diff options
Diffstat (limited to 'layout/style/nsCSSScanner.h')
-rw-r--r-- | layout/style/nsCSSScanner.h | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/layout/style/nsCSSScanner.h b/layout/style/nsCSSScanner.h new file mode 100644 index 000000000..ef03958c8 --- /dev/null +++ b/layout/style/nsCSSScanner.h @@ -0,0 +1,397 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* tokenization of CSS style sheets */ + +#ifndef nsCSSScanner_h___ +#define nsCSSScanner_h___ + +#include "nsString.h" + +namespace mozilla { +namespace css { +class ErrorReporter; +} // namespace css +} // namespace mozilla + +// Token types; in close but not perfect correspondence to the token +// categorization in section 4.1.1 of CSS2.1. (The deviations are all +// the fault of css3-selectors, which has requirements that can only be +// met by changing the generic tokenization.) The comment on each line +// illustrates the form of each identifier. + +enum nsCSSTokenType { + // White space of any kind. No value fields are used. Note that + // comments do *not* count as white space; comments separate tokens + // but are not themselves tokens. + eCSSToken_Whitespace, // + // A comment. + eCSSToken_Comment, // /*...*/ + + // Identifier-like tokens. mIdent is the text of the identifier. + // The difference between ID and Hash is: if the text after the # + // would have been a valid Ident if the # hadn't been there, the + // scanner produces an ID token. Otherwise it produces a Hash token. + // (This distinction is required by css3-selectors.) + eCSSToken_Ident, // word + eCSSToken_Function, // word( + eCSSToken_AtKeyword, // @word + eCSSToken_ID, // #word + eCSSToken_Hash, // #0word + + // Numeric tokens. mNumber is the floating-point value of the + // number, and mHasSign indicates whether there was an explicit sign + // (+ or -) in front of the number. If mIntegerValid is true, the + // number had the lexical form of an integer, and mInteger is its + // integer value. Lexically integer values outside the range of a + // 32-bit signed number are clamped to the maximum values; mNumber + // will indicate a 'truer' value in that case. Percentage tokens + // are always considered not to be integers, even if their numeric + // value is integral (100% => mNumber = 1.0). For Dimension + // tokens, mIdent holds the text of the unit. + eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3 + eCSSToken_Dimension, // 24px 8.5in + eCSSToken_Percentage, // 85% 1280.4% + + // String-like tokens. In all cases, mIdent holds the text + // belonging to the string, and mSymbol holds the delimiter + // character, which may be ', ", or zero (only for unquoted URLs). + // Bad_String and Bad_URL tokens are emitted when the closing + // delimiter or parenthesis was missing. + eCSSToken_String, // 'foo bar' "foo bar" + eCSSToken_Bad_String, // 'foo bar + eCSSToken_URL, // url(foobar) url("foo bar") + eCSSToken_Bad_URL, // url(foo + + // Any one-character symbol. mSymbol holds the character. + eCSSToken_Symbol, // . ; { } ! * + + // Match operators. These are single tokens rather than pairs of + // Symbol tokens because css3-selectors forbids the presence of + // comments between the two characters. No value fields are used; + // the token type indicates which operator. + eCSSToken_Includes, // ~= + eCSSToken_Dashmatch, // |= + eCSSToken_Beginsmatch, // ^= + eCSSToken_Endsmatch, // $= + eCSSToken_Containsmatch, // *= + + // Unicode-range token: currently used only in @font-face. + // The lexical rule for this token includes several forms that are + // semantically invalid. Therefore, mIdent always holds the + // complete original text of the token (so we can print it + // accurately in diagnostics), and mIntegerValid is true iff the + // token is semantically valid. In that case, mInteger holds the + // lowest value included in the range, and mInteger2 holds the + // highest value included in the range. + eCSSToken_URange, // U+007e U+01?? U+2000-206F + + // HTML comment delimiters, ignored as a unit when they appear at + // the top level of a style sheet, for compatibility with websites + // written for compatibility with pre-CSS browsers. This token type + // subsumes the css2.1 CDO and CDC tokens, which are always treated + // the same by the parser. mIdent holds the text of the token, for + // diagnostics. + eCSSToken_HTMLComment, // <!-- --> +}; + +// Classification of tokens used to determine if a "/**/" string must be +// inserted if pasting token streams together when serializing. We include +// values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch, +// as css-syntax does not treat these as whole tokens, but we will still +// need to insert a "/**/" string between a '|' delim and a '|=' dashmatch +// and between a '/' delim and a '*=' containsmatch. +// +// https://drafts.csswg.org/css-syntax/#serialization +enum nsCSSTokenSerializationType { + eCSSTokenSerialization_Nothing, + eCSSTokenSerialization_Whitespace, + eCSSTokenSerialization_AtKeyword_or_Hash, + eCSSTokenSerialization_Number, + eCSSTokenSerialization_Dimension, + eCSSTokenSerialization_Percentage, + eCSSTokenSerialization_URange, + eCSSTokenSerialization_URL_or_BadURL, + eCSSTokenSerialization_Function, + eCSSTokenSerialization_Ident, + eCSSTokenSerialization_CDC, + eCSSTokenSerialization_DashMatch, + eCSSTokenSerialization_ContainsMatch, + eCSSTokenSerialization_Symbol_Hash, // '#' + eCSSTokenSerialization_Symbol_At, // '@' + eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+' + eCSSTokenSerialization_Symbol_Minus, // '-' + eCSSTokenSerialization_Symbol_OpenParen, // '(' + eCSSTokenSerialization_Symbol_Question, // '?' + eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~' + eCSSTokenSerialization_Symbol_Equals, // '=' + eCSSTokenSerialization_Symbol_Bar, // '|' + eCSSTokenSerialization_Symbol_Slash, // '/' + eCSSTokenSerialization_Symbol_Asterisk, // '*' + eCSSTokenSerialization_Other // anything else +}; + +// A single token returned from the scanner. mType is always +// meaningful; comments above describe which other fields are +// meaningful for which token types. +struct nsCSSToken { + nsAutoString mIdent; + float mNumber; + int32_t mInteger; + int32_t mInteger2; + nsCSSTokenType mType; + char16_t mSymbol; + bool mIntegerValid; + bool mHasSign; + + nsCSSToken() + : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace), + mSymbol('\0'), mIntegerValid(false), mHasSign(false) + {} + + bool IsSymbol(char16_t aSymbol) const { + return mType == eCSSToken_Symbol && mSymbol == aSymbol; + } + + void AppendToString(nsString& aBuffer) const; +}; + +// Represents an nsCSSScanner's saved position in the input buffer. +class nsCSSScannerPosition { + friend class nsCSSScanner; +public: + nsCSSScannerPosition() : mInitialized(false) { } + + uint32_t LineNumber() { + MOZ_ASSERT(mInitialized); + return mLineNumber; + } + + uint32_t LineOffset() { + MOZ_ASSERT(mInitialized); + return mLineOffset; + } + +private: + uint32_t mOffset; + uint32_t mLineNumber; + uint32_t mLineOffset; + uint32_t mTokenLineNumber; + uint32_t mTokenLineOffset; + uint32_t mTokenOffset; + bool mInitialized; +}; + +enum nsCSSScannerExclude { + // Return all tokens, including whitespace and comments. + eCSSScannerExclude_None, + // Include whitespace but exclude comments. + eCSSScannerExclude_Comments, + // Exclude whitespace and comments. + eCSSScannerExclude_WhitespaceAndComments +}; + +// nsCSSScanner tokenizes an input stream using the CSS2.1 forward +// compatible tokenization rules. Used internally by nsCSSParser; +// not available for use by other code. +class nsCSSScanner { + public: + // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0| + // when the line number is unknown. The scanner does not take + // ownership of |aBuffer|, so the caller must be sure to keep it + // alive for the lifetime of the scanner. + nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber); + ~nsCSSScanner(); + + void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) { + mReporter = aReporter; + } + // Set whether or not we are processing SVG + void SetSVGMode(bool aSVGMode) { + mSVGMode = aSVGMode; + } + bool IsSVGMode() const { + return mSVGMode; + } + + // Reset or check whether a BAD_URL or BAD_STRING token has been seen. + void ClearSeenBadToken() { mSeenBadToken = false; } + bool SeenBadToken() const { return mSeenBadToken; } + + // Reset or check whether a "var(" FUNCTION token has been seen. + void ClearSeenVariableReference() { mSeenVariableReference = false; } + bool SeenVariableReference() const { return mSeenVariableReference; } + + // Get the 1-based line number of the last character of + // the most recently processed token. + uint32_t GetLineNumber() const { return mTokenLineNumber; } + + // Get the 0-based column number of the first character of + // the most recently processed token. + uint32_t GetColumnNumber() const + { return mTokenOffset - mTokenLineOffset; } + + uint32_t GetTokenOffset() const + { return mTokenOffset; } + + uint32_t GetTokenEndOffset() const + { return mOffset; } + + // Get the text of the line containing the first character of + // the most recently processed token. + nsDependentSubstring GetCurrentLine() const; + + // Get the next token. Return false on EOF. aTokenResult is filled + // in with the data for the token. aSkip controls whether + // whitespace and/or comment tokens are ever returned. + bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip); + + // Get the body of an URL token (everything after the 'url('). + // This is exposed for use by nsCSSParser::ParseMozDocumentRule, + // which, for historical reasons, must make additional function + // tokens behave like url(). Please do not add new uses to the + // parser. + void NextURL(nsCSSToken& aTokenResult); + + // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg, + // because "2n-1" is a single DIMENSION token, and "n-1" is a single + // IDENT token, but the :nth() selector syntax wants to interpret + // them the same as "2n -1" and "n -1" respectively. Please do not + // add new uses to the parser. + // + // Note: this function may not be used to back up over a line boundary. + void Backup(uint32_t n); + + // Starts recording the input stream from the current position. + void StartRecording(); + + // Abandons recording of the input stream. + void StopRecording(); + + // Stops recording of the input stream and appends the recorded + // input to aBuffer. + void StopRecording(nsString& aBuffer); + + // Returns the length of the current recording. + uint32_t RecordingLength() const; + +#ifdef DEBUG + bool IsRecording() const; +#endif + + // Stores the current scanner offset into the specified object. + void SavePosition(nsCSSScannerPosition& aState); + + // Resets the scanner offset to a position saved by SavePosition. + void RestoreSavedPosition(const nsCSSScannerPosition& aState); + + enum EOFCharacters { + eEOFCharacters_None = 0x0000, + + // to handle \<EOF> inside strings + eEOFCharacters_DropBackslash = 0x0001, + + // to handle \<EOF> outside strings + eEOFCharacters_ReplacementChar = 0x0002, + + // to close comments + eEOFCharacters_Asterisk = 0x0004, + eEOFCharacters_Slash = 0x0008, + + // to close double-quoted strings + eEOFCharacters_DoubleQuote = 0x0010, + + // to close single-quoted strings + eEOFCharacters_SingleQuote = 0x0020, + + // to close URLs + eEOFCharacters_CloseParen = 0x0040, + }; + + // Appends any characters to the specified string the input stream to make the + // last token not rely on special EOF handling behavior. + // + // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored. + static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters, + nsAString& aString); + + EOFCharacters GetEOFCharacters() const { +#ifdef DEBUG + AssertEOFCharactersValid(mEOFCharacters); +#endif + return mEOFCharacters; + } + +#ifdef DEBUG + static void AssertEOFCharactersValid(uint32_t c); +#endif + +protected: + int32_t Peek(uint32_t n = 0); + void Advance(uint32_t n = 1); + void AdvanceLine(); + + void SkipWhitespace(); + void SkipComment(); + + bool GatherEscape(nsString& aOutput, bool aInString); + bool GatherText(uint8_t aClass, nsString& aIdent); + + bool ScanIdent(nsCSSToken& aResult); + bool ScanAtKeyword(nsCSSToken& aResult); + bool ScanHash(nsCSSToken& aResult); + bool ScanNumber(nsCSSToken& aResult); + bool ScanString(nsCSSToken& aResult); + bool ScanURange(nsCSSToken& aResult); + + void SetEOFCharacters(uint32_t aEOFCharacters); + void AddEOFCharacters(uint32_t aEOFCharacters); + + const char16_t *mBuffer; + uint32_t mOffset; + uint32_t mCount; + + uint32_t mLineNumber; + uint32_t mLineOffset; + + uint32_t mTokenLineNumber; + uint32_t mTokenLineOffset; + uint32_t mTokenOffset; + + uint32_t mRecordStartOffset; + EOFCharacters mEOFCharacters; + + mozilla::css::ErrorReporter *mReporter; + + // True if we are in SVG mode; false in "normal" CSS + bool mSVGMode; + bool mRecording; + bool mSeenBadToken; + bool mSeenVariableReference; +}; + +// Token for the grid-template-areas micro-syntax +// http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas +struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken { + nsAutoString mName; // Empty for a null cell, non-empty for a named cell + bool isTrash; // True for a trash token, mName is ignored in this case. +}; + +// Scanner for the grid-template-areas micro-syntax +class nsCSSGridTemplateAreaScanner { +public: + explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer); + + // Get the next token. Return false on EOF. + // aTokenResult is filled in with the data for the token. + bool Next(nsCSSGridTemplateAreaToken& aTokenResult); + +private: + const char16_t *mBuffer; + uint32_t mOffset; + uint32_t mCount; +}; + +#endif /* nsCSSScanner_h___ */ |