summaryrefslogtreecommitdiffstats
path: root/layout/style/nsCSSScanner.h
diff options
context:
space:
mode:
Diffstat (limited to 'layout/style/nsCSSScanner.h')
-rw-r--r--layout/style/nsCSSScanner.h397
1 files changed, 397 insertions, 0 deletions
diff --git a/layout/style/nsCSSScanner.h b/layout/style/nsCSSScanner.h
new file mode 100644
index 000000000..ef03958c8
--- /dev/null
+++ b/layout/style/nsCSSScanner.h
@@ -0,0 +1,397 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* tokenization of CSS style sheets */
+
+#ifndef nsCSSScanner_h___
+#define nsCSSScanner_h___
+
+#include "nsString.h"
+
+namespace mozilla {
+namespace css {
+class ErrorReporter;
+} // namespace css
+} // namespace mozilla
+
+// Token types; in close but not perfect correspondence to the token
+// categorization in section 4.1.1 of CSS2.1. (The deviations are all
+// the fault of css3-selectors, which has requirements that can only be
+// met by changing the generic tokenization.) The comment on each line
+// illustrates the form of each identifier.
+
+enum nsCSSTokenType {
+ // White space of any kind. No value fields are used. Note that
+ // comments do *not* count as white space; comments separate tokens
+ // but are not themselves tokens.
+ eCSSToken_Whitespace, //
+ // A comment.
+ eCSSToken_Comment, // /*...*/
+
+ // Identifier-like tokens. mIdent is the text of the identifier.
+ // The difference between ID and Hash is: if the text after the #
+ // would have been a valid Ident if the # hadn't been there, the
+ // scanner produces an ID token. Otherwise it produces a Hash token.
+ // (This distinction is required by css3-selectors.)
+ eCSSToken_Ident, // word
+ eCSSToken_Function, // word(
+ eCSSToken_AtKeyword, // @word
+ eCSSToken_ID, // #word
+ eCSSToken_Hash, // #0word
+
+ // Numeric tokens. mNumber is the floating-point value of the
+ // number, and mHasSign indicates whether there was an explicit sign
+ // (+ or -) in front of the number. If mIntegerValid is true, the
+ // number had the lexical form of an integer, and mInteger is its
+ // integer value. Lexically integer values outside the range of a
+ // 32-bit signed number are clamped to the maximum values; mNumber
+ // will indicate a 'truer' value in that case. Percentage tokens
+ // are always considered not to be integers, even if their numeric
+ // value is integral (100% => mNumber = 1.0). For Dimension
+ // tokens, mIdent holds the text of the unit.
+ eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
+ eCSSToken_Dimension, // 24px 8.5in
+ eCSSToken_Percentage, // 85% 1280.4%
+
+ // String-like tokens. In all cases, mIdent holds the text
+ // belonging to the string, and mSymbol holds the delimiter
+ // character, which may be ', ", or zero (only for unquoted URLs).
+ // Bad_String and Bad_URL tokens are emitted when the closing
+ // delimiter or parenthesis was missing.
+ eCSSToken_String, // 'foo bar' "foo bar"
+ eCSSToken_Bad_String, // 'foo bar
+ eCSSToken_URL, // url(foobar) url("foo bar")
+ eCSSToken_Bad_URL, // url(foo
+
+ // Any one-character symbol. mSymbol holds the character.
+ eCSSToken_Symbol, // . ; { } ! *
+
+ // Match operators. These are single tokens rather than pairs of
+ // Symbol tokens because css3-selectors forbids the presence of
+ // comments between the two characters. No value fields are used;
+ // the token type indicates which operator.
+ eCSSToken_Includes, // ~=
+ eCSSToken_Dashmatch, // |=
+ eCSSToken_Beginsmatch, // ^=
+ eCSSToken_Endsmatch, // $=
+ eCSSToken_Containsmatch, // *=
+
+ // Unicode-range token: currently used only in @font-face.
+ // The lexical rule for this token includes several forms that are
+ // semantically invalid. Therefore, mIdent always holds the
+ // complete original text of the token (so we can print it
+ // accurately in diagnostics), and mIntegerValid is true iff the
+ // token is semantically valid. In that case, mInteger holds the
+ // lowest value included in the range, and mInteger2 holds the
+ // highest value included in the range.
+ eCSSToken_URange, // U+007e U+01?? U+2000-206F
+
+ // HTML comment delimiters, ignored as a unit when they appear at
+ // the top level of a style sheet, for compatibility with websites
+ // written for compatibility with pre-CSS browsers. This token type
+ // subsumes the css2.1 CDO and CDC tokens, which are always treated
+ // the same by the parser. mIdent holds the text of the token, for
+ // diagnostics.
+ eCSSToken_HTMLComment, // <!-- -->
+};
+
+// Classification of tokens used to determine if a "/**/" string must be
+// inserted if pasting token streams together when serializing. We include
+// values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
+// as css-syntax does not treat these as whole tokens, but we will still
+// need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
+// and between a '/' delim and a '*=' containsmatch.
+//
+// https://drafts.csswg.org/css-syntax/#serialization
+enum nsCSSTokenSerializationType {
+ eCSSTokenSerialization_Nothing,
+ eCSSTokenSerialization_Whitespace,
+ eCSSTokenSerialization_AtKeyword_or_Hash,
+ eCSSTokenSerialization_Number,
+ eCSSTokenSerialization_Dimension,
+ eCSSTokenSerialization_Percentage,
+ eCSSTokenSerialization_URange,
+ eCSSTokenSerialization_URL_or_BadURL,
+ eCSSTokenSerialization_Function,
+ eCSSTokenSerialization_Ident,
+ eCSSTokenSerialization_CDC,
+ eCSSTokenSerialization_DashMatch,
+ eCSSTokenSerialization_ContainsMatch,
+ eCSSTokenSerialization_Symbol_Hash, // '#'
+ eCSSTokenSerialization_Symbol_At, // '@'
+ eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+'
+ eCSSTokenSerialization_Symbol_Minus, // '-'
+ eCSSTokenSerialization_Symbol_OpenParen, // '('
+ eCSSTokenSerialization_Symbol_Question, // '?'
+ eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~'
+ eCSSTokenSerialization_Symbol_Equals, // '='
+ eCSSTokenSerialization_Symbol_Bar, // '|'
+ eCSSTokenSerialization_Symbol_Slash, // '/'
+ eCSSTokenSerialization_Symbol_Asterisk, // '*'
+ eCSSTokenSerialization_Other // anything else
+};
+
+// A single token returned from the scanner. mType is always
+// meaningful; comments above describe which other fields are
+// meaningful for which token types.
+struct nsCSSToken {
+ nsAutoString mIdent;
+ float mNumber;
+ int32_t mInteger;
+ int32_t mInteger2;
+ nsCSSTokenType mType;
+ char16_t mSymbol;
+ bool mIntegerValid;
+ bool mHasSign;
+
+ nsCSSToken()
+ : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
+ mSymbol('\0'), mIntegerValid(false), mHasSign(false)
+ {}
+
+ bool IsSymbol(char16_t aSymbol) const {
+ return mType == eCSSToken_Symbol && mSymbol == aSymbol;
+ }
+
+ void AppendToString(nsString& aBuffer) const;
+};
+
+// Represents an nsCSSScanner's saved position in the input buffer.
+class nsCSSScannerPosition {
+ friend class nsCSSScanner;
+public:
+ nsCSSScannerPosition() : mInitialized(false) { }
+
+ uint32_t LineNumber() {
+ MOZ_ASSERT(mInitialized);
+ return mLineNumber;
+ }
+
+ uint32_t LineOffset() {
+ MOZ_ASSERT(mInitialized);
+ return mLineOffset;
+ }
+
+private:
+ uint32_t mOffset;
+ uint32_t mLineNumber;
+ uint32_t mLineOffset;
+ uint32_t mTokenLineNumber;
+ uint32_t mTokenLineOffset;
+ uint32_t mTokenOffset;
+ bool mInitialized;
+};
+
+enum nsCSSScannerExclude {
+ // Return all tokens, including whitespace and comments.
+ eCSSScannerExclude_None,
+ // Include whitespace but exclude comments.
+ eCSSScannerExclude_Comments,
+ // Exclude whitespace and comments.
+ eCSSScannerExclude_WhitespaceAndComments
+};
+
+// nsCSSScanner tokenizes an input stream using the CSS2.1 forward
+// compatible tokenization rules. Used internally by nsCSSParser;
+// not available for use by other code.
+class nsCSSScanner {
+ public:
+ // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
+ // when the line number is unknown. The scanner does not take
+ // ownership of |aBuffer|, so the caller must be sure to keep it
+ // alive for the lifetime of the scanner.
+ nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
+ ~nsCSSScanner();
+
+ void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
+ mReporter = aReporter;
+ }
+ // Set whether or not we are processing SVG
+ void SetSVGMode(bool aSVGMode) {
+ mSVGMode = aSVGMode;
+ }
+ bool IsSVGMode() const {
+ return mSVGMode;
+ }
+
+ // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
+ void ClearSeenBadToken() { mSeenBadToken = false; }
+ bool SeenBadToken() const { return mSeenBadToken; }
+
+ // Reset or check whether a "var(" FUNCTION token has been seen.
+ void ClearSeenVariableReference() { mSeenVariableReference = false; }
+ bool SeenVariableReference() const { return mSeenVariableReference; }
+
+ // Get the 1-based line number of the last character of
+ // the most recently processed token.
+ uint32_t GetLineNumber() const { return mTokenLineNumber; }
+
+ // Get the 0-based column number of the first character of
+ // the most recently processed token.
+ uint32_t GetColumnNumber() const
+ { return mTokenOffset - mTokenLineOffset; }
+
+ uint32_t GetTokenOffset() const
+ { return mTokenOffset; }
+
+ uint32_t GetTokenEndOffset() const
+ { return mOffset; }
+
+ // Get the text of the line containing the first character of
+ // the most recently processed token.
+ nsDependentSubstring GetCurrentLine() const;
+
+ // Get the next token. Return false on EOF. aTokenResult is filled
+ // in with the data for the token. aSkip controls whether
+ // whitespace and/or comment tokens are ever returned.
+ bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
+
+ // Get the body of an URL token (everything after the 'url(').
+ // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
+ // which, for historical reasons, must make additional function
+ // tokens behave like url(). Please do not add new uses to the
+ // parser.
+ void NextURL(nsCSSToken& aTokenResult);
+
+ // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
+ // because "2n-1" is a single DIMENSION token, and "n-1" is a single
+ // IDENT token, but the :nth() selector syntax wants to interpret
+ // them the same as "2n -1" and "n -1" respectively. Please do not
+ // add new uses to the parser.
+ //
+ // Note: this function may not be used to back up over a line boundary.
+ void Backup(uint32_t n);
+
+ // Starts recording the input stream from the current position.
+ void StartRecording();
+
+ // Abandons recording of the input stream.
+ void StopRecording();
+
+ // Stops recording of the input stream and appends the recorded
+ // input to aBuffer.
+ void StopRecording(nsString& aBuffer);
+
+ // Returns the length of the current recording.
+ uint32_t RecordingLength() const;
+
+#ifdef DEBUG
+ bool IsRecording() const;
+#endif
+
+ // Stores the current scanner offset into the specified object.
+ void SavePosition(nsCSSScannerPosition& aState);
+
+ // Resets the scanner offset to a position saved by SavePosition.
+ void RestoreSavedPosition(const nsCSSScannerPosition& aState);
+
+ enum EOFCharacters {
+ eEOFCharacters_None = 0x0000,
+
+ // to handle \<EOF> inside strings
+ eEOFCharacters_DropBackslash = 0x0001,
+
+ // to handle \<EOF> outside strings
+ eEOFCharacters_ReplacementChar = 0x0002,
+
+ // to close comments
+ eEOFCharacters_Asterisk = 0x0004,
+ eEOFCharacters_Slash = 0x0008,
+
+ // to close double-quoted strings
+ eEOFCharacters_DoubleQuote = 0x0010,
+
+ // to close single-quoted strings
+ eEOFCharacters_SingleQuote = 0x0020,
+
+ // to close URLs
+ eEOFCharacters_CloseParen = 0x0040,
+ };
+
+ // Appends any characters to the specified string the input stream to make the
+ // last token not rely on special EOF handling behavior.
+ //
+ // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
+ static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
+ nsAString& aString);
+
+ EOFCharacters GetEOFCharacters() const {
+#ifdef DEBUG
+ AssertEOFCharactersValid(mEOFCharacters);
+#endif
+ return mEOFCharacters;
+ }
+
+#ifdef DEBUG
+ static void AssertEOFCharactersValid(uint32_t c);
+#endif
+
+protected:
+ int32_t Peek(uint32_t n = 0);
+ void Advance(uint32_t n = 1);
+ void AdvanceLine();
+
+ void SkipWhitespace();
+ void SkipComment();
+
+ bool GatherEscape(nsString& aOutput, bool aInString);
+ bool GatherText(uint8_t aClass, nsString& aIdent);
+
+ bool ScanIdent(nsCSSToken& aResult);
+ bool ScanAtKeyword(nsCSSToken& aResult);
+ bool ScanHash(nsCSSToken& aResult);
+ bool ScanNumber(nsCSSToken& aResult);
+ bool ScanString(nsCSSToken& aResult);
+ bool ScanURange(nsCSSToken& aResult);
+
+ void SetEOFCharacters(uint32_t aEOFCharacters);
+ void AddEOFCharacters(uint32_t aEOFCharacters);
+
+ const char16_t *mBuffer;
+ uint32_t mOffset;
+ uint32_t mCount;
+
+ uint32_t mLineNumber;
+ uint32_t mLineOffset;
+
+ uint32_t mTokenLineNumber;
+ uint32_t mTokenLineOffset;
+ uint32_t mTokenOffset;
+
+ uint32_t mRecordStartOffset;
+ EOFCharacters mEOFCharacters;
+
+ mozilla::css::ErrorReporter *mReporter;
+
+ // True if we are in SVG mode; false in "normal" CSS
+ bool mSVGMode;
+ bool mRecording;
+ bool mSeenBadToken;
+ bool mSeenVariableReference;
+};
+
+// Token for the grid-template-areas micro-syntax
+// http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
+struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
+ nsAutoString mName; // Empty for a null cell, non-empty for a named cell
+ bool isTrash; // True for a trash token, mName is ignored in this case.
+};
+
+// Scanner for the grid-template-areas micro-syntax
+class nsCSSGridTemplateAreaScanner {
+public:
+ explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
+
+ // Get the next token. Return false on EOF.
+ // aTokenResult is filled in with the data for the token.
+ bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
+
+private:
+ const char16_t *mBuffer;
+ uint32_t mOffset;
+ uint32_t mCount;
+};
+
+#endif /* nsCSSScanner_h___ */