1 files changed, 397 insertions, 0 deletions
diff --git a/layout/style/nsCSSScanner.h b/layout/style/nsCSSScanner.h
new file mode 100644
index 000000000..ef03958c8
--- /dev/null
+++ b/layout/style/nsCSSScanner.h
@@ -0,0 +1,397 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* tokenization of CSS style sheets */
+
+#ifndef nsCSSScanner_h___
+#define nsCSSScanner_h___
+
+#include "nsString.h"
+
+namespace mozilla {
+namespace css {
+class ErrorReporter;
+} // namespace css
+} // namespace mozilla
+
+// Token types; in close but not perfect correspondence to the token
+// categorization in section 4.1.1 of CSS2.1.  (The deviations are all
+// the fault of css3-selectors, which has requirements that can only be
+// met by changing the generic tokenization.)  The comment on each line
+// illustrates the form of each identifier.
+
+enum nsCSSTokenType {
+  // White space of any kind.  No value fields are used.  Note that
+  // comments do *not* count as white space; comments separate tokens
+  // but are not themselves tokens.
+  eCSSToken_Whitespace,     //
+  // A comment.
+  eCSSToken_Comment,        // /*...*/
+
+  // Identifier-like tokens.  mIdent is the text of the identifier.
+  // The difference between ID and Hash is: if the text after the #
+  // would have been a valid Ident if the # hadn't been there, the
+  // scanner produces an ID token.  Otherwise it produces a Hash token.
+  // (This distinction is required by css3-selectors.)
+  eCSSToken_Ident,          // word
+  eCSSToken_Function,       // word(
+  eCSSToken_AtKeyword,      // @word
+  eCSSToken_ID,             // #word
+  eCSSToken_Hash,           // #0word
+
+  // Numeric tokens.  mNumber is the floating-point value of the
+  // number, and mHasSign indicates whether there was an explicit sign
+  // (+ or -) in front of the number.  If mIntegerValid is true, the
+  // number had the lexical form of an integer, and mInteger is its
+  // integer value.  Lexically integer values outside the range of a
+  // 32-bit signed number are clamped to the maximum values; mNumber
+  // will indicate a 'truer' value in that case.  Percentage tokens
+  // are always considered not to be integers, even if their numeric
+  // value is integral (100% => mNumber = 1.0).  For Dimension
+  // tokens, mIdent holds the text of the unit.
+  eCSSToken_Number,         // 1 -5 +2e3 3.14159 7.297352e-3
+  eCSSToken_Dimension,      // 24px 8.5in
+  eCSSToken_Percentage,     // 85% 1280.4%
+
+  // String-like tokens.  In all cases, mIdent holds the text
+  // belonging to the string, and mSymbol holds the delimiter
+  // character, which may be ', ", or zero (only for unquoted URLs).
+  // Bad_String and Bad_URL tokens are emitted when the closing
+  // delimiter or parenthesis was missing.
+  eCSSToken_String,         // 'foo bar' "foo bar"
+  eCSSToken_Bad_String,     // 'foo bar
+  eCSSToken_URL,            // url(foobar) url("foo bar")
+  eCSSToken_Bad_URL,        // url(foo
+
+  // Any one-character symbol.  mSymbol holds the character.
+  eCSSToken_Symbol,         // . ; { } ! *
+
+  // Match operators.  These are single tokens rather than pairs of
+  // Symbol tokens because css3-selectors forbids the presence of
+  // comments between the two characters.  No value fields are used;
+  // the token type indicates which operator.
+  eCSSToken_Includes,       // ~=
+  eCSSToken_Dashmatch,      // |=
+  eCSSToken_Beginsmatch,    // ^=
+  eCSSToken_Endsmatch,      // $=
+  eCSSToken_Containsmatch,  // *=
+
+  // Unicode-range token: currently used only in @font-face.
+  // The lexical rule for this token includes several forms that are
+  // semantically invalid.  Therefore, mIdent always holds the
+  // complete original text of the token (so we can print it
+  // accurately in diagnostics), and mIntegerValid is true iff the
+  // token is semantically valid.  In that case, mInteger holds the
+  // lowest value included in the range, and mInteger2 holds the
+  // highest value included in the range.
+  eCSSToken_URange,         // U+007e U+01?? U+2000-206F
+
+  // HTML comment delimiters, ignored as a unit when they appear at
+  // the top level of a style sheet, for compatibility with websites
+  // written for compatibility with pre-CSS browsers.  This token type
+  // subsumes the css2.1 CDO and CDC tokens, which are always treated
+  // the same by the parser.  mIdent holds the text of the token, for
+  // diagnostics.
+  eCSSToken_HTMLComment,    // <!-- -->
+};
+
+// Classification of tokens used to determine if a "/**/" string must be
+// inserted if pasting token streams together when serializing.  We include
+// values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
+// as css-syntax does not treat these as whole tokens, but we will still
+// need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
+// and between a '/' delim and a '*=' containsmatch.
+//
+// https://drafts.csswg.org/css-syntax/#serialization
+enum nsCSSTokenSerializationType {
+  eCSSTokenSerialization_Nothing,
+  eCSSTokenSerialization_Whitespace,
+  eCSSTokenSerialization_AtKeyword_or_Hash,
+  eCSSTokenSerialization_Number,
+  eCSSTokenSerialization_Dimension,
+  eCSSTokenSerialization_Percentage,
+  eCSSTokenSerialization_URange,
+  eCSSTokenSerialization_URL_or_BadURL,
+  eCSSTokenSerialization_Function,
+  eCSSTokenSerialization_Ident,
+  eCSSTokenSerialization_CDC,
+  eCSSTokenSerialization_DashMatch,
+  eCSSTokenSerialization_ContainsMatch,
+  eCSSTokenSerialization_Symbol_Hash,         // '#'
+  eCSSTokenSerialization_Symbol_At,           // '@'
+  eCSSTokenSerialization_Symbol_Dot_or_Plus,  // '.', '+'
+  eCSSTokenSerialization_Symbol_Minus,        // '-'
+  eCSSTokenSerialization_Symbol_OpenParen,    // '('
+  eCSSTokenSerialization_Symbol_Question,     // '?'
+  eCSSTokenSerialization_Symbol_Assorted,     // '$', '^', '~'
+  eCSSTokenSerialization_Symbol_Equals,       // '='
+  eCSSTokenSerialization_Symbol_Bar,          // '|'
+  eCSSTokenSerialization_Symbol_Slash,        // '/'
+  eCSSTokenSerialization_Symbol_Asterisk,     // '*'
+  eCSSTokenSerialization_Other                // anything else
+};
+
+// A single token returned from the scanner.  mType is always
+// meaningful; comments above describe which other fields are
+// meaningful for which token types.
+struct nsCSSToken {
+  nsAutoString    mIdent;
+  float           mNumber;
+  int32_t         mInteger;
+  int32_t         mInteger2;
+  nsCSSTokenType  mType;
+  char16_t       mSymbol;
+  bool            mIntegerValid;
+  bool            mHasSign;
+
+  nsCSSToken()
+    : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
+      mSymbol('\0'), mIntegerValid(false), mHasSign(false)
+  {}
+
+  bool IsSymbol(char16_t aSymbol) const {
+    return mType == eCSSToken_Symbol && mSymbol == aSymbol;
+  }
+
+  void AppendToString(nsString& aBuffer) const;
+};
+
+// Represents an nsCSSScanner's saved position in the input buffer.
+class nsCSSScannerPosition {
+  friend class nsCSSScanner;
+public:
+  nsCSSScannerPosition() : mInitialized(false) { }
+
+  uint32_t LineNumber() {
+    MOZ_ASSERT(mInitialized);
+    return mLineNumber;
+  }
+
+  uint32_t LineOffset() {
+    MOZ_ASSERT(mInitialized);
+    return mLineOffset;
+  }
+
+private:
+  uint32_t mOffset;
+  uint32_t mLineNumber;
+  uint32_t mLineOffset;
+  uint32_t mTokenLineNumber;
+  uint32_t mTokenLineOffset;
+  uint32_t mTokenOffset;
+  bool mInitialized;
+};
+
+enum nsCSSScannerExclude {
+  // Return all tokens, including whitespace and comments.
+  eCSSScannerExclude_None,
+  // Include whitespace but exclude comments.
+  eCSSScannerExclude_Comments,
+  // Exclude whitespace and comments.
+  eCSSScannerExclude_WhitespaceAndComments
+};
+
+// nsCSSScanner tokenizes an input stream using the CSS2.1 forward
+// compatible tokenization rules.  Used internally by nsCSSParser;
+// not available for use by other code.
+class nsCSSScanner {
+  public:
+  // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
+  // when the line number is unknown.  The scanner does not take
+  // ownership of |aBuffer|, so the caller must be sure to keep it
+  // alive for the lifetime of the scanner.
+  nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
+  ~nsCSSScanner();
+
+  void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
+    mReporter = aReporter;
+  }
+  // Set whether or not we are processing SVG
+  void SetSVGMode(bool aSVGMode) {
+    mSVGMode = aSVGMode;
+  }
+  bool IsSVGMode() const {
+    return mSVGMode;
+  }
+
+  // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
+  void ClearSeenBadToken() { mSeenBadToken = false; }
+  bool SeenBadToken() const { return mSeenBadToken; }
+
+  // Reset or check whether a "var(" FUNCTION token has been seen.
+  void ClearSeenVariableReference() { mSeenVariableReference = false; }
+  bool SeenVariableReference() const { return mSeenVariableReference; }
+
+  // Get the 1-based line number of the last character of
+  // the most recently processed token.
+  uint32_t GetLineNumber() const { return mTokenLineNumber; }
+
+  // Get the 0-based column number of the first character of
+  // the most recently processed token.
+  uint32_t GetColumnNumber() const
+  { return mTokenOffset - mTokenLineOffset; }
+
+  uint32_t GetTokenOffset() const
+  { return mTokenOffset; }
+
+  uint32_t GetTokenEndOffset() const
+  { return mOffset; }
+
+  // Get the text of the line containing the first character of
+  // the most recently processed token.
+  nsDependentSubstring GetCurrentLine() const;
+
+  // Get the next token.  Return false on EOF.  aTokenResult is filled
+  // in with the data for the token.  aSkip controls whether
+  // whitespace and/or comment tokens are ever returned.
+  bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
+
+  // Get the body of an URL token (everything after the 'url(').
+  // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
+  // which, for historical reasons, must make additional function
+  // tokens behave like url().  Please do not add new uses to the
+  // parser.
+  void NextURL(nsCSSToken& aTokenResult);
+
+  // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
+  // because "2n-1" is a single DIMENSION token, and "n-1" is a single
+  // IDENT token, but the :nth() selector syntax wants to interpret
+  // them the same as "2n -1" and "n -1" respectively.  Please do not
+  // add new uses to the parser.
+  //
+  // Note: this function may not be used to back up over a line boundary.
+  void Backup(uint32_t n);
+
+  // Starts recording the input stream from the current position.
+  void StartRecording();
+
+  // Abandons recording of the input stream.
+  void StopRecording();
+
+  // Stops recording of the input stream and appends the recorded
+  // input to aBuffer.
+  void StopRecording(nsString& aBuffer);
+
+  // Returns the length of the current recording.
+  uint32_t RecordingLength() const;
+
+#ifdef DEBUG
+  bool IsRecording() const;
+#endif
+
+  // Stores the current scanner offset into the specified object.
+  void SavePosition(nsCSSScannerPosition& aState);
+
+  // Resets the scanner offset to a position saved by SavePosition.
+  void RestoreSavedPosition(const nsCSSScannerPosition& aState);
+
+  enum EOFCharacters {
+    eEOFCharacters_None =                    0x0000,
+
+    // to handle \<EOF> inside strings
+    eEOFCharacters_DropBackslash =           0x0001,
+
+    // to handle \<EOF> outside strings
+    eEOFCharacters_ReplacementChar =         0x0002,
+
+    // to close comments
+    eEOFCharacters_Asterisk =                0x0004,
+    eEOFCharacters_Slash =                   0x0008,
+
+    // to close double-quoted strings
+    eEOFCharacters_DoubleQuote =             0x0010,
+
+    // to close single-quoted strings
+    eEOFCharacters_SingleQuote =             0x0020,
+
+    // to close URLs
+    eEOFCharacters_CloseParen =              0x0040,
+  };
+
+  // Appends any characters to the specified string the input stream to make the
+  // last token not rely on special EOF handling behavior.
+  //
+  // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
+  static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
+                                         nsAString& aString);
+
+  EOFCharacters GetEOFCharacters() const {
+#ifdef DEBUG
+    AssertEOFCharactersValid(mEOFCharacters);
+#endif
+    return mEOFCharacters;
+  }
+
+#ifdef DEBUG
+  static void AssertEOFCharactersValid(uint32_t c);
+#endif
+
+protected:
+  int32_t Peek(uint32_t n = 0);
+  void Advance(uint32_t n = 1);
+  void AdvanceLine();
+
+  void SkipWhitespace();
+  void SkipComment();
+
+  bool GatherEscape(nsString& aOutput, bool aInString);
+  bool GatherText(uint8_t aClass, nsString& aIdent);
+
+  bool ScanIdent(nsCSSToken& aResult);
+  bool ScanAtKeyword(nsCSSToken& aResult);
+  bool ScanHash(nsCSSToken& aResult);
+  bool ScanNumber(nsCSSToken& aResult);
+  bool ScanString(nsCSSToken& aResult);
+  bool ScanURange(nsCSSToken& aResult);
+
+  void SetEOFCharacters(uint32_t aEOFCharacters);
+  void AddEOFCharacters(uint32_t aEOFCharacters);
+
+  const char16_t *mBuffer;
+  uint32_t mOffset;
+  uint32_t mCount;
+
+  uint32_t mLineNumber;
+  uint32_t mLineOffset;
+
+  uint32_t mTokenLineNumber;
+  uint32_t mTokenLineOffset;
+  uint32_t mTokenOffset;
+
+  uint32_t mRecordStartOffset;
+  EOFCharacters mEOFCharacters;
+
+  mozilla::css::ErrorReporter *mReporter;
+
+  // True if we are in SVG mode; false in "normal" CSS
+  bool mSVGMode;
+  bool mRecording;
+  bool mSeenBadToken;
+  bool mSeenVariableReference;
+};
+
+// Token for the grid-template-areas micro-syntax
+// http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
+struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
+  nsAutoString mName;  // Empty for a null cell, non-empty for a named cell
+  bool isTrash;  // True for a trash token, mName is ignored in this case.
+};
+
+// Scanner for the grid-template-areas micro-syntax
+class nsCSSGridTemplateAreaScanner {
+public:
+  explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
+
+  // Get the next token.  Return false on EOF.
+  // aTokenResult is filled in with the data for the token.
+  bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
+
+private:
+  const char16_t *mBuffer;
+  uint32_t mOffset;
+  uint32_t mCount;
+};
+
+#endif /* nsCSSScanner_h___ */