Add m-esr52 at 52.6.0

author: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
committer: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
commit: 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree: 10027f336435511475e392454359edea8e25895d /layout/style/nsCSSScanner.cpp
parent: 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download: UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
1 files changed, 1380 insertions, 0 deletions
diff --git a/layout/style/nsCSSScanner.cpp b/layout/style/nsCSSScanner.cpp
new file mode 100644
index 000000000..771c8936b
--- /dev/null
+++ b/layout/style/nsCSSScanner.cpp
@@ -0,0 +1,1380 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/* tokenization of CSS style sheets */
+
+#include "nsCSSScanner.h"
+#include "nsStyleUtil.h"
+#include "nsISupportsImpl.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/css/ErrorReporter.h"
+#include "mozilla/Likely.h"
+#include <algorithm>
+
+/* Character class tables and related helper functions. */
+
+static const uint8_t IS_HEX_DIGIT  = 0x01;
+static const uint8_t IS_IDSTART    = 0x02;
+static const uint8_t IS_IDCHAR     = 0x04;
+static const uint8_t IS_URL_CHAR   = 0x08;
+static const uint8_t IS_HSPACE     = 0x10;
+static const uint8_t IS_VSPACE     = 0x20;
+static const uint8_t IS_SPACE      = IS_HSPACE|IS_VSPACE;
+static const uint8_t IS_STRING     = 0x40;
+
+#define H    IS_HSPACE
+#define V    IS_VSPACE
+#define I    IS_IDCHAR
+#define J    IS_IDSTART
+#define U    IS_URL_CHAR
+#define S    IS_STRING
+#define X    IS_HEX_DIGIT
+
+#define SH    S|H
+#define SU    S|U
+#define SUI   S|U|I
+#define SUIJ  S|U|I|J
+#define SUIX  S|U|I|X
+#define SUIJX S|U|I|J|X
+
+static const uint8_t gLexTable[] = {
+// 00    01    02    03    04    05    06    07
+    0,    S,    S,    S,    S,    S,    S,    S,
+// 08   TAB    LF    0B    FF    CR    0E    0F
+    S,   SH,    V,    S,    V,    V,    S,    S,
+// 10    11    12    13    14    15    16    17
+    S,    S,    S,    S,    S,    S,    S,    S,
+// 18    19    1A    1B    1C    1D    1E    1F
+    S,    S,    S,    S,    S,    S,    S,    S,
+//SPC     !     "     #     $     %     &     '
+   SH,   SU,    0,   SU,   SU,   SU,   SU,    0,
+//  (     )     *     +     ,     -     .     /
+    S,    S,   SU,   SU,   SU,  SUI,   SU,   SU,
+//  0     1     2     3     4     5     6     7
+ SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
+//  8     9     :     ;     <     =     >     ?
+ SUIX, SUIX,   SU,   SU,   SU,   SU,   SU,   SU,
+//  @     A     B     C     D     E     F     G
+   SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
+//  H     I     J     K     L     M     N     O
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+//  P     Q     R     S     T     U     V     W
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+//  X     Y     Z     [     \     ]     ^     _
+ SUIJ, SUIJ, SUIJ,   SU,    J,   SU,   SU, SUIJ,
+//  `     a     b     c     d     e     f     g
+   SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
+//  h     i     j     k     l     m     n     o
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+//  p     q     r     s     t     u     v     w
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+//  x     y     z     {     |     }     ~    7F
+ SUIJ, SUIJ, SUIJ,   SU,   SU,   SU,   SU,    S,
+};
+
+static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
+              "gLexTable expected to cover all 128 ASCII characters");
+
+#undef I
+#undef J
+#undef U
+#undef S
+#undef X
+#undef SH
+#undef SU
+#undef SUI
+#undef SUIJ
+#undef SUIX
+#undef SUIJX
+
+/**
+ * True if 'ch' is in character class 'cls', which should be one of
+ * the constants above or some combination of them.  All characters
+ * above U+007F are considered to be in 'cls'.  EOF is never in 'cls'.
+ */
+static inline bool
+IsOpenCharClass(int32_t ch, uint8_t cls) {
+  return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
+}
+
+/**
+ * True if 'ch' is in character class 'cls', which should be one of
+ * the constants above or some combination of them.  No characters
+ * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
+ */
+static inline bool
+IsClosedCharClass(int32_t ch, uint8_t cls) {
+  return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
+}
+
+/**
+ * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
+ * TAB, LF, FF, CR, or SPC.
+ */
+static inline bool
+IsWhitespace(int32_t ch) {
+  return IsClosedCharClass(ch, IS_SPACE);
+}
+
+/**
+ * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
+ */
+static inline bool
+IsHorzSpace(int32_t ch) {
+  return IsClosedCharClass(ch, IS_HSPACE);
+}
+
+/**
+ * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR.  Vertical
+ * whitespace requires special handling when consumed, see AdvanceLine.
+ */
+static inline bool
+IsVertSpace(int32_t ch) {
+  return IsClosedCharClass(ch, IS_VSPACE);
+}
+
+/**
+ * True if 'ch' is a character that can appear in the middle of an identifier.
+ * This includes U+0000 since it is handled as U+FFFD, but for purposes of
+ * GatherText it should not be included in IsOpenCharClass.
+ */
+static inline bool
+IsIdentChar(int32_t ch) {
+  return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
+}
+
+/**
+ * True if 'ch' is a character that by itself begins an identifier.
+ * This includes U+0000 since it is handled as U+FFFD, but for purposes of
+ * GatherText it should not be included in IsOpenCharClass.
+ * (This is a subset of IsIdentChar.)
+ */
+static inline bool
+IsIdentStart(int32_t ch) {
+  return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
+}
+
+/**
+ * True if the two-character sequence aFirstChar+aSecondChar begins an
+ * identifier.
+ */
+static inline bool
+StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
+{
+  return IsIdentStart(aFirstChar) ||
+    (aFirstChar == '-' && (aSecondChar == '-' || IsIdentStart(aSecondChar)));
+}
+
+/**
+ * True if 'ch' is a decimal digit.
+ */
+static inline bool
+IsDigit(int32_t ch) {
+  return (ch >= '0') && (ch <= '9');
+}
+
+/**
+ * True if 'ch' is a hexadecimal digit.
+ */
+static inline bool
+IsHexDigit(int32_t ch) {
+  return IsClosedCharClass(ch, IS_HEX_DIGIT);
+}
+
+/**
+ * Assuming that 'ch' is a decimal digit, return its numeric value.
+ */
+static inline uint32_t
+DecimalDigitValue(int32_t ch)
+{
+  return ch - '0';
+}
+
+/**
+ * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
+ */
+static inline uint32_t
+HexDigitValue(int32_t ch)
+{
+  if (IsDigit(ch)) {
+    return DecimalDigitValue(ch);
+  } else {
+    // Note: c&7 just keeps the low three bits which causes
+    // upper and lower case alphabetics to both yield their
+    // "relative to 10" value for computing the hex value.
+    return (ch & 0x7) + 9;
+  }
+}
+
+/**
+ * If 'ch' can be the first character of a two-character match operator
+ * token, return the token type code for that token, otherwise return
+ * eCSSToken_Symbol to indicate that it can't.
+ */
+static inline nsCSSTokenType
+MatchOperatorType(int32_t ch)
+{
+  switch (ch) {
+  case '~': return eCSSToken_Includes;
+  case '|': return eCSSToken_Dashmatch;
+  case '^': return eCSSToken_Beginsmatch;
+  case '$': return eCSSToken_Endsmatch;
+  case '*': return eCSSToken_Containsmatch;
+  default:  return eCSSToken_Symbol;
+  }
+}
+
+/* Out-of-line nsCSSToken methods. */
+
+/**
+ * Append the textual representation of |this| to |aBuffer|.
+ */
+void
+nsCSSToken::AppendToString(nsString& aBuffer) const
+{
+  switch (mType) {
+    case eCSSToken_Ident:
+      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+      break;
+
+    case eCSSToken_AtKeyword:
+      aBuffer.Append('@');
+      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+      break;
+
+    case eCSSToken_ID:
+    case eCSSToken_Hash:
+      aBuffer.Append('#');
+      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+      break;
+
+    case eCSSToken_Function:
+      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+      aBuffer.Append('(');
+      break;
+
+    case eCSSToken_URL:
+    case eCSSToken_Bad_URL:
+      aBuffer.AppendLiteral("url(");
+      if (mSymbol != char16_t(0)) {
+        nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
+      } else {
+        aBuffer.Append(mIdent);
+      }
+      if (mType == eCSSToken_URL) {
+        aBuffer.Append(char16_t(')'));
+      }
+      break;
+
+    case eCSSToken_Number:
+      if (mIntegerValid) {
+        aBuffer.AppendInt(mInteger, 10);
+      } else {
+        aBuffer.AppendFloat(mNumber);
+      }
+      break;
+
+    case eCSSToken_Percentage:
+      aBuffer.AppendFloat(mNumber * 100.0f);
+      aBuffer.Append(char16_t('%'));
+      break;
+
+    case eCSSToken_Dimension:
+      if (mIntegerValid) {
+        aBuffer.AppendInt(mInteger, 10);
+      } else {
+        aBuffer.AppendFloat(mNumber);
+      }
+      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+      break;
+
+    case eCSSToken_Bad_String:
+      nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
+      // remove the trailing quote character
+      aBuffer.Truncate(aBuffer.Length() - 1);
+      break;
+
+    case eCSSToken_String:
+      nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
+      break;
+
+    case eCSSToken_Symbol:
+      aBuffer.Append(mSymbol);
+      break;
+
+    case eCSSToken_Whitespace:
+      aBuffer.Append(' ');
+      break;
+
+    case eCSSToken_HTMLComment:
+    case eCSSToken_URange:
+      aBuffer.Append(mIdent);
+      break;
+
+    case eCSSToken_Includes:
+      aBuffer.AppendLiteral("~=");
+      break;
+    case eCSSToken_Dashmatch:
+      aBuffer.AppendLiteral("|=");
+      break;
+    case eCSSToken_Beginsmatch:
+      aBuffer.AppendLiteral("^=");
+      break;
+    case eCSSToken_Endsmatch:
+      aBuffer.AppendLiteral("$=");
+      break;
+    case eCSSToken_Containsmatch:
+      aBuffer.AppendLiteral("*=");
+      break;
+
+    default:
+      NS_ERROR("invalid token type");
+      break;
+  }
+}
+
+/* nsCSSScanner methods. */
+
+nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
+  : mBuffer(aBuffer.BeginReading())
+  , mOffset(0)
+  , mCount(aBuffer.Length())
+  , mLineNumber(aLineNumber)
+  , mLineOffset(0)
+  , mTokenLineNumber(aLineNumber)
+  , mTokenLineOffset(0)
+  , mTokenOffset(0)
+  , mRecordStartOffset(0)
+  , mEOFCharacters(eEOFCharacters_None)
+  , mReporter(nullptr)
+  , mSVGMode(false)
+  , mRecording(false)
+  , mSeenBadToken(false)
+  , mSeenVariableReference(false)
+{
+  MOZ_COUNT_CTOR(nsCSSScanner);
+}
+
+nsCSSScanner::~nsCSSScanner()
+{
+  MOZ_COUNT_DTOR(nsCSSScanner);
+}
+
+void
+nsCSSScanner::StartRecording()
+{
+  MOZ_ASSERT(!mRecording, "already started recording");
+  mRecording = true;
+  mRecordStartOffset = mOffset;
+}
+
+void
+nsCSSScanner::StopRecording()
+{
+  MOZ_ASSERT(mRecording, "haven't started recording");
+  mRecording = false;
+}
+
+void
+nsCSSScanner::StopRecording(nsString& aBuffer)
+{
+  MOZ_ASSERT(mRecording, "haven't started recording");
+  mRecording = false;
+  aBuffer.Append(mBuffer + mRecordStartOffset,
+                 mOffset - mRecordStartOffset);
+}
+
+uint32_t
+nsCSSScanner::RecordingLength() const
+{
+  MOZ_ASSERT(mRecording, "haven't started recording");
+  return mOffset - mRecordStartOffset;
+}
+
+#ifdef DEBUG
+bool
+nsCSSScanner::IsRecording() const
+{
+  return mRecording;
+}
+#endif
+
+nsDependentSubstring
+nsCSSScanner::GetCurrentLine() const
+{
+  uint32_t end = mTokenOffset;
+  while (end < mCount && !IsVertSpace(mBuffer[end])) {
+    end++;
+  }
+  return nsDependentSubstring(mBuffer + mTokenLineOffset,
+                              mBuffer + end);
+}
+
+/**
+ * Return the raw UTF-16 code unit at position |mOffset + n| within
+ * the read buffer.  If that is beyond the end of the buffer, returns
+ * -1 to indicate end of input.
+ */
+inline int32_t
+nsCSSScanner::Peek(uint32_t n)
+{
+  if (mOffset + n >= mCount) {
+    return -1;
+  }
+  return mBuffer[mOffset + n];
+}
+
+/**
+ * Advance |mOffset| over |n| code units.  Advance(0) is a no-op.
+ * If |n| is greater than the distance to end of input, will silently
+ * stop at the end.  May not be used to advance over a line boundary;
+ * AdvanceLine() must be used instead.
+ */
+inline void
+nsCSSScanner::Advance(uint32_t n)
+{
+#ifdef DEBUG
+  while (mOffset < mCount && n > 0) {
+    MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
+               "may not Advance() over a line boundary");
+    mOffset++;
+    n--;
+  }
+#else
+  if (mOffset + n >= mCount || mOffset + n < mOffset)
+    mOffset = mCount;
+  else
+    mOffset += n;
+#endif
+}
+
+/**
+ * Advance |mOffset| over a line boundary.
+ */
+void
+nsCSSScanner::AdvanceLine()
+{
+  MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
+             "may not AdvanceLine() over a horizontal character");
+  // Advance over \r\n as a unit.
+  if (mBuffer[mOffset]   == '\r' && mOffset + 1 < mCount &&
+      mBuffer[mOffset+1] == '\n')
+    mOffset += 2;
+  else
+    mOffset += 1;
+  // 0 is a magical line number meaning that we don't know (i.e., script)
+  if (mLineNumber != 0)
+    mLineNumber++;
+  mLineOffset = mOffset;
+}
+
+/**
+ * Back up |mOffset| over |n| code units.  Backup(0) is a no-op.
+ * If |n| is greater than the distance to beginning of input, will
+ * silently stop at the beginning.  May not be used to back up over a
+ * line boundary.
+ */
+void
+nsCSSScanner::Backup(uint32_t n)
+{
+#ifdef DEBUG
+  while (mOffset > 0 && n > 0) {
+    MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
+               "may not Backup() over a line boundary");
+    mOffset--;
+    n--;
+  }
+#else
+  if (mOffset < n)
+    mOffset = 0;
+  else
+    mOffset -= n;
+#endif
+}
+
+void
+nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
+{
+  aState.mOffset = mOffset;
+  aState.mLineNumber = mLineNumber;
+  aState.mLineOffset = mLineOffset;
+  aState.mTokenLineNumber = mTokenLineNumber;
+  aState.mTokenLineOffset = mTokenLineOffset;
+  aState.mTokenOffset = mTokenOffset;
+  aState.mInitialized = true;
+}
+
+void
+nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
+{
+  MOZ_ASSERT(aState.mInitialized, "have not saved state");
+  if (aState.mInitialized) {
+    mOffset = aState.mOffset;
+    mLineNumber = aState.mLineNumber;
+    mLineOffset = aState.mLineOffset;
+    mTokenLineNumber = aState.mTokenLineNumber;
+    mTokenLineOffset = aState.mTokenLineOffset;
+    mTokenOffset = aState.mTokenOffset;
+  }
+}
+
+/**
+ * Skip over a sequence of whitespace characters (vertical or
+ * horizontal) starting at the current read position.
+ */
+void
+nsCSSScanner::SkipWhitespace()
+{
+  for (;;) {
+    int32_t ch = Peek();
+    if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
+      break;
+    }
+    if (IsVertSpace(ch)) {
+      AdvanceLine();
+    } else {
+      Advance();
+    }
+  }
+}
+
+/**
+ * Skip over one CSS comment starting at the current read position.
+ */
+void
+nsCSSScanner::SkipComment()
+{
+  MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
+  Advance(2);
+  for (;;) {
+    int32_t ch = Peek();
+    if (ch < 0) {
+      if (mReporter)
+        mReporter->ReportUnexpectedEOF("PECommentEOF");
+      SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
+      return;
+    }
+    if (ch == '*') {
+      Advance();
+      ch = Peek();
+      if (ch < 0) {
+        if (mReporter)
+          mReporter->ReportUnexpectedEOF("PECommentEOF");
+        SetEOFCharacters(eEOFCharacters_Slash);
+        return;
+      }
+      if (ch == '/') {
+        Advance();
+        return;
+      }
+    } else if (IsVertSpace(ch)) {
+      AdvanceLine();
+    } else {
+      Advance();
+    }
+  }
+}
+
+/**
+ * If there is a valid escape sequence starting at the current read
+ * position, consume it, decode it, append the result to |aOutput|,
+ * and return true.  Otherwise, consume nothing, leave |aOutput|
+ * unmodified, and return false.  If |aInString| is true, accept the
+ * additional form of escape sequence allowed within string-like tokens.
+ */
+bool
+nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
+{
+  MOZ_ASSERT(Peek() == '\\', "should not have been called");
+  int32_t ch = Peek(1);
+  if (ch < 0) {
+    // If we are in a string (or a url() containing a string), we want to drop
+    // the backslash on the floor.  Otherwise, we want to treat it as a U+FFFD
+    // character.
+    Advance();
+    if (aInString) {
+      SetEOFCharacters(eEOFCharacters_DropBackslash);
+    } else {
+      aOutput.Append(UCS2_REPLACEMENT_CHAR);
+      SetEOFCharacters(eEOFCharacters_ReplacementChar);
+    }
+    return true;
+  }
+  if (IsVertSpace(ch)) {
+    if (aInString) {
+      // In strings (and in url() containing a string), escaped
+      // newlines are completely removed, to allow splitting over
+      // multiple lines.
+      Advance();
+      AdvanceLine();
+      return true;
+    }
+    // Outside of strings, backslash followed by a newline is not an escape.
+    return false;
+  }
+
+  if (!IsHexDigit(ch)) {
+    // "Any character (except a hexadecimal digit, linefeed, carriage
+    // return, or form feed) can be escaped with a backslash to remove
+    // its special meaning." -- CSS2.1 section 4.1.3
+    Advance(2);
+    if (ch == 0) {
+      aOutput.Append(UCS2_REPLACEMENT_CHAR);
+    } else {
+      aOutput.Append(ch);
+    }
+    return true;
+  }
+
+  // "[at most six hexadecimal digits following a backslash] stand
+  // for the ISO 10646 character with that number, which must not be
+  // zero. (It is undefined in CSS 2.1 what happens if a style sheet
+  // does contain a character with Unicode codepoint zero.)"
+  //   -- CSS2.1 section 4.1.3
+
+  // At this point we know we have \ followed by at least one
+  // hexadecimal digit, therefore the escape sequence is valid and we
+  // can go ahead and consume the backslash.
+  Advance();
+  uint32_t val = 0;
+  int i = 0;
+  do {
+    val = val * 16 + HexDigitValue(ch);
+    i++;
+    Advance();
+    ch = Peek();
+  } while (i < 6 && IsHexDigit(ch));
+
+  // "Interpret the hex digits as a hexadecimal number. If this number is zero,
+  // or is greater than the maximum allowed codepoint, return U+FFFD
+  // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
+  if (MOZ_UNLIKELY(val == 0)) {
+    aOutput.Append(UCS2_REPLACEMENT_CHAR);
+  } else {
+    AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
+  }
+
+  // Consume exactly one whitespace character after a
+  // hexadecimal escape sequence.
+  if (IsVertSpace(ch)) {
+    AdvanceLine();
+  } else if (IsHorzSpace(ch)) {
+    Advance();
+  }
+  return true;
+}
+
+/**
+ * Consume a run of "text" beginning with the current read position,
+ * consisting of characters in the class |aClass| (which must be a
+ * suitable argument to IsOpenCharClass) plus escape sequences.
+ * Append the text to |aText|, after decoding escape sequences.
+ *
+ * Returns true if at least one character was appended to |aText|,
+ * false otherwise.
+ */
+bool
+nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
+{
+  // This is all of the character classes currently used with
+  // GatherText.  If you have a need to use this function with a
+  // different class, go ahead and add it.
+  MOZ_ASSERT(aClass == IS_STRING ||
+             aClass == IS_IDCHAR ||
+             aClass == IS_URL_CHAR,
+             "possibly-inappropriate character class");
+
+  uint32_t start = mOffset;
+  bool inString = aClass == IS_STRING;
+
+  for (;;) {
+    // Consume runs of unescaped characters in one go.
+    uint32_t n = mOffset;
+    while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
+      n++;
+    }
+    if (n > mOffset) {
+      aText.Append(&mBuffer[mOffset], n - mOffset);
+      mOffset = n;
+    }
+    if (n == mCount) {
+      break;
+    }
+
+    int32_t ch = Peek();
+    MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
+               "should not have exited the inner loop");
+    if (ch == 0) {
+      Advance();
+      aText.Append(UCS2_REPLACEMENT_CHAR);
+      continue;
+    }
+
+    if (ch != '\\') {
+      break;
+    }
+    if (!GatherEscape(aText, inString)) {
+      break;
+    }
+  }
+
+  return mOffset > start;
+}
+
+/**
+ * Scan an Ident token.  This also handles Function and URL tokens,
+ * both of which begin indistinguishably from an identifier.  It can
+ * produce a Symbol token when an apparent identifier actually led
+ * into an invalid escape sequence.
+ */
+bool
+nsCSSScanner::ScanIdent(nsCSSToken& aToken)
+{
+  if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
+    MOZ_ASSERT(Peek() == '\\',
+               "unexpected IsIdentStart character that did not begin an ident");
+    aToken.mSymbol = Peek();
+    Advance();
+    return true;
+  }
+
+  if (MOZ_LIKELY(Peek() != '(')) {
+    aToken.mType = eCSSToken_Ident;
+    return true;
+  }
+
+  Advance();
+  aToken.mType = eCSSToken_Function;
+  if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
+    NextURL(aToken);
+  } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
+    mSeenVariableReference = true;
+  }
+  return true;
+}
+
+/**
+ * Scan an AtKeyword token.  Also handles production of Symbol when
+ * an '@' is not followed by an identifier.
+ */
+bool
+nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
+{
+  MOZ_ASSERT(Peek() == '@', "should not have been called");
+
+  // Fall back for when '@' isn't followed by an identifier.
+  aToken.mSymbol = '@';
+  Advance();
+
+  int32_t ch = Peek();
+  if (StartsIdent(ch, Peek(1))) {
+    if (GatherText(IS_IDCHAR, aToken.mIdent)) {
+       aToken.mType = eCSSToken_AtKeyword;
+     }
+  }
+  return true;
+}
+
+/**
+ * Scan a Hash token.  Handles the distinction between eCSSToken_ID
+ * and eCSSToken_Hash, and handles production of Symbol when a '#'
+ * is not followed by identifier characters.
+ */
+bool
+nsCSSScanner::ScanHash(nsCSSToken& aToken)
+{
+  MOZ_ASSERT(Peek() == '#', "should not have been called");
+
+  // Fall back for when '#' isn't followed by identifier characters.
+  aToken.mSymbol = '#';
+  Advance();
+
+  int32_t ch = Peek();
+  if (IsIdentChar(ch) || ch == '\\') {
+    nsCSSTokenType type =
+      StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
+    aToken.mIdent.SetLength(0);
+    if (GatherText(IS_IDCHAR, aToken.mIdent)) {
+      aToken.mType = type;
+    }
+  }
+
+  return true;
+}
+
+/**
+ * Scan a Number, Percentage, or Dimension token (all of which begin
+ * like a Number).  Can produce a Symbol when a '.' is not followed by
+ * digits, or when '+' or '-' are not followed by either a digit or a
+ * '.' and then a digit.  Can also produce a HTMLComment when it
+ * encounters '-->'.
+ */
+bool
+nsCSSScanner::ScanNumber(nsCSSToken& aToken)
+{
+  int32_t c = Peek();
+#ifdef DEBUG
+  {
+    int32_t c2 = Peek(1);
+    int32_t c3 = Peek(2);
+    MOZ_ASSERT(IsDigit(c) ||
+               (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
+               (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
+               "should not have been called");
+  }
+#endif
+
+  // Sign of the mantissa (-1 or 1).
+  int32_t sign = c == '-' ? -1 : 1;
+  // Absolute value of the integer part of the mantissa.  This is a double so
+  // we don't run into overflow issues for consumers that only care about our
+  // floating-point value while still being able to express the full int32_t
+  // range for consumers who want integers.
+  double intPart = 0;
+  // Fractional part of the mantissa.  This is a double so that when we convert
+  // to float at the end we'll end up rounding to nearest float instead of
+  // truncating down (as we would if fracPart were a float and we just
+  // effectively lost the last several digits).
+  double fracPart = 0;
+  // Absolute value of the power of 10 that we should multiply by (only
+  // relevant for numbers in scientific notation).  Has to be a signed integer,
+  // because multiplication of signed by unsigned converts the unsigned to
+  // signed, so if we plan to actually multiply by expSign...
+  int32_t exponent = 0;
+  // Sign of the exponent.
+  int32_t expSign = 1;
+
+  aToken.mHasSign = (c == '+' || c == '-');
+  if (aToken.mHasSign) {
+    Advance();
+    c = Peek();
+  }
+
+  bool gotDot = (c == '.');
+
+  if (!gotDot) {
+    // Scan the integer part of the mantissa.
+    MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
+    do {
+      intPart = 10*intPart + DecimalDigitValue(c);
+      Advance();
+      c = Peek();
+    } while (IsDigit(c));
+
+    gotDot = (c == '.') && IsDigit(Peek(1));
+  }
+
+  if (gotDot) {
+    // Scan the fractional part of the mantissa.
+    Advance();
+    c = Peek();
+    MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
+    // Power of ten by which we need to divide our next digit
+    double divisor = 10;
+    do {
+      fracPart += DecimalDigitValue(c) / divisor;
+      divisor *= 10;
+      Advance();
+      c = Peek();
+    } while (IsDigit(c));
+  }
+
+  bool gotE = false;
+  if (c == 'e' || c == 'E') {
+    int32_t expSignChar = Peek(1);
+    int32_t nextChar = Peek(2);
+    if (IsDigit(expSignChar) ||
+        ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
+      gotE = true;
+      if (expSignChar == '-') {
+        expSign = -1;
+      }
+      Advance(); // consumes the E
+      if (expSignChar == '-' || expSignChar == '+') {
+        Advance();
+        c = nextChar;
+      } else {
+        c = expSignChar;
+      }
+      MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
+      do {
+        exponent = 10*exponent + DecimalDigitValue(c);
+        Advance();
+        c = Peek();
+      } while (IsDigit(c));
+    }
+  }
+
+  nsCSSTokenType type = eCSSToken_Number;
+
+  // Set mIntegerValid for all cases (except %, below) because we need
+  // it for the "2n" in :nth-child(2n).
+  aToken.mIntegerValid = false;
+
+  // Time to reassemble our number.
+  // Do all the math in double precision so it's truncated only once.
+  double value = sign * (intPart + fracPart);
+  if (gotE) {
+    // Avoid multiplication of 0 by Infinity.
+    if (value != 0.0) {
+      // Explicitly cast expSign*exponent to double to avoid issues with
+      // overloaded pow() on Windows.
+      value *= pow(10.0, double(expSign * exponent));
+    }
+  } else if (!gotDot) {
+    // Clamp values outside of integer range.
+    if (sign > 0) {
+      aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
+    } else {
+      aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
+    }
+    aToken.mIntegerValid = true;
+  }
+
+  nsString& ident = aToken.mIdent;
+
+  // Check for Dimension and Percentage tokens.
+  if (c >= 0) {
+    if (StartsIdent(c, Peek(1))) {
+      if (GatherText(IS_IDCHAR, ident)) {
+        type = eCSSToken_Dimension;
+      }
+    } else if (c == '%') {
+      Advance();
+      type = eCSSToken_Percentage;
+      value = value / 100.0f;
+      aToken.mIntegerValid = false;
+    }
+  }
+  MOZ_ASSERT(!IsNaN(value), "The value should not be NaN");
+  aToken.mNumber = value;
+  aToken.mType = type;
+  return true;
+}
+
+/**
+ * Scan a string constant ('foo' or "foo").  Will always produce
+ * either a String or a Bad_String token; the latter occurs when the
+ * close quote is missing.  Always returns true (for convenience in Next()).
+ */
+bool
+nsCSSScanner::ScanString(nsCSSToken& aToken)
+{
+  int32_t aStop = Peek();
+  MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
+  aToken.mType = eCSSToken_String;
+  aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
+  Advance();
+
+  for (;;) {
+    GatherText(IS_STRING, aToken.mIdent);
+
+    int32_t ch = Peek();
+    if (ch == -1) {
+      AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
+                                      eEOFCharacters_SingleQuote);
+      break; // EOF ends a string token with no error.
+    }
+    if (ch == aStop) {
+      Advance();
+      break;
+    }
+    // Both " and ' are excluded from IS_STRING.
+    if (ch == '"' || ch == '\'') {
+      aToken.mIdent.Append(ch);
+      Advance();
+      continue;
+    }
+
+    mSeenBadToken = true;
+    aToken.mType = eCSSToken_Bad_String;
+    if (mReporter)
+      mReporter->ReportUnexpected("SEUnterminatedString", aToken);
+    break;
+  }
+  return true;
+}
+
+/**
+ * Scan a unicode-range token.  These match the regular expression
+ *
+ *     u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
+ *
+ * However, some such tokens are "invalid".  There are three valid forms:
+ *
+ *     u+[0-9a-f]{x}              1 <= x <= 6
+ *     u+[0-9a-f]{x}\?{y}         1 <= x+y <= 6
+ *     u+[0-9a-f]{x}-[0-9a-f]{y}  1 <= x <= 6, 1 <= y <= 6
+ *
+ * All unicode-range tokens have their text recorded in mIdent; valid ones
+ * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
+ * Note that this does not validate the numeric range, only the syntactic
+ * form.
+ */
+bool
+nsCSSScanner::ScanURange(nsCSSToken& aResult)
+{
+  int32_t intro1 = Peek();
+  int32_t intro2 = Peek(1);
+  int32_t ch = Peek(2);
+
+  MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
+             intro2 == '+' &&
+             (IsHexDigit(ch) || ch == '?'),
+             "should not have been called");
+
+  aResult.mIdent.Append(intro1);
+  aResult.mIdent.Append(intro2);
+  Advance(2);
+
+  bool valid = true;
+  bool haveQues = false;
+  uint32_t low = 0;
+  uint32_t high = 0;
+  int i = 0;
+
+  do {
+    aResult.mIdent.Append(ch);
+    if (IsHexDigit(ch)) {
+      if (haveQues) {
+        valid = false; // All question marks should be at the end.
+      }
+      low = low*16 + HexDigitValue(ch);
+      high = high*16 + HexDigitValue(ch);
+    } else {
+      haveQues = true;
+      low = low*16 + 0x0;
+      high = high*16 + 0xF;
+    }
+
+    i++;
+    Advance();
+    ch = Peek();
+  } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
+
+  if (ch == '-' && IsHexDigit(Peek(1))) {
+    if (haveQues) {
+      valid = false;
+    }
+
+    aResult.mIdent.Append(ch);
+    Advance();
+    ch = Peek();
+    high = 0;
+    i = 0;
+    do {
+      aResult.mIdent.Append(ch);
+      high = high*16 + HexDigitValue(ch);
+
+      i++;
+      Advance();
+      ch = Peek();
+    } while (i < 6 && IsHexDigit(ch));
+  }
+
+  aResult.mInteger = low;
+  aResult.mInteger2 = high;
+  aResult.mIntegerValid = valid;
+  aResult.mType = eCSSToken_URange;
+  return true;
+}
+
+#ifdef DEBUG
+/* static */ void
+nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
+{
+  MOZ_ASSERT(c == eEOFCharacters_None ||
+             c == eEOFCharacters_ReplacementChar ||
+             c == eEOFCharacters_Slash ||
+             c == (eEOFCharacters_Asterisk |
+                   eEOFCharacters_Slash) ||
+             c == eEOFCharacters_DoubleQuote ||
+             c == eEOFCharacters_SingleQuote ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_DoubleQuote) ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_SingleQuote) ||
+             c == eEOFCharacters_CloseParen ||
+             c == (eEOFCharacters_ReplacementChar |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_DoubleQuote |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_SingleQuote |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_DoubleQuote |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_SingleQuote |
+                   eEOFCharacters_CloseParen),
+             "invalid EOFCharacters value");
+}
+#endif
+
+void
+nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
+{
+  mEOFCharacters = EOFCharacters(aEOFCharacters);
+}
+
+void
+nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
+{
+  mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
+}
+
+static const char16_t kImpliedEOFCharacters[] = {
+  UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
+};
+
+/* static */ void
+nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
+                                         nsAString& aResult)
+{
+  // First, ignore eEOFCharacters_DropBackslash.
+  uint32_t c = aEOFCharacters >> 1;
+
+  // All of the remaining EOFCharacters bits represent appended characters,
+  // and the bits are in the order that they need appending.
+  for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
+    if (c & 1) {
+      aResult.Append(*p);
+    }
+  }
+
+  MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
+}
+
+/**
+ * Consume the part of an URL token after the initial 'url('.  Caller
+ * is assumed to have consumed 'url(' already.  Will always produce
+ * either an URL or a Bad_URL token.
+ *
+ * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
+ * the special lexical rules for URL tokens in a nonstandard context.
+ */
+void
+nsCSSScanner::NextURL(nsCSSToken& aToken)
+{
+  SkipWhitespace();
+
+  // aToken.mIdent may be "url" at this point; clear that out
+  aToken.mIdent.Truncate();
+
+  int32_t ch = Peek();
+  // Do we have a string?
+  if (ch == '"' || ch == '\'') {
+    ScanString(aToken);
+    if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
+      aToken.mType = eCSSToken_Bad_URL;
+      return;
+    }
+    MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
+
+  } else {
+    // Otherwise, this is the start of a non-quoted url (which may be empty).
+    aToken.mSymbol = char16_t(0);
+    GatherText(IS_URL_CHAR, aToken.mIdent);
+  }
+
+  // Consume trailing whitespace and then look for a close parenthesis.
+  SkipWhitespace();
+  ch = Peek();
+  // ch can be less than zero indicating EOF
+  if (MOZ_LIKELY(ch < 0 || ch == ')')) {
+    Advance();
+    aToken.mType = eCSSToken_URL;
+    if (ch < 0) {
+      AddEOFCharacters(eEOFCharacters_CloseParen);
+    }
+  } else {
+    mSeenBadToken = true;
+    aToken.mType = eCSSToken_Bad_URL;
+  }
+}
+
+/**
+ * Primary scanner entry point.  Consume one token and fill in
+ * |aToken| accordingly.  Will skip over any number of comments first,
+ * and will also skip over rather than return whitespace and comment
+ * tokens, depending on the value of |aSkip|.
+ *
+ * Returns true if it successfully consumed a token, false if EOF has
+ * been reached.  Will always advance the current read position by at
+ * least one character unless called when already at EOF.
+ */
+bool
+nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
+{
+  int32_t ch;
+
+  // do this here so we don't have to do it in dozens of other places
+  aToken.mIdent.Truncate();
+  aToken.mType = eCSSToken_Symbol;
+
+  for (;;) {
+    // Consume any number of comments, and possibly also whitespace tokens,
+    // in between other tokens.
+    mTokenOffset = mOffset;
+    mTokenLineOffset = mLineOffset;
+    mTokenLineNumber = mLineNumber;
+
+    ch = Peek();
+    if (IsWhitespace(ch)) {
+      SkipWhitespace();
+      if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
+        aToken.mType = eCSSToken_Whitespace;
+        return true;
+      }
+      continue; // start again at the beginning
+    }
+    if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
+      SkipComment();
+      if (aSkip == eCSSScannerExclude_None) {
+        aToken.mType = eCSSToken_Comment;
+        return true;
+      }
+      continue; // start again at the beginning
+    }
+    break;
+  }
+
+  // EOF
+  if (ch < 0) {
+    return false;
+  }
+
+  // 'u' could be UNICODE-RANGE or an identifier-family token
+  if (ch == 'u' || ch == 'U') {
+    int32_t c2 = Peek(1);
+    int32_t c3 = Peek(2);
+    if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
+      return ScanURange(aToken);
+    }
+    return ScanIdent(aToken);
+  }
+
+  // identifier family
+  if (IsIdentStart(ch)) {
+    return ScanIdent(aToken);
+  }
+
+  // number family
+  if (IsDigit(ch)) {
+    return ScanNumber(aToken);
+  }
+
+  if (ch == '.' && IsDigit(Peek(1))) {
+    return ScanNumber(aToken);
+  }
+
+  if (ch == '+') {
+    int32_t c2 = Peek(1);
+    if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
+      return ScanNumber(aToken);
+    }
+  }
+
+  // '-' can start an identifier-family token, a number-family token,
+  // or an HTML-comment
+  if (ch == '-') {
+    int32_t c2 = Peek(1);
+    int32_t c3 = Peek(2);
+    if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
+      return ScanIdent(aToken);
+    }
+    if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
+      return ScanNumber(aToken);
+    }
+    if (c2 == '-' && c3 == '>') {
+      Advance(3);
+      aToken.mType = eCSSToken_HTMLComment;
+      aToken.mIdent.AssignLiteral("-->");
+      return true;
+    }
+  }
+
+  // the other HTML-comment token
+  if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
+    Advance(4);
+    aToken.mType = eCSSToken_HTMLComment;
+    aToken.mIdent.AssignLiteral("<!--");
+    return true;
+  }
+
+  // AT_KEYWORD
+  if (ch == '@') {
+    return ScanAtKeyword(aToken);
+  }
+
+  // HASH
+  if (ch == '#') {
+    return ScanHash(aToken);
+  }
+
+  // STRING
+  if (ch == '"' || ch == '\'') {
+    return ScanString(aToken);
+  }
+
+  // Match operators: ~= |= ^= $= *=
+  nsCSSTokenType opType = MatchOperatorType(ch);
+  if (opType != eCSSToken_Symbol && Peek(1) == '=') {
+    aToken.mType = opType;
+    Advance(2);
+    return true;
+  }
+
+  // Otherwise, a symbol (DELIM).
+  aToken.mSymbol = ch;
+  Advance();
+  return true;
+}
+
+/* nsCSSGridTemplateAreaScanner methods. */
+
+nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
+  : mBuffer(aBuffer.BeginReading())
+  , mOffset(0)
+  , mCount(aBuffer.Length())
+{
+}
+
+bool
+nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
+{
+  int32_t ch;
+  // Skip whitespace
+  do {
+    if (mOffset >= mCount) {
+      return false;
+    }
+    ch = mBuffer[mOffset];
+    mOffset++;
+  } while (IsWhitespace(ch));
+
+  if (IsOpenCharClass(ch, IS_IDCHAR)) {
+    // Named cell token
+    uint32_t start = mOffset - 1;  // offset of |ch|
+    while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
+      mOffset++;
+    }
+    aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
+    aTokenResult.isTrash = false;
+  } else if (ch == '.') {
+    // Null cell token
+    // Skip any other '.'
+    while (mOffset < mCount && mBuffer[mOffset] == '.') {
+      mOffset++;
+    }
+    aTokenResult.mName.Truncate();
+    aTokenResult.isTrash = false;
+  } else {
+    // Trash token
+    aTokenResult.isTrash = true;
+  }
+  return true;
+}
author	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
committer	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
commit	5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree	10027f336435511475e392454359edea8e25895d /layout/style/nsCSSScanner.cpp
parent	49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download	UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip