summaryrefslogtreecommitdiffstats
path: root/layout/style/nsCSSScanner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'layout/style/nsCSSScanner.cpp')
-rw-r--r--layout/style/nsCSSScanner.cpp1380
1 files changed, 1380 insertions, 0 deletions
diff --git a/layout/style/nsCSSScanner.cpp b/layout/style/nsCSSScanner.cpp
new file mode 100644
index 000000000..771c8936b
--- /dev/null
+++ b/layout/style/nsCSSScanner.cpp
@@ -0,0 +1,1380 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+/* tokenization of CSS style sheets */
+
+#include "nsCSSScanner.h"
+#include "nsStyleUtil.h"
+#include "nsISupportsImpl.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/css/ErrorReporter.h"
+#include "mozilla/Likely.h"
+#include <algorithm>
+
+/* Character class tables and related helper functions. */
+
+static const uint8_t IS_HEX_DIGIT = 0x01;
+static const uint8_t IS_IDSTART = 0x02;
+static const uint8_t IS_IDCHAR = 0x04;
+static const uint8_t IS_URL_CHAR = 0x08;
+static const uint8_t IS_HSPACE = 0x10;
+static const uint8_t IS_VSPACE = 0x20;
+static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE;
+static const uint8_t IS_STRING = 0x40;
+
+#define H IS_HSPACE
+#define V IS_VSPACE
+#define I IS_IDCHAR
+#define J IS_IDSTART
+#define U IS_URL_CHAR
+#define S IS_STRING
+#define X IS_HEX_DIGIT
+
+#define SH S|H
+#define SU S|U
+#define SUI S|U|I
+#define SUIJ S|U|I|J
+#define SUIX S|U|I|X
+#define SUIJX S|U|I|J|X
+
+static const uint8_t gLexTable[] = {
+// 00 01 02 03 04 05 06 07
+ 0, S, S, S, S, S, S, S,
+// 08 TAB LF 0B FF CR 0E 0F
+ S, SH, V, S, V, V, S, S,
+// 10 11 12 13 14 15 16 17
+ S, S, S, S, S, S, S, S,
+// 18 19 1A 1B 1C 1D 1E 1F
+ S, S, S, S, S, S, S, S,
+//SPC ! " # $ % & '
+ SH, SU, 0, SU, SU, SU, SU, 0,
+// ( ) * + , - . /
+ S, S, SU, SU, SU, SUI, SU, SU,
+// 0 1 2 3 4 5 6 7
+ SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
+// 8 9 : ; < = > ?
+ SUIX, SUIX, SU, SU, SU, SU, SU, SU,
+// @ A B C D E F G
+ SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
+// H I J K L M N O
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+// P Q R S T U V W
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+// X Y Z [ \ ] ^ _
+ SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ,
+// ` a b c d e f g
+ SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
+// h i j k l m n o
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+// p q r s t u v w
+ SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
+// x y z { | } ~ 7F
+ SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S,
+};
+
+static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
+ "gLexTable expected to cover all 128 ASCII characters");
+
+#undef I
+#undef J
+#undef U
+#undef S
+#undef X
+#undef SH
+#undef SU
+#undef SUI
+#undef SUIJ
+#undef SUIX
+#undef SUIJX
+
+/**
+ * True if 'ch' is in character class 'cls', which should be one of
+ * the constants above or some combination of them. All characters
+ * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
+ */
+static inline bool
+IsOpenCharClass(int32_t ch, uint8_t cls) {
+ return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
+}
+
+/**
+ * True if 'ch' is in character class 'cls', which should be one of
+ * the constants above or some combination of them. No characters
+ * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
+ */
+static inline bool
+IsClosedCharClass(int32_t ch, uint8_t cls) {
+ return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
+}
+
+/**
+ * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
+ * TAB, LF, FF, CR, or SPC.
+ */
+static inline bool
+IsWhitespace(int32_t ch) {
+ return IsClosedCharClass(ch, IS_SPACE);
+}
+
+/**
+ * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
+ */
+static inline bool
+IsHorzSpace(int32_t ch) {
+ return IsClosedCharClass(ch, IS_HSPACE);
+}
+
+/**
+ * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
+ * whitespace requires special handling when consumed, see AdvanceLine.
+ */
+static inline bool
+IsVertSpace(int32_t ch) {
+ return IsClosedCharClass(ch, IS_VSPACE);
+}
+
+/**
+ * True if 'ch' is a character that can appear in the middle of an identifier.
+ * This includes U+0000 since it is handled as U+FFFD, but for purposes of
+ * GatherText it should not be included in IsOpenCharClass.
+ */
+static inline bool
+IsIdentChar(int32_t ch) {
+ return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
+}
+
+/**
+ * True if 'ch' is a character that by itself begins an identifier.
+ * This includes U+0000 since it is handled as U+FFFD, but for purposes of
+ * GatherText it should not be included in IsOpenCharClass.
+ * (This is a subset of IsIdentChar.)
+ */
+static inline bool
+IsIdentStart(int32_t ch) {
+ return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
+}
+
+/**
+ * True if the two-character sequence aFirstChar+aSecondChar begins an
+ * identifier.
+ */
+static inline bool
+StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
+{
+ return IsIdentStart(aFirstChar) ||
+ (aFirstChar == '-' && (aSecondChar == '-' || IsIdentStart(aSecondChar)));
+}
+
+/**
+ * True if 'ch' is a decimal digit.
+ */
+static inline bool
+IsDigit(int32_t ch) {
+ return (ch >= '0') && (ch <= '9');
+}
+
+/**
+ * True if 'ch' is a hexadecimal digit.
+ */
+static inline bool
+IsHexDigit(int32_t ch) {
+ return IsClosedCharClass(ch, IS_HEX_DIGIT);
+}
+
+/**
+ * Assuming that 'ch' is a decimal digit, return its numeric value.
+ */
+static inline uint32_t
+DecimalDigitValue(int32_t ch)
+{
+ return ch - '0';
+}
+
+/**
+ * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
+ */
+static inline uint32_t
+HexDigitValue(int32_t ch)
+{
+ if (IsDigit(ch)) {
+ return DecimalDigitValue(ch);
+ } else {
+ // Note: c&7 just keeps the low three bits which causes
+ // upper and lower case alphabetics to both yield their
+ // "relative to 10" value for computing the hex value.
+ return (ch & 0x7) + 9;
+ }
+}
+
+/**
+ * If 'ch' can be the first character of a two-character match operator
+ * token, return the token type code for that token, otherwise return
+ * eCSSToken_Symbol to indicate that it can't.
+ */
+static inline nsCSSTokenType
+MatchOperatorType(int32_t ch)
+{
+ switch (ch) {
+ case '~': return eCSSToken_Includes;
+ case '|': return eCSSToken_Dashmatch;
+ case '^': return eCSSToken_Beginsmatch;
+ case '$': return eCSSToken_Endsmatch;
+ case '*': return eCSSToken_Containsmatch;
+ default: return eCSSToken_Symbol;
+ }
+}
+
+/* Out-of-line nsCSSToken methods. */
+
+/**
+ * Append the textual representation of |this| to |aBuffer|.
+ */
+void
+nsCSSToken::AppendToString(nsString& aBuffer) const
+{
+ switch (mType) {
+ case eCSSToken_Ident:
+ nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+ break;
+
+ case eCSSToken_AtKeyword:
+ aBuffer.Append('@');
+ nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+ break;
+
+ case eCSSToken_ID:
+ case eCSSToken_Hash:
+ aBuffer.Append('#');
+ nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+ break;
+
+ case eCSSToken_Function:
+ nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+ aBuffer.Append('(');
+ break;
+
+ case eCSSToken_URL:
+ case eCSSToken_Bad_URL:
+ aBuffer.AppendLiteral("url(");
+ if (mSymbol != char16_t(0)) {
+ nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
+ } else {
+ aBuffer.Append(mIdent);
+ }
+ if (mType == eCSSToken_URL) {
+ aBuffer.Append(char16_t(')'));
+ }
+ break;
+
+ case eCSSToken_Number:
+ if (mIntegerValid) {
+ aBuffer.AppendInt(mInteger, 10);
+ } else {
+ aBuffer.AppendFloat(mNumber);
+ }
+ break;
+
+ case eCSSToken_Percentage:
+ aBuffer.AppendFloat(mNumber * 100.0f);
+ aBuffer.Append(char16_t('%'));
+ break;
+
+ case eCSSToken_Dimension:
+ if (mIntegerValid) {
+ aBuffer.AppendInt(mInteger, 10);
+ } else {
+ aBuffer.AppendFloat(mNumber);
+ }
+ nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
+ break;
+
+ case eCSSToken_Bad_String:
+ nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
+ // remove the trailing quote character
+ aBuffer.Truncate(aBuffer.Length() - 1);
+ break;
+
+ case eCSSToken_String:
+ nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
+ break;
+
+ case eCSSToken_Symbol:
+ aBuffer.Append(mSymbol);
+ break;
+
+ case eCSSToken_Whitespace:
+ aBuffer.Append(' ');
+ break;
+
+ case eCSSToken_HTMLComment:
+ case eCSSToken_URange:
+ aBuffer.Append(mIdent);
+ break;
+
+ case eCSSToken_Includes:
+ aBuffer.AppendLiteral("~=");
+ break;
+ case eCSSToken_Dashmatch:
+ aBuffer.AppendLiteral("|=");
+ break;
+ case eCSSToken_Beginsmatch:
+ aBuffer.AppendLiteral("^=");
+ break;
+ case eCSSToken_Endsmatch:
+ aBuffer.AppendLiteral("$=");
+ break;
+ case eCSSToken_Containsmatch:
+ aBuffer.AppendLiteral("*=");
+ break;
+
+ default:
+ NS_ERROR("invalid token type");
+ break;
+ }
+}
+
+/* nsCSSScanner methods. */
+
+nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
+ : mBuffer(aBuffer.BeginReading())
+ , mOffset(0)
+ , mCount(aBuffer.Length())
+ , mLineNumber(aLineNumber)
+ , mLineOffset(0)
+ , mTokenLineNumber(aLineNumber)
+ , mTokenLineOffset(0)
+ , mTokenOffset(0)
+ , mRecordStartOffset(0)
+ , mEOFCharacters(eEOFCharacters_None)
+ , mReporter(nullptr)
+ , mSVGMode(false)
+ , mRecording(false)
+ , mSeenBadToken(false)
+ , mSeenVariableReference(false)
+{
+ MOZ_COUNT_CTOR(nsCSSScanner);
+}
+
+nsCSSScanner::~nsCSSScanner()
+{
+ MOZ_COUNT_DTOR(nsCSSScanner);
+}
+
+void
+nsCSSScanner::StartRecording()
+{
+ MOZ_ASSERT(!mRecording, "already started recording");
+ mRecording = true;
+ mRecordStartOffset = mOffset;
+}
+
+void
+nsCSSScanner::StopRecording()
+{
+ MOZ_ASSERT(mRecording, "haven't started recording");
+ mRecording = false;
+}
+
+void
+nsCSSScanner::StopRecording(nsString& aBuffer)
+{
+ MOZ_ASSERT(mRecording, "haven't started recording");
+ mRecording = false;
+ aBuffer.Append(mBuffer + mRecordStartOffset,
+ mOffset - mRecordStartOffset);
+}
+
+uint32_t
+nsCSSScanner::RecordingLength() const
+{
+ MOZ_ASSERT(mRecording, "haven't started recording");
+ return mOffset - mRecordStartOffset;
+}
+
+#ifdef DEBUG
+bool
+nsCSSScanner::IsRecording() const
+{
+ return mRecording;
+}
+#endif
+
+nsDependentSubstring
+nsCSSScanner::GetCurrentLine() const
+{
+ uint32_t end = mTokenOffset;
+ while (end < mCount && !IsVertSpace(mBuffer[end])) {
+ end++;
+ }
+ return nsDependentSubstring(mBuffer + mTokenLineOffset,
+ mBuffer + end);
+}
+
+/**
+ * Return the raw UTF-16 code unit at position |mOffset + n| within
+ * the read buffer. If that is beyond the end of the buffer, returns
+ * -1 to indicate end of input.
+ */
+inline int32_t
+nsCSSScanner::Peek(uint32_t n)
+{
+ if (mOffset + n >= mCount) {
+ return -1;
+ }
+ return mBuffer[mOffset + n];
+}
+
+/**
+ * Advance |mOffset| over |n| code units. Advance(0) is a no-op.
+ * If |n| is greater than the distance to end of input, will silently
+ * stop at the end. May not be used to advance over a line boundary;
+ * AdvanceLine() must be used instead.
+ */
+inline void
+nsCSSScanner::Advance(uint32_t n)
+{
+#ifdef DEBUG
+ while (mOffset < mCount && n > 0) {
+ MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
+ "may not Advance() over a line boundary");
+ mOffset++;
+ n--;
+ }
+#else
+ if (mOffset + n >= mCount || mOffset + n < mOffset)
+ mOffset = mCount;
+ else
+ mOffset += n;
+#endif
+}
+
+/**
+ * Advance |mOffset| over a line boundary.
+ */
+void
+nsCSSScanner::AdvanceLine()
+{
+ MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
+ "may not AdvanceLine() over a horizontal character");
+ // Advance over \r\n as a unit.
+ if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount &&
+ mBuffer[mOffset+1] == '\n')
+ mOffset += 2;
+ else
+ mOffset += 1;
+ // 0 is a magical line number meaning that we don't know (i.e., script)
+ if (mLineNumber != 0)
+ mLineNumber++;
+ mLineOffset = mOffset;
+}
+
+/**
+ * Back up |mOffset| over |n| code units. Backup(0) is a no-op.
+ * If |n| is greater than the distance to beginning of input, will
+ * silently stop at the beginning. May not be used to back up over a
+ * line boundary.
+ */
+void
+nsCSSScanner::Backup(uint32_t n)
+{
+#ifdef DEBUG
+ while (mOffset > 0 && n > 0) {
+ MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
+ "may not Backup() over a line boundary");
+ mOffset--;
+ n--;
+ }
+#else
+ if (mOffset < n)
+ mOffset = 0;
+ else
+ mOffset -= n;
+#endif
+}
+
+void
+nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
+{
+ aState.mOffset = mOffset;
+ aState.mLineNumber = mLineNumber;
+ aState.mLineOffset = mLineOffset;
+ aState.mTokenLineNumber = mTokenLineNumber;
+ aState.mTokenLineOffset = mTokenLineOffset;
+ aState.mTokenOffset = mTokenOffset;
+ aState.mInitialized = true;
+}
+
+void
+nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
+{
+ MOZ_ASSERT(aState.mInitialized, "have not saved state");
+ if (aState.mInitialized) {
+ mOffset = aState.mOffset;
+ mLineNumber = aState.mLineNumber;
+ mLineOffset = aState.mLineOffset;
+ mTokenLineNumber = aState.mTokenLineNumber;
+ mTokenLineOffset = aState.mTokenLineOffset;
+ mTokenOffset = aState.mTokenOffset;
+ }
+}
+
+/**
+ * Skip over a sequence of whitespace characters (vertical or
+ * horizontal) starting at the current read position.
+ */
+void
+nsCSSScanner::SkipWhitespace()
+{
+ for (;;) {
+ int32_t ch = Peek();
+ if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
+ break;
+ }
+ if (IsVertSpace(ch)) {
+ AdvanceLine();
+ } else {
+ Advance();
+ }
+ }
+}
+
+/**
+ * Skip over one CSS comment starting at the current read position.
+ */
+void
+nsCSSScanner::SkipComment()
+{
+ MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
+ Advance(2);
+ for (;;) {
+ int32_t ch = Peek();
+ if (ch < 0) {
+ if (mReporter)
+ mReporter->ReportUnexpectedEOF("PECommentEOF");
+ SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
+ return;
+ }
+ if (ch == '*') {
+ Advance();
+ ch = Peek();
+ if (ch < 0) {
+ if (mReporter)
+ mReporter->ReportUnexpectedEOF("PECommentEOF");
+ SetEOFCharacters(eEOFCharacters_Slash);
+ return;
+ }
+ if (ch == '/') {
+ Advance();
+ return;
+ }
+ } else if (IsVertSpace(ch)) {
+ AdvanceLine();
+ } else {
+ Advance();
+ }
+ }
+}
+
+/**
+ * If there is a valid escape sequence starting at the current read
+ * position, consume it, decode it, append the result to |aOutput|,
+ * and return true. Otherwise, consume nothing, leave |aOutput|
+ * unmodified, and return false. If |aInString| is true, accept the
+ * additional form of escape sequence allowed within string-like tokens.
+ */
+bool
+nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
+{
+ MOZ_ASSERT(Peek() == '\\', "should not have been called");
+ int32_t ch = Peek(1);
+ if (ch < 0) {
+ // If we are in a string (or a url() containing a string), we want to drop
+ // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
+ // character.
+ Advance();
+ if (aInString) {
+ SetEOFCharacters(eEOFCharacters_DropBackslash);
+ } else {
+ aOutput.Append(UCS2_REPLACEMENT_CHAR);
+ SetEOFCharacters(eEOFCharacters_ReplacementChar);
+ }
+ return true;
+ }
+ if (IsVertSpace(ch)) {
+ if (aInString) {
+ // In strings (and in url() containing a string), escaped
+ // newlines are completely removed, to allow splitting over
+ // multiple lines.
+ Advance();
+ AdvanceLine();
+ return true;
+ }
+ // Outside of strings, backslash followed by a newline is not an escape.
+ return false;
+ }
+
+ if (!IsHexDigit(ch)) {
+ // "Any character (except a hexadecimal digit, linefeed, carriage
+ // return, or form feed) can be escaped with a backslash to remove
+ // its special meaning." -- CSS2.1 section 4.1.3
+ Advance(2);
+ if (ch == 0) {
+ aOutput.Append(UCS2_REPLACEMENT_CHAR);
+ } else {
+ aOutput.Append(ch);
+ }
+ return true;
+ }
+
+ // "[at most six hexadecimal digits following a backslash] stand
+ // for the ISO 10646 character with that number, which must not be
+ // zero. (It is undefined in CSS 2.1 what happens if a style sheet
+ // does contain a character with Unicode codepoint zero.)"
+ // -- CSS2.1 section 4.1.3
+
+ // At this point we know we have \ followed by at least one
+ // hexadecimal digit, therefore the escape sequence is valid and we
+ // can go ahead and consume the backslash.
+ Advance();
+ uint32_t val = 0;
+ int i = 0;
+ do {
+ val = val * 16 + HexDigitValue(ch);
+ i++;
+ Advance();
+ ch = Peek();
+ } while (i < 6 && IsHexDigit(ch));
+
+ // "Interpret the hex digits as a hexadecimal number. If this number is zero,
+ // or is greater than the maximum allowed codepoint, return U+FFFD
+ // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
+ if (MOZ_UNLIKELY(val == 0)) {
+ aOutput.Append(UCS2_REPLACEMENT_CHAR);
+ } else {
+ AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
+ }
+
+ // Consume exactly one whitespace character after a
+ // hexadecimal escape sequence.
+ if (IsVertSpace(ch)) {
+ AdvanceLine();
+ } else if (IsHorzSpace(ch)) {
+ Advance();
+ }
+ return true;
+}
+
+/**
+ * Consume a run of "text" beginning with the current read position,
+ * consisting of characters in the class |aClass| (which must be a
+ * suitable argument to IsOpenCharClass) plus escape sequences.
+ * Append the text to |aText|, after decoding escape sequences.
+ *
+ * Returns true if at least one character was appended to |aText|,
+ * false otherwise.
+ */
+bool
+nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
+{
+ // This is all of the character classes currently used with
+ // GatherText. If you have a need to use this function with a
+ // different class, go ahead and add it.
+ MOZ_ASSERT(aClass == IS_STRING ||
+ aClass == IS_IDCHAR ||
+ aClass == IS_URL_CHAR,
+ "possibly-inappropriate character class");
+
+ uint32_t start = mOffset;
+ bool inString = aClass == IS_STRING;
+
+ for (;;) {
+ // Consume runs of unescaped characters in one go.
+ uint32_t n = mOffset;
+ while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
+ n++;
+ }
+ if (n > mOffset) {
+ aText.Append(&mBuffer[mOffset], n - mOffset);
+ mOffset = n;
+ }
+ if (n == mCount) {
+ break;
+ }
+
+ int32_t ch = Peek();
+ MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
+ "should not have exited the inner loop");
+ if (ch == 0) {
+ Advance();
+ aText.Append(UCS2_REPLACEMENT_CHAR);
+ continue;
+ }
+
+ if (ch != '\\') {
+ break;
+ }
+ if (!GatherEscape(aText, inString)) {
+ break;
+ }
+ }
+
+ return mOffset > start;
+}
+
+/**
+ * Scan an Ident token. This also handles Function and URL tokens,
+ * both of which begin indistinguishably from an identifier. It can
+ * produce a Symbol token when an apparent identifier actually led
+ * into an invalid escape sequence.
+ */
+bool
+nsCSSScanner::ScanIdent(nsCSSToken& aToken)
+{
+ if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
+ MOZ_ASSERT(Peek() == '\\',
+ "unexpected IsIdentStart character that did not begin an ident");
+ aToken.mSymbol = Peek();
+ Advance();
+ return true;
+ }
+
+ if (MOZ_LIKELY(Peek() != '(')) {
+ aToken.mType = eCSSToken_Ident;
+ return true;
+ }
+
+ Advance();
+ aToken.mType = eCSSToken_Function;
+ if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
+ NextURL(aToken);
+ } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
+ mSeenVariableReference = true;
+ }
+ return true;
+}
+
+/**
+ * Scan an AtKeyword token. Also handles production of Symbol when
+ * an '@' is not followed by an identifier.
+ */
+bool
+nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
+{
+ MOZ_ASSERT(Peek() == '@', "should not have been called");
+
+ // Fall back for when '@' isn't followed by an identifier.
+ aToken.mSymbol = '@';
+ Advance();
+
+ int32_t ch = Peek();
+ if (StartsIdent(ch, Peek(1))) {
+ if (GatherText(IS_IDCHAR, aToken.mIdent)) {
+ aToken.mType = eCSSToken_AtKeyword;
+ }
+ }
+ return true;
+}
+
+/**
+ * Scan a Hash token. Handles the distinction between eCSSToken_ID
+ * and eCSSToken_Hash, and handles production of Symbol when a '#'
+ * is not followed by identifier characters.
+ */
+bool
+nsCSSScanner::ScanHash(nsCSSToken& aToken)
+{
+ MOZ_ASSERT(Peek() == '#', "should not have been called");
+
+ // Fall back for when '#' isn't followed by identifier characters.
+ aToken.mSymbol = '#';
+ Advance();
+
+ int32_t ch = Peek();
+ if (IsIdentChar(ch) || ch == '\\') {
+ nsCSSTokenType type =
+ StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
+ aToken.mIdent.SetLength(0);
+ if (GatherText(IS_IDCHAR, aToken.mIdent)) {
+ aToken.mType = type;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Scan a Number, Percentage, or Dimension token (all of which begin
+ * like a Number). Can produce a Symbol when a '.' is not followed by
+ * digits, or when '+' or '-' are not followed by either a digit or a
+ * '.' and then a digit. Can also produce a HTMLComment when it
+ * encounters '-->'.
+ */
+bool
+nsCSSScanner::ScanNumber(nsCSSToken& aToken)
+{
+ int32_t c = Peek();
+#ifdef DEBUG
+ {
+ int32_t c2 = Peek(1);
+ int32_t c3 = Peek(2);
+ MOZ_ASSERT(IsDigit(c) ||
+ (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
+ (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
+ "should not have been called");
+ }
+#endif
+
+ // Sign of the mantissa (-1 or 1).
+ int32_t sign = c == '-' ? -1 : 1;
+ // Absolute value of the integer part of the mantissa. This is a double so
+ // we don't run into overflow issues for consumers that only care about our
+ // floating-point value while still being able to express the full int32_t
+ // range for consumers who want integers.
+ double intPart = 0;
+ // Fractional part of the mantissa. This is a double so that when we convert
+ // to float at the end we'll end up rounding to nearest float instead of
+ // truncating down (as we would if fracPart were a float and we just
+ // effectively lost the last several digits).
+ double fracPart = 0;
+ // Absolute value of the power of 10 that we should multiply by (only
+ // relevant for numbers in scientific notation). Has to be a signed integer,
+ // because multiplication of signed by unsigned converts the unsigned to
+ // signed, so if we plan to actually multiply by expSign...
+ int32_t exponent = 0;
+ // Sign of the exponent.
+ int32_t expSign = 1;
+
+ aToken.mHasSign = (c == '+' || c == '-');
+ if (aToken.mHasSign) {
+ Advance();
+ c = Peek();
+ }
+
+ bool gotDot = (c == '.');
+
+ if (!gotDot) {
+ // Scan the integer part of the mantissa.
+ MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
+ do {
+ intPart = 10*intPart + DecimalDigitValue(c);
+ Advance();
+ c = Peek();
+ } while (IsDigit(c));
+
+ gotDot = (c == '.') && IsDigit(Peek(1));
+ }
+
+ if (gotDot) {
+ // Scan the fractional part of the mantissa.
+ Advance();
+ c = Peek();
+ MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
+ // Power of ten by which we need to divide our next digit
+ double divisor = 10;
+ do {
+ fracPart += DecimalDigitValue(c) / divisor;
+ divisor *= 10;
+ Advance();
+ c = Peek();
+ } while (IsDigit(c));
+ }
+
+ bool gotE = false;
+ if (c == 'e' || c == 'E') {
+ int32_t expSignChar = Peek(1);
+ int32_t nextChar = Peek(2);
+ if (IsDigit(expSignChar) ||
+ ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
+ gotE = true;
+ if (expSignChar == '-') {
+ expSign = -1;
+ }
+ Advance(); // consumes the E
+ if (expSignChar == '-' || expSignChar == '+') {
+ Advance();
+ c = nextChar;
+ } else {
+ c = expSignChar;
+ }
+ MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
+ do {
+ exponent = 10*exponent + DecimalDigitValue(c);
+ Advance();
+ c = Peek();
+ } while (IsDigit(c));
+ }
+ }
+
+ nsCSSTokenType type = eCSSToken_Number;
+
+ // Set mIntegerValid for all cases (except %, below) because we need
+ // it for the "2n" in :nth-child(2n).
+ aToken.mIntegerValid = false;
+
+ // Time to reassemble our number.
+ // Do all the math in double precision so it's truncated only once.
+ double value = sign * (intPart + fracPart);
+ if (gotE) {
+ // Avoid multiplication of 0 by Infinity.
+ if (value != 0.0) {
+ // Explicitly cast expSign*exponent to double to avoid issues with
+ // overloaded pow() on Windows.
+ value *= pow(10.0, double(expSign * exponent));
+ }
+ } else if (!gotDot) {
+ // Clamp values outside of integer range.
+ if (sign > 0) {
+ aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
+ } else {
+ aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
+ }
+ aToken.mIntegerValid = true;
+ }
+
+ nsString& ident = aToken.mIdent;
+
+ // Check for Dimension and Percentage tokens.
+ if (c >= 0) {
+ if (StartsIdent(c, Peek(1))) {
+ if (GatherText(IS_IDCHAR, ident)) {
+ type = eCSSToken_Dimension;
+ }
+ } else if (c == '%') {
+ Advance();
+ type = eCSSToken_Percentage;
+ value = value / 100.0f;
+ aToken.mIntegerValid = false;
+ }
+ }
+ MOZ_ASSERT(!IsNaN(value), "The value should not be NaN");
+ aToken.mNumber = value;
+ aToken.mType = type;
+ return true;
+}
+
+/**
+ * Scan a string constant ('foo' or "foo"). Will always produce
+ * either a String or a Bad_String token; the latter occurs when the
+ * close quote is missing. Always returns true (for convenience in Next()).
+ */
+bool
+nsCSSScanner::ScanString(nsCSSToken& aToken)
+{
+ int32_t aStop = Peek();
+ MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
+ aToken.mType = eCSSToken_String;
+ aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
+ Advance();
+
+ for (;;) {
+ GatherText(IS_STRING, aToken.mIdent);
+
+ int32_t ch = Peek();
+ if (ch == -1) {
+ AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
+ eEOFCharacters_SingleQuote);
+ break; // EOF ends a string token with no error.
+ }
+ if (ch == aStop) {
+ Advance();
+ break;
+ }
+ // Both " and ' are excluded from IS_STRING.
+ if (ch == '"' || ch == '\'') {
+ aToken.mIdent.Append(ch);
+ Advance();
+ continue;
+ }
+
+ mSeenBadToken = true;
+ aToken.mType = eCSSToken_Bad_String;
+ if (mReporter)
+ mReporter->ReportUnexpected("SEUnterminatedString", aToken);
+ break;
+ }
+ return true;
+}
+
+/**
+ * Scan a unicode-range token. These match the regular expression
+ *
+ * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
+ *
+ * However, some such tokens are "invalid". There are three valid forms:
+ *
+ * u+[0-9a-f]{x} 1 <= x <= 6
+ * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
+ * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
+ *
+ * All unicode-range tokens have their text recorded in mIdent; valid ones
+ * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
+ * Note that this does not validate the numeric range, only the syntactic
+ * form.
+ */
+bool
+nsCSSScanner::ScanURange(nsCSSToken& aResult)
+{
+ int32_t intro1 = Peek();
+ int32_t intro2 = Peek(1);
+ int32_t ch = Peek(2);
+
+ MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
+ intro2 == '+' &&
+ (IsHexDigit(ch) || ch == '?'),
+ "should not have been called");
+
+ aResult.mIdent.Append(intro1);
+ aResult.mIdent.Append(intro2);
+ Advance(2);
+
+ bool valid = true;
+ bool haveQues = false;
+ uint32_t low = 0;
+ uint32_t high = 0;
+ int i = 0;
+
+ do {
+ aResult.mIdent.Append(ch);
+ if (IsHexDigit(ch)) {
+ if (haveQues) {
+ valid = false; // All question marks should be at the end.
+ }
+ low = low*16 + HexDigitValue(ch);
+ high = high*16 + HexDigitValue(ch);
+ } else {
+ haveQues = true;
+ low = low*16 + 0x0;
+ high = high*16 + 0xF;
+ }
+
+ i++;
+ Advance();
+ ch = Peek();
+ } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
+
+ if (ch == '-' && IsHexDigit(Peek(1))) {
+ if (haveQues) {
+ valid = false;
+ }
+
+ aResult.mIdent.Append(ch);
+ Advance();
+ ch = Peek();
+ high = 0;
+ i = 0;
+ do {
+ aResult.mIdent.Append(ch);
+ high = high*16 + HexDigitValue(ch);
+
+ i++;
+ Advance();
+ ch = Peek();
+ } while (i < 6 && IsHexDigit(ch));
+ }
+
+ aResult.mInteger = low;
+ aResult.mInteger2 = high;
+ aResult.mIntegerValid = valid;
+ aResult.mType = eCSSToken_URange;
+ return true;
+}
+
+#ifdef DEBUG
+/* static */ void
+nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
+{
+ MOZ_ASSERT(c == eEOFCharacters_None ||
+ c == eEOFCharacters_ReplacementChar ||
+ c == eEOFCharacters_Slash ||
+ c == (eEOFCharacters_Asterisk |
+ eEOFCharacters_Slash) ||
+ c == eEOFCharacters_DoubleQuote ||
+ c == eEOFCharacters_SingleQuote ||
+ c == (eEOFCharacters_DropBackslash |
+ eEOFCharacters_DoubleQuote) ||
+ c == (eEOFCharacters_DropBackslash |
+ eEOFCharacters_SingleQuote) ||
+ c == eEOFCharacters_CloseParen ||
+ c == (eEOFCharacters_ReplacementChar |
+ eEOFCharacters_CloseParen) ||
+ c == (eEOFCharacters_DoubleQuote |
+ eEOFCharacters_CloseParen) ||
+ c == (eEOFCharacters_SingleQuote |
+ eEOFCharacters_CloseParen) ||
+ c == (eEOFCharacters_DropBackslash |
+ eEOFCharacters_DoubleQuote |
+ eEOFCharacters_CloseParen) ||
+ c == (eEOFCharacters_DropBackslash |
+ eEOFCharacters_SingleQuote |
+ eEOFCharacters_CloseParen),
+ "invalid EOFCharacters value");
+}
+#endif
+
+void
+nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
+{
+ mEOFCharacters = EOFCharacters(aEOFCharacters);
+}
+
+void
+nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
+{
+ mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
+}
+
+static const char16_t kImpliedEOFCharacters[] = {
+ UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
+};
+
+/* static */ void
+nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
+ nsAString& aResult)
+{
+ // First, ignore eEOFCharacters_DropBackslash.
+ uint32_t c = aEOFCharacters >> 1;
+
+ // All of the remaining EOFCharacters bits represent appended characters,
+ // and the bits are in the order that they need appending.
+ for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
+ if (c & 1) {
+ aResult.Append(*p);
+ }
+ }
+
+ MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
+}
+
+/**
+ * Consume the part of an URL token after the initial 'url('. Caller
+ * is assumed to have consumed 'url(' already. Will always produce
+ * either an URL or a Bad_URL token.
+ *
+ * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
+ * the special lexical rules for URL tokens in a nonstandard context.
+ */
+void
+nsCSSScanner::NextURL(nsCSSToken& aToken)
+{
+ SkipWhitespace();
+
+ // aToken.mIdent may be "url" at this point; clear that out
+ aToken.mIdent.Truncate();
+
+ int32_t ch = Peek();
+ // Do we have a string?
+ if (ch == '"' || ch == '\'') {
+ ScanString(aToken);
+ if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
+ aToken.mType = eCSSToken_Bad_URL;
+ return;
+ }
+ MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
+
+ } else {
+ // Otherwise, this is the start of a non-quoted url (which may be empty).
+ aToken.mSymbol = char16_t(0);
+ GatherText(IS_URL_CHAR, aToken.mIdent);
+ }
+
+ // Consume trailing whitespace and then look for a close parenthesis.
+ SkipWhitespace();
+ ch = Peek();
+ // ch can be less than zero indicating EOF
+ if (MOZ_LIKELY(ch < 0 || ch == ')')) {
+ Advance();
+ aToken.mType = eCSSToken_URL;
+ if (ch < 0) {
+ AddEOFCharacters(eEOFCharacters_CloseParen);
+ }
+ } else {
+ mSeenBadToken = true;
+ aToken.mType = eCSSToken_Bad_URL;
+ }
+}
+
+/**
+ * Primary scanner entry point. Consume one token and fill in
+ * |aToken| accordingly. Will skip over any number of comments first,
+ * and will also skip over rather than return whitespace and comment
+ * tokens, depending on the value of |aSkip|.
+ *
+ * Returns true if it successfully consumed a token, false if EOF has
+ * been reached. Will always advance the current read position by at
+ * least one character unless called when already at EOF.
+ */
+bool
+nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
+{
+ int32_t ch;
+
+ // do this here so we don't have to do it in dozens of other places
+ aToken.mIdent.Truncate();
+ aToken.mType = eCSSToken_Symbol;
+
+ for (;;) {
+ // Consume any number of comments, and possibly also whitespace tokens,
+ // in between other tokens.
+ mTokenOffset = mOffset;
+ mTokenLineOffset = mLineOffset;
+ mTokenLineNumber = mLineNumber;
+
+ ch = Peek();
+ if (IsWhitespace(ch)) {
+ SkipWhitespace();
+ if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
+ aToken.mType = eCSSToken_Whitespace;
+ return true;
+ }
+ continue; // start again at the beginning
+ }
+ if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
+ SkipComment();
+ if (aSkip == eCSSScannerExclude_None) {
+ aToken.mType = eCSSToken_Comment;
+ return true;
+ }
+ continue; // start again at the beginning
+ }
+ break;
+ }
+
+ // EOF
+ if (ch < 0) {
+ return false;
+ }
+
+ // 'u' could be UNICODE-RANGE or an identifier-family token
+ if (ch == 'u' || ch == 'U') {
+ int32_t c2 = Peek(1);
+ int32_t c3 = Peek(2);
+ if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
+ return ScanURange(aToken);
+ }
+ return ScanIdent(aToken);
+ }
+
+ // identifier family
+ if (IsIdentStart(ch)) {
+ return ScanIdent(aToken);
+ }
+
+ // number family
+ if (IsDigit(ch)) {
+ return ScanNumber(aToken);
+ }
+
+ if (ch == '.' && IsDigit(Peek(1))) {
+ return ScanNumber(aToken);
+ }
+
+ if (ch == '+') {
+ int32_t c2 = Peek(1);
+ if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
+ return ScanNumber(aToken);
+ }
+ }
+
+ // '-' can start an identifier-family token, a number-family token,
+ // or an HTML-comment
+ if (ch == '-') {
+ int32_t c2 = Peek(1);
+ int32_t c3 = Peek(2);
+ if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
+ return ScanIdent(aToken);
+ }
+ if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
+ return ScanNumber(aToken);
+ }
+ if (c2 == '-' && c3 == '>') {
+ Advance(3);
+ aToken.mType = eCSSToken_HTMLComment;
+ aToken.mIdent.AssignLiteral("-->");
+ return true;
+ }
+ }
+
+ // the other HTML-comment token
+ if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
+ Advance(4);
+ aToken.mType = eCSSToken_HTMLComment;
+ aToken.mIdent.AssignLiteral("<!--");
+ return true;
+ }
+
+ // AT_KEYWORD
+ if (ch == '@') {
+ return ScanAtKeyword(aToken);
+ }
+
+ // HASH
+ if (ch == '#') {
+ return ScanHash(aToken);
+ }
+
+ // STRING
+ if (ch == '"' || ch == '\'') {
+ return ScanString(aToken);
+ }
+
+ // Match operators: ~= |= ^= $= *=
+ nsCSSTokenType opType = MatchOperatorType(ch);
+ if (opType != eCSSToken_Symbol && Peek(1) == '=') {
+ aToken.mType = opType;
+ Advance(2);
+ return true;
+ }
+
+ // Otherwise, a symbol (DELIM).
+ aToken.mSymbol = ch;
+ Advance();
+ return true;
+}
+
+/* nsCSSGridTemplateAreaScanner methods. */
+
+nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
+ : mBuffer(aBuffer.BeginReading())
+ , mOffset(0)
+ , mCount(aBuffer.Length())
+{
+}
+
+bool
+nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
+{
+ int32_t ch;
+ // Skip whitespace
+ do {
+ if (mOffset >= mCount) {
+ return false;
+ }
+ ch = mBuffer[mOffset];
+ mOffset++;
+ } while (IsWhitespace(ch));
+
+ if (IsOpenCharClass(ch, IS_IDCHAR)) {
+ // Named cell token
+ uint32_t start = mOffset - 1; // offset of |ch|
+ while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
+ mOffset++;
+ }
+ aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
+ aTokenResult.isTrash = false;
+ } else if (ch == '.') {
+ // Null cell token
+ // Skip any other '.'
+ while (mOffset < mCount && mBuffer[mOffset] == '.') {
+ mOffset++;
+ }
+ aTokenResult.mName.Truncate();
+ aTokenResult.isTrash = false;
+ } else {
+ // Trash token
+ aTokenResult.isTrash = true;
+ }
+ return true;
+}