summaryrefslogtreecommitdiffstats
path: root/js/src/frontend/TokenStream.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/frontend/TokenStream.cpp')
-rw-r--r--js/src/frontend/TokenStream.cpp1962
1 files changed, 1962 insertions, 0 deletions
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
new file mode 100644
index 000000000..c166ed414
--- /dev/null
+++ b/js/src/frontend/TokenStream.cpp
@@ -0,0 +1,1962 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// JS lexical scanner.
+
+#include "frontend/TokenStream.h"
+
+#include "mozilla/IntegerTypeTraits.h"
+#include "mozilla/PodOperations.h"
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "jsatom.h"
+#include "jscntxt.h"
+#include "jscompartment.h"
+#include "jsexn.h"
+#include "jsnum.h"
+
+#include "frontend/BytecodeCompiler.h"
+#include "js/CharacterEncoding.h"
+#include "js/UniquePtr.h"
+#include "vm/HelperThreads.h"
+#include "vm/Keywords.h"
+#include "vm/StringBuffer.h"
+#include "vm/Unicode.h"
+
+using namespace js;
+using namespace js::frontend;
+
+using mozilla::Maybe;
+using mozilla::PodAssign;
+using mozilla::PodCopy;
+using mozilla::PodZero;
+
+struct KeywordInfo {
+ const char* chars; // C string with keyword text
+ TokenKind tokentype;
+};
+
+static const KeywordInfo keywords[] = {
+#define KEYWORD_INFO(keyword, name, type) \
+ {js_##keyword##_str, type},
+ FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
+#undef KEYWORD_INFO
+};
+
+// Returns a KeywordInfo for the specified characters, or nullptr if the string
+// is not a keyword.
+template <typename CharT>
+static const KeywordInfo*
+FindKeyword(const CharT* s, size_t length)
+{
+ MOZ_ASSERT(length != 0);
+
+ size_t i;
+ const KeywordInfo* kw;
+ const char* chars;
+
+#define JSKW_LENGTH() length
+#define JSKW_AT(column) s[column]
+#define JSKW_GOT_MATCH(index) i = (index); goto got_match;
+#define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
+#define JSKW_NO_MATCH() goto no_match;
+#include "jsautokw.h"
+#undef JSKW_NO_MATCH
+#undef JSKW_TEST_GUESS
+#undef JSKW_GOT_MATCH
+#undef JSKW_AT
+#undef JSKW_LENGTH
+
+ got_match:
+ return &keywords[i];
+
+ test_guess:
+ kw = &keywords[i];
+ chars = kw->chars;
+ do {
+ if (*s++ != (unsigned char)(*chars++))
+ goto no_match;
+ } while (--length != 0);
+ return kw;
+
+ no_match:
+ return nullptr;
+}
+
+static const KeywordInfo*
+FindKeyword(JSLinearString* str)
+{
+ JS::AutoCheckCannotGC nogc;
+ return str->hasLatin1Chars()
+ ? FindKeyword(str->latin1Chars(nogc), str->length())
+ : FindKeyword(str->twoByteChars(nogc), str->length());
+}
+
+template <typename CharT>
+static bool
+IsIdentifier(const CharT* chars, size_t length)
+{
+ if (length == 0)
+ return false;
+
+ if (!unicode::IsIdentifierStart(char16_t(*chars)))
+ return false;
+
+ const CharT* end = chars + length;
+ while (++chars != end) {
+ if (!unicode::IsIdentifierPart(char16_t(*chars)))
+ return false;
+ }
+
+ return true;
+}
+
+bool
+frontend::IsIdentifier(JSLinearString* str)
+{
+ JS::AutoCheckCannotGC nogc;
+ return str->hasLatin1Chars()
+ ? ::IsIdentifier(str->latin1Chars(nogc), str->length())
+ : ::IsIdentifier(str->twoByteChars(nogc), str->length());
+}
+
+bool
+frontend::IsIdentifier(const char16_t* chars, size_t length)
+{
+ return ::IsIdentifier(chars, length);
+}
+
+bool
+frontend::IsKeyword(JSLinearString* str)
+{
+ return FindKeyword(str) != nullptr;
+}
+
+TokenStream::SourceCoords::SourceCoords(ExclusiveContext* cx, uint32_t ln)
+ : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
+{
+ // This is actually necessary! Removing it causes compile errors on
+ // GCC and clang. You could try declaring this:
+ //
+ // const uint32_t TokenStream::SourceCoords::MAX_PTR;
+ //
+ // which fixes the GCC/clang error, but causes bustage on Windows. Sigh.
+ //
+ uint32_t maxPtr = MAX_PTR;
+
+ // The first line begins at buffer offset 0. MAX_PTR is the sentinel. The
+ // appends cannot fail because |lineStartOffsets_| has statically-allocated
+ // elements.
+ MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
+ MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
+ lineStartOffsets_.infallibleAppend(0);
+ lineStartOffsets_.infallibleAppend(maxPtr);
+}
+
+MOZ_ALWAYS_INLINE bool
+TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
+{
+ uint32_t lineIndex = lineNumToIndex(lineNum);
+ uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
+
+ MOZ_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
+
+ if (lineIndex == sentinelIndex) {
+ // We haven't seen this newline before. Update lineStartOffsets_
+ // only if lineStartOffsets_.append succeeds, to keep sentinel.
+ // Otherwise return false to tell TokenStream about OOM.
+ uint32_t maxPtr = MAX_PTR;
+ if (!lineStartOffsets_.append(maxPtr))
+ return false;
+
+ lineStartOffsets_[lineIndex] = lineStartOffset;
+ } else {
+ // We have seen this newline before (and ungot it). Do nothing (other
+ // than checking it hasn't mysteriously changed).
+ // This path can be executed after hitting OOM, so check lineIndex.
+ MOZ_ASSERT_IF(lineIndex < sentinelIndex, lineStartOffsets_[lineIndex] == lineStartOffset);
+ }
+ return true;
+}
+
+MOZ_ALWAYS_INLINE bool
+TokenStream::SourceCoords::fill(const TokenStream::SourceCoords& other)
+{
+ MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
+ MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
+
+ if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
+ return true;
+
+ uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
+ lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
+
+ for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
+ if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
+ return false;
+ }
+ return true;
+}
+
+MOZ_ALWAYS_INLINE uint32_t
+TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
+{
+ uint32_t iMin, iMax, iMid;
+
+ if (lineStartOffsets_[lastLineIndex_] <= offset) {
+ // If we reach here, offset is on a line the same as or higher than
+ // last time. Check first for the +0, +1, +2 cases, because they
+ // typically cover 85--98% of cases.
+ if (offset < lineStartOffsets_[lastLineIndex_ + 1])
+ return lastLineIndex_; // lineIndex is same as last time
+
+ // If we reach here, there must be at least one more entry (plus the
+ // sentinel). Try it.
+ lastLineIndex_++;
+ if (offset < lineStartOffsets_[lastLineIndex_ + 1])
+ return lastLineIndex_; // lineIndex is one higher than last time
+
+ // The same logic applies here.
+ lastLineIndex_++;
+ if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
+ return lastLineIndex_; // lineIndex is two higher than last time
+ }
+
+ // No luck. Oh well, we have a better-than-default starting point for
+ // the binary search.
+ iMin = lastLineIndex_ + 1;
+ MOZ_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel
+
+ } else {
+ iMin = 0;
+ }
+
+ // This is a binary search with deferred detection of equality, which was
+ // marginally faster in this case than a standard binary search.
+ // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
+ // want one before that.
+ iMax = lineStartOffsets_.length() - 2;
+ while (iMax > iMin) {
+ iMid = iMin + (iMax - iMin) / 2;
+ if (offset >= lineStartOffsets_[iMid + 1])
+ iMin = iMid + 1; // offset is above lineStartOffsets_[iMid]
+ else
+ iMax = iMid; // offset is below or within lineStartOffsets_[iMid]
+ }
+ MOZ_ASSERT(iMax == iMin);
+ MOZ_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
+ lastLineIndex_ = iMin;
+ return iMin;
+}
+
+uint32_t
+TokenStream::SourceCoords::lineNum(uint32_t offset) const
+{
+ uint32_t lineIndex = lineIndexOf(offset);
+ return lineIndexToNum(lineIndex);
+}
+
+uint32_t
+TokenStream::SourceCoords::columnIndex(uint32_t offset) const
+{
+ uint32_t lineIndex = lineIndexOf(offset);
+ uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
+ MOZ_ASSERT(offset >= lineStartOffset);
+ return offset - lineStartOffset;
+}
+
+void
+TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum,
+ uint32_t* columnIndex) const
+{
+ uint32_t lineIndex = lineIndexOf(offset);
+ *lineNum = lineIndexToNum(lineIndex);
+ uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
+ MOZ_ASSERT(offset >= lineStartOffset);
+ *columnIndex = offset - lineStartOffset;
+}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4351)
+#endif
+
+TokenStream::TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options,
+ const char16_t* base, size_t length, StrictModeGetter* smg)
+ : srcCoords(cx, options.lineno),
+ options_(options),
+ tokens(),
+ cursor(),
+ lookahead(),
+ lineno(options.lineno),
+ flags(),
+ linebase(0),
+ prevLinebase(size_t(-1)),
+ userbuf(cx, base, length, options.column),
+ filename(options.filename()),
+ displayURL_(nullptr),
+ sourceMapURL_(nullptr),
+ tokenbuf(cx),
+ cx(cx),
+ mutedErrors(options.mutedErrors()),
+ strictModeGetter(smg)
+{
+ // Nb: the following tables could be static, but initializing them here is
+ // much easier. Don't worry, the time to initialize them for each
+ // TokenStream is trivial. See bug 639420.
+
+ // See Parser::assignExpr() for an explanation of isExprEnding[].
+ memset(isExprEnding, 0, sizeof(isExprEnding));
+ isExprEnding[TOK_COMMA] = 1;
+ isExprEnding[TOK_SEMI] = 1;
+ isExprEnding[TOK_COLON] = 1;
+ isExprEnding[TOK_RP] = 1;
+ isExprEnding[TOK_RB] = 1;
+ isExprEnding[TOK_RC] = 1;
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+bool
+TokenStream::checkOptions()
+{
+ // Constrain starting columns to half of the range of a signed 32-bit value,
+ // to avoid overflow.
+ if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
+ reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
+ return false;
+ }
+
+ return true;
+}
+
+TokenStream::~TokenStream()
+{
+}
+
+// Use the fastest available getc.
+#if defined(HAVE_GETC_UNLOCKED)
+# define fast_getc getc_unlocked
+#elif defined(HAVE__GETC_NOLOCK)
+# define fast_getc _getc_nolock
+#else
+# define fast_getc getc
+#endif
+
+MOZ_ALWAYS_INLINE void
+TokenStream::updateLineInfoForEOL()
+{
+ prevLinebase = linebase;
+ linebase = userbuf.offset();
+ lineno++;
+ if (!srcCoords.add(lineno, linebase))
+ flags.hitOOM = true;
+}
+
+MOZ_ALWAYS_INLINE void
+TokenStream::updateFlagsForEOL()
+{
+ flags.isDirtyLine = false;
+}
+
+// This gets the next char, normalizing all EOL sequences to '\n' as it goes.
+int32_t
+TokenStream::getChar()
+{
+ int32_t c;
+ if (MOZ_LIKELY(userbuf.hasRawChars())) {
+ c = userbuf.getRawChar();
+
+ // Normalize the char16_t if it was a newline.
+ if (MOZ_UNLIKELY(c == '\n'))
+ goto eol;
+ if (MOZ_UNLIKELY(c == '\r')) {
+ // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
+ if (MOZ_LIKELY(userbuf.hasRawChars()))
+ userbuf.matchRawChar('\n');
+ goto eol;
+ }
+ if (MOZ_UNLIKELY(c == LINE_SEPARATOR || c == PARA_SEPARATOR))
+ goto eol;
+
+ return c;
+ }
+
+ flags.isEOF = true;
+ return EOF;
+
+ eol:
+ updateLineInfoForEOL();
+ return '\n';
+}
+
+// This gets the next char. It does nothing special with EOL sequences, not
+// even updating the line counters. It can be used safely if (a) the
+// resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
+// it's an EOL, and (b) the line-related state (lineno, linebase) is not used
+// before it's ungotten.
+int32_t
+TokenStream::getCharIgnoreEOL()
+{
+ if (MOZ_LIKELY(userbuf.hasRawChars()))
+ return userbuf.getRawChar();
+
+ flags.isEOF = true;
+ return EOF;
+}
+
+void
+TokenStream::ungetChar(int32_t c)
+{
+ if (c == EOF)
+ return;
+ MOZ_ASSERT(!userbuf.atStart());
+ userbuf.ungetRawChar();
+ if (c == '\n') {
+#ifdef DEBUG
+ int32_t c2 = userbuf.peekRawChar();
+ MOZ_ASSERT(TokenBuf::isRawEOLChar(c2));
+#endif
+
+ // If it's a \r\n sequence, also unget the \r.
+ if (!userbuf.atStart())
+ userbuf.matchRawCharBackwards('\r');
+
+ MOZ_ASSERT(prevLinebase != size_t(-1)); // we should never get more than one EOL char
+ linebase = prevLinebase;
+ prevLinebase = size_t(-1);
+ lineno--;
+ } else {
+ MOZ_ASSERT(userbuf.peekRawChar() == c);
+ }
+}
+
+void
+TokenStream::ungetCharIgnoreEOL(int32_t c)
+{
+ if (c == EOF)
+ return;
+ MOZ_ASSERT(!userbuf.atStart());
+ userbuf.ungetRawChar();
+}
+
+// Return true iff |n| raw characters can be read from this without reading past
+// EOF or a newline, and copy those characters into |cp| if so. The characters
+// are not consumed: use skipChars(n) to do so after checking that the consumed
+// characters had appropriate values.
+bool
+TokenStream::peekChars(int n, char16_t* cp)
+{
+ int i, j;
+ int32_t c;
+
+ for (i = 0; i < n; i++) {
+ c = getCharIgnoreEOL();
+ if (c == EOF)
+ break;
+ if (c == '\n') {
+ ungetCharIgnoreEOL(c);
+ break;
+ }
+ cp[i] = char16_t(c);
+ }
+ for (j = i - 1; j >= 0; j--)
+ ungetCharIgnoreEOL(cp[j]);
+ return i == n;
+}
+
+size_t
+TokenStream::TokenBuf::findEOLMax(size_t start, size_t max)
+{
+ const char16_t* p = rawCharPtrAt(start);
+
+ size_t n = 0;
+ while (true) {
+ if (p >= limit_)
+ break;
+ if (n >= max)
+ break;
+ n++;
+ if (TokenBuf::isRawEOLChar(*p++))
+ break;
+ }
+ return start + n;
+}
+
+bool
+TokenStream::advance(size_t position)
+{
+ const char16_t* end = userbuf.rawCharPtrAt(position);
+ while (userbuf.addressOfNextRawChar() < end)
+ getChar();
+
+ Token* cur = &tokens[cursor];
+ cur->pos.begin = userbuf.offset();
+ MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
+ lookahead = 0;
+
+ if (flags.hitOOM)
+ return reportError(JSMSG_OUT_OF_MEMORY);
+
+ return true;
+}
+
+void
+TokenStream::tell(Position* pos)
+{
+ pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
+ pos->flags = flags;
+ pos->lineno = lineno;
+ pos->linebase = linebase;
+ pos->prevLinebase = prevLinebase;
+ pos->lookahead = lookahead;
+ pos->currentToken = currentToken();
+ for (unsigned i = 0; i < lookahead; i++)
+ pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
+}
+
+void
+TokenStream::seek(const Position& pos)
+{
+ userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
+ flags = pos.flags;
+ lineno = pos.lineno;
+ linebase = pos.linebase;
+ prevLinebase = pos.prevLinebase;
+ lookahead = pos.lookahead;
+
+ tokens[cursor] = pos.currentToken;
+ for (unsigned i = 0; i < lookahead; i++)
+ tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
+}
+
+bool
+TokenStream::seek(const Position& pos, const TokenStream& other)
+{
+ if (!srcCoords.fill(other.srcCoords))
+ return false;
+ seek(pos);
+ return true;
+}
+
+bool
+TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
+ va_list args)
+{
+ // In strict mode code, this is an error, not merely a warning.
+ unsigned flags;
+ if (strictMode)
+ flags = JSREPORT_ERROR;
+ else if (options().extraWarningsOption)
+ flags = JSREPORT_WARNING | JSREPORT_STRICT;
+ else
+ return true;
+
+ return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
+}
+
+void
+CompileError::throwError(JSContext* cx)
+{
+ if (JSREPORT_IS_WARNING(flags)) {
+ CallWarningReporter(cx, this);
+ return;
+ }
+
+ // If there's a runtime exception type associated with this error
+ // number, set that as the pending exception. For errors occuring at
+ // compile time, this is very likely to be a JSEXN_SYNTAXERR.
+ //
+ // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
+ // flag will be set in report.flags. Proper behavior for an error
+ // reporter is to ignore a report with this flag for all but top-level
+ // compilation errors. The exception will remain pending, and so long
+ // as the non-top-level "load", "eval", or "compile" native function
+ // returns false, the top-level reporter will eventually receive the
+ // uncaught exception report.
+ ErrorToException(cx, this, nullptr, nullptr);
+}
+
+bool
+TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
+ va_list args)
+{
+ bool warning = JSREPORT_IS_WARNING(flags);
+
+ if (warning && options().werrorOption) {
+ flags &= ~JSREPORT_WARNING;
+ warning = false;
+ }
+
+ // On the main thread, report the error immediately. When compiling off
+ // thread, save the error so that the main thread can report it later.
+ CompileError tempErr;
+ CompileError* tempErrPtr = &tempErr;
+ if (!cx->isJSContext() && !cx->addPendingCompileError(&tempErrPtr))
+ return false;
+ CompileError& err = *tempErrPtr;
+
+ err.flags = flags;
+ err.errorNumber = errorNumber;
+ err.filename = filename;
+ err.isMuted = mutedErrors;
+ if (offset == NoOffset) {
+ err.lineno = 0;
+ err.column = 0;
+ } else {
+ err.lineno = srcCoords.lineNum(offset);
+ err.column = srcCoords.columnIndex(offset);
+ }
+
+ // If we have no location information, try to get one from the caller.
+ bool callerFilename = false;
+ if (offset != NoOffset && !err.filename && cx->isJSContext()) {
+ NonBuiltinFrameIter iter(cx->asJSContext(),
+ FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
+ cx->compartment()->principals());
+ if (!iter.done() && iter.filename()) {
+ callerFilename = true;
+ err.filename = iter.filename();
+ err.lineno = iter.computeLine(&err.column);
+ }
+ }
+
+ if (!ExpandErrorArgumentsVA(cx, GetErrorMessage, nullptr, errorNumber,
+ nullptr, ArgumentsAreLatin1, &err, args))
+ {
+ return false;
+ }
+
+ // Given a token, T, that we want to complain about: if T's (starting)
+ // lineno doesn't match TokenStream's lineno, that means we've scanned past
+ // the line that T starts on, which makes it hard to print some or all of
+ // T's (starting) line for context.
+ //
+ // So we don't even try, leaving report.linebuf and friends zeroed. This
+ // means that any error involving a multi-line token (e.g. an unterminated
+ // multi-line string literal) won't have a context printed.
+ if (offset != NoOffset && err.lineno == lineno && !callerFilename) {
+ // We show only a portion (a "window") of the line around the erroneous
+ // token -- the first char in the token, plus |windowRadius| chars
+ // before it and |windowRadius - 1| chars after it. This is because
+ // lines can be very long and printing the whole line is (a) not that
+ // helpful, and (b) can waste a lot of memory. See bug 634444.
+ static const size_t windowRadius = 60;
+
+ // The window must start within the current line, no earlier than
+ // windowRadius characters before offset.
+ size_t windowStart = (offset - linebase > windowRadius) ?
+ offset - windowRadius :
+ linebase;
+
+ // The window must start within the portion of the current line
+ // that we actually have in our buffer.
+ if (windowStart < userbuf.startOffset())
+ windowStart = userbuf.startOffset();
+
+ // The window must end within the current line, no later than
+ // windowRadius after offset.
+ size_t windowEnd = userbuf.findEOLMax(offset, windowRadius);
+ size_t windowLength = windowEnd - windowStart;
+ MOZ_ASSERT(windowLength <= windowRadius * 2);
+
+ // Create the windowed strings.
+ StringBuffer windowBuf(cx);
+ if (!windowBuf.append(userbuf.rawCharPtrAt(windowStart), windowLength) ||
+ !windowBuf.append('\0'))
+ {
+ return false;
+ }
+
+ // The window into the offending source line, without final \n.
+ UniqueTwoByteChars linebuf(windowBuf.stealChars());
+ if (!linebuf)
+ return false;
+
+ err.initOwnedLinebuf(linebuf.release(), windowLength, offset - windowStart);
+ }
+
+ if (cx->isJSContext())
+ err.throwError(cx->asJSContext());
+
+ return warning;
+}
+
+bool
+TokenStream::reportStrictModeError(unsigned errorNumber, ...)
+{
+ va_list args;
+ va_start(args, errorNumber);
+ bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
+ errorNumber, args);
+ va_end(args);
+ return result;
+}
+
+bool
+TokenStream::reportError(unsigned errorNumber, ...)
+{
+ va_list args;
+ va_start(args, errorNumber);
+ bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
+ args);
+ va_end(args);
+ return result;
+}
+
+bool
+TokenStream::reportErrorNoOffset(unsigned errorNumber, ...)
+{
+ va_list args;
+ va_start(args, errorNumber);
+ bool result = reportCompileErrorNumberVA(NoOffset, JSREPORT_ERROR, errorNumber,
+ args);
+ va_end(args);
+ return result;
+}
+
+bool
+TokenStream::reportWarning(unsigned errorNumber, ...)
+{
+ va_list args;
+ va_start(args, errorNumber);
+ bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
+ errorNumber, args);
+ va_end(args);
+ return result;
+}
+
+bool
+TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
+{
+ if (!options().extraWarningsOption)
+ return true;
+
+ return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
+}
+
+void
+TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
+{
+ va_list args;
+ va_start(args, errorNumber);
+ unsigned flags = options().throwOnAsmJSValidationFailureOption
+ ? JSREPORT_ERROR
+ : JSREPORT_WARNING;
+ reportCompileErrorNumberVA(offset, flags, errorNumber, args);
+ va_end(args);
+}
+
+// We have encountered a '\': check for a Unicode escape sequence after it.
+// Return the length of the escape sequence and the character code point (by
+// value) if we found a Unicode escape sequence. Otherwise, return 0. In both
+// cases, do not advance along the buffer.
+uint32_t
+TokenStream::peekUnicodeEscape(uint32_t* codePoint)
+{
+ int32_t c = getCharIgnoreEOL();
+ if (c != 'u') {
+ ungetCharIgnoreEOL(c);
+ return 0;
+ }
+
+ char16_t cp[3];
+ uint32_t length;
+ c = getCharIgnoreEOL();
+ if (JS7_ISHEX(c) && peekChars(3, cp) &&
+ JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
+ {
+ *codePoint = (JS7_UNHEX(c) << 12) |
+ (JS7_UNHEX(cp[0]) << 8) |
+ (JS7_UNHEX(cp[1]) << 4) |
+ JS7_UNHEX(cp[2]);
+ length = 5;
+ } else if (c == '{') {
+ length = peekExtendedUnicodeEscape(codePoint);
+ } else {
+ length = 0;
+ }
+
+ ungetCharIgnoreEOL(c);
+ ungetCharIgnoreEOL('u');
+ return length;
+}
+
+uint32_t
+TokenStream::peekExtendedUnicodeEscape(uint32_t* codePoint)
+{
+ // The opening brace character was already read.
+ int32_t c = getCharIgnoreEOL();
+
+ // Skip leading zeros.
+ uint32_t leadingZeros = 0;
+ while (c == '0') {
+ leadingZeros++;
+ c = getCharIgnoreEOL();
+ }
+
+ char16_t cp[6];
+ size_t i = 0;
+ uint32_t code = 0;
+ while (JS7_ISHEX(c) && i < 6) {
+ cp[i++] = c;
+ code = code << 4 | JS7_UNHEX(c);
+ c = getCharIgnoreEOL();
+ }
+
+ uint32_t length;
+ if (c == '}' && (leadingZeros > 0 || i > 0) && code <= unicode::NonBMPMax) {
+ *codePoint = code;
+ length = leadingZeros + i + 3;
+ } else {
+ length = 0;
+ }
+
+ ungetCharIgnoreEOL(c);
+ while (i--)
+ ungetCharIgnoreEOL(cp[i]);
+ while (leadingZeros--)
+ ungetCharIgnoreEOL('0');
+
+ return length;
+}
+
+uint32_t
+TokenStream::matchUnicodeEscapeIdStart(uint32_t* codePoint)
+{
+ uint32_t length = peekUnicodeEscape(codePoint);
+ if (length > 0 && unicode::IsIdentifierStart(*codePoint)) {
+ skipChars(length);
+ return length;
+ }
+ return 0;
+}
+
+bool
+TokenStream::matchUnicodeEscapeIdent(uint32_t* codePoint)
+{
+ uint32_t length = peekUnicodeEscape(codePoint);
+ if (length > 0 && unicode::IsIdentifierPart(*codePoint)) {
+ skipChars(length);
+ return true;
+ }
+ return false;
+}
+
+// Helper function which returns true if the first length(q) characters in p are
+// the same as the characters in q.
+static bool
+CharsMatch(const char16_t* p, const char* q) {
+ while (*q) {
+ if (*p++ != *q++)
+ return false;
+ }
+ return true;
+}
+
+bool
+TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
+{
+ // Match directive comments used in debugging, such as "//# sourceURL" and
+ // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
+ //
+ // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
+ // line comments containing a source mapping URL inside a multiline
+ // comment. To avoid potentially expensive lookahead and backtracking, we
+ // only check for this case if we encounter a '#' character.
+
+ if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
+ return false;
+ if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
+ return false;
+
+ return true;
+}
+
+bool
+TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
+ const char* directive, int directiveLength,
+ const char* errorMsgPragma,
+ UniqueTwoByteChars* destination)
+{
+ MOZ_ASSERT(directiveLength <= 18);
+ char16_t peeked[18];
+ int32_t c;
+
+ if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
+ if (shouldWarnDeprecated &&
+ !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
+ return false;
+
+ skipChars(directiveLength);
+ tokenbuf.clear();
+
+ while ((c = peekChar()) && c != EOF && !unicode::IsSpaceOrBOM2(c)) {
+ getChar();
+ // Debugging directives can occur in both single- and multi-line
+ // comments. If we're currently inside a multi-line comment, we also
+ // need to recognize multi-line comment terminators.
+ if (isMultiline && c == '*' && peekChar() == '/') {
+ ungetChar('*');
+ break;
+ }
+ if (!tokenbuf.append(c))
+ return false;
+ }
+
+ if (tokenbuf.empty()) {
+ // The directive's URL was missing, but this is not quite an
+ // exception that we should stop and drop everything for.
+ return true;
+ }
+
+ size_t length = tokenbuf.length();
+
+ *destination = cx->make_pod_array<char16_t>(length + 1);
+ if (!*destination)
+ return false;
+
+ PodCopy(destination->get(), tokenbuf.begin(), length);
+ (*destination)[length] = '\0';
+ }
+
+ return true;
+}
+
+bool
+TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
+{
+ // Match comments of the form "//# sourceURL=<url>" or
+ // "/\* //# sourceURL=<url> *\/"
+ //
+ // Note that while these are labeled "sourceURL" in the source text,
+ // internally we refer to it as a "displayURL" to distinguish what the
+ // developer would like to refer to the source as from the source's actual
+ // URL.
+
+ return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
+ "sourceURL", &displayURL_);
+}
+
+bool
+TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
+{
+ // Match comments of the form "//# sourceMappingURL=<url>" or
+ // "/\* //# sourceMappingURL=<url> *\/"
+
+ return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
+ "sourceMappingURL", &sourceMapURL_);
+}
+
+MOZ_ALWAYS_INLINE Token*
+TokenStream::newToken(ptrdiff_t adjust)
+{
+ cursor = (cursor + 1) & ntokensMask;
+ Token* tp = &tokens[cursor];
+ tp->pos.begin = userbuf.offset() + adjust;
+
+ // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
+ MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
+
+ return tp;
+}
+
+MOZ_ALWAYS_INLINE JSAtom*
+TokenStream::atomize(ExclusiveContext* cx, CharBuffer& cb)
+{
+ return AtomizeChars(cx, cb.begin(), cb.length());
+}
+
+#ifdef DEBUG
+static bool
+IsTokenSane(Token* tp)
+{
+ // Nb: TOK_EOL should never be used in an actual Token; it should only be
+ // returned as a TokenKind from peekTokenSameLine().
+ if (tp->type < 0 || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
+ return false;
+
+ if (tp->pos.end < tp->pos.begin)
+ return false;
+
+ return true;
+}
+#endif
+
+bool
+TokenStream::putIdentInTokenbuf(const char16_t* identStart)
+{
+ int32_t c;
+ uint32_t qc;
+ const char16_t* tmp = userbuf.addressOfNextRawChar();
+ userbuf.setAddressOfNextRawChar(identStart);
+
+ tokenbuf.clear();
+ for (;;) {
+ c = getCharIgnoreEOL();
+ if (!unicode::IsIdentifierPart(char16_t(c))) {
+ if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
+ break;
+ c = qc;
+ }
+ if (!tokenbuf.append(c)) {
+ userbuf.setAddressOfNextRawChar(tmp);
+ return false;
+ }
+ }
+ userbuf.setAddressOfNextRawChar(tmp);
+ return true;
+}
+
+bool
+TokenStream::checkForKeyword(const KeywordInfo* kw, TokenKind* ttp)
+{
+ if (!awaitIsKeyword && kw->tokentype == TOK_AWAIT) {
+ if (ttp)
+ *ttp = TOK_NAME;
+ return true;
+ }
+
+ if (kw->tokentype == TOK_RESERVED)
+ return reportError(JSMSG_RESERVED_ID, kw->chars);
+
+ if (kw->tokentype == TOK_STRICT_RESERVED)
+ return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
+
+ // Working keyword.
+ *ttp = kw->tokentype;
+ return true;
+}
+
+bool
+TokenStream::checkForKeyword(JSAtom* atom, TokenKind* ttp)
+{
+ const KeywordInfo* kw = FindKeyword(atom);
+ if (!kw)
+ return true;
+
+ return checkForKeyword(kw, ttp);
+}
+
+enum FirstCharKind {
+ // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
+ // token that cannot also be a prefix of a longer token. E.g. ';' has the
+ // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
+ // that begin with '+'.
+ //
+ // The few token kinds satisfying these properties cover roughly 35--45%
+ // of the tokens seen in practice.
+ //
+ // We represent the 'OneChar' kind with any positive value less than
+ // TOK_LIMIT. This representation lets us associate each one-char token
+ // char16_t with a TokenKind and thus avoid a subsequent char16_t-to-TokenKind
+ // conversion.
+ OneChar_Min = 0,
+ OneChar_Max = TOK_LIMIT - 1,
+
+ Space = TOK_LIMIT,
+ Ident,
+ Dec,
+ String,
+ EOL,
+ BasePrefix,
+ Other,
+
+ LastCharKind = Other
+};
+
+// OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126:
+// '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
+// Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
+// Dot: 46: '.'
+// Equals: 61: '='
+// String: 34, 39: '"', '\''
+// Dec: 49..57: '1'..'9'
+// Plus: 43: '+'
+// BasePrefix: 48: '0'
+// Space: 9, 11, 12, 32: '\t', '\v', '\f', ' '
+// EOL: 10, 13: '\n', '\r'
+//
+#define T_COMMA TOK_COMMA
+#define T_COLON TOK_COLON
+#define T_BITNOT TOK_BITNOT
+#define Templat String
+#define _______ Other
+static const uint8_t firstCharKinds[] = {
+/* 0 1 2 3 4 5 6 7 8 9 */
+/* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
+/* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
+/* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
+/* 40+ */ TOK_LP, TOK_RP, _______, _______, T_COMMA,_______, _______, _______,BasePrefix, Dec,
+/* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON,TOK_SEMI,
+/* 60+ */ _______, _______, _______,TOK_HOOK, _______, Ident, Ident, Ident, Ident, Ident,
+/* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
+/* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
+/* 90+ */ Ident, TOK_LB, _______, TOK_RB, _______, Ident, Templat, Ident, Ident, Ident,
+/* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
+/* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
+/* 120+ */ Ident, Ident, Ident, TOK_LC, _______, TOK_RC,T_BITNOT, _______
+};
+#undef T_COMMA
+#undef T_COLON
+#undef T_BITNOT
+#undef Templat
+#undef _______
+
+static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
+ "Elements of firstCharKinds[] are too small");
+
+bool
+TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
+{
+ int c;
+ uint32_t qc;
+ Token* tp;
+ FirstCharKind c1kind;
+ const char16_t* numStart;
+ bool hasExp;
+ DecimalPoint decimalPoint;
+ const char16_t* identStart;
+ bool hadUnicodeEscape;
+
+ // Check if in the middle of a template string. Have to get this out of
+ // the way first.
+ if (MOZ_UNLIKELY(modifier == TemplateTail)) {
+ if (!getStringOrTemplateToken('`', &tp))
+ goto error;
+ goto out;
+ }
+
+ retry:
+ if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
+ tp = newToken(0);
+ tp->type = TOK_EOF;
+ flags.isEOF = true;
+ goto out;
+ }
+
+ c = userbuf.getRawChar();
+ MOZ_ASSERT(c != EOF);
+
+ // Chars not in the range 0..127 are rare. Getting them out of the way
+ // early allows subsequent checking to be faster.
+ if (MOZ_UNLIKELY(c >= 128)) {
+ if (unicode::IsSpaceOrBOM2(c)) {
+ if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
+ updateLineInfoForEOL();
+ updateFlagsForEOL();
+ }
+
+ goto retry;
+ }
+
+ tp = newToken(-1);
+
+ static_assert('$' < 128,
+ "IdentifierStart contains '$', but as !IsUnicodeIDStart('$'), "
+ "ensure that '$' is never handled here");
+ static_assert('_' < 128,
+ "IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
+ "ensure that '_' is never handled here");
+ if (unicode::IsUnicodeIDStart(c)) {
+ identStart = userbuf.addressOfNextRawChar() - 1;
+ hadUnicodeEscape = false;
+ goto identifier;
+ }
+
+ goto badchar;
+ }
+
+ // Get the token kind, based on the first char. The ordering of c1kind
+ // comparison is based on the frequency of tokens in real code -- Parsemark
+ // (which represents typical JS code on the web) and the Unreal demo (which
+ // represents asm.js code).
+ //
+ // Parsemark Unreal
+ // OneChar 32.9% 39.7%
+ // Space 25.0% 0.6%
+ // Ident 19.2% 36.4%
+ // Dec 7.2% 5.1%
+ // String 7.9% 0.0%
+ // EOL 1.7% 0.0%
+ // BasePrefix 0.4% 4.9%
+ // Other 5.7% 13.3%
+ //
+ // The ordering is based mostly only Parsemark frequencies, with Unreal
+ // frequencies used to break close categories (e.g. |Dec| and |String|).
+ // |Other| is biggish, but no other token kind is common enough for it to
+ // be worth adding extra values to FirstCharKind.
+ //
+ c1kind = FirstCharKind(firstCharKinds[c]);
+
+ // Look for an unambiguous single-char token.
+ //
+ if (c1kind <= OneChar_Max) {
+ tp = newToken(-1);
+ tp->type = TokenKind(c1kind);
+ goto out;
+ }
+
+ // Skip over non-EOL whitespace chars.
+ //
+ if (c1kind == Space)
+ goto retry;
+
+ // Look for an identifier.
+ //
+ if (c1kind == Ident) {
+ tp = newToken(-1);
+ identStart = userbuf.addressOfNextRawChar() - 1;
+ hadUnicodeEscape = false;
+
+ identifier:
+ for (;;) {
+ c = getCharIgnoreEOL();
+ if (c == EOF)
+ break;
+ if (!unicode::IsIdentifierPart(char16_t(c))) {
+ if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
+ break;
+ hadUnicodeEscape = true;
+ }
+ }
+ ungetCharIgnoreEOL(c);
+
+ // Identifiers containing no Unicode escapes can be processed directly
+ // from userbuf. The rest must use the escapes converted via tokenbuf
+ // before atomizing.
+ const char16_t* chars;
+ size_t length;
+ if (hadUnicodeEscape) {
+ if (!putIdentInTokenbuf(identStart))
+ goto error;
+
+ chars = tokenbuf.begin();
+ length = tokenbuf.length();
+ } else {
+ chars = identStart;
+ length = userbuf.addressOfNextRawChar() - identStart;
+ }
+
+ // Represent keywords as keyword tokens unless told otherwise.
+ if (modifier != KeywordIsName) {
+ if (const KeywordInfo* kw = FindKeyword(chars, length)) {
+ // That said, keywords can't contain escapes. (Contexts where
+ // keywords are treated as names, that also sometimes treat
+ // keywords as keywords, must manually check this requirement.)
+ // There are two exceptions
+ // 1) StrictReservedWords: These keywords need to be treated as
+ // names in non-strict mode.
+ // 2) yield is also treated as a name if it contains an escape
+ // sequence. The parser must handle this case separately.
+ if (hadUnicodeEscape && !(
+ (kw->tokentype == TOK_STRICT_RESERVED && !strictMode()) ||
+ kw->tokentype == TOK_YIELD))
+ {
+ reportError(JSMSG_ESCAPED_KEYWORD);
+ goto error;
+ }
+
+ tp->type = TOK_NAME;
+ if (!checkForKeyword(kw, &tp->type))
+ goto error;
+ if (tp->type != TOK_NAME && !hadUnicodeEscape)
+ goto out;
+ }
+ }
+
+ JSAtom* atom = AtomizeChars(cx, chars, length);
+ if (!atom)
+ goto error;
+ tp->type = TOK_NAME;
+ tp->setName(atom->asPropertyName());
+ goto out;
+ }
+
+ // Look for a decimal number.
+ //
+ if (c1kind == Dec) {
+ tp = newToken(-1);
+ numStart = userbuf.addressOfNextRawChar() - 1;
+
+ decimal:
+ decimalPoint = NoDecimal;
+ hasExp = false;
+ while (JS7_ISDEC(c))
+ c = getCharIgnoreEOL();
+
+ if (c == '.') {
+ decimalPoint = HasDecimal;
+ decimal_dot:
+ do {
+ c = getCharIgnoreEOL();
+ } while (JS7_ISDEC(c));
+ }
+ if (c == 'e' || c == 'E') {
+ hasExp = true;
+ c = getCharIgnoreEOL();
+ if (c == '+' || c == '-')
+ c = getCharIgnoreEOL();
+ if (!JS7_ISDEC(c)) {
+ ungetCharIgnoreEOL(c);
+ reportError(JSMSG_MISSING_EXPONENT);
+ goto error;
+ }
+ do {
+ c = getCharIgnoreEOL();
+ } while (JS7_ISDEC(c));
+ }
+ ungetCharIgnoreEOL(c);
+
+ if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+
+ // Unlike identifiers and strings, numbers cannot contain escaped
+ // chars, so we don't need to use tokenbuf. Instead we can just
+ // convert the char16_t characters in userbuf to the numeric value.
+ double dval;
+ if (!((decimalPoint == HasDecimal) || hasExp)) {
+ if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
+ goto error;
+ } else {
+ const char16_t* dummy;
+ if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
+ goto error;
+ }
+ tp->type = TOK_NUMBER;
+ tp->setNumber(dval, decimalPoint);
+ goto out;
+ }
+
+ // Look for a string or a template string.
+ //
+ if (c1kind == String) {
+ if (!getStringOrTemplateToken(c, &tp))
+ goto error;
+ goto out;
+ }
+
+ // Skip over EOL chars, updating line state along the way.
+ //
+ if (c1kind == EOL) {
+ // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
+ if (c == '\r' && userbuf.hasRawChars())
+ userbuf.matchRawChar('\n');
+ updateLineInfoForEOL();
+ updateFlagsForEOL();
+ goto retry;
+ }
+
+ // Look for a hexadecimal, octal, or binary number.
+ //
+ if (c1kind == BasePrefix) {
+ tp = newToken(-1);
+ int radix;
+ c = getCharIgnoreEOL();
+ if (c == 'x' || c == 'X') {
+ radix = 16;
+ c = getCharIgnoreEOL();
+ if (!JS7_ISHEX(c)) {
+ ungetCharIgnoreEOL(c);
+ reportError(JSMSG_MISSING_HEXDIGITS);
+ goto error;
+ }
+ numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0x'
+ while (JS7_ISHEX(c))
+ c = getCharIgnoreEOL();
+ } else if (c == 'b' || c == 'B') {
+ radix = 2;
+ c = getCharIgnoreEOL();
+ if (c != '0' && c != '1') {
+ ungetCharIgnoreEOL(c);
+ reportError(JSMSG_MISSING_BINARY_DIGITS);
+ goto error;
+ }
+ numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0b'
+ while (c == '0' || c == '1')
+ c = getCharIgnoreEOL();
+ } else if (c == 'o' || c == 'O') {
+ radix = 8;
+ c = getCharIgnoreEOL();
+ if (c < '0' || c > '7') {
+ ungetCharIgnoreEOL(c);
+ reportError(JSMSG_MISSING_OCTAL_DIGITS);
+ goto error;
+ }
+ numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0o'
+ while ('0' <= c && c <= '7')
+ c = getCharIgnoreEOL();
+ } else if (JS7_ISDEC(c)) {
+ radix = 8;
+ numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0'
+ while (JS7_ISDEC(c)) {
+ // Octal integer literals are not permitted in strict mode code.
+ if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
+ goto error;
+
+ // Outside strict mode, we permit 08 and 09 as decimal numbers,
+ // which makes our behaviour a superset of the ECMA numeric
+ // grammar. We might not always be so permissive, so we warn
+ // about it.
+ if (c >= '8') {
+ if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
+ goto error;
+ }
+ goto decimal; // use the decimal scanner for the rest of the number
+ }
+ c = getCharIgnoreEOL();
+ }
+ } else {
+ // '0' not followed by 'x', 'X' or a digit; scan as a decimal number.
+ numStart = userbuf.addressOfNextRawChar() - 1;
+ goto decimal;
+ }
+ ungetCharIgnoreEOL(c);
+
+ if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+
+ double dval;
+ const char16_t* dummy;
+ if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
+ goto error;
+ tp->type = TOK_NUMBER;
+ tp->setNumber(dval, NoDecimal);
+ goto out;
+ }
+
+ // This handles everything else.
+ //
+ MOZ_ASSERT(c1kind == Other);
+ tp = newToken(-1);
+ switch (c) {
+ case '.':
+ c = getCharIgnoreEOL();
+ if (JS7_ISDEC(c)) {
+ numStart = userbuf.addressOfNextRawChar() - 2;
+ decimalPoint = HasDecimal;
+ hasExp = false;
+ goto decimal_dot;
+ }
+ if (c == '.') {
+ if (matchChar('.')) {
+ tp->type = TOK_TRIPLEDOT;
+ goto out;
+ }
+ }
+ ungetCharIgnoreEOL(c);
+ tp->type = TOK_DOT;
+ goto out;
+
+ case '=':
+ if (matchChar('='))
+ tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
+ else if (matchChar('>'))
+ tp->type = TOK_ARROW;
+ else
+ tp->type = TOK_ASSIGN;
+ goto out;
+
+ case '+':
+ if (matchChar('+'))
+ tp->type = TOK_INC;
+ else
+ tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
+ goto out;
+
+ case '\\': {
+ uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc);
+ if (escapeLength > 0) {
+ identStart = userbuf.addressOfNextRawChar() - escapeLength - 1;
+ hadUnicodeEscape = true;
+ goto identifier;
+ }
+ goto badchar;
+ }
+
+ case '|':
+ if (matchChar('|'))
+ tp->type = TOK_OR;
+ else
+ tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
+ goto out;
+
+ case '^':
+ tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
+ goto out;
+
+ case '&':
+ if (matchChar('&'))
+ tp->type = TOK_AND;
+ else
+ tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
+ goto out;
+
+ case '!':
+ if (matchChar('='))
+ tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
+ else
+ tp->type = TOK_NOT;
+ goto out;
+
+ case '<':
+ // NB: treat HTML begin-comment as comment-till-end-of-line.
+ if (matchChar('!')) {
+ if (matchChar('-')) {
+ if (matchChar('-'))
+ goto skipline;
+ ungetChar('-');
+ }
+ ungetChar('!');
+ }
+ if (matchChar('<')) {
+ tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
+ } else {
+ tp->type = matchChar('=') ? TOK_LE : TOK_LT;
+ }
+ goto out;
+
+ case '>':
+ if (matchChar('>')) {
+ if (matchChar('>'))
+ tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
+ else
+ tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
+ } else {
+ tp->type = matchChar('=') ? TOK_GE : TOK_GT;
+ }
+ goto out;
+
+ case '*':
+ if (matchChar('*'))
+ tp->type = matchChar('=') ? TOK_POWASSIGN : TOK_POW;
+ else
+ tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
+ goto out;
+
+ case '/':
+ // Look for a single-line comment.
+ if (matchChar('/')) {
+ c = peekChar();
+ if (c == '@' || c == '#') {
+ bool shouldWarn = getChar() == '@';
+ if (!getDirectives(false, shouldWarn))
+ goto error;
+ }
+
+ skipline:
+ while ((c = getChar()) != EOF && c != '\n')
+ continue;
+ ungetChar(c);
+ cursor = (cursor - 1) & ntokensMask;
+ goto retry;
+ }
+
+ // Look for a multi-line comment.
+ if (matchChar('*')) {
+ unsigned linenoBefore = lineno;
+ while ((c = getChar()) != EOF &&
+ !(c == '*' && matchChar('/'))) {
+ if (c == '@' || c == '#') {
+ bool shouldWarn = c == '@';
+ if (!getDirectives(true, shouldWarn))
+ goto error;
+ }
+ }
+ if (c == EOF) {
+ reportError(JSMSG_UNTERMINATED_COMMENT);
+ goto error;
+ }
+ if (linenoBefore != lineno)
+ updateFlagsForEOL();
+ cursor = (cursor - 1) & ntokensMask;
+ goto retry;
+ }
+
+ // Look for a regexp.
+ if (modifier == Operand) {
+ tokenbuf.clear();
+
+ bool inCharClass = false;
+ for (;;) {
+ c = getChar();
+ if (c == '\\') {
+ if (!tokenbuf.append(c))
+ goto error;
+ c = getChar();
+ } else if (c == '[') {
+ inCharClass = true;
+ } else if (c == ']') {
+ inCharClass = false;
+ } else if (c == '/' && !inCharClass) {
+ // For compat with IE, allow unescaped / in char classes.
+ break;
+ }
+ if (c == '\n' || c == EOF) {
+ ungetChar(c);
+ reportError(JSMSG_UNTERMINATED_REGEXP);
+ goto error;
+ }
+ if (!tokenbuf.append(c))
+ goto error;
+ }
+
+ RegExpFlag reflags = NoFlags;
+ unsigned length = tokenbuf.length() + 1;
+ while (true) {
+ c = peekChar();
+ if (c == 'g' && !(reflags & GlobalFlag))
+ reflags = RegExpFlag(reflags | GlobalFlag);
+ else if (c == 'i' && !(reflags & IgnoreCaseFlag))
+ reflags = RegExpFlag(reflags | IgnoreCaseFlag);
+ else if (c == 'm' && !(reflags & MultilineFlag))
+ reflags = RegExpFlag(reflags | MultilineFlag);
+ else if (c == 'y' && !(reflags & StickyFlag))
+ reflags = RegExpFlag(reflags | StickyFlag);
+ else if (c == 'u' && !(reflags & UnicodeFlag))
+ reflags = RegExpFlag(reflags | UnicodeFlag);
+ else
+ break;
+ getChar();
+ length++;
+ }
+
+ c = peekChar();
+ if (JS7_ISLET(c)) {
+ char buf[2] = { '\0', '\0' };
+ tp->pos.begin += length + 1;
+ buf[0] = char(c);
+ reportError(JSMSG_BAD_REGEXP_FLAG, buf);
+ (void) getChar();
+ goto error;
+ }
+ tp->type = TOK_REGEXP;
+ tp->setRegExpFlags(reflags);
+ goto out;
+ }
+
+ tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
+ goto out;
+
+ case '%':
+ tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
+ goto out;
+
+ case '-':
+ if (matchChar('-')) {
+ if (peekChar() == '>' && !flags.isDirtyLine)
+ goto skipline;
+ tp->type = TOK_DEC;
+ } else {
+ tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
+ }
+ goto out;
+
+ badchar:
+ default:
+ reportError(JSMSG_ILLEGAL_CHARACTER);
+ goto error;
+ }
+
+ MOZ_CRASH("should have jumped to |out| or |error|");
+
+ out:
+ if (flags.hitOOM)
+ return reportError(JSMSG_OUT_OF_MEMORY);
+
+ flags.isDirtyLine = true;
+ tp->pos.end = userbuf.offset();
+#ifdef DEBUG
+ // Save the modifier used to get this token, so that if an ungetToken()
+ // occurs and then the token is re-gotten (or peeked, etc.), we can assert
+ // that both gets have used the same modifiers.
+ tp->modifier = modifier;
+ tp->modifierException = NoException;
+#endif
+ MOZ_ASSERT(IsTokenSane(tp));
+ *ttp = tp->type;
+ return true;
+
+ error:
+ if (flags.hitOOM)
+ return reportError(JSMSG_OUT_OF_MEMORY);
+
+ flags.isDirtyLine = true;
+ tp->pos.end = userbuf.offset();
+ MOZ_MAKE_MEM_UNDEFINED(&tp->type, sizeof(tp->type));
+ flags.hadError = true;
+#ifdef DEBUG
+ // Poisoning userbuf on error establishes an invariant: once an erroneous
+ // token has been seen, userbuf will not be consulted again. This is true
+ // because the parser will deal with the illegal token by aborting parsing
+ // immediately.
+ userbuf.poison();
+#endif
+ MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
+ return false;
+}
+
+bool
+TokenStream::getBracedUnicode(uint32_t* cp)
+{
+ consumeKnownChar('{');
+
+ bool first = true;
+ int32_t c;
+ uint32_t code = 0;
+ while (true) {
+ c = getCharIgnoreEOL();
+ if (c == EOF)
+ return false;
+ if (c == '}') {
+ if (first)
+ return false;
+ break;
+ }
+
+ if (!JS7_ISHEX(c))
+ return false;
+
+ code = (code << 4) | JS7_UNHEX(c);
+ if (code > unicode::NonBMPMax)
+ return false;
+ first = false;
+ }
+
+ *cp = code;
+ return true;
+}
+
+bool
+TokenStream::getStringOrTemplateToken(int untilChar, Token** tp)
+{
+ int c;
+ int nc = -1;
+
+ bool parsingTemplate = (untilChar == '`');
+
+ *tp = newToken(-1);
+ tokenbuf.clear();
+
+ // We need to detect any of these chars: " or ', \n (or its
+ // equivalents), \\, EOF. Because we detect EOL sequences here and
+ // put them back immediately, we can use getCharIgnoreEOL().
+ while ((c = getCharIgnoreEOL()) != untilChar) {
+ if (c == EOF) {
+ ungetCharIgnoreEOL(c);
+ reportError(JSMSG_UNTERMINATED_STRING);
+ return false;
+ }
+
+ if (c == '\\') {
+ switch (c = getChar()) {
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+
+ case '\n':
+ // ES5 7.8.4: an escaped line terminator represents
+ // no character.
+ continue;
+
+ // Unicode character specification.
+ case 'u': {
+ if (peekChar() == '{') {
+ uint32_t code;
+ if (!getBracedUnicode(&code)) {
+ reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
+ return false;
+ }
+
+ MOZ_ASSERT(code <= unicode::NonBMPMax);
+ if (code < unicode::NonBMPMin) {
+ c = code;
+ } else {
+ if (!tokenbuf.append(unicode::LeadSurrogate(code)))
+ return false;
+ c = unicode::TrailSurrogate(code);
+ }
+ break;
+ }
+
+ char16_t cp[4];
+ if (peekChars(4, cp) &&
+ JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3]))
+ {
+ c = JS7_UNHEX(cp[0]);
+ c = (c << 4) + JS7_UNHEX(cp[1]);
+ c = (c << 4) + JS7_UNHEX(cp[2]);
+ c = (c << 4) + JS7_UNHEX(cp[3]);
+ skipChars(4);
+ } else {
+ reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
+ return false;
+ }
+ break;
+ }
+
+ // Hexadecimal character specification.
+ case 'x': {
+ char16_t cp[2];
+ if (peekChars(2, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
+ c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
+ skipChars(2);
+ } else {
+ reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
+ return false;
+ }
+ break;
+ }
+
+ default:
+ // Octal character specification.
+ if (JS7_ISOCT(c)) {
+ int32_t val = JS7_UNOCT(c);
+
+ c = peekChar();
+
+ // Strict mode code allows only \0, then a non-digit.
+ if (val != 0 || JS7_ISDEC(c)) {
+ if (parsingTemplate) {
+ reportError(JSMSG_DEPRECATED_OCTAL);
+ return false;
+ }
+ if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
+ return false;
+ flags.sawOctalEscape = true;
+ }
+
+ if (JS7_ISOCT(c)) {
+ val = 8 * val + JS7_UNOCT(c);
+ getChar();
+ c = peekChar();
+ if (JS7_ISOCT(c)) {
+ int32_t save = val;
+ val = 8 * val + JS7_UNOCT(c);
+ if (val <= 0xFF)
+ getChar();
+ else
+ val = save;
+ }
+ }
+
+ c = char16_t(val);
+ }
+ break;
+ }
+ } else if (TokenBuf::isRawEOLChar(c)) {
+ if (!parsingTemplate) {
+ ungetCharIgnoreEOL(c);
+ reportError(JSMSG_UNTERMINATED_STRING);
+ return false;
+ }
+ if (c == '\r') {
+ c = '\n';
+ if (userbuf.peekRawChar() == '\n')
+ skipCharsIgnoreEOL(1);
+ }
+ updateLineInfoForEOL();
+ updateFlagsForEOL();
+ } else if (parsingTemplate && c == '$') {
+ if ((nc = getCharIgnoreEOL()) == '{')
+ break;
+ ungetCharIgnoreEOL(nc);
+ }
+
+ if (!tokenbuf.append(c)) {
+ ReportOutOfMemory(cx);
+ return false;
+ }
+ }
+
+ JSAtom* atom = atomize(cx, tokenbuf);
+ if (!atom)
+ return false;
+
+ if (!parsingTemplate) {
+ (*tp)->type = TOK_STRING;
+ } else {
+ if (c == '$' && nc == '{')
+ (*tp)->type = TOK_TEMPLATE_HEAD;
+ else
+ (*tp)->type = TOK_NO_SUBS_TEMPLATE;
+ }
+
+ (*tp)->setAtom(atom);
+ return true;
+}
+
+JS_FRIEND_API(int)
+js_fgets(char* buf, int size, FILE* file)
+{
+ int n, i, c;
+ bool crflag;
+
+ n = size - 1;
+ if (n < 0)
+ return -1;
+
+ crflag = false;
+ for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
+ buf[i] = c;
+ if (c == '\n') { // any \n ends a line
+ i++; // keep the \n; we know there is room for \0
+ break;
+ }
+ if (crflag) { // \r not followed by \n ends line at the \r
+ ungetc(c, file);
+ break; // and overwrite c in buf with \0
+ }
+ crflag = (c == '\r');
+ }
+
+ buf[i] = '\0';
+ return i;
+}
+
+const char*
+frontend::TokenKindToDesc(TokenKind tt)
+{
+ switch (tt) {
+#define EMIT_CASE(name, desc) case TOK_##name: return desc;
+ FOR_EACH_TOKEN_KIND(EMIT_CASE)
+#undef EMIT_CASE
+ case TOK_LIMIT:
+ MOZ_ASSERT_UNREACHABLE("TOK_LIMIT should not be passed.");
+ break;
+ }
+
+ return "<bad TokenKind>";
+}
+
+#ifdef DEBUG
+const char*
+TokenKindToString(TokenKind tt)
+{
+ switch (tt) {
+#define EMIT_CASE(name, desc) case TOK_##name: return "TOK_" #name;
+ FOR_EACH_TOKEN_KIND(EMIT_CASE)
+#undef EMIT_CASE
+ case TOK_LIMIT: break;
+ }
+
+ return "<bad TokenKind>";
+}
+#endif