diff options
Diffstat (limited to 'js/src/frontend/TokenStream.h')
-rw-r--r-- | js/src/frontend/TokenStream.h | 263 |
1 files changed, 134 insertions, 129 deletions
diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h index 5d6b4b795..2744fd144 100644 --- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -26,14 +26,13 @@ #include "js/UniquePtr.h" #include "js/Vector.h" #include "vm/RegExpObject.h" +#include "vm/String.h" struct KeywordInfo; namespace js { namespace frontend { -class AutoAwaitIsKeyword; - struct TokenPos { uint32_t begin; // Offset of the token's first char. uint32_t end; // Offset of 1 past the token's last char. @@ -80,6 +79,20 @@ struct TokenPos { enum DecimalPoint { NoDecimal = false, HasDecimal = true }; +enum class InvalidEscapeType { + // No invalid character escapes. + None, + // A malformed \x escape. + Hexadecimal, + // A malformed \u escape. + Unicode, + // An otherwise well-formed \u escape which represents a + // codepoint > 10FFFF. + UnicodeOverflow, + // An octal escape in a template token. + Octal +}; + class TokenStream; struct Token @@ -106,9 +119,6 @@ struct Token // TOK_DIV. Operand, - // Treat keywords as names by returning TOK_NAME. - KeywordIsName, - // Treat subsequent characters as the tail of a template literal, after // a template substitution, beginning with a "}", continuing with zero // or more template literal characters, and ending with either "${" or @@ -150,10 +160,6 @@ struct Token // If a semicolon is inserted automatically, the next token is already // gotten with None, but we expect Operand. OperandIsNone, - - // If name of method definition is `get` or `set`, the next token is - // already gotten with KeywordIsName, but we expect None. - NoneIsKeywordIsName, }; friend class TokenStream; @@ -210,11 +216,6 @@ struct Token return u.name->JSAtom::asPropertyName(); // poor-man's type verification } - bool nameContainsEscape() const { - PropertyName* n = name(); - return pos.begin + n->length() != pos.end; - } - JSAtom* atom() const { MOZ_ASSERT(type == TOK_STRING || type == TOK_TEMPLATE_HEAD || @@ -240,10 +241,22 @@ struct Token }; class CompileError : public JSErrorReport { -public: + public: void throwError(JSContext* cx); }; +extern const char* +ReservedWordToCharZ(PropertyName* str); + +extern MOZ_MUST_USE bool +IsFutureReservedWord(JSLinearString* str); + +extern MOZ_MUST_USE bool +IsReservedWordLiteral(JSLinearString* str); + +extern MOZ_MUST_USE bool +IsStrictReservedWord(JSLinearString* str); + // Ideally, tokenizing would be entirely independent of context. But the // strict mode flag, which is in SharedContext, affects tokenizing, and // TokenStream needs to see it. @@ -330,25 +343,26 @@ class MOZ_STACK_CLASS TokenStream JSVersion versionNumber() const { return VersionNumber(options().version); } JSVersion versionWithFlags() const { return options().version; } + private: + PropertyName* reservedWordToPropertyName(TokenKind tt) const; + + public: PropertyName* currentName() const { - if (isCurrentTokenType(TOK_YIELD)) - return cx->names().yield; - MOZ_ASSERT(isCurrentTokenType(TOK_NAME)); - return currentToken().name(); + if (isCurrentTokenType(TOK_NAME)) { + return currentToken().name(); + } + + MOZ_ASSERT(TokenKindIsPossibleIdentifierName(currentToken().type)); + return reservedWordToPropertyName(currentToken().type); } PropertyName* nextName() const { - if (nextToken().type == TOK_YIELD) - return cx->names().yield; - MOZ_ASSERT(nextToken().type == TOK_NAME); - return nextToken().name(); - } + if (nextToken().type != TOK_NAME) { + return nextToken().name(); + } - bool nextNameContainsEscape() const { - if (nextToken().type == TOK_YIELD) - return false; - MOZ_ASSERT(nextToken().type == TOK_NAME); - return nextToken().nameContainsEscape(); + MOZ_ASSERT(TokenKindIsPossibleIdentifierName(nextToken().type)); + return reservedWordToPropertyName(nextToken().type); } bool isCurrentTokenAssignment() const { @@ -361,22 +375,47 @@ class MOZ_STACK_CLASS TokenStream bool hadError() const { return flags.hadError; } void clearSawOctalEscape() { flags.sawOctalEscape = false; } + bool hasInvalidTemplateEscape() const { + return invalidTemplateEscapeType != InvalidEscapeType::None; + } + void clearInvalidTemplateEscape() { + invalidTemplateEscapeType = InvalidEscapeType::None; + } + + // If there is an invalid escape in a template, report it and return false, + // otherwise return true. + bool checkForInvalidTemplateEscapeError() { + if (invalidTemplateEscapeType == InvalidEscapeType::None) + return true; + + reportInvalidEscapeError(invalidTemplateEscapeOffset, invalidTemplateEscapeType); + return false; + } + // TokenStream-specific error reporters. bool reportError(unsigned errorNumber, ...); bool reportErrorNoOffset(unsigned errorNumber, ...); - bool reportWarning(unsigned errorNumber, ...); + + // Report the given error at the current offset. + void error(unsigned errorNumber, ...); + + // Report the given error at the given offset. + void errorAt(uint32_t offset, unsigned errorNumber, ...); + + // Warn at the current offset. + MOZ_MUST_USE bool warning(unsigned errorNumber, ...); static const uint32_t NoOffset = UINT32_MAX; // General-purpose error reporters. You should avoid calling these - // directly, and instead use the more succinct alternatives (e.g. - // reportError()) in TokenStream, Parser, and BytecodeEmitter. - bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, - va_list args); - bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, - va_list args); - bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, - va_list args); + // directly, and instead use the more succinct alternatives (error(), + // warning(), &c.) in TokenStream, Parser, and BytecodeEmitter. + bool reportCompileErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset, unsigned flags, + unsigned errorNumber, va_list args); + bool reportStrictModeErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset, + bool strictMode, unsigned errorNumber, va_list args); + bool reportExtraWarningErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset, + unsigned errorNumber, va_list args); // asm.js reporter void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...); @@ -415,6 +454,33 @@ class MOZ_STACK_CLASS TokenStream bool reportStrictModeError(unsigned errorNumber, ...); bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); } + void setInvalidTemplateEscape(uint32_t offset, InvalidEscapeType type) { + MOZ_ASSERT(type != InvalidEscapeType::None); + if (invalidTemplateEscapeType != InvalidEscapeType::None) + return; + invalidTemplateEscapeOffset = offset; + invalidTemplateEscapeType = type; + } + void reportInvalidEscapeError(uint32_t offset, InvalidEscapeType type) { + switch (type) { + case InvalidEscapeType::None: + MOZ_ASSERT_UNREACHABLE("unexpected InvalidEscapeType"); + return; + case InvalidEscapeType::Hexadecimal: + errorAt(offset, JSMSG_MALFORMED_ESCAPE, "hexadecimal"); + return; + case InvalidEscapeType::Unicode: + errorAt(offset, JSMSG_MALFORMED_ESCAPE, "Unicode"); + return; + case InvalidEscapeType::UnicodeOverflow: + errorAt(offset, JSMSG_UNICODE_OVERFLOW, "escape sequence"); + return; + case InvalidEscapeType::Octal: + errorAt(offset, JSMSG_DEPRECATED_OCTAL); + return; + } + } + static JSAtom* atomize(ExclusiveContext* cx, CharBuffer& cb); MOZ_MUST_USE bool putIdentInTokenbuf(const char16_t* identStart); @@ -432,21 +498,19 @@ class MOZ_STACK_CLASS TokenStream {} }; - bool awaitIsKeyword = false; - friend class AutoAwaitIsKeyword; + uint32_t invalidTemplateEscapeOffset = 0; + InvalidEscapeType invalidTemplateEscapeType = InvalidEscapeType::None; public: typedef Token::Modifier Modifier; static constexpr Modifier None = Token::None; static constexpr Modifier Operand = Token::Operand; - static constexpr Modifier KeywordIsName = Token::KeywordIsName; static constexpr Modifier TemplateTail = Token::TemplateTail; typedef Token::ModifierException ModifierException; static constexpr ModifierException NoException = Token::NoException; static constexpr ModifierException NoneIsOperand = Token::NoneIsOperand; static constexpr ModifierException OperandIsNone = Token::OperandIsNone; - static constexpr ModifierException NoneIsKeywordIsName = Token::NoneIsKeywordIsName; void addModifierException(ModifierException modifierException) { #ifdef DEBUG @@ -475,10 +539,6 @@ class MOZ_STACK_CLASS TokenStream MOZ_ASSERT(next.type != TOK_DIV && next.type != TOK_REGEXP, "next token requires contextual specifier to be parsed unambiguously"); break; - case NoneIsKeywordIsName: - MOZ_ASSERT(next.modifier == KeywordIsName); - MOZ_ASSERT(next.type != TOK_NAME); - break; default: MOZ_CRASH("unexpected modifier exception"); } @@ -505,18 +565,17 @@ class MOZ_STACK_CLASS TokenStream return; } - if (lookaheadToken.modifierException == NoneIsKeywordIsName) { - // getToken() permissibly following getToken(KeywordIsName). - if (modifier == None && lookaheadToken.modifier == KeywordIsName) - return; - } - MOZ_ASSERT_UNREACHABLE("this token was previously looked up with a " "different modifier, potentially making " "tokenization non-deterministic"); #endif } + const Token& nextToken() const { + MOZ_ASSERT(hasLookahead()); + return tokens[(cursor + 1) & ntokensMask]; + } + // Advance to the next token. If the token stream encountered an error, // return false. Otherwise return true and store the token kind in |*ttp|. MOZ_MUST_USE bool getToken(TokenKind* ttp, Modifier modifier = None) { @@ -570,6 +629,14 @@ class MOZ_STACK_CLASS TokenStream return true; } + MOZ_MUST_USE bool peekOffset(uint32_t* offset, Modifier modifier = None) { + TokenPos pos; + if (!peekTokenPos(&pos, modifier)) + return false; + *offset = pos.begin; + return true; + } + // This is like peekToken(), with one exception: if there is an EOL // between the end of the current token and the start of the next token, it // return true and store TOK_EOL in |*ttp|. In that case, no token with @@ -637,36 +704,6 @@ class MOZ_STACK_CLASS TokenStream MOZ_ALWAYS_TRUE(matched); } - // Like matchToken(..., TOK_NAME) but further matching the name token only - // if it has the given characters, without containing escape sequences. - // If the name token has the given characters yet *does* contain an escape, - // a syntax error will be reported. - // - // This latter behavior makes this method unsuitable for use in any context - // where ASI might occur. In such places, an escaped "contextual keyword" - // on a new line is the start of an ExpressionStatement, not a continuation - // of a StatementListItem (or ImportDeclaration or ExportDeclaration, in - // modules). - MOZ_MUST_USE bool matchContextualKeyword(bool* matchedp, Handle<PropertyName*> keyword, - Modifier modifier = None) - { - TokenKind token; - if (!getToken(&token, modifier)) - return false; - if (token == TOK_NAME && currentToken().name() == keyword) { - if (currentToken().nameContainsEscape()) { - reportError(JSMSG_ESCAPED_KEYWORD); - return false; - } - - *matchedp = true; - } else { - *matchedp = false; - ungetToken(); - } - return true; - } - MOZ_MUST_USE bool nextTokenEndsExpr(bool* endsExpr) { TokenKind tt; if (!peekToken(&tt)) @@ -732,19 +769,6 @@ class MOZ_STACK_CLASS TokenStream return sourceMapURL_.get(); } - // If |atom| is not a keyword in this version, return true with *ttp - // unchanged. - // - // If it is a reserved word in this version and strictness mode, and thus - // can't be present in correct code, report a SyntaxError and return false. - // - // If it is a keyword, like "if", return true with the keyword's TokenKind - // in *ttp. - MOZ_MUST_USE bool checkForKeyword(JSAtom* atom, TokenKind* ttp); - - // Same semantics as above, but for the provided keyword. - MOZ_MUST_USE bool checkForKeyword(const KeywordInfo* kw, TokenKind* ttp); - // This class maps a userbuf offset (which is 0-indexed) to a line number // (which is 1-indexed) and a column index (which is 0-indexed). class SourceCoords @@ -940,7 +964,6 @@ class MOZ_STACK_CLASS TokenStream MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier); - MOZ_MUST_USE bool getBracedUnicode(uint32_t* code); MOZ_MUST_USE bool getStringOrTemplateToken(int untilChar, Token** tp); int32_t getChar(); @@ -957,7 +980,7 @@ class MOZ_STACK_CLASS TokenStream MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); MOZ_MUST_USE bool getDirective(bool isMultiline, bool shouldWarnDeprecated, - const char* directive, int directiveLength, + const char* directive, uint8_t directiveLength, const char* errorMsgPragma, UniquePtr<char16_t[], JS::FreePolicy>* destination); MOZ_MUST_USE bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated); @@ -975,29 +998,30 @@ class MOZ_STACK_CLASS TokenStream MOZ_ASSERT(c == expect); } - int32_t peekChar() { - int32_t c = getChar(); - ungetChar(c); - return c; + MOZ_MUST_USE bool peekChar(int32_t* c) { + *c = getChar(); + ungetChar(*c); + return true; } - void skipChars(int n) { - while (--n >= 0) - getChar(); + void skipChars(uint8_t n) { + while (n-- > 0) { + MOZ_ASSERT(userbuf.hasRawChars()); + mozilla::DebugOnly<int32_t> c = getCharIgnoreEOL(); + MOZ_ASSERT(c != '\n'); + } } - void skipCharsIgnoreEOL(int n) { - while (--n >= 0) + void skipCharsIgnoreEOL(uint8_t n) { + while (n-- > 0) { + MOZ_ASSERT(userbuf.hasRawChars()); getCharIgnoreEOL(); + } } void updateLineInfoForEOL(); void updateFlagsForEOL(); - const Token& nextToken() const { - MOZ_ASSERT(hasLookahead()); - return tokens[(cursor + 1) & ntokensMask]; - } bool hasLookahead() const { return lookahead > 0; } @@ -1022,25 +1046,6 @@ class MOZ_STACK_CLASS TokenStream StrictModeGetter* strictModeGetter; // used to test for strict mode }; -class MOZ_STACK_CLASS AutoAwaitIsKeyword -{ -private: - TokenStream* ts_; - bool oldAwaitIsKeyword_; - -public: - AutoAwaitIsKeyword(TokenStream* ts, bool awaitIsKeyword) { - ts_ = ts; - oldAwaitIsKeyword_ = ts_->awaitIsKeyword; - ts_->awaitIsKeyword = awaitIsKeyword; - } - - ~AutoAwaitIsKeyword() { - ts_->awaitIsKeyword = oldAwaitIsKeyword_; - ts_ = nullptr; - } -}; - extern const char* TokenKindToDesc(TokenKind tt); |