summaryrefslogtreecommitdiffstats
path: root/js/src/frontend
diff options
context:
space:
mode:
authorwolfbeast <mcwerewolf@gmail.com>2018-03-18 17:57:11 +0100
committerwolfbeast <mcwerewolf@gmail.com>2018-03-18 17:58:37 +0100
commitb2331d76221b67bffb5cb5a2344c8b150a305dc7 (patch)
tree5d9cae4cdfc502b568f436fd66d77e0b16476395 /js/src/frontend
parent122938a4398ae8db07060dca3561ff46f93b5925 (diff)
parent427e7346629c76a1676a3f92320c3d2575d01b85 (diff)
downloadUXP-b2331d76221b67bffb5cb5a2344c8b150a305dc7.tar
UXP-b2331d76221b67bffb5cb5a2344c8b150a305dc7.tar.gz
UXP-b2331d76221b67bffb5cb5a2344c8b150a305dc7.tar.lz
UXP-b2331d76221b67bffb5cb5a2344c8b150a305dc7.tar.xz
UXP-b2331d76221b67bffb5cb5a2344c8b150a305dc7.zip
Correctly tokenize valid JS names when using code points outside of BMP range.
This resolves #72. Merged remote-tracking branch 'janek/js_variable_unicode_1'
Diffstat (limited to 'js/src/frontend')
-rw-r--r--js/src/frontend/TokenStream.cpp149
-rw-r--r--js/src/frontend/TokenStream.h1
2 files changed, 142 insertions, 8 deletions
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
index c166ed414..179a7c244 100644
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -118,13 +118,57 @@ IsIdentifier(const CharT* chars, size_t length)
return true;
}
+static uint32_t
+GetSingleCodePoint(const char16_t** p, const char16_t* end)
+{
+ uint32_t codePoint;
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) {
+ char16_t lead = **p;
+ char16_t maybeTrail = *(*p + 1);
+ if (unicode::IsTrailSurrogate(maybeTrail)) {
+ *p += 2;
+ return unicode::UTF16Decode(lead, maybeTrail);
+ }
+ }
+
+ codePoint = **p;
+ (*p)++;
+ return codePoint;
+}
+
+static bool
+IsIdentifierMaybeNonBMP(const char16_t* chars, size_t length)
+{
+ if (IsIdentifier(chars, length))
+ return true;
+
+ if (length == 0)
+ return false;
+
+ const char16_t* p = chars;
+ const char16_t* end = chars + length;
+ uint32_t codePoint;
+
+ codePoint = GetSingleCodePoint(&p, end);
+ if (!unicode::IsIdentifierStart(codePoint))
+ return false;
+
+ while (p < end) {
+ codePoint = GetSingleCodePoint(&p, end);
+ if (!unicode::IsIdentifierPart(codePoint))
+ return false;
+ }
+
+ return true;
+}
+
bool
frontend::IsIdentifier(JSLinearString* str)
{
JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars()
? ::IsIdentifier(str->latin1Chars(nogc), str->length())
- : ::IsIdentifier(str->twoByteChars(nogc), str->length());
+ : ::IsIdentifierMaybeNonBMP(str->twoByteChars(nogc), str->length());
}
bool
@@ -993,6 +1037,21 @@ IsTokenSane(Token* tp)
#endif
bool
+TokenStream::matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint)
+{
+ int32_t maybeTrail = getCharIgnoreEOL();
+ if (!unicode::IsTrailSurrogate(maybeTrail)) {
+ ungetCharIgnoreEOL(maybeTrail);
+ return false;
+ }
+
+ if (trail)
+ *trail = maybeTrail;
+ *codePoint = unicode::UTF16Decode(lead, maybeTrail);
+ return true;
+}
+
+bool
TokenStream::putIdentInTokenbuf(const char16_t* identStart)
{
int32_t c;
@@ -1003,11 +1062,39 @@ TokenStream::putIdentInTokenbuf(const char16_t* identStart)
tokenbuf.clear();
for (;;) {
c = getCharIgnoreEOL();
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ char16_t trail;
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, &trail, &codePoint)) {
+ if (!unicode::IsIdentifierPart(codePoint))
+ break;
+
+ if (!tokenbuf.append(c) || !tokenbuf.append(trail)) {
+ userbuf.setAddressOfNextRawChar(tmp);
+ return false;
+ }
+ continue;
+ }
+ }
+
if (!unicode::IsIdentifierPart(char16_t(c))) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
+
+ if (MOZ_UNLIKELY(unicode::IsSupplementary(qc))) {
+ char16_t lead, trail;
+ unicode::UTF16Encode(qc, &lead, &trail);
+ if (!tokenbuf.append(lead) || !tokenbuf.append(trail)) {
+ userbuf.setAddressOfNextRawChar(tmp);
+ return false;
+ }
+ continue;
+ }
+
c = qc;
}
+
if (!tokenbuf.append(c)) {
userbuf.setAddressOfNextRawChar(tmp);
return false;
@@ -1168,12 +1255,23 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
static_assert('_' < 128,
"IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
"ensure that '_' is never handled here");
- if (unicode::IsUnicodeIDStart(c)) {
+ if (unicode::IsUnicodeIDStart(char16_t(c))) {
identStart = userbuf.addressOfNextRawChar() - 1;
hadUnicodeEscape = false;
goto identifier;
}
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+ unicode::IsUnicodeIDStart(codePoint))
+ {
+ identStart = userbuf.addressOfNextRawChar() - 2;
+ hadUnicodeEscape = false;
+ goto identifier;
+ }
+ }
+
goto badchar;
}
@@ -1224,6 +1322,17 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
c = getCharIgnoreEOL();
if (c == EOF)
break;
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint)) {
+ if (!unicode::IsIdentifierPart(codePoint))
+ break;
+
+ continue;
+ }
+ }
+
if (!unicode::IsIdentifierPart(char16_t(c))) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
@@ -1318,9 +1427,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
}
ungetCharIgnoreEOL(c);
- if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
- reportError(JSMSG_IDSTART_AFTER_NUMBER);
- goto error;
+ if (c != EOF) {
+ if (unicode::IsIdentifierStart(char16_t(c))) {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+ unicode::IsIdentifierStart(codePoint))
+ {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+ }
}
// Unlike identifiers and strings, numbers cannot contain escaped
@@ -1425,9 +1546,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
}
ungetCharIgnoreEOL(c);
- if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
- reportError(JSMSG_IDSTART_AFTER_NUMBER);
- goto error;
+ if (c != EOF) {
+ if (unicode::IsIdentifierStart(char16_t(c))) {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+ unicode::IsIdentifierStart(codePoint))
+ {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+ }
}
double dval;
diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h
index 29dcead62..5d6b4b795 100644
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -952,6 +952,7 @@ class MOZ_STACK_CLASS TokenStream
uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint);
uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint);
bool matchUnicodeEscapeIdent(uint32_t* codePoint);
+ bool matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint);
bool peekChars(int n, char16_t* cp);
MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);