From 427e7346629c76a1676a3f92320c3d2575d01b85 Mon Sep 17 00:00:00 2001
From: janekptacijarabaci <janekptacijarabaci@seznam.cz>
Date: Sun, 18 Mar 2018 14:55:49 +0100
Subject: Correctly tokenize valid JS names when using Unicode mathematical
 alphanumeric symbols as variable name

Issue https://github.com/MoonchildProductions/Pale-Moon/issues/1647
---
 js/src/frontend/TokenStream.cpp | 149 +++++++++++++++++++++++++++++++++++++---
 js/src/frontend/TokenStream.h   |   1 +
 2 files changed, 142 insertions(+), 8 deletions(-)

(limited to 'js/src/frontend')

diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
index c166ed414..179a7c244 100644
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -118,13 +118,57 @@ IsIdentifier(const CharT* chars, size_t length)
     return true;
 }
 
+static uint32_t
+GetSingleCodePoint(const char16_t** p, const char16_t* end)
+{
+    uint32_t codePoint;
+    if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) {
+        char16_t lead = **p;
+        char16_t maybeTrail = *(*p + 1);
+        if (unicode::IsTrailSurrogate(maybeTrail)) {
+            *p += 2;
+            return unicode::UTF16Decode(lead, maybeTrail);
+        }
+    }
+
+    codePoint = **p;
+    (*p)++;
+    return codePoint;
+}
+
+static bool
+IsIdentifierMaybeNonBMP(const char16_t* chars, size_t length)
+{
+    if (IsIdentifier(chars, length))
+        return true;
+
+    if (length == 0)
+        return false;
+
+    const char16_t* p = chars;
+    const char16_t* end = chars + length;
+    uint32_t codePoint;
+
+    codePoint = GetSingleCodePoint(&p, end);
+    if (!unicode::IsIdentifierStart(codePoint))
+        return false;
+
+    while (p < end) {
+        codePoint = GetSingleCodePoint(&p, end);
+        if (!unicode::IsIdentifierPart(codePoint))
+            return false;
+    }
+
+    return true;
+}
+
 bool
 frontend::IsIdentifier(JSLinearString* str)
 {
     JS::AutoCheckCannotGC nogc;
     return str->hasLatin1Chars()
            ? ::IsIdentifier(str->latin1Chars(nogc), str->length())
-           : ::IsIdentifier(str->twoByteChars(nogc), str->length());
+           : ::IsIdentifierMaybeNonBMP(str->twoByteChars(nogc), str->length());
 }
 
 bool
@@ -992,6 +1036,21 @@ IsTokenSane(Token* tp)
 }
 #endif
 
+bool
+TokenStream::matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint)
+{
+    int32_t maybeTrail = getCharIgnoreEOL();
+    if (!unicode::IsTrailSurrogate(maybeTrail)) {
+        ungetCharIgnoreEOL(maybeTrail);
+        return false;
+    }
+
+    if (trail)
+        *trail = maybeTrail;
+    *codePoint = unicode::UTF16Decode(lead, maybeTrail);
+    return true;
+}
+
 bool
 TokenStream::putIdentInTokenbuf(const char16_t* identStart)
 {
@@ -1003,11 +1062,39 @@ TokenStream::putIdentInTokenbuf(const char16_t* identStart)
     tokenbuf.clear();
     for (;;) {
         c = getCharIgnoreEOL();
+
+        if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+            char16_t trail;
+            uint32_t codePoint;
+            if (matchTrailForLeadSurrogate(c, &trail, &codePoint)) {
+                if (!unicode::IsIdentifierPart(codePoint))
+                    break;
+
+                if (!tokenbuf.append(c) || !tokenbuf.append(trail)) {
+                    userbuf.setAddressOfNextRawChar(tmp);
+                    return false;
+                }
+                continue;
+            }
+        }
+
         if (!unicode::IsIdentifierPart(char16_t(c))) {
             if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
                 break;
+
+            if (MOZ_UNLIKELY(unicode::IsSupplementary(qc))) {
+                char16_t lead, trail;
+                unicode::UTF16Encode(qc, &lead, &trail);
+                if (!tokenbuf.append(lead) || !tokenbuf.append(trail)) {
+                    userbuf.setAddressOfNextRawChar(tmp);
+                    return false;
+                }
+                continue;
+            }
+
             c = qc;
         }
+
         if (!tokenbuf.append(c)) {
             userbuf.setAddressOfNextRawChar(tmp);
             return false;
@@ -1168,12 +1255,23 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
         static_assert('_' < 128,
                       "IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
                       "ensure that '_' is never handled here");
-        if (unicode::IsUnicodeIDStart(c)) {
+        if (unicode::IsUnicodeIDStart(char16_t(c))) {
             identStart = userbuf.addressOfNextRawChar() - 1;
             hadUnicodeEscape = false;
             goto identifier;
         }
 
+        if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+            uint32_t codePoint;
+            if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+                unicode::IsUnicodeIDStart(codePoint))
+            {
+                identStart = userbuf.addressOfNextRawChar() - 2;
+                hadUnicodeEscape = false;
+                goto identifier;
+            }
+        }
+
         goto badchar;
     }
 
@@ -1224,6 +1322,17 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
             c = getCharIgnoreEOL();
             if (c == EOF)
                 break;
+
+            if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+                uint32_t codePoint;
+                if (matchTrailForLeadSurrogate(c, nullptr, &codePoint)) {
+                    if (!unicode::IsIdentifierPart(codePoint))
+                        break;
+
+                    continue;
+                }
+            }
+
             if (!unicode::IsIdentifierPart(char16_t(c))) {
                 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
                     break;
@@ -1318,9 +1427,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
         }
         ungetCharIgnoreEOL(c);
 
-        if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
-            reportError(JSMSG_IDSTART_AFTER_NUMBER);
-            goto error;
+        if (c != EOF) {
+            if (unicode::IsIdentifierStart(char16_t(c))) {
+                reportError(JSMSG_IDSTART_AFTER_NUMBER);
+                goto error;
+            }
+
+            if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+                uint32_t codePoint;
+                if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+                    unicode::IsIdentifierStart(codePoint))
+                {
+                    reportError(JSMSG_IDSTART_AFTER_NUMBER);
+                    goto error;
+                }
+            }
         }
 
         // Unlike identifiers and strings, numbers cannot contain escaped
@@ -1425,9 +1546,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
         }
         ungetCharIgnoreEOL(c);
 
-        if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
-            reportError(JSMSG_IDSTART_AFTER_NUMBER);
-            goto error;
+        if (c != EOF) {
+            if (unicode::IsIdentifierStart(char16_t(c))) {
+                reportError(JSMSG_IDSTART_AFTER_NUMBER);
+                goto error;
+            }
+
+            if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+                uint32_t codePoint;
+                if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+                    unicode::IsIdentifierStart(codePoint))
+                {
+                    reportError(JSMSG_IDSTART_AFTER_NUMBER);
+                    goto error;
+                }
+            }
         }
 
         double dval;
diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h
index 29dcead62..5d6b4b795 100644
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -952,6 +952,7 @@ class MOZ_STACK_CLASS TokenStream
     uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint);
     uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint);
     bool matchUnicodeEscapeIdent(uint32_t* codePoint);
+    bool matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint);
     bool peekChars(int n, char16_t* cp);
 
     MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
-- 
cgit v1.2.3