Merge branch 'master' of https://github.com/MoonchildProductions/UXP into html_input_datetime_1

author: janekptacijarabaci <janekptacijarabaci@seznam.cz> 2018-03-30 09:44:21 +0200
committer: janekptacijarabaci <janekptacijarabaci@seznam.cz> 2018-03-30 09:44:21 +0200
commit: a1a007a4856fa50d6d811c2268f881e3666f4c67 (patch)
tree: 24b082c1bfb5777f1770c82a534bf765160bc1b8 /js/src/irregexp
parent: eddd0de2ae80e176011f41a5400e81522d53f4f3 (diff)
parent: 59bf4204a84f7638d3f89a29bc7c04e5dc401369 (diff)
download: UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar
UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar.gz
UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar.lz
UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar.xz
UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.zip
5 files changed, 296 insertions, 117 deletions
diff --git a/js/src/irregexp/RegExpCharacters-inl.h b/js/src/irregexp/RegExpCharacters-inl.h
new file mode 100644
index 000000000..d001819fc
--- /dev/null
+++ b/js/src/irregexp/RegExpCharacters-inl.h
@@ -0,0 +1,40 @@
+/* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 9.0.0 */
+#ifndef V8_JSREGEXPCHARACTERS_INL_H_
+#define V8_JSREGEXPCHARACTERS_INL_H_
+
+namespace js {
+
+namespace irregexp {
+
+static inline bool
+RangeContainsLatin1Equivalents(CharacterRange range, bool unicode)
+{
+    if (unicode) {
+        // "LATIN SMALL LETTER LONG S" case folds to "LATIN SMALL LETTER S".
+        if (range.Contains(0x017F))
+            return true;
+        // "LATIN CAPITAL LETTER SHARP S" case folds to "LATIN SMALL LETTER SHARP S".
+        if (range.Contains(0x1E9E))
+            return true;
+        // "KELVIN SIGN" case folds to "LATIN SMALL LETTER K".
+        if (range.Contains(0x212A))
+            return true;
+        // "ANGSTROM SIGN" case folds to "LATIN SMALL LETTER A WITH RING ABOVE".
+        if (range.Contains(0x212B))
+            return true;
+    }
+
+    // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
+    // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
+    if (range.Contains(0x039C) || range.Contains(0x03BC))
+        return true;
+    // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER Y WITH DIAERESIS".
+    if (range.Contains(0x0178))
+        return true;
+    return false;
+}
+
+} } // namespace js::irregexp
+
+#endif // V8_JSREGEXPCHARACTERS_INL_H_
diff --git a/js/src/irregexp/RegExpCharacters.cpp b/js/src/irregexp/RegExpCharacters.cpp
new file mode 100644
index 000000000..096c02760
--- /dev/null
+++ b/js/src/irregexp/RegExpCharacters.cpp
@@ -0,0 +1,135 @@
+/* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 9.0.0 */
+#include "irregexp/RegExpCharacters.h"
+
+#include "mozilla/Assertions.h"
+
+char16_t
+js::irregexp::ConvertNonLatin1ToLatin1(char16_t c, bool unicode)
+{
+    MOZ_ASSERT(c > 0xFF, "Character mustn't be Latin1");
+    if (unicode) {
+        // "LATIN SMALL LETTER LONG S" case folds to "LATIN SMALL LETTER S".
+        if (c == 0x017F)
+            return 0x73;
+        // "LATIN CAPITAL LETTER SHARP S" case folds to "LATIN SMALL LETTER SHARP S".
+        if (c == 0x1E9E)
+            return 0xDF;
+        // "KELVIN SIGN" case folds to "LATIN SMALL LETTER K".
+        if (c == 0x212A)
+            return 0x6B;
+        // "ANGSTROM SIGN" case folds to "LATIN SMALL LETTER A WITH RING ABOVE".
+        if (c == 0x212B)
+            return 0xE5;
+    }
+
+    // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
+    // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
+    if (c == 0x039C || c == 0x03BC)
+        return 0xB5;
+    // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER Y WITH DIAERESIS".
+    if (c == 0x0178)
+        return 0xFF;
+    return 0;
+}
+
+const int js::irregexp::kSpaceRanges[] = {
+    0x0009, 0x000D + 1, // CHARACTER TABULATION..CARRIAGE RETURN (CR)
+    0x0020, 0x0020 + 1, // SPACE
+    0x00A0, 0x00A0 + 1, // NO-BREAK SPACE
+    0x1680, 0x1680 + 1, // OGHAM SPACE MARK
+    0x2000, 0x200A + 1, // EN QUAD..HAIR SPACE
+    0x2028, 0x2029 + 1, // LINE SEPARATOR..PARAGRAPH SEPARATOR
+    0x202F, 0x202F + 1, // NARROW NO-BREAK SPACE
+    0x205F, 0x205F + 1, // MEDIUM MATHEMATICAL SPACE
+    0x3000, 0x3000 + 1, // IDEOGRAPHIC SPACE
+    0xFEFF, 0xFEFF + 1, // ZERO WIDTH NO-BREAK SPACE
+    0xFFFF + 1
+};
+const int js::irregexp::kSpaceRangeCount = 21;
+
+const int js::irregexp::kSpaceAndSurrogateRanges[] = {
+    0x0009, 0x000D + 1, // CHARACTER TABULATION..CARRIAGE RETURN (CR)
+    0x0020, 0x0020 + 1, // SPACE
+    0x00A0, 0x00A0 + 1, // NO-BREAK SPACE
+    0x1680, 0x1680 + 1, // OGHAM SPACE MARK
+    0x2000, 0x200A + 1, // EN QUAD..HAIR SPACE
+    0x2028, 0x2029 + 1, // LINE SEPARATOR..PARAGRAPH SEPARATOR
+    0x202F, 0x202F + 1, // NARROW NO-BREAK SPACE
+    0x205F, 0x205F + 1, // MEDIUM MATHEMATICAL SPACE
+    0x3000, 0x3000 + 1, // IDEOGRAPHIC SPACE
+    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
+    0xFEFF, 0xFEFF + 1, // ZERO WIDTH NO-BREAK SPACE
+    0xFFFF + 1
+};
+const int js::irregexp::kSpaceAndSurrogateRangeCount = 23;
+
+const int js::irregexp::kWordRanges[] = {
+    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
+    0x0041, 0x005A + 1, // LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+    0x005F, 0x005F + 1, // LOW LINE
+    0x0061, 0x007A + 1, // LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+    0xFFFF + 1
+};
+const int js::irregexp::kWordRangeCount = 9;
+
+const int js::irregexp::kIgnoreCaseWordRanges[] = {
+    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
+    0x0041, 0x005A + 1, // LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+    0x005F, 0x005F + 1, // LOW LINE
+    0x0061, 0x007A + 1, // LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+    0x017F, 0x017F + 1, // LATIN SMALL LETTER LONG S
+    0x212A, 0x212A + 1, // KELVIN SIGN
+    0xFFFF + 1
+};
+const int js::irregexp::kIgnoreCaseWordRangeCount = 13;
+
+const int js::irregexp::kWordAndSurrogateRanges[] = {
+    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
+    0x0041, 0x005A + 1, // LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+    0x005F, 0x005F + 1, // LOW LINE
+    0x0061, 0x007A + 1, // LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
+    0xFFFF + 1
+};
+const int js::irregexp::kWordAndSurrogateRangeCount = 11;
+
+const int js::irregexp::kNegatedIgnoreCaseWordAndSurrogateRanges[] = {
+    0x0000, 0x002F + 1, // NULL..SOLIDUS
+    0x003A, 0x0040 + 1, // COLON..COMMERCIAL AT
+    0x005B, 0x005E + 1, // LEFT SQUARE BRACKET..CIRCUMFLEX ACCENT
+    0x0060, 0x0060 + 1, // GRAVE ACCENT
+    0x007B, 0x017E + 1, // LEFT CURLY BRACKET..LATIN SMALL LETTER Z WITH CARON
+    0x0180, 0x2129 + 1, // LATIN SMALL LETTER B WITH STROKE..TURNED GREEK SMALL LETTER IOTA
+    0x212B, 0xD7FF + 1, // ANGSTROM SIGN..<Unused>
+    0xE000, 0xFFFF + 1, // Private Use..<Unused>
+    0xFFFF + 1
+};
+const int js::irregexp::kNegatedIgnoreCaseWordAndSurrogateRangeCount = 17;
+
+const int js::irregexp::kDigitRanges[] = {
+    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
+    0xFFFF + 1
+};
+const int js::irregexp::kDigitRangeCount = 3;
+
+const int js::irregexp::kDigitAndSurrogateRanges[] = {
+    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
+    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
+    0xFFFF + 1
+};
+const int js::irregexp::kDigitAndSurrogateRangeCount = 5;
+
+const int js::irregexp::kSurrogateRanges[] = {
+    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
+    0xFFFF + 1
+};
+const int js::irregexp::kSurrogateRangeCount = 3;
+
+const int js::irregexp::kLineTerminatorRanges[] = {
+    0x000A, 0x000A + 1, // LINE FEED (LF)
+    0x000D, 0x000D + 1, // CARRIAGE RETURN (CR)
+    0x2028, 0x2029 + 1, // LINE SEPARATOR..PARAGRAPH SEPARATOR
+    0xFFFF + 1
+};
+const int js::irregexp::kLineTerminatorRangeCount = 7;
diff --git a/js/src/irregexp/RegExpCharacters.h b/js/src/irregexp/RegExpCharacters.h
new file mode 100644
index 000000000..0d3cf096f
--- /dev/null
+++ b/js/src/irregexp/RegExpCharacters.h
@@ -0,0 +1,90 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99: */
+
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_JSREGEXPCHARACTERS_H_
+#define V8_JSREGEXPCHARACTERS_H_
+
+namespace js {
+
+namespace irregexp {
+
+char16_t
+ConvertNonLatin1ToLatin1(char16_t c, bool unicode);
+
+// -------------------------------------------------------------------
+// CharacterRange
+
+// The ranges have inclusive from and exclusive to.
+
+// This covers \s as defined in ES2016, 21.2.2.12 CharacterClassEscape,
+// which includes WhiteSpace (11.2) and LineTerminator (11.3) values.
+extern const int kSpaceRanges[];
+extern const int kSpaceRangeCount;
+
+// Characters in \s and additionally all surrogate characters.
+extern const int kSpaceAndSurrogateRanges[];
+extern const int kSpaceAndSurrogateRangeCount;
+
+// This covers \w as defined in ES2016, 21.2.2.12 CharacterClassEscape.
+extern const int kWordRanges[];
+extern const int kWordRangeCount;
+
+// Characters which case-fold to characters in \w.
+extern const int kIgnoreCaseWordRanges[];
+extern const int kIgnoreCaseWordRangeCount;
+
+// Characters in \w and additionally all surrogate characters.
+extern const int kWordAndSurrogateRanges[];
+extern const int kWordAndSurrogateRangeCount;
+
+// All characters excluding those which case-fold to \w and excluding all
+// surrogate characters.
+extern const int kNegatedIgnoreCaseWordAndSurrogateRanges[];
+extern const int kNegatedIgnoreCaseWordAndSurrogateRangeCount;
+
+// This covers \d as defined in ES2016, 21.2.2.12 CharacterClassEscape.
+extern const int kDigitRanges[];
+extern const int kDigitRangeCount;
+
+// Characters in \d and additionally all surrogate characters.
+extern const int kDigitAndSurrogateRanges[];
+extern const int kDigitAndSurrogateRangeCount;
+
+// The range of all surrogate characters.
+extern const int kSurrogateRanges[];
+extern const int kSurrogateRangeCount;
+
+// Line terminators as defined in ES2016, 11.3 LineTerminator.
+extern const int kLineTerminatorRanges[];
+extern const int kLineTerminatorRangeCount;
+
+} } // namespace js::irregexp
+
+#endif // V8_JSREGEXPCHARACTERS_H_
diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp
index 2e19065fd..4d691a5dc 100644
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -31,10 +31,14 @@
 #include "irregexp/RegExpEngine.h"
 
 #include "irregexp/NativeRegExpMacroAssembler.h"
+#include "irregexp/RegExpCharacters.h" 
 #include "irregexp/RegExpMacroAssembler.h"
 #include "jit/ExecutableAllocator.h"
 #include "jit/JitCommon.h"
 
+// Generated table
+#include "irregexp/RegExpCharacters-inl.h"
+
 using namespace js;
 using namespace js::irregexp;
 
@@ -61,61 +65,6 @@ RegExpNode::RegExpNode(LifoAlloc* alloc)
     bm_info_[0] = bm_info_[1] = nullptr;
 }
 
-// -------------------------------------------------------------------
-// CharacterRange
-
-// The '2' variant has inclusive from and exclusive to.
-// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
-// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
-static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
-    0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
-    0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
-    0xFEFF, 0xFF00, 0x10000 };
-static const int kSpaceRangeCount = ArrayLength(kSpaceRanges);
-
-static const int kSpaceAndSurrogateRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
-    0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
-    0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
-    unicode::LeadSurrogateMin, unicode::TrailSurrogateMax + 1,
-    0xFEFF, 0xFF00, 0x10000 };
-static const int kSpaceAndSurrogateRangeCount = ArrayLength(kSpaceAndSurrogateRanges);
-static const int kWordRanges[] = {
-    '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
-static const int kWordRangeCount = ArrayLength(kWordRanges);
-static const int kIgnoreCaseWordRanges[] = {
-    '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1,
-    0x017F, 0x017F + 1, 0x212A, 0x212A + 1,
-    0x10000 };
-static const int kIgnoreCaseWordCount = ArrayLength(kIgnoreCaseWordRanges);
-static const int kWordAndSurrogateRanges[] = {
-    '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1,
-    unicode::LeadSurrogateMin, unicode::TrailSurrogateMax + 1,
-    0x10000 };
-static const int kWordAndSurrogateRangeCount = ArrayLength(kWordAndSurrogateRanges);
-static const int kNegatedIgnoreCaseWordAndSurrogateRanges[] = {
-    0, '0', '9' + 1, 'A',
-    'Z' + 1, '_', '_' + 1, 'a',
-    'z' + 1, 0x017F,
-    0x017F + 1, 0x212A,
-    0x212A + 1, unicode::LeadSurrogateMin,
-    unicode::TrailSurrogateMax + 1, 0x10000,
-    0x10000 };
-static const int kNegatedIgnoreCaseWordAndSurrogateRangeCount =
-    ArrayLength(kNegatedIgnoreCaseWordAndSurrogateRanges);
-static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
-static const int kDigitRangeCount = ArrayLength(kDigitRanges);
-static const int kDigitAndSurrogateRanges[] = {
-    '0', '9' + 1,
-    unicode::LeadSurrogateMin, unicode::TrailSurrogateMax + 1,
-    0x10000 };
-static const int kDigitAndSurrogateRangeCount = ArrayLength(kDigitAndSurrogateRanges);
-static const int kSurrogateRanges[] = {
-    unicode::LeadSurrogateMin, unicode::TrailSurrogateMax + 1,
-    0x10000 };
-static const int kSurrogateRangeCount = ArrayLength(kSurrogateRanges);
-static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
-    0x2028, 0x202A, 0x10000 };
-static const int kLineTerminatorRangeCount = ArrayLength(kLineTerminatorRanges);
 static const int kMaxOneByteCharCode = 0xff;
 static const int kMaxUtf16CodeUnit = 0xffff;
 
@@ -213,7 +162,7 @@ CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
         break;
       case 'w':
         if (ignore_case)
-            AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordCount, ranges);
+            AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, ranges);
         else
             AddClassEscape(alloc, type, ranges);
         break;
@@ -233,33 +182,6 @@ CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
     }
 }
 
-#define FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(macro)      \
-    /* LATIN CAPITAL LETTER Y WITH DIAERESIS */         \
-    macro(0x0178, 0x00FF)                               \
-    /* LATIN SMALL LETTER LONG S */                     \
-    macro(0x017F, 0x0073)                               \
-    /* LATIN CAPITAL LETTER SHARP S */                  \
-    macro(0x1E9E, 0x00DF)                               \
-    /* KELVIN SIGN */                                   \
-    macro(0x212A, 0x006B)                               \
-    /* ANGSTROM SIGN */                                 \
-    macro(0x212B, 0x00E5)
-
-// We need to check for the following characters: 0x39c 0x3bc 0x178.
-static inline bool
-RangeContainsLatin1Equivalents(CharacterRange range, bool unicode)
-{
-    /* TODO(dcarney): this could be a lot more efficient. */
-    if (unicode) {
-#define CHECK_RANGE(C, F) \
-        if (range.Contains(C)) return true;
-FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(CHECK_RANGE)
-#undef CHECK_RANGE
-    }
-
-    return range.Contains(0x39c) || range.Contains(0x3bc) || range.Contains(0x178);
-}
-
 static bool
 RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode)
 {
@@ -336,7 +258,7 @@ GetCaseIndependentLetters(char16_t character,
     // step 3.g.
     // The standard requires that non-ASCII characters cannot have ASCII
     // character codes in their equivalence class, even though this
-    // situation occurs multiple times in the unicode tables.
+    // situation occurs multiple times in the Unicode tables.
     static const unsigned kMaxAsciiCharCode = 127;
     if (upper <= kMaxAsciiCharCode) {
         if (character > kMaxAsciiCharCode) {
@@ -365,31 +287,6 @@ GetCaseIndependentLetters(char16_t character,
                                      choices, ArrayLength(choices), letters);
 }
 
-static char16_t
-ConvertNonLatin1ToLatin1(char16_t c, bool unicode)
-{
-    MOZ_ASSERT(c > kMaxOneByteCharCode);
-    if (unicode) {
-        switch (c) {
-#define CONVERT(C, F) case C: return F;
-FOR_EACH_NON_ASCII_TO_ASCII_FOLDING(CONVERT)
-#undef CONVERT
-        }
-    }
-
-    switch (c) {
-      // This are equivalent characters in unicode.
-      case 0x39c:
-      case 0x3bc:
-        return 0xb5;
-      // This is an uppercase of a Latin-1 character
-      // outside of Latin-1.
-      case 0x178:
-        return 0xff;
-    }
-    return 0;
-}
-
 void
 CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges)
 {
@@ -2358,7 +2255,10 @@ void
 BoyerMoorePositionInfo::SetInterval(const Interval& interval)
 {
     s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
-    w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
+    if (unicode_ignore_case_)
+        w_ = AddRange(w_, kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, interval);
+    else
+        w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
     d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
     surrogate_ =
         AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
@@ -2395,11 +2295,12 @@ BoyerMoorePositionInfo::SetAll()
 BoyerMooreLookahead::BoyerMooreLookahead(LifoAlloc* alloc, size_t length, RegExpCompiler* compiler)
   : length_(length), compiler_(compiler), bitmaps_(*alloc)
 {
+    bool unicode_ignore_case = compiler->unicode() && compiler->ignore_case();
     max_char_ = MaximumCharacter(compiler->ascii());
 
     bitmaps_.reserve(length);
     for (size_t i = 0; i < length; i++)
-        bitmaps_.append(alloc->newInfallible<BoyerMoorePositionInfo>(alloc));
+        bitmaps_.append(alloc->newInfallible<BoyerMoorePositionInfo>(alloc, unicode_ignore_case));
 }
 
 // Find the longest range of lookahead that has the fewest number of different
@@ -3065,15 +2966,22 @@ EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace*
 // Check for [0-9A-Z_a-z].
 static void
 EmitWordCheck(RegExpMacroAssembler* assembler,
-              jit::Label* word, jit::Label* non_word, bool fall_through_on_word)
+              jit::Label* word, jit::Label* non_word, bool fall_through_on_word,
+              bool unicode_ignore_case)
 {
-    if (assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W',
+    if (!unicode_ignore_case &&
+        assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W',
                                               fall_through_on_word ? non_word : word))
     {
         // Optimized implementation available.
         return;
     }
 
+    if (unicode_ignore_case) {
+        assembler->CheckCharacter(0x017F, word);
+        assembler->CheckCharacter(0x212A, word);
+    }
+
     assembler->CheckCharacterGT('z', non_word);
     assembler->CheckCharacterLT('0', non_word);
     assembler->CheckCharacterGT('a' - 1, word);
@@ -3122,7 +3030,8 @@ AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace)
             assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
         }
         // Fall through on non-word.
-        EmitWordCheck(assembler, &before_word, &before_non_word, false);
+        EmitWordCheck(assembler, &before_word, &before_non_word, false,
+                      compiler->unicode() && compiler->ignore_case());
         // Next character is not a word character.
         assembler->Bind(&before_non_word);
         jit::Label ok;
@@ -3162,7 +3071,8 @@ AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler,
     // We already checked that we are not at the start of input so it must be
     // OK to load the previous character.
     assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
-    EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
+    EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord,
+                  compiler->unicode() && compiler->ignore_case());
 
     assembler->Bind(&fall_through);
     on_success()->Emit(compiler, &new_trace);
diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h
index 78c784aaf..1a8fd4b22 100644
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -1195,13 +1195,14 @@ AddRange(ContainedInLattice a,
 class BoyerMoorePositionInfo
 {
   public:
-    explicit BoyerMoorePositionInfo(LifoAlloc* alloc)
+    explicit BoyerMoorePositionInfo(LifoAlloc* alloc, bool unicode_ignore_case)
       : map_(*alloc),
         map_count_(0),
         w_(kNotYet),
         s_(kNotYet),
         d_(kNotYet),
-        surrogate_(kNotYet)
+        surrogate_(kNotYet),
+        unicode_ignore_case_(unicode_ignore_case)
     {
         map_.reserve(kMapSize);
         for (int i = 0; i < kMapSize; i++)
@@ -1228,6 +1229,9 @@ class BoyerMoorePositionInfo
     ContainedInLattice s_;  // The \s character class.
     ContainedInLattice d_;  // The \d character class.
     ContainedInLattice surrogate_;  // Surrogate UTF-16 code units.
+
+    // True if the RegExp has unicode and ignoreCase flags.
+    bool unicode_ignore_case_;
 };
 
 typedef InfallibleVector<BoyerMoorePositionInfo*, 1> BoyerMoorePositionInfoVector;
author	janekptacijarabaci <janekptacijarabaci@seznam.cz>	2018-03-30 09:44:21 +0200
committer	janekptacijarabaci <janekptacijarabaci@seznam.cz>	2018-03-30 09:44:21 +0200
commit	a1a007a4856fa50d6d811c2268f881e3666f4c67 (patch)
tree	24b082c1bfb5777f1770c82a534bf765160bc1b8 /js/src/irregexp
parent	eddd0de2ae80e176011f41a5400e81522d53f4f3 (diff)
parent	59bf4204a84f7638d3f89a29bc7c04e5dc401369 (diff)
download	UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar.gz UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar.lz UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.tar.xz UXP-a1a007a4856fa50d6d811c2268f881e3666f4c67.zip