1 files changed, 14 insertions, 100 deletions
diff --git a/js/src/regexp/regexp-compiler-tonode.cc b/js/src/regexp/regexp-compiler-tonode.cc
index fc734ac7c..257030589 100644
--- a/js/src/regexp/regexp-compiler-tonode.cc
+++ b/js/src/regexp/regexp-compiler-tonode.cc
@@ -1137,39 +1137,6 @@ Vector<const int> CharacterRange::GetWordBounds() {
   return Vector<const int>(kWordRanges, kWordRangeCount - 1);
 }
 
-#ifdef V8_INTL_SUPPORT
-struct IgnoreSet {
-  IgnoreSet() : set(BuildIgnoreSet()) {}
-  const icu::UnicodeSet set;
-};
-
-struct SpecialAddSet {
-  SpecialAddSet() : set(BuildSpecialAddSet()) {}
-  const icu::UnicodeSet set;
-};
-
-icu::UnicodeSet BuildAsciiAToZSet() {
-  icu::UnicodeSet set('a', 'z');
-  set.add('A', 'Z');
-  set.freeze();
-  return set;
-}
-
-struct AsciiAToZSet {
-  AsciiAToZSet() : set(BuildAsciiAToZSet()) {}
-  const icu::UnicodeSet set;
-};
-
-static base::LazyInstance<IgnoreSet>::type ignore_set =
-    LAZY_INSTANCE_INITIALIZER;
-
-static base::LazyInstance<SpecialAddSet>::type special_add_set =
-    LAZY_INSTANCE_INITIALIZER;
-
-static base::LazyInstance<AsciiAToZSet>::type ascii_a_to_z_set =
-    LAZY_INSTANCE_INITIALIZER;
-#endif  // V8_INTL_SUPPORT
-
 // static
 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
                                         ZoneList<CharacterRange>* ranges,
@@ -1192,75 +1159,22 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
     others.add(from, to);
   }
 
-  // Set of characters already added to ranges that do not need to be added
-  // again.
+  // Compute the set of additional characters that should be added,
+  // using UnicodeSet::closeOver. ECMA 262 defines slightly different
+  // case-folding rules than Unicode, so some characters that are
+  // added by closeOver do not match anything other than themselves in
+  // JS. For example, 'ſ' (U+017F LATIN SMALL LETTER LONG S) is the
+  // same case-insensitive character as 's' or 'S' according to
+  // Unicode, but does not match any other character in JS. To handle
+  // this case, we add such characters to the IgnoreSet and filter
+  // them out. We filter twice: once before calling closeOver (to
+  // prevent 'ſ' from adding 's'), and once after calling closeOver
+  // (to prevent 's' from adding 'ſ'). See regexp/special-case.h for
+  // more information.
   icu::UnicodeSet already_added(others);
-
-  // Set of characters in ranges that are in the 52 ASCII characters [a-zA-Z].
-  icu::UnicodeSet in_ascii_a_to_z(others);
-  in_ascii_a_to_z.retainAll(ascii_a_to_z_set.Pointer()->set);
-
-  // Remove all chars in [a-zA-Z] from others.
-  others.removeAll(in_ascii_a_to_z);
-
-  // Set of characters in ranges that are overlapping with special add set.
-  icu::UnicodeSet in_special_add(others);
-  in_special_add.retainAll(special_add_set.Pointer()->set);
-
-  others.removeAll(in_special_add);
-
-  // Ignore all chars in ignore set.
-  others.removeAll(ignore_set.Pointer()->set);
-
-  // For most of the chars in ranges that is still in others, find the case
-  // equivlant set by calling closeOver(USET_CASE_INSENSITIVE).
+  others.removeAll(RegExpCaseFolding::IgnoreSet());
   others.closeOver(USET_CASE_INSENSITIVE);
-
-  // Because closeOver(USET_CASE_INSENSITIVE) may add ASCII [a-zA-Z] to others,
-  // but ECMA262 "i" mode won't consider that, remove them from others.
-  // Ex: U+017F add 'S' and 's' to others.
-  others.removeAll(ascii_a_to_z_set.Pointer()->set);
-
-  // Special handling for in_ascii_a_to_z.
-  for (int32_t i = 0; i < in_ascii_a_to_z.getRangeCount(); i++) {
-    UChar32 start = in_ascii_a_to_z.getRangeStart(i);
-    UChar32 end = in_ascii_a_to_z.getRangeEnd(i);
-    // Check if it is uppercase A-Z by checking bit 6.
-    if (start & 0x0020) {
-      // Add the lowercases
-      others.add(start & 0x005F, end & 0x005F);
-    } else {
-      // Add the uppercases
-      others.add(start | 0x0020, end | 0x0020);
-    }
-  }
-
-  // Special handling for chars in "Special Add" set.
-  for (int32_t i = 0; i < in_special_add.getRangeCount(); i++) {
-    UChar32 end = in_special_add.getRangeEnd(i);
-    for (UChar32 ch = in_special_add.getRangeStart(i); ch <= end; ch++) {
-      // Add the uppercase of this character if itself is not an uppercase
-      // character.
-      // Note: The if condiction cannot be u_islower(ch) because ch could be
-      // neither uppercase nor lowercase but Mn.
-      if (!u_isupper(ch)) {
-        others.add(u_toupper(ch));
-      }
-      icu::UnicodeSet candidates(ch, ch);
-      candidates.closeOver(USET_CASE_INSENSITIVE);
-      for (int32_t j = 0; j < candidates.getRangeCount(); j++) {
-        UChar32 end2 = candidates.getRangeEnd(j);
-        for (UChar32 ch2 = candidates.getRangeStart(j); ch2 <= end2; ch2++) {
-          // Add character that is not uppercase to others.
-          if (!u_isupper(ch2)) {
-            others.add(ch2);
-          }
-        }
-      }
-    }
-  }
-
-  // Remove all characters which already in the ranges.
+  others.removeAll(RegExpCaseFolding::IgnoreSet());
   others.removeAll(already_added);
 
   // Add others to the ranges