From 519775b8d9d823b8cee786bc668e050110a8aa67 Mon Sep 17 00:00:00 2001 From: wolfbeast Date: Mon, 12 Mar 2018 11:09:28 +0100 Subject: Revert "JS - RegExp - match updated spec for `/\b/iu` and `/\B/iu`" This reverts commit 93f8e06bb8d8656e868679d584c7c8771ff8e42f. --- js/src/irregexp/RegExpEngine.cpp | 25 ++++++---------------- js/src/irregexp/RegExpEngine.h | 8 ++----- .../RegExp/unicode-ignoreCase-word-boundary.js | 25 ---------------------- 3 files changed, 8 insertions(+), 50 deletions(-) delete mode 100644 js/src/tests/ecma_6/RegExp/unicode-ignoreCase-word-boundary.js (limited to 'js/src') diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp index 7116ff1e5..2e19065fd 100644 --- a/js/src/irregexp/RegExpEngine.cpp +++ b/js/src/irregexp/RegExpEngine.cpp @@ -2358,10 +2358,7 @@ void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); - if (unicode_ignore_case_) - w_ = AddRange(w_, kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, interval); - else - w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); + w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); surrogate_ = AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); @@ -2398,12 +2395,11 @@ BoyerMoorePositionInfo::SetAll() BoyerMooreLookahead::BoyerMooreLookahead(LifoAlloc* alloc, size_t length, RegExpCompiler* compiler) : length_(length), compiler_(compiler), bitmaps_(*alloc) { - bool unicode_ignore_case = compiler->unicode() && compiler->ignore_case(); max_char_ = MaximumCharacter(compiler->ascii()); bitmaps_.reserve(length); for (size_t i = 0; i < length; i++) - bitmaps_.append(alloc->newInfallible(alloc, unicode_ignore_case)); + bitmaps_.append(alloc->newInfallible(alloc)); } // Find the longest range of lookahead that has the fewest number of different @@ -3069,22 +3065,15 @@ EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace* // Check for [0-9A-Z_a-z]. static void EmitWordCheck(RegExpMacroAssembler* assembler, - jit::Label* word, jit::Label* non_word, bool fall_through_on_word, - bool unicode_ignore_case) + jit::Label* word, jit::Label* non_word, bool fall_through_on_word) { - if (!unicode_ignore_case && - assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W', + if (assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W', fall_through_on_word ? non_word : word)) { // Optimized implementation available. return; } - if (unicode_ignore_case) { - assembler->CheckCharacter(0x017F, word); - assembler->CheckCharacter(0x212A, word); - } - assembler->CheckCharacterGT('z', non_word); assembler->CheckCharacterLT('0', non_word); assembler->CheckCharacterGT('a' - 1, word); @@ -3133,8 +3122,7 @@ AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); } // Fall through on non-word. - EmitWordCheck(assembler, &before_word, &before_non_word, false, - compiler->unicode() && compiler->ignore_case()); + EmitWordCheck(assembler, &before_word, &before_non_word, false); // Next character is not a word character. assembler->Bind(&before_non_word); jit::Label ok; @@ -3174,8 +3162,7 @@ AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler, // We already checked that we are not at the start of input so it must be // OK to load the previous character. assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); - EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord, - compiler->unicode() && compiler->ignore_case()); + EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); assembler->Bind(&fall_through); on_success()->Emit(compiler, &new_trace); diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h index 1a8fd4b22..78c784aaf 100644 --- a/js/src/irregexp/RegExpEngine.h +++ b/js/src/irregexp/RegExpEngine.h @@ -1195,14 +1195,13 @@ AddRange(ContainedInLattice a, class BoyerMoorePositionInfo { public: - explicit BoyerMoorePositionInfo(LifoAlloc* alloc, bool unicode_ignore_case) + explicit BoyerMoorePositionInfo(LifoAlloc* alloc) : map_(*alloc), map_count_(0), w_(kNotYet), s_(kNotYet), d_(kNotYet), - surrogate_(kNotYet), - unicode_ignore_case_(unicode_ignore_case) + surrogate_(kNotYet) { map_.reserve(kMapSize); for (int i = 0; i < kMapSize; i++) @@ -1229,9 +1228,6 @@ class BoyerMoorePositionInfo ContainedInLattice s_; // The \s character class. ContainedInLattice d_; // The \d character class. ContainedInLattice surrogate_; // Surrogate UTF-16 code units. - - // True if the RegExp has unicode and ignoreCase flags. - bool unicode_ignore_case_; }; typedef InfallibleVector BoyerMoorePositionInfoVector; diff --git a/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-word-boundary.js b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-word-boundary.js deleted file mode 100644 index c1a04bd3d..000000000 --- a/js/src/tests/ecma_6/RegExp/unicode-ignoreCase-word-boundary.js +++ /dev/null @@ -1,25 +0,0 @@ -var BUGNUMBER = 1338373; -var summary = "Word boundary should match U+017F and U+212A in unicode+ignoreCase."; - -assertEq(/\b/iu.test('\u017F'), true); -assertEq(/\b/i.test('\u017F'), false); -assertEq(/\b/u.test('\u017F'), false); -assertEq(/\b/.test('\u017F'), false); - -assertEq(/\b/iu.test('\u212A'), true); -assertEq(/\b/i.test('\u212A'), false); -assertEq(/\b/u.test('\u212A'), false); -assertEq(/\b/.test('\u212A'), false); - -assertEq(/\B/iu.test('\u017F'), false); -assertEq(/\B/i.test('\u017F'), true); -assertEq(/\B/u.test('\u017F'), true); -assertEq(/\B/.test('\u017F'), true); - -assertEq(/\B/iu.test('\u212A'), false); -assertEq(/\B/i.test('\u212A'), true); -assertEq(/\B/u.test('\u212A'), true); -assertEq(/\B/.test('\u212A'), true); - -if (typeof reportCompare === "function") - reportCompare(true, true); -- cgit v1.2.3