diff options
author | wolfbeast <mcwerewolf@wolfbeast.com> | 2019-11-18 12:20:44 +0100 |
---|---|---|
committer | wolfbeast <mcwerewolf@wolfbeast.com> | 2019-11-18 12:20:44 +0100 |
commit | f31b04a303607cd82757e7c4f60bb536658c8a30 (patch) | |
tree | 98b720782be5bc8bd77202bb4c1dc69e4927c03e /js/src/irregexp | |
parent | 36c81a978695ec3ba68af8475422bcab49ef470d (diff) | |
download | UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar.gz UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar.lz UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar.xz UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.zip |
Issue #1284 - Implement /s (dotAll) for regular expressions.
Resolves #1284.
Diffstat (limited to 'js/src/irregexp')
-rw-r--r-- | js/src/irregexp/RegExpParser.cpp | 52 | ||||
-rw-r--r-- | js/src/irregexp/RegExpParser.h | 1 |
2 files changed, 49 insertions, 4 deletions
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index 9ef9fe3e2..28abdb0b4 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -1384,7 +1384,7 @@ UnicodeEverythingAtom(LifoAlloc* alloc) { RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc); - // everything except \x0a, \x0d, \u2028 and \u2029 + // Everything except \x0a, \x0d, \u2028 and \u2029 CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); ranges->append(CharacterRange::Range(0x0, 0x09)); @@ -1414,6 +1414,38 @@ UnicodeEverythingAtom(LifoAlloc* alloc) return builder->ToRegExp(); } +static inline RegExpTree* +UnicodeDotAllAtom(LifoAlloc* alloc) +{ + RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc); + + // Full range excluding surrogates because /s was specified + + CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); + ranges->append(CharacterRange::Range(0x0, unicode::LeadSurrogateMin - 1)); + ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max)); + builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false)); + + builder->NewAlternative(); + + builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax)); + builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin, + unicode::TrailSurrogateMax)); + + builder->NewAlternative(); + + builder->AddAssertion(alloc->newInfallible<RegExpAssertion>( + RegExpAssertion::NOT_AFTER_LEAD_SURROGATE)); + builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax)); + + builder->NewAlternative(); + + builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax)); + builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax)); + + return builder->ToRegExp(); +} + RegExpTree* UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case) { @@ -1541,13 +1573,25 @@ RegExpParser<CharT>::ParseDisjunction() } case '.': { Advance(); - // everything except \x0a, \x0d, \u2028 and \u2029 + if (unicode_) { - builder->AddAtom(UnicodeEverythingAtom(alloc)); + if (dotall_) { + // Everything + builder->AddAtom(UnicodeDotAllAtom(alloc)); + } else { + // Everything except \x0a, \x0d, \u2028 and \u2029 + builder->AddAtom(UnicodeEverythingAtom(alloc)); + } break; } CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); - CharacterRange::AddClassEscape(alloc, '.', ranges); + if (dotall_) { + // Everything + CharacterRange::AddClassEscape(alloc, '*', ranges); + } else { + // Everything except \x0a, \x0d, \u2028 and \u2029 + CharacterRange::AddClassEscape(alloc, '.', ranges); + } RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false); builder->AddAtom(atom); break; diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h index 2f02625b5..36c24cd7c 100644 --- a/js/src/irregexp/RegExpParser.h +++ b/js/src/irregexp/RegExpParser.h @@ -313,6 +313,7 @@ class RegExpParser bool multiline_; bool unicode_; bool ignore_case_; + bool dotall_; bool simple_; bool contains_anchor_; bool is_scanned_for_captures_; |