summaryrefslogtreecommitdiffstats
path: root/js/src/irregexp
diff options
context:
space:
mode:
authorwolfbeast <mcwerewolf@wolfbeast.com>2019-11-18 12:20:44 +0100
committerwolfbeast <mcwerewolf@wolfbeast.com>2019-11-18 12:20:44 +0100
commitf31b04a303607cd82757e7c4f60bb536658c8a30 (patch)
tree98b720782be5bc8bd77202bb4c1dc69e4927c03e /js/src/irregexp
parent36c81a978695ec3ba68af8475422bcab49ef470d (diff)
downloadUXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar
UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar.gz
UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar.lz
UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.tar.xz
UXP-f31b04a303607cd82757e7c4f60bb536658c8a30.zip
Issue #1284 - Implement /s (dotAll) for regular expressions.
Resolves #1284.
Diffstat (limited to 'js/src/irregexp')
-rw-r--r--js/src/irregexp/RegExpParser.cpp52
-rw-r--r--js/src/irregexp/RegExpParser.h1
2 files changed, 49 insertions, 4 deletions
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp
index 9ef9fe3e2..28abdb0b4 100644
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -1384,7 +1384,7 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
{
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
- // everything except \x0a, \x0d, \u2028 and \u2029
+ // Everything except \x0a, \x0d, \u2028 and \u2029
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
ranges->append(CharacterRange::Range(0x0, 0x09));
@@ -1414,6 +1414,38 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
return builder->ToRegExp();
}
+static inline RegExpTree*
+UnicodeDotAllAtom(LifoAlloc* alloc)
+{
+ RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+
+ // Full range excluding surrogates because /s was specified
+
+ CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ ranges->append(CharacterRange::Range(0x0, unicode::LeadSurrogateMin - 1));
+ ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max));
+ builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
+
+ builder->NewAlternative();
+
+ builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
+ builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
+ unicode::TrailSurrogateMax));
+
+ builder->NewAlternative();
+
+ builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
+ RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
+ builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
+
+ builder->NewAlternative();
+
+ builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
+ builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
+
+ return builder->ToRegExp();
+}
+
RegExpTree*
UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case)
{
@@ -1541,13 +1573,25 @@ RegExpParser<CharT>::ParseDisjunction()
}
case '.': {
Advance();
- // everything except \x0a, \x0d, \u2028 and \u2029
+
if (unicode_) {
- builder->AddAtom(UnicodeEverythingAtom(alloc));
+ if (dotall_) {
+ // Everything
+ builder->AddAtom(UnicodeDotAllAtom(alloc));
+ } else {
+ // Everything except \x0a, \x0d, \u2028 and \u2029
+ builder->AddAtom(UnicodeEverythingAtom(alloc));
+ }
break;
}
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
- CharacterRange::AddClassEscape(alloc, '.', ranges);
+ if (dotall_) {
+ // Everything
+ CharacterRange::AddClassEscape(alloc, '*', ranges);
+ } else {
+ // Everything except \x0a, \x0d, \u2028 and \u2029
+ CharacterRange::AddClassEscape(alloc, '.', ranges);
+ }
RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
builder->AddAtom(atom);
break;
diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h
index 2f02625b5..36c24cd7c 100644
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -313,6 +313,7 @@ class RegExpParser
bool multiline_;
bool unicode_;
bool ignore_case_;
+ bool dotall_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;