From 185a9a750878ed1d9705fbd162dbfe9bf2e4ea0c Mon Sep 17 00:00:00 2001 From: wolfbeast Date: Tue, 26 Nov 2019 13:37:09 +0100 Subject: Issue #1302 - Add self-hosted implementation for string regex .matchAll This resolves #1302. --- js/src/builtin/RegExp.cpp | 1 + js/src/builtin/RegExp.js | 132 ++++++++++++++++++++++++++++++++++++ js/src/builtin/SelfHostingDefines.h | 5 ++ js/src/builtin/String.js | 27 ++++++++ 4 files changed, 165 insertions(+) (limited to 'js/src/builtin') diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp index b7853d533..55e0c8578 100644 --- a/js/src/builtin/RegExp.cpp +++ b/js/src/builtin/RegExp.cpp @@ -795,6 +795,7 @@ const JSFunctionSpec js::regexp_methods[] = { JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1,0), JS_SELF_HOSTED_FN("test", "RegExpTest" , 1,0), JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1,0), + JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0), JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2,0), JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1,0), JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2,0), diff --git a/js/src/builtin/RegExp.js b/js/src/builtin/RegExp.js index 1a2276594..25827743e 100644 --- a/js/src/builtin/RegExp.js +++ b/js/src/builtin/RegExp.js @@ -1031,3 +1031,135 @@ function RegExpSpecies() { return this; } _SetCanonicalName(RegExpSpecies, "get [Symbol.species]"); + +// String.prototype.matchAll proposal. +// +// RegExp.prototype [ @@matchAll ] ( string ) +function RegExpMatchAll(string) { + // Step 1. + var rx = this; + + // Step 2. + if (!IsObject(rx)) + ThrowTypeError(JSMSG_NOT_NONNULL_OBJECT, rx === null ? "null" : typeof rx); + + // Step 3. + var str = ToString(string); + + // Step 4. + var C = SpeciesConstructor(rx, GetBuiltinConstructor("RegExp")); + + // Step 5. + var flags = ToString(rx.flags); + + // Step 2.b.iii; located here because it needs |flags|. + if (!callFunction(std_String_includes, flags, "g")) { + ThrowTypeError(JSMSG_BAD_REGEXP_FLAG, "- matchAll requires g"); + } + + // Step 6. + var matcher = new C(rx, flags); + + // Steps 7-8. + matcher.lastIndex = ToLength(rx.lastIndex); + + // Steps 9-12. + // Note, always global because non-global throws as per + // https://github.com/tc39/ecma262/pull/1716 + var flags = REGEXP_GLOBAL_FLAG | + (callFunction(std_String_includes, flags, "u") ? REGEXP_UNICODE_FLAG : 0); + + // Step 13. + return CreateRegExpStringIterator(matcher, str, flags); +} + +// String.prototype.matchAll proposal. +// +// CreateRegExpStringIterator ( R, S, global, fullUnicode ) +function CreateRegExpStringIterator(regexp, string, flags) { + // Step 1. + assert(typeof string === "string", "|string| is a string value"); + + // Steps 2-3. + assert(typeof flags === "number", "|flags| is a number value"); + + // Steps 4-9. + var iterator = NewRegExpStringIterator(); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_REGEXP_SLOT, regexp); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_STRING_SLOT, string); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_FLAGS_SLOT, flags | 0); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_DONE_SLOT, false); + + // Step 10. + return iterator; +} + +// String.prototype.matchAll proposal. +// +// %RegExpStringIteratorPrototype%.next ( ) +function RegExpStringIteratorNext() { + // Steps 1-3. + var obj; + if (!IsObject(this) || (obj = GuardToRegExpStringIterator(this)) === null) { + return callFunction(CallRegExpStringIteratorMethodIfWrapped, this, + "RegExpStringIteratorNext"); + } + + var result = { value: undefined, done: false }; + + // Step 4. + var done = UnsafeGetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT); + if (done) { + result.done = true; + return result; + } + + // Step 5. + var regexp = UnsafeGetObjectFromReservedSlot(obj, REGEXP_STRING_ITERATOR_REGEXP_SLOT); + + // Step 6. + var string = UnsafeGetStringFromReservedSlot(obj, REGEXP_STRING_ITERATOR_STRING_SLOT); + + // Steps 7-8. + var flags = UnsafeGetInt32FromReservedSlot(obj, REGEXP_STRING_ITERATOR_FLAGS_SLOT); + var global = !!(flags & REGEXP_GLOBAL_FLAG); + var fullUnicode = !!(flags & REGEXP_UNICODE_FLAG); + + // Step 9. + var match = RegExpExec(regexp, string, false); + + // Step 10. + if (match === null) { + // Step 10.a. + UnsafeSetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT, true); + + // Step 10.b. + result.done = true; + return result; + } + + // Step 11.a. + if (global) { + // Step 11.a.i. + var matchStr = ToString(match[0]); + + // Step 11.a.ii. + if (matchStr.length === 0) { + // Step 11.a.ii.1. + var thisIndex = ToLength(regexp.lastIndex); + + // Step 11.a.ii.2. + var nextIndex = fullUnicode ? AdvanceStringIndex(string, thisIndex) : thisIndex + 1; + + // Step 11.a.ii.3. + regexp.lastIndex = nextIndex; + } + } else { + // Step 11.b.i. + UnsafeSetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT, true); + } + + // Steps 11.a.iii and 11.b.ii. + result.value = match; + return result; +} diff --git a/js/src/builtin/SelfHostingDefines.h b/js/src/builtin/SelfHostingDefines.h index 6512810ca..117ac7ffd 100644 --- a/js/src/builtin/SelfHostingDefines.h +++ b/js/src/builtin/SelfHostingDefines.h @@ -92,6 +92,11 @@ #define REGEXP_UNICODE_FLAG 0x10 #define REGEXP_DOTALL_FLAG 0x20 +#define REGEXP_STRING_ITERATOR_REGEXP_SLOT 0 +#define REGEXP_STRING_ITERATOR_STRING_SLOT 1 +#define REGEXP_STRING_ITERATOR_FLAGS_SLOT 2 +#define REGEXP_STRING_ITERATOR_DONE_SLOT 3 + #define MODULE_OBJECT_ENVIRONMENT_SLOT 2 #define MODULE_STATE_FAILED 0 diff --git a/js/src/builtin/String.js b/js/src/builtin/String.js index f830b1aa2..d07ec6127 100644 --- a/js/src/builtin/String.js +++ b/js/src/builtin/String.js @@ -63,6 +63,33 @@ function String_generic_match(thisValue, regexp) { return callFunction(String_match, thisValue, regexp); } +// String.prototype.matchAll proposal. +// +// String.prototype.matchAll ( regexp ) +function String_matchAll(regexp) { + // Step 1. + RequireObjectCoercible(this); + + // Step 2. + if (regexp !== undefined && regexp !== null) { + // Step 2.a. + var matcher = GetMethod(regexp, std_matchAll); + + // Step 2.b. + if (matcher !== undefined) + return callContentFunction(matcher, regexp, this); + } + + // Step 3. + var string = ToString(this); + + // Step 4. + var rx = RegExpCreate(regexp, "g"); + + // Step 5. + return callContentFunction(GetMethod(rx, std_matchAll), rx, string); +} + /** * A helper function implementing the logic for both String.prototype.padStart * and String.prototype.padEnd as described in ES7 Draft March 29, 2016 -- cgit v1.2.3