From 185a9a750878ed1d9705fbd162dbfe9bf2e4ea0c Mon Sep 17 00:00:00 2001 From: wolfbeast Date: Tue, 26 Nov 2019 13:37:09 +0100 Subject: Issue #1302 - Add self-hosted implementation for string regex .matchAll This resolves #1302. --- js/public/Class.h | 2 +- js/src/builtin/RegExp.cpp | 1 + js/src/builtin/RegExp.js | 132 ++++++++++++++++++++++++++++++++++++ js/src/builtin/SelfHostingDefines.h | 5 ++ js/src/builtin/String.js | 27 ++++++++ js/src/jsapi.h | 3 +- js/src/jsiter.cpp | 57 ++++++++++++++++ js/src/jsiter.h | 6 ++ js/src/jsstr.cpp | 1 + js/src/vm/CommonPropertyNames.h | 1 + js/src/vm/GlobalObject.cpp | 79 +++++++-------------- js/src/vm/GlobalObject.h | 8 +++ js/src/vm/SelfHosting.cpp | 23 +++++++ 13 files changed, 287 insertions(+), 58 deletions(-) (limited to 'js') diff --git a/js/public/Class.h b/js/public/Class.h index f7533654b..67c0cbca8 100644 --- a/js/public/Class.h +++ b/js/public/Class.h @@ -779,7 +779,7 @@ struct JSClass { // application. #define JSCLASS_GLOBAL_APPLICATION_SLOTS 5 #define JSCLASS_GLOBAL_SLOT_COUNT \ - (JSCLASS_GLOBAL_APPLICATION_SLOTS + JSProto_LIMIT * 2 + 39) + (JSCLASS_GLOBAL_APPLICATION_SLOTS + JSProto_LIMIT * 2 + 40) #define JSCLASS_GLOBAL_FLAGS_WITH_SLOTS(n) \ (JSCLASS_IS_GLOBAL | JSCLASS_HAS_RESERVED_SLOTS(JSCLASS_GLOBAL_SLOT_COUNT + (n))) #define JSCLASS_GLOBAL_FLAGS \ diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp index b7853d533..55e0c8578 100644 --- a/js/src/builtin/RegExp.cpp +++ b/js/src/builtin/RegExp.cpp @@ -795,6 +795,7 @@ const JSFunctionSpec js::regexp_methods[] = { JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1,0), JS_SELF_HOSTED_FN("test", "RegExpTest" , 1,0), JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1,0), + JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0), JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2,0), JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1,0), JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2,0), diff --git a/js/src/builtin/RegExp.js b/js/src/builtin/RegExp.js index 1a2276594..25827743e 100644 --- a/js/src/builtin/RegExp.js +++ b/js/src/builtin/RegExp.js @@ -1031,3 +1031,135 @@ function RegExpSpecies() { return this; } _SetCanonicalName(RegExpSpecies, "get [Symbol.species]"); + +// String.prototype.matchAll proposal. +// +// RegExp.prototype [ @@matchAll ] ( string ) +function RegExpMatchAll(string) { + // Step 1. + var rx = this; + + // Step 2. + if (!IsObject(rx)) + ThrowTypeError(JSMSG_NOT_NONNULL_OBJECT, rx === null ? "null" : typeof rx); + + // Step 3. + var str = ToString(string); + + // Step 4. + var C = SpeciesConstructor(rx, GetBuiltinConstructor("RegExp")); + + // Step 5. + var flags = ToString(rx.flags); + + // Step 2.b.iii; located here because it needs |flags|. + if (!callFunction(std_String_includes, flags, "g")) { + ThrowTypeError(JSMSG_BAD_REGEXP_FLAG, "- matchAll requires g"); + } + + // Step 6. + var matcher = new C(rx, flags); + + // Steps 7-8. + matcher.lastIndex = ToLength(rx.lastIndex); + + // Steps 9-12. + // Note, always global because non-global throws as per + // https://github.com/tc39/ecma262/pull/1716 + var flags = REGEXP_GLOBAL_FLAG | + (callFunction(std_String_includes, flags, "u") ? REGEXP_UNICODE_FLAG : 0); + + // Step 13. + return CreateRegExpStringIterator(matcher, str, flags); +} + +// String.prototype.matchAll proposal. +// +// CreateRegExpStringIterator ( R, S, global, fullUnicode ) +function CreateRegExpStringIterator(regexp, string, flags) { + // Step 1. + assert(typeof string === "string", "|string| is a string value"); + + // Steps 2-3. + assert(typeof flags === "number", "|flags| is a number value"); + + // Steps 4-9. + var iterator = NewRegExpStringIterator(); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_REGEXP_SLOT, regexp); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_STRING_SLOT, string); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_FLAGS_SLOT, flags | 0); + UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_DONE_SLOT, false); + + // Step 10. + return iterator; +} + +// String.prototype.matchAll proposal. +// +// %RegExpStringIteratorPrototype%.next ( ) +function RegExpStringIteratorNext() { + // Steps 1-3. + var obj; + if (!IsObject(this) || (obj = GuardToRegExpStringIterator(this)) === null) { + return callFunction(CallRegExpStringIteratorMethodIfWrapped, this, + "RegExpStringIteratorNext"); + } + + var result = { value: undefined, done: false }; + + // Step 4. + var done = UnsafeGetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT); + if (done) { + result.done = true; + return result; + } + + // Step 5. + var regexp = UnsafeGetObjectFromReservedSlot(obj, REGEXP_STRING_ITERATOR_REGEXP_SLOT); + + // Step 6. + var string = UnsafeGetStringFromReservedSlot(obj, REGEXP_STRING_ITERATOR_STRING_SLOT); + + // Steps 7-8. + var flags = UnsafeGetInt32FromReservedSlot(obj, REGEXP_STRING_ITERATOR_FLAGS_SLOT); + var global = !!(flags & REGEXP_GLOBAL_FLAG); + var fullUnicode = !!(flags & REGEXP_UNICODE_FLAG); + + // Step 9. + var match = RegExpExec(regexp, string, false); + + // Step 10. + if (match === null) { + // Step 10.a. + UnsafeSetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT, true); + + // Step 10.b. + result.done = true; + return result; + } + + // Step 11.a. + if (global) { + // Step 11.a.i. + var matchStr = ToString(match[0]); + + // Step 11.a.ii. + if (matchStr.length === 0) { + // Step 11.a.ii.1. + var thisIndex = ToLength(regexp.lastIndex); + + // Step 11.a.ii.2. + var nextIndex = fullUnicode ? AdvanceStringIndex(string, thisIndex) : thisIndex + 1; + + // Step 11.a.ii.3. + regexp.lastIndex = nextIndex; + } + } else { + // Step 11.b.i. + UnsafeSetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT, true); + } + + // Steps 11.a.iii and 11.b.ii. + result.value = match; + return result; +} diff --git a/js/src/builtin/SelfHostingDefines.h b/js/src/builtin/SelfHostingDefines.h index 6512810ca..117ac7ffd 100644 --- a/js/src/builtin/SelfHostingDefines.h +++ b/js/src/builtin/SelfHostingDefines.h @@ -92,6 +92,11 @@ #define REGEXP_UNICODE_FLAG 0x10 #define REGEXP_DOTALL_FLAG 0x20 +#define REGEXP_STRING_ITERATOR_REGEXP_SLOT 0 +#define REGEXP_STRING_ITERATOR_STRING_SLOT 1 +#define REGEXP_STRING_ITERATOR_FLAGS_SLOT 2 +#define REGEXP_STRING_ITERATOR_DONE_SLOT 3 + #define MODULE_OBJECT_ENVIRONMENT_SLOT 2 #define MODULE_STATE_FAILED 0 diff --git a/js/src/builtin/String.js b/js/src/builtin/String.js index f830b1aa2..d07ec6127 100644 --- a/js/src/builtin/String.js +++ b/js/src/builtin/String.js @@ -63,6 +63,33 @@ function String_generic_match(thisValue, regexp) { return callFunction(String_match, thisValue, regexp); } +// String.prototype.matchAll proposal. +// +// String.prototype.matchAll ( regexp ) +function String_matchAll(regexp) { + // Step 1. + RequireObjectCoercible(this); + + // Step 2. + if (regexp !== undefined && regexp !== null) { + // Step 2.a. + var matcher = GetMethod(regexp, std_matchAll); + + // Step 2.b. + if (matcher !== undefined) + return callContentFunction(matcher, regexp, this); + } + + // Step 3. + var string = ToString(this); + + // Step 4. + var rx = RegExpCreate(regexp, "g"); + + // Step 5. + return callContentFunction(GetMethod(rx, std_matchAll), rx, string); +} + /** * A helper function implementing the logic for both String.prototype.padStart * and String.prototype.padEnd as described in ES7 Draft March 29, 2016 diff --git a/js/src/jsapi.h b/js/src/jsapi.h index 1a69b1513..4dad68e6e 100644 --- a/js/src/jsapi.h +++ b/js/src/jsapi.h @@ -5087,7 +5087,8 @@ GetSymbolDescription(HandleSymbol symbol); macro(split) \ macro(toPrimitive) \ macro(toStringTag) \ - macro(unscopables) + macro(unscopables) \ + macro(matchAll) enum class SymbolCode : uint32_t { // There is one SymbolCode for each well-known symbol. diff --git a/js/src/jsiter.cpp b/js/src/jsiter.cpp index 3e222ca6f..c4da86bdb 100644 --- a/js/src/jsiter.cpp +++ b/js/src/jsiter.cpp @@ -24,6 +24,7 @@ #include "jstypes.h" #include "jsutil.h" +#include "builtin/SelfHostingDefines.h" #include "ds/Sort.h" #include "gc/Marking.h" #include "js/Proxy.h" @@ -1135,6 +1136,38 @@ static const JSFunctionSpec string_iterator_methods[] = { JS_FS_END }; +static const Class RegExpStringIteratorPrototypeClass = { + "RegExp String Iterator", + 0 +}; + +enum { + RegExpStringIteratorSlotRegExp, + RegExpStringIteratorSlotString, + RegExpStringIteratorSlotFlags, + RegExpStringIteratorSlotDone, + RegExpStringIteratorSlotCount +}; + +static_assert(RegExpStringIteratorSlotRegExp == REGEXP_STRING_ITERATOR_REGEXP_SLOT, + "RegExpStringIteratorSlotRegExp must match self-hosting define for regexp slot."); +static_assert(RegExpStringIteratorSlotString == REGEXP_STRING_ITERATOR_STRING_SLOT, + "RegExpStringIteratorSlotString must match self-hosting define for string slot."); +static_assert(RegExpStringIteratorSlotFlags == REGEXP_STRING_ITERATOR_FLAGS_SLOT, + "RegExpStringIteratorSlotFlags must match self-hosting define for flags slot."); +static_assert(RegExpStringIteratorSlotDone == REGEXP_STRING_ITERATOR_DONE_SLOT, + "RegExpStringIteratorSlotDone must match self-hosting define for done slot."); + +const Class RegExpStringIteratorObject::class_ = { + "RegExp String Iterator", + JSCLASS_HAS_RESERVED_SLOTS(RegExpStringIteratorSlotCount) +}; + +static const JSFunctionSpec regexp_string_iterator_methods[] = { + JS_SELF_HOSTED_FN("next", "RegExpStringIteratorNext", 0, 0), + JS_FS_END +}; + JSObject* js::ValueToIterator(JSContext* cx, unsigned flags, HandleValue vp) { @@ -1541,6 +1574,30 @@ GlobalObject::initStringIteratorProto(JSContext* cx, Handle globa return true; } +/* static */ bool +GlobalObject::initRegExpStringIteratorProto(JSContext* cx, Handle global) +{ + if (global->getReservedSlot(REGEXP_STRING_ITERATOR_PROTO).isObject()) + return true; + + RootedObject iteratorProto(cx, GlobalObject::getOrCreateIteratorPrototype(cx, global)); + if (!iteratorProto) + return false; + + const Class* cls = &RegExpStringIteratorPrototypeClass; + RootedObject proto(cx, GlobalObject::createBlankPrototypeInheriting(cx, global, cls, + iteratorProto)); + if (!proto || + !DefinePropertiesAndFunctions(cx, proto, nullptr, regexp_string_iterator_methods) || + !DefineToStringTag(cx, proto, cx->names().RegExpStringIterator)) + { + return false; + } + + global->setReservedSlot(REGEXP_STRING_ITERATOR_PROTO, ObjectValue(*proto)); + return true; +} + JSObject* js::InitLegacyIteratorClass(JSContext* cx, HandleObject obj) { diff --git a/js/src/jsiter.h b/js/src/jsiter.h index a3035ddd0..52eb045c5 100644 --- a/js/src/jsiter.h +++ b/js/src/jsiter.h @@ -151,6 +151,12 @@ class StringIteratorObject : public JSObject static const Class class_; }; +class RegExpStringIteratorObject : public JSObject +{ + public: + static const Class class_; +}; + bool GetIterator(JSContext* cx, HandleObject obj, unsigned flags, MutableHandleObject objp); diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp index 74f61b87d..3964ab84e 100644 --- a/js/src/jsstr.cpp +++ b/js/src/jsstr.cpp @@ -2584,6 +2584,7 @@ static const JSFunctionSpec string_methods[] = { /* Perl-ish methods (search is actually Python-esque). */ JS_SELF_HOSTED_FN("match", "String_match", 1,0), + JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1,0), JS_SELF_HOSTED_FN("search", "String_search", 1,0), JS_SELF_HOSTED_FN("replace", "String_replace", 2,0), JS_SELF_HOSTED_FN("split", "String_split", 2,0), diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h index 4ae49d577..99cb02e58 100644 --- a/js/src/vm/CommonPropertyNames.h +++ b/js/src/vm/CommonPropertyNames.h @@ -282,6 +282,7 @@ macro(RegExpFlagsGetter, RegExpFlagsGetter, "RegExpFlagsGetter") \ macro(RegExpMatcher, RegExpMatcher, "RegExpMatcher") \ macro(RegExpSearcher, RegExpSearcher, "RegExpSearcher") \ + macro(RegExpStringIterator, RegExpStringIterator, "RegExp String Iterator") \ macro(RegExpTester, RegExpTester, "RegExpTester") \ macro(RegExp_prototype_Exec, RegExp_prototype_Exec, "RegExp_prototype_Exec") \ macro(Reify, Reify, "Reify") \ diff --git a/js/src/vm/GlobalObject.cpp b/js/src/vm/GlobalObject.cpp index 85707e1c6..013208f66 100644 --- a/js/src/vm/GlobalObject.cpp +++ b/js/src/vm/GlobalObject.cpp @@ -468,62 +468,29 @@ GlobalObject::initSelfHostingBuiltins(JSContext* cx, Handle globa return false; } - RootedValue std_isConcatSpreadable(cx); - std_isConcatSpreadable.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::isConcatSpreadable)); - if (!JS_DefineProperty(cx, global, "std_isConcatSpreadable", std_isConcatSpreadable, - JSPROP_PERMANENT | JSPROP_READONLY)) - { - return false; - } - - // Define a top-level property 'std_iterator' with the name of the method - // used by for-of loops to create an iterator. - RootedValue std_iterator(cx); - std_iterator.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::iterator)); - if (!JS_DefineProperty(cx, global, "std_iterator", std_iterator, - JSPROP_PERMANENT | JSPROP_READONLY)) - { - return false; - } - - RootedValue std_match(cx); - std_match.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::match)); - if (!JS_DefineProperty(cx, global, "std_match", std_match, - JSPROP_PERMANENT | JSPROP_READONLY)) - { - return false; - } - - RootedValue std_replace(cx); - std_replace.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::replace)); - if (!JS_DefineProperty(cx, global, "std_replace", std_replace, - JSPROP_PERMANENT | JSPROP_READONLY)) - { - return false; - } - - RootedValue std_search(cx); - std_search.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::search)); - if (!JS_DefineProperty(cx, global, "std_search", std_search, - JSPROP_PERMANENT | JSPROP_READONLY)) - { - return false; - } - - RootedValue std_species(cx); - std_species.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::species)); - if (!JS_DefineProperty(cx, global, "std_species", std_species, - JSPROP_PERMANENT | JSPROP_READONLY)) - { - return false; - } - - RootedValue std_split(cx); - std_split.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::split)); - if (!JS_DefineProperty(cx, global, "std_split", std_split, - JSPROP_PERMANENT | JSPROP_READONLY)) - { - return false; + struct SymbolAndName { + JS::SymbolCode code; + const char* name; + }; + + SymbolAndName wellKnownSymbols[] = { + {JS::SymbolCode::isConcatSpreadable, "std_isConcatSpreadable"}, + {JS::SymbolCode::iterator, "std_iterator"}, + {JS::SymbolCode::match, "std_match"}, + {JS::SymbolCode::matchAll, "std_matchAll"}, + {JS::SymbolCode::replace, "std_replace"}, + {JS::SymbolCode::search, "std_search"}, + {JS::SymbolCode::species, "std_species"}, + {JS::SymbolCode::split, "std_split"}, + }; + + RootedValue symVal(cx); + for (const auto& sym : wellKnownSymbols) { + symVal.setSymbol(cx->wellKnownSymbols().get(sym.code)); + if (!JS_DefineProperty(cx, global, sym.name, symVal, + JSPROP_PERMANENT | JSPROP_READONLY)) { + return false; + } } return InitBareBuiltinCtor(cx, global, JSProto_Array) && diff --git a/js/src/vm/GlobalObject.h b/js/src/vm/GlobalObject.h index 5aacfc5dc..9179abbb7 100644 --- a/js/src/vm/GlobalObject.h +++ b/js/src/vm/GlobalObject.h @@ -93,6 +93,7 @@ class GlobalObject : public NativeObject ITERATOR_PROTO, ARRAY_ITERATOR_PROTO, STRING_ITERATOR_PROTO, + REGEXP_STRING_ITERATOR_PROTO, LEGACY_GENERATOR_OBJECT_PROTO, STAR_GENERATOR_OBJECT_PROTO, STAR_GENERATOR_FUNCTION_PROTO, @@ -582,6 +583,12 @@ class GlobalObject : public NativeObject initStringIteratorProto)); } + static NativeObject* + getOrCreateRegExpStringIteratorPrototype(JSContext* cx, Handle global) { + return MaybeNativeObject(getOrCreateObject(cx, global, REGEXP_STRING_ITERATOR_PROTO, + initRegExpStringIteratorProto)); + } + static NativeObject* getOrCreateLegacyGeneratorObjectPrototype(JSContext* cx, Handle global) { return MaybeNativeObject(getOrCreateObject(cx, global, LEGACY_GENERATOR_OBJECT_PROTO, @@ -767,6 +774,7 @@ class GlobalObject : public NativeObject static bool initIteratorProto(JSContext* cx, Handle global); static bool initArrayIteratorProto(JSContext* cx, Handle global); static bool initStringIteratorProto(JSContext* cx, Handle global); + static bool initRegExpStringIteratorProto(JSContext* cx, Handle global); // Implemented in vm/GeneratorObject.cpp. static bool initLegacyGeneratorProto(JSContext* cx, Handle global); diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp index 833410465..ffd707b14 100644 --- a/js/src/vm/SelfHosting.cpp +++ b/js/src/vm/SelfHosting.cpp @@ -856,6 +856,24 @@ intrinsic_NewStringIterator(JSContext* cx, unsigned argc, Value* vp) return true; } +static bool +intrinsic_NewRegExpStringIterator(JSContext* cx, unsigned argc, Value* vp) +{ + CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 0); + + RootedObject proto(cx, GlobalObject::getOrCreateRegExpStringIteratorPrototype(cx, cx->global())); + if (!proto) + return false; + + JSObject* obj = NewObjectWithGivenProto(cx, &RegExpStringIteratorObject::class_, proto); + if (!obj) + return false; + + args.rval().setObject(*obj); + return true; +} + static bool intrinsic_SetCanonicalName(JSContext* cx, unsigned argc, Value* vp) { @@ -2288,6 +2306,8 @@ static const JSFunctionSpec intrinsic_functions[] = { JS_INLINABLE_FN("GuardToStringIterator", intrinsic_GuardToBuiltin, 1,0, IntrinsicGuardToStringIterator), + JS_FN("GuardToRegExpStringIterator", + intrinsic_GuardToBuiltin, 1,0), JS_FN("_CreateMapIterationResultPair", intrinsic_CreateMapIterationResultPair, 0, 0), JS_INLINABLE_FN("_GetNextMapEntryForIterator", intrinsic_GetNextMapEntryForIterator, 2,0, @@ -2305,6 +2325,9 @@ static const JSFunctionSpec intrinsic_functions[] = { JS_FN("NewStringIterator", intrinsic_NewStringIterator, 0,0), JS_FN("CallStringIteratorMethodIfWrapped", CallNonGenericSelfhostedMethod>, 2,0), + JS_FN("NewRegExpStringIterator", intrinsic_NewRegExpStringIterator, 0,0), + JS_FN("CallRegExpStringIteratorMethodIfWrapped", + CallNonGenericSelfhostedMethod>, 2,0), JS_FN("IsStarGeneratorObject", intrinsic_IsInstanceOfBuiltin, 1,0), -- cgit v1.2.3