summaryrefslogtreecommitdiffstats
path: root/js/src/vm/RegExpObject.h
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/vm/RegExpObject.h')
-rw-r--r--js/src/vm/RegExpObject.h561
1 files changed, 561 insertions, 0 deletions
diff --git a/js/src/vm/RegExpObject.h b/js/src/vm/RegExpObject.h
new file mode 100644
index 000000000..d6dde1668
--- /dev/null
+++ b/js/src/vm/RegExpObject.h
@@ -0,0 +1,561 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef vm_RegExpObject_h
+#define vm_RegExpObject_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/MemoryReporting.h"
+
+#include "jscntxt.h"
+
+#include "builtin/SelfHostingDefines.h"
+#include "gc/Marking.h"
+#include "gc/Zone.h"
+#include "proxy/Proxy.h"
+#include "vm/ArrayObject.h"
+#include "vm/Shape.h"
+
+/*
+ * JavaScript Regular Expressions
+ *
+ * There are several engine concepts associated with a single logical regexp:
+ *
+ * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp"
+ *
+ * RegExpShared - The compiled representation of the regexp.
+ *
+ * RegExpCompartment - Owns all RegExpShared instances in a compartment.
+ *
+ * To save memory, a RegExpShared is not created for a RegExpObject until it is
+ * needed for execution. When a RegExpShared needs to be created, it is looked
+ * up in a per-compartment table to allow reuse between objects. Lastly, on
+ * GC, every RegExpShared (that is not active on the callstack) is discarded.
+ * Because of the last point, any code using a RegExpShared (viz., by executing
+ * a regexp) must indicate the RegExpShared is active via RegExpGuard.
+ */
+namespace js {
+
+struct MatchPair;
+class MatchPairs;
+class RegExpShared;
+class RegExpStatics;
+
+namespace frontend { class TokenStream; }
+
+enum RegExpFlag
+{
+ IgnoreCaseFlag = 0x01,
+ GlobalFlag = 0x02,
+ MultilineFlag = 0x04,
+ StickyFlag = 0x08,
+ UnicodeFlag = 0x10,
+
+ NoFlags = 0x00,
+ AllFlags = 0x1f
+};
+
+static_assert(IgnoreCaseFlag == REGEXP_IGNORECASE_FLAG &&
+ GlobalFlag == REGEXP_GLOBAL_FLAG &&
+ MultilineFlag == REGEXP_MULTILINE_FLAG &&
+ StickyFlag == REGEXP_STICKY_FLAG &&
+ UnicodeFlag == REGEXP_UNICODE_FLAG,
+ "Flag values should be in sync with self-hosted JS");
+
+enum RegExpRunStatus
+{
+ RegExpRunStatus_Error,
+ RegExpRunStatus_Success,
+ RegExpRunStatus_Success_NotFound
+};
+
+extern RegExpObject*
+RegExpAlloc(ExclusiveContext* cx, HandleObject proto = nullptr);
+
+// |regexp| is under-typed because this function's used in the JIT.
+extern JSObject*
+CloneRegExpObject(JSContext* cx, JSObject* regexp);
+
+extern JSObject*
+CreateRegExpPrototype(JSContext* cx, JSProtoKey key);
+
+/*
+ * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
+ * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
+ * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
+ * table so that they can be reused when compiling the same regex string.
+ *
+ * During a GC, RegExpShared instances are marked and swept like GC things.
+ * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
+ * than explicitly tracing them, so that the RegExpShared and any jitcode can
+ * be reclaimed quicker. However, the RegExpShareds are traced through by
+ * objects when we are preserving jitcode in their zone, to avoid the same
+ * recompilation inefficiencies as normal Ion and baseline compilation.
+ */
+class RegExpShared
+{
+ public:
+ enum CompilationMode {
+ Normal,
+ MatchOnly
+ };
+
+ enum ForceByteCodeEnum {
+ DontForceByteCode,
+ ForceByteCode
+ };
+
+ private:
+ friend class RegExpCompartment;
+ friend class RegExpStatics;
+
+ typedef frontend::TokenStream TokenStream;
+
+ struct RegExpCompilation
+ {
+ HeapPtr<jit::JitCode*> jitCode;
+ uint8_t* byteCode;
+
+ RegExpCompilation() : byteCode(nullptr) {}
+ ~RegExpCompilation() { js_free(byteCode); }
+
+ bool compiled(ForceByteCodeEnum force = DontForceByteCode) const {
+ return byteCode || (force == DontForceByteCode && jitCode);
+ }
+ };
+
+ /* Source to the RegExp, for lazy compilation. */
+ HeapPtr<JSAtom*> source;
+
+ RegExpFlag flags;
+ size_t parenCount;
+ bool canStringMatch;
+ bool marked_;
+
+ RegExpCompilation compilationArray[4];
+
+ static int CompilationIndex(CompilationMode mode, bool latin1) {
+ switch (mode) {
+ case Normal: return latin1 ? 0 : 1;
+ case MatchOnly: return latin1 ? 2 : 3;
+ }
+ MOZ_CRASH();
+ }
+
+ // Tables referenced by JIT code.
+ Vector<uint8_t*, 0, SystemAllocPolicy> tables;
+
+ /* Internal functions. */
+ bool compile(JSContext* cx, HandleLinearString input,
+ CompilationMode mode, ForceByteCodeEnum force);
+ bool compile(JSContext* cx, HandleAtom pattern, HandleLinearString input,
+ CompilationMode mode, ForceByteCodeEnum force);
+
+ bool compileIfNecessary(JSContext* cx, HandleLinearString input,
+ CompilationMode mode, ForceByteCodeEnum force);
+
+ const RegExpCompilation& compilation(CompilationMode mode, bool latin1) const {
+ return compilationArray[CompilationIndex(mode, latin1)];
+ }
+
+ RegExpCompilation& compilation(CompilationMode mode, bool latin1) {
+ return compilationArray[CompilationIndex(mode, latin1)];
+ }
+
+ public:
+ RegExpShared(JSAtom* source, RegExpFlag flags);
+ ~RegExpShared();
+
+ // Execute this RegExp on input starting from searchIndex, filling in
+ // matches if specified and otherwise only determining if there is a match.
+ RegExpRunStatus execute(JSContext* cx, HandleLinearString input, size_t searchIndex,
+ MatchPairs* matches, size_t* endIndex);
+
+ // Register a table with this RegExpShared, and take ownership.
+ bool addTable(uint8_t* table) {
+ return tables.append(table);
+ }
+
+ /* Accessors */
+
+ size_t getParenCount() const {
+ MOZ_ASSERT(isCompiled());
+ return parenCount;
+ }
+
+ /* Accounts for the "0" (whole match) pair. */
+ size_t pairCount() const { return getParenCount() + 1; }
+
+ JSAtom* getSource() const { return source; }
+ RegExpFlag getFlags() const { return flags; }
+ bool ignoreCase() const { return flags & IgnoreCaseFlag; }
+ bool global() const { return flags & GlobalFlag; }
+ bool multiline() const { return flags & MultilineFlag; }
+ bool sticky() const { return flags & StickyFlag; }
+ bool unicode() const { return flags & UnicodeFlag; }
+
+ bool isCompiled(CompilationMode mode, bool latin1,
+ ForceByteCodeEnum force = DontForceByteCode) const {
+ return compilation(mode, latin1).compiled(force);
+ }
+ bool isCompiled() const {
+ return isCompiled(Normal, true) || isCompiled(Normal, false)
+ || isCompiled(MatchOnly, true) || isCompiled(MatchOnly, false);
+ }
+
+ void trace(JSTracer* trc);
+ bool needsSweep(JSRuntime* rt);
+ void discardJitCode();
+
+ bool marked() const { return marked_; }
+ void clearMarked() { marked_ = false; }
+
+ bool isMarkedGray() const;
+ void unmarkGray();
+
+ static size_t offsetOfSource() {
+ return offsetof(RegExpShared, source);
+ }
+
+ static size_t offsetOfFlags() {
+ return offsetof(RegExpShared, flags);
+ }
+
+ static size_t offsetOfParenCount() {
+ return offsetof(RegExpShared, parenCount);
+ }
+
+ static size_t offsetOfLatin1JitCode(CompilationMode mode) {
+ return offsetof(RegExpShared, compilationArray)
+ + (CompilationIndex(mode, true) * sizeof(RegExpCompilation))
+ + offsetof(RegExpCompilation, jitCode);
+ }
+ static size_t offsetOfTwoByteJitCode(CompilationMode mode) {
+ return offsetof(RegExpShared, compilationArray)
+ + (CompilationIndex(mode, false) * sizeof(RegExpCompilation))
+ + offsetof(RegExpCompilation, jitCode);
+ }
+
+ size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf);
+
+#ifdef DEBUG
+ bool dumpBytecode(JSContext* cx, bool match_only, HandleLinearString input);
+#endif
+};
+
+/*
+ * Extend the lifetime of a given RegExpShared to at least the lifetime of
+ * the guard object. See Regular Expression comment at the top.
+ */
+class RegExpGuard : public JS::CustomAutoRooter
+{
+ RegExpShared* re_;
+
+ RegExpGuard(const RegExpGuard&) = delete;
+ void operator=(const RegExpGuard&) = delete;
+
+ public:
+ explicit RegExpGuard(ExclusiveContext* cx)
+ : CustomAutoRooter(cx), re_(nullptr)
+ {}
+
+ RegExpGuard(ExclusiveContext* cx, RegExpShared& re)
+ : CustomAutoRooter(cx), re_(nullptr)
+ {
+ init(re);
+ }
+
+ ~RegExpGuard() {
+ release();
+ }
+
+ public:
+ void init(RegExpShared& re) {
+ MOZ_ASSERT(!initialized());
+ re_ = &re;
+ }
+
+ void release() {
+ re_ = nullptr;
+ }
+
+ virtual void trace(JSTracer* trc) {
+ if (re_)
+ re_->trace(trc);
+ }
+
+ bool initialized() const { return !!re_; }
+ RegExpShared* re() const { MOZ_ASSERT(initialized()); return re_; }
+ RegExpShared* operator->() { return re(); }
+ RegExpShared& operator*() { return *re(); }
+};
+
+class RegExpCompartment
+{
+ struct Key {
+ JSAtom* atom;
+ uint16_t flag;
+
+ Key() {}
+ Key(JSAtom* atom, RegExpFlag flag)
+ : atom(atom), flag(flag)
+ { }
+ MOZ_IMPLICIT Key(RegExpShared* shared)
+ : atom(shared->getSource()), flag(shared->getFlags())
+ { }
+
+ typedef Key Lookup;
+ static HashNumber hash(const Lookup& l) {
+ return DefaultHasher<JSAtom*>::hash(l.atom) ^ (l.flag << 1);
+ }
+ static bool match(Key l, Key r) {
+ return l.atom == r.atom && l.flag == r.flag;
+ }
+ };
+
+ /*
+ * The set of all RegExpShareds in the compartment. On every GC, every
+ * RegExpShared that was not marked is deleted and removed from the set.
+ */
+ typedef HashSet<RegExpShared*, Key, RuntimeAllocPolicy> Set;
+ Set set_;
+
+ /*
+ * This is the template object where the result of re.exec() is based on,
+ * if there is a result. This is used in CreateRegExpMatchResult to set
+ * the input/index properties faster.
+ */
+ ReadBarriered<ArrayObject*> matchResultTemplateObject_;
+
+ /*
+ * The shape of RegExp.prototype object that satisfies following:
+ * * RegExp.prototype.flags getter is not modified
+ * * RegExp.prototype.global getter is not modified
+ * * RegExp.prototype.ignoreCase getter is not modified
+ * * RegExp.prototype.multiline getter is not modified
+ * * RegExp.prototype.sticky getter is not modified
+ * * RegExp.prototype.unicode getter is not modified
+ * * RegExp.prototype.exec is an own data property
+ * * RegExp.prototype[@@match] is an own data property
+ * * RegExp.prototype[@@search] is an own data property
+ */
+ ReadBarriered<Shape*> optimizableRegExpPrototypeShape_;
+
+ /*
+ * The shape of RegExp instance that satisfies following:
+ * * lastProperty is lastIndex
+ * * prototype is RegExp.prototype
+ */
+ ReadBarriered<Shape*> optimizableRegExpInstanceShape_;
+
+ ArrayObject* createMatchResultTemplateObject(JSContext* cx);
+
+ public:
+ explicit RegExpCompartment(JSRuntime* rt);
+ ~RegExpCompartment();
+
+ bool init(JSContext* cx);
+ void sweep(JSRuntime* rt);
+
+ bool empty() { return set_.empty(); }
+
+ bool get(JSContext* cx, JSAtom* source, RegExpFlag flags, RegExpGuard* g);
+
+ /* Like 'get', but compile 'maybeOpt' (if non-null). */
+ bool get(JSContext* cx, HandleAtom source, JSString* maybeOpt, RegExpGuard* g);
+
+ /* Get or create template object used to base the result of .exec() on. */
+ ArrayObject* getOrCreateMatchResultTemplateObject(JSContext* cx) {
+ if (matchResultTemplateObject_)
+ return matchResultTemplateObject_;
+ return createMatchResultTemplateObject(cx);
+ }
+
+ Shape* getOptimizableRegExpPrototypeShape() {
+ return optimizableRegExpPrototypeShape_;
+ }
+ void setOptimizableRegExpPrototypeShape(Shape* shape) {
+ optimizableRegExpPrototypeShape_ = shape;
+ }
+ Shape* getOptimizableRegExpInstanceShape() {
+ return optimizableRegExpInstanceShape_;
+ }
+ void setOptimizableRegExpInstanceShape(Shape* shape) {
+ optimizableRegExpInstanceShape_ = shape;
+ }
+
+ static size_t offsetOfOptimizableRegExpPrototypeShape() {
+ return offsetof(RegExpCompartment, optimizableRegExpPrototypeShape_);
+ }
+ static size_t offsetOfOptimizableRegExpInstanceShape() {
+ return offsetof(RegExpCompartment, optimizableRegExpInstanceShape_);
+ }
+
+ size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
+};
+
+class RegExpObject : public NativeObject
+{
+ static const unsigned LAST_INDEX_SLOT = 0;
+ static const unsigned SOURCE_SLOT = 1;
+ static const unsigned FLAGS_SLOT = 2;
+
+ static_assert(RegExpObject::FLAGS_SLOT == REGEXP_FLAGS_SLOT,
+ "FLAGS_SLOT values should be in sync with self-hosted JS");
+
+ public:
+ static const unsigned RESERVED_SLOTS = 3;
+ static const unsigned PRIVATE_SLOT = 3;
+
+ static const Class class_;
+
+ // The maximum number of pairs a MatchResult can have, without having to
+ // allocate a bigger MatchResult.
+ static const size_t MaxPairCount = 14;
+
+ static RegExpObject*
+ create(ExclusiveContext* cx, const char16_t* chars, size_t length, RegExpFlag flags,
+ frontend::TokenStream* ts, LifoAlloc& alloc);
+
+ static RegExpObject*
+ create(ExclusiveContext* cx, HandleAtom atom, RegExpFlag flags,
+ frontend::TokenStream* ts, LifoAlloc& alloc);
+
+ /*
+ * Compute the initial shape to associate with fresh RegExp objects,
+ * encoding their initial properties. Return the shape after
+ * changing |obj|'s last property to it.
+ */
+ static Shape*
+ assignInitialShape(ExclusiveContext* cx, Handle<RegExpObject*> obj);
+
+ /* Accessors. */
+
+ static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; }
+
+ static bool isInitialShape(RegExpObject* rx) {
+ Shape* shape = rx->lastProperty();
+ if (!shape->hasSlot())
+ return false;
+ if (shape->maybeSlot() != LAST_INDEX_SLOT)
+ return false;
+ return true;
+ }
+
+ const Value& getLastIndex() const { return getSlot(LAST_INDEX_SLOT); }
+
+ void setLastIndex(double d) {
+ setSlot(LAST_INDEX_SLOT, NumberValue(d));
+ }
+
+ void zeroLastIndex(ExclusiveContext* cx) {
+ MOZ_ASSERT(lookupPure(cx->names().lastIndex)->writable(),
+ "can't infallibly zero a non-writable lastIndex on a "
+ "RegExp that's been exposed to script");
+ setSlot(LAST_INDEX_SLOT, Int32Value(0));
+ }
+
+ JSFlatString* toString(JSContext* cx) const;
+
+ JSAtom* getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); }
+
+ void setSource(JSAtom* source) {
+ setSlot(SOURCE_SLOT, StringValue(source));
+ }
+
+ /* Flags. */
+
+ static unsigned flagsSlot() { return FLAGS_SLOT; }
+
+ RegExpFlag getFlags() const {
+ return RegExpFlag(getFixedSlot(FLAGS_SLOT).toInt32());
+ }
+ void setFlags(RegExpFlag flags) {
+ setSlot(FLAGS_SLOT, Int32Value(flags));
+ }
+
+ bool ignoreCase() const { return getFlags() & IgnoreCaseFlag; }
+ bool global() const { return getFlags() & GlobalFlag; }
+ bool multiline() const { return getFlags() & MultilineFlag; }
+ bool sticky() const { return getFlags() & StickyFlag; }
+ bool unicode() const { return getFlags() & UnicodeFlag; }
+
+ static bool isOriginalFlagGetter(JSNative native, RegExpFlag* mask);
+
+ bool getShared(JSContext* cx, RegExpGuard* g);
+
+ void setShared(RegExpShared& shared) {
+ MOZ_ASSERT(!maybeShared());
+ NativeObject::setPrivate(&shared);
+ }
+
+ static void trace(JSTracer* trc, JSObject* obj);
+
+ void initIgnoringLastIndex(HandleAtom source, RegExpFlag flags);
+
+ // NOTE: This method is *only* safe to call on RegExps that haven't been
+ // exposed to script, because it requires that the "lastIndex"
+ // property be writable.
+ void initAndZeroLastIndex(HandleAtom source, RegExpFlag flags, ExclusiveContext* cx);
+
+#ifdef DEBUG
+ bool dumpBytecode(JSContext* cx, bool match_only, HandleLinearString input);
+#endif
+
+ private:
+ /*
+ * Precondition: the syntax for |source| has already been validated.
+ * Side effect: sets the private field.
+ */
+ bool createShared(JSContext* cx, RegExpGuard* g);
+ RegExpShared* maybeShared() const {
+ return static_cast<RegExpShared*>(NativeObject::getPrivate(PRIVATE_SLOT));
+ }
+
+ /* Call setShared in preference to setPrivate. */
+ void setPrivate(void* priv) = delete;
+};
+
+/*
+ * Parse regexp flags. Report an error and return false if an invalid
+ * sequence of flags is encountered (repeat/invalid flag).
+ *
+ * N.B. flagStr must be rooted.
+ */
+bool
+ParseRegExpFlags(JSContext* cx, JSString* flagStr, RegExpFlag* flagsOut);
+
+/* Assuming GetBuiltinClass(obj) is ESClass::RegExp, return a RegExpShared for obj. */
+inline bool
+RegExpToShared(JSContext* cx, HandleObject obj, RegExpGuard* g)
+{
+ if (obj->is<RegExpObject>())
+ return obj->as<RegExpObject>().getShared(cx, g);
+
+ return Proxy::regexp_toShared(cx, obj, g);
+}
+
+template<XDRMode mode>
+bool
+XDRScriptRegExpObject(XDRState<mode>* xdr, MutableHandle<RegExpObject*> objp);
+
+extern JSObject*
+CloneScriptRegExpObject(JSContext* cx, RegExpObject& re);
+
+/* Escape all slashes and newlines in the given string. */
+extern JSAtom*
+EscapeRegExpPattern(JSContext* cx, HandleAtom src);
+
+template <typename CharT>
+extern bool
+HasRegExpMetaChars(const CharT* chars, size_t length);
+
+extern bool
+StringHasRegExpMetaChars(JSLinearString* str);
+
+} /* namespace js */
+
+#endif /* vm_RegExpObject_h */