Issue #1677 - Part 3: Create shim definitions for V8-specific code in new regexp implementation

author: Matt A. Tobin <email@mattatobin.com> 2020-11-09 20:20:18 -0500
committer: Matt A. Tobin <email@mattatobin.com> 2020-11-09 20:20:18 -0500
commit: cb0208163f5b44651831f217db0d0d4e97dd2463 (patch)
tree: f9489df88dad982d544d3799fc6728d9211e6d55 /js
parent: 19499014a36f2f27ac81d11495fdc5bdbd28ae25 (diff)
download: UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar
UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar.gz
UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar.lz
UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar.xz
UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.zip
14 files changed, 1748 insertions, 0 deletions
diff --git a/js/src/regexp/property-sequences.h b/js/src/regexp/property-sequences.h
index cbf2ea1fc..ed39e2379 100644
--- a/js/src/regexp/property-sequences.h
+++ b/js/src/regexp/property-sequences.h
@@ -7,6 +7,7 @@
 
 #ifdef V8_INTL_SUPPORT
 
+#include "regexp/regexp-shim.h"
 
 namespace v8 {
 namespace internal {
diff --git a/js/src/regexp/regexp-ast.h b/js/src/regexp/regexp-ast.h
index 540fdce80..fe6913e1d 100644
--- a/js/src/regexp/regexp-ast.h
+++ b/js/src/regexp/regexp-ast.h
@@ -5,6 +5,7 @@
 #ifndef V8_REGEXP_REGEXP_AST_H_
 #define V8_REGEXP_REGEXP_AST_H_
 
+#include "regexp/regexp-shim.h"
 
 namespace v8 {
 namespace internal {
diff --git a/js/src/regexp/regexp-bytecode-peephole.h b/js/src/regexp/regexp-bytecode-peephole.h
index 96349f52c..31d5a2d48 100644
--- a/js/src/regexp/regexp-bytecode-peephole.h
+++ b/js/src/regexp/regexp-bytecode-peephole.h
@@ -5,6 +5,7 @@
 #ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
 #define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
 
+#include "regexp/regexp-shim.h"
 
 namespace v8 {
 namespace internal {
diff --git a/js/src/regexp/regexp-bytecodes.h b/js/src/regexp/regexp-bytecodes.h
index 35f7a30da..24d6925db 100644
--- a/js/src/regexp/regexp-bytecodes.h
+++ b/js/src/regexp/regexp-bytecodes.h
@@ -5,6 +5,7 @@
 #ifndef V8_REGEXP_REGEXP_BYTECODES_H_
 #define V8_REGEXP_REGEXP_BYTECODES_H_
 
+#include "regexp/regexp-shim.h"
 
 namespace v8 {
 namespace internal {
diff --git a/js/src/regexp/regexp-dotprinter.h b/js/src/regexp/regexp-dotprinter.h
index b3b268f53..e5781184c 100644
--- a/js/src/regexp/regexp-dotprinter.h
+++ b/js/src/regexp/regexp-dotprinter.h
@@ -5,6 +5,7 @@
 #ifndef V8_REGEXP_REGEXP_DOTPRINTER_H_
 #define V8_REGEXP_REGEXP_DOTPRINTER_H_
 
+#include "regexp/regexp-shim.h"
 
 namespace v8 {
 namespace internal {
diff --git a/js/src/regexp/regexp-macro-assembler-arch.h b/js/src/regexp/regexp-macro-assembler-arch.h
new file mode 100644
index 000000000..60b5c94de
--- /dev/null
+++ b/js/src/regexp/regexp-macro-assembler-arch.h
@@ -0,0 +1,16 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file implements the NativeRegExpMacroAssembler interface for
+// SpiderMonkey. It provides the same interface as each of V8's
+// architecture-specific implementations.
+
+#ifndef RegexpMacroAssemblerArch_h
+#define RegexpMacroAssemblerArch_h
+
+#include "regexp/regexp-shim.h"
+
+#endif // RegexpMacroAssemblerArch_h
diff --git a/js/src/regexp/regexp-macro-assembler.h b/js/src/regexp/regexp-macro-assembler.h
index 67d97d122..dd059a43d 100644
--- a/js/src/regexp/regexp-macro-assembler.h
+++ b/js/src/regexp/regexp-macro-assembler.h
@@ -6,6 +6,7 @@
 #define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
 
 #include "regexp/regexp-ast.h"
+#include "regexp/regexp-shim.h"
 #include "regexp/regexp.h"
 
 namespace v8 {
diff --git a/js/src/regexp/regexp-shim.cc b/js/src/regexp/regexp-shim.cc
new file mode 100644
index 000000000..5e1da7a60
--- /dev/null
+++ b/js/src/regexp/regexp-shim.cc
@@ -0,0 +1,72 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Copyright 2019 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "regexp/regexp-shim.h"
+
+namespace v8 {
+namespace internal {
+
+void PrintF(const char* format, ...) {
+  va_list arguments;
+  va_start(arguments, format);
+  vprintf(format, arguments);
+  va_end(arguments);
+}
+
+void PrintF(FILE* out, const char* format, ...) {
+  va_list arguments;
+  va_start(arguments, format);
+  vfprintf(out, format, arguments);
+  va_end(arguments);
+}
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/ostreams.cc#L120-L169
+// (This is a hand-simplified version.)
+// Writes the given character to the output escaping everything outside
+// of printable ASCII range.
+std::ostream& operator<<(std::ostream& os, const AsUC16& c) {
+  uint16_t v = c.value;
+  bool isPrint = 0x20 < v && v <= 0x7e;
+  char buf[10];
+  const char* format = isPrint ? "%c" : (v <= 0xFF) ? "\\x%02x" : "\\u%04x";
+  snprintf(buf, sizeof(buf), format, v);
+  return os << buf;
+}
+std::ostream& operator<<(std::ostream& os, const AsUC32& c) {
+  int32_t v = c.value;
+  if (v <= String::kMaxUtf16CodeUnit) {
+    return os << AsUC16(v);
+  }
+  char buf[13];
+  snprintf(buf, sizeof(buf), "\\u{%06x}", v);
+  return os << buf;
+}
+
+DisallowJavascriptExecution::DisallowJavascriptExecution(Isolate* isolate)
+    : nojs_(isolate->cx()) {}
+
+// TODO: Map flags to jitoptions
+bool FLAG_correctness_fuzzer_suppressions = false;
+bool FLAG_enable_regexp_unaligned_accesses = false;
+bool FLAG_harmony_regexp_sequence = false;
+bool FLAG_regexp_interpret_all = false;
+bool FLAG_regexp_mode_modifiers = false;
+bool FLAG_regexp_optimization = false;
+bool FLAG_regexp_peephole_optimization = false;
+bool FLAG_regexp_possessive_quantifier = false;
+bool FLAG_regexp_tier_up = false;
+bool FLAG_trace_regexp_assembler = false;
+bool FLAG_trace_regexp_bytecodes = false;
+bool FLAG_trace_regexp_parser = false;
+bool FLAG_trace_regexp_peephole_optimization = false;
+
+}  // namespace internal
+}  // namespace v8
diff --git a/js/src/regexp/regexp-shim.h b/js/src/regexp/regexp-shim.h
new file mode 100644
index 000000000..0497ee552
--- /dev/null
+++ b/js/src/regexp/regexp-shim.h
@@ -0,0 +1,999 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Copyright 2019 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef RegexpShim_h
+#define RegexpShim_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Types.h"
+
+#include <algorithm>
+
+#include "jit/Label.h"
+#include "js/Value.h"
+#include "regexp/util/flags.h"
+#include "regexp/util/vector.h"
+#include "regexp/util/zone.h"
+#include "vm/NativeObject.h"
+
+// Forward declaration of classes
+namespace v8 {
+namespace internal {
+
+class Heap;
+class Isolate;
+class RegExpMatchInfo;
+class RegExpStack;
+
+}  // namespace internal
+}  // namespace v8
+
+#define V8_WARN_UNUSED_RESULT MOZ_MUST_USE
+#define V8_EXPORT_PRIVATE MOZ_EXPORT
+#define V8_FALLTHROUGH MOZ_FALLTHROUGH
+
+#define FATAL(x) MOZ_CRASH(x)
+#define UNREACHABLE() MOZ_CRASH("unreachable code")
+#define UNIMPLEMENTED() MOZ_CRASH("unimplemented code")
+#define STATIC_ASSERT(exp) static_assert(exp, #exp)
+
+#define DCHECK MOZ_ASSERT
+#define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
+#define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
+#define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
+#define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
+#define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
+#define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
+#define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr)
+#define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr)
+#define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs)
+#define CHECK MOZ_RELEASE_ASSERT
+
+template <class T>
+static constexpr inline T Min(T t1, T t2) {
+  return t1 < t2 ? t1 : t2;
+}
+
+template <class T>
+static constexpr inline T Max(T t1, T t2) {
+  return t1 > t2 ? t1 : t2;
+}
+#define MemCopy memcpy
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319
+// ptrdiff_t is 't' according to the standard, but MSVC uses 'I'.
+#ifdef _MSC_VER
+#  define V8PRIxPTRDIFF "Ix"
+#  define V8PRIdPTRDIFF "Id"
+#  define V8PRIuPTRDIFF "Iu"
+#else
+#  define V8PRIxPTRDIFF "tx"
+#  define V8PRIdPTRDIFF "td"
+#  define V8PRIuPTRDIFF "tu"
+#endif
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L27-L38
+// The arraysize(arr) macro returns the # of elements in an array arr.
+// The expression is a compile-time constant, and therefore can be
+// used in defining new arrays, for example.  If you use arraysize on
+// a pointer by mistake, you will get a compile-time error.
+#define arraysize(array) (sizeof(ArraySizeHelper(array)))
+
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+
+// Explicitly declare the assignment operator as deleted.
+#define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
+
+// Explicitly declare the copy constructor and assignment operator as deleted.
+// This also deletes the implicit move constructor and implicit move assignment
+// operator, but still allows to manually define them.
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&) = delete;      \
+  DISALLOW_ASSIGN(TypeName)
+
+// Explicitly declare all implicit constructors as deleted, namely the
+// default constructor, copy constructor and operator= functions.
+// This is especially useful for classes containing only static methods.
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+  TypeName() = delete;                           \
+  DISALLOW_COPY_AND_ASSIGN(TypeName)
+
+namespace v8 {
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367
+template <typename T, typename U>
+constexpr inline bool IsAligned(T value, U alignment) {
+  return (value & (alignment - 1)) == 0;
+}
+
+using byte = uint8_t;
+using Address = uintptr_t;
+static const Address kNullAddress = 0;
+
+namespace base {
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258
+// The USE(x, ...) template is used to silence C++ compiler warnings
+// issued for (yet) unused variables (typically parameters).
+// The arguments are guaranteed to be evaluated from left to right.
+struct Use {
+  template <typename T>
+  Use(T&&) {}  // NOLINT(runtime/explicit)
+};
+#define USE(...)                                                   \
+  do {                                                             \
+    ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \
+    (void)unused_tmp_array_for_use_macro;                          \
+  } while (false)
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39
+// saturated_cast<> is analogous to static_cast<> for numeric types, except
+// that the specified numeric conversion will saturate rather than overflow or
+// underflow.
+template <typename Dst, typename Src>
+inline Dst saturated_cast(Src value);
+
+// This is the only specialization that is needed for regexp code.
+// Instead of pulling in dozens of lines of template goo
+// to derive it, I used the implementation from uint8_clamped in
+// ArrayBufferObject.h.
+template <>
+inline uint8_t saturated_cast<uint8_t, int>(int x) {
+  return (x >= 0) ? ((x < 255) ? uint8_t(x) : 255) : 0;
+}
+
+namespace bits {
+
+inline uint64_t CountTrailingZeros(uint64_t value) {
+  return mozilla::CountTrailingZeroes64(value);
+}
+
+}  // namespace bits
+}  // namespace base
+
+namespace unibrow {
+
+using uchar = unsigned int;
+
+// Origin:
+// https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150
+class Latin1 {
+ public:
+  static const uint16_t kMaxChar = 0xff;
+
+  // Convert the character to Latin-1 case equivalent if possible.
+  static inline uint16_t TryConvertToLatin1(uint16_t c) {
+    // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
+    // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
+    if (c == 0x039C || c == 0x03BC) {
+      return 0xB5;
+    }
+    // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER
+    // Y WITH DIAERESIS".
+    if (c == 0x0178) {
+      return 0xFF;
+    }
+    return c;
+  }
+};
+
+// Origin:
+// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131
+class Utf16 {
+ public:
+  static inline bool IsLeadSurrogate(int code) {
+    return js::unicode::IsLeadSurrogate(code);
+  }
+  static inline bool IsTrailSurrogate(int code) {
+    return js::unicode::IsTrailSurrogate(code);
+  }
+  static inline uint16_t LeadSurrogate(uint32_t char_code) {
+    return js::unicode::LeadSurrogate(char_code);
+  }
+  static inline uint16_t TrailSurrogate(uint32_t char_code) {
+    return js::unicode::TrailSurrogate(char_code);
+  }
+  static inline uint32_t CombineSurrogatePair(char16_t lead, char16_t trail) {
+    return js::unicode::UTF16Decode(lead, trail);
+  }
+  static const uchar kMaxNonSurrogateCharCode = 0xffff;
+};
+
+// A cache used in case conversion.  It caches the value for characters
+// that either have no mapping or map to a single character independent
+// of context.  Characters that map to more than one character or that
+// map differently depending on context are always looked up.
+// Origin:
+// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88
+template <class T, int size = 256>
+class Mapping {
+ public:
+  inline Mapping() = default;
+  int get(uchar c, uchar n, uchar* result);
+
+ private:
+  friend class Test;
+  int CalculateValue(uchar c, uchar n, uchar* result);
+  struct CacheEntry {
+    inline CacheEntry() : code_point_(kNoChar), offset_(0) {}
+    inline CacheEntry(uchar code_point, signed offset)
+        : code_point_(code_point), offset_(offset) {}
+    uchar code_point_;
+    signed offset_;
+    static const int kNoChar = (1 << 21) - 1;
+  };
+  static const int kSize = size;
+  static const int kMask = kSize - 1;
+  CacheEntry entries_[kSize];
+};
+
+// Origin:
+// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252
+struct Ecma262Canonicalize {
+  static const int kMaxWidth = 1;
+  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
+};
+struct Ecma262UnCanonicalize {
+  static const int kMaxWidth = 1;
+  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
+};
+struct CanonicalizationRange {
+  static const int kMaxWidth = 1;
+  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
+};
+
+}  // namespace unibrow
+
+namespace internal {
+
+#define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y)
+void PRINTF_FORMAT(1, 2) PrintF(const char* format, ...);
+void PRINTF_FORMAT(2, 3) PrintF(FILE* out, const char* format, ...);
+
+// Superclass for classes only using static method functions.
+// The subclass of AllStatic cannot be instantiated at all.
+class AllStatic {
+#ifdef DEBUG
+ public:
+  AllStatic() = delete;
+#endif
+};
+
+// Superclass for classes managed with new and delete.
+// In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc)
+// Compare:
+// https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96
+class Malloced {
+ public:
+  static void* operator new(size_t size) {
+    js::AutoEnterOOMUnsafeRegion oomUnsafe;
+    void* result = js_malloc(size);
+    if (!result) {
+      oomUnsafe.crash("Irregexp Malloced shim");
+    }
+    return result;
+  }
+  static void operator delete(void* p) { js_free(p); }
+};
+
+constexpr int32_t KB = 1024;
+constexpr int32_t MB = 1024 * 1024;
+
+#define kMaxInt JSVAL_INT_MAX
+#define kMinInt JSVAL_INT_MIN
+constexpr int kSystemPointerSize = sizeof(void*);
+
+// The largest integer n such that n and n + 1 are both exactly
+// representable as a Number value.  ES6 section 20.1.2.6
+constexpr double kMaxSafeInteger = 9007199254740991.0;  // 2^53-1
+
+// Latin1/UTF-16 constants
+// Code-point values in Unicode 4.0 are 21 bits wide.
+// Code units in UTF-16 are 16 bits wide.
+using uc16 = uint16_t;
+using uc32 = int32_t;
+
+constexpr int kBitsPerByte = 8;
+constexpr int kBitsPerByteLog2 = 3;
+constexpr int kUInt32Size = sizeof(uint32_t);
+constexpr int kInt64Size = sizeof(int64_t);
+constexpr int kUC16Size = sizeof(uc16);
+
+inline constexpr bool IsDecimalDigit(uc32 c) { return c >= '0' && c <= '9'; }
+inline bool is_uint24(int val) { return (val & 0x00ffffff) == val; }
+
+inline bool IsIdentifierStart(uc32 c) {
+  return js::unicode::IsIdentifierStart(uint32_t(c));
+}
+inline bool IsIdentifierPart(uc32 c) {
+  return js::unicode::IsIdentifierPart(uint32_t(c));
+}
+
+// Wrappers to disambiguate uint16_t and uc16.
+struct AsUC16 {
+  explicit AsUC16(uint16_t v) : value(v) {}
+  uint16_t value;
+};
+
+struct AsUC32 {
+  explicit AsUC32(int32_t v) : value(v) {}
+  int32_t value;
+};
+
+class StdoutStream : public std::ostream {};
+
+std::ostream& operator<<(std::ostream& os, const AsUC16& c);
+std::ostream& operator<<(std::ostream& os, const AsUC32& c);
+
+// Reuse existing Maybe implementation
+using mozilla::Maybe;
+
+template <typename T>
+Maybe<T> Just(const T& value) {
+  return mozilla::Some(value);
+}
+
+template <typename T>
+mozilla::Nothing Nothing() {
+  return mozilla::Nothing();
+}
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L600-L642
+// Compare 8bit/16bit chars to 8bit/16bit chars.
+// Used indirectly by regexp-interpreter.cc
+template <typename lchar, typename rchar>
+inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs,
+                                size_t chars) {
+  const lchar* limit = lhs + chars;
+  if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) {
+    // memcmp compares byte-by-byte, yielding wrong results for two-byte
+    // strings on little-endian systems.
+    return memcmp(lhs, rhs, chars);
+  }
+  while (lhs < limit) {
+    int r = static_cast<int>(*lhs) - static_cast<int>(*rhs);
+    if (r != 0) return r;
+    ++lhs;
+    ++rhs;
+  }
+  return 0;
+}
+template <typename lchar, typename rchar>
+inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) {
+  DCHECK_LE(sizeof(lchar), 2);
+  DCHECK_LE(sizeof(rchar), 2);
+  if (sizeof(lchar) == 1) {
+    if (sizeof(rchar) == 1) {
+      return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
+                                  reinterpret_cast<const uint8_t*>(rhs), chars);
+    } else {
+      return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
+                                  reinterpret_cast<const uint16_t*>(rhs),
+                                  chars);
+    }
+  } else {
+    if (sizeof(rchar) == 1) {
+      return CompareCharsUnsigned(reinterpret_cast<const uint16_t*>(lhs),
+                                  reinterpret_cast<const uint8_t*>(rhs), chars);
+    } else {
+      return CompareCharsUnsigned(reinterpret_cast<const uint16_t*>(lhs),
+                                  reinterpret_cast<const uint16_t*>(rhs),
+                                  chars);
+    }
+  }
+}
+
+// Origin:
+// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48
+// Returns the value (0 .. 15) of a hexadecimal character c.
+// If c is not a legal hexadecimal character, returns a value < 0.
+// Used in regexp-parser.cc
+inline int HexValue(uc32 c) {
+  c -= '0';
+  if (static_cast<unsigned>(c) <= 9) return c;
+  c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
+  if (static_cast<unsigned>(c) <= 5) return c + 10;
+  return -1;
+}
+
+// V8::Object ~= JS::Value
+class Object {
+ public:
+  // The default object constructor in V8 stores a nullptr,
+  // which has its low bit clear and is interpreted as Smi(0).
+  constexpr Object() : value_(JS::Int32Value(0)) {}
+
+  // Conversions to/from SpiderMonkey types
+  constexpr Object(JS::Value value) : value_(value) {}
+  operator JS::Value() const { return value_; }
+
+  // Used in regexp-macro-assembler.cc and regexp-interpreter.cc to
+  // check the return value of isolate->stack_guard()->HandleInterrupts()
+  // In V8, this will be either an exception object or undefined.
+  // In SM, we store the exception in the context, so we can use our normal
+  // idiom: return false iff we are throwing an exception.
+  inline bool IsException(Isolate*) const { return !value_.toBoolean(); }
+
+  // SpiderMonkey tries to avoid leaking the internal representation of its
+  // objects. V8 is not so strict. These functions are used when calling /
+  // being called by native code: objects are converted to Addresses for the
+  // call, then cast back to objects on the other side.
+  // We might be able to upstream a patch that eliminates the need for these.
+  Object(Address bits);
+  Address ptr() const;
+
+ protected:
+  JS::Value value_;
+};
+
+class Smi : public Object {
+ public:
+  static Smi FromInt(int32_t value) {
+    Smi smi;
+    smi.value_ = JS::Int32Value(value);
+    return smi;
+  }
+  static inline int32_t ToInt(const Object object) {
+    return JS::Value(object).toInt32();
+  }
+};
+
+// V8::HeapObject ~= JSObject
+class HeapObject : public Object {
+public:
+  // Only used for bookkeeping of total code generated in regexp-compiler.
+  // We may be able to refactor this away.
+  int Size() const;
+};
+
+// A fixed-size array with Objects (aka Values) as element types
+// Implemented as a wrapper around a regular native object with dense elements.
+class FixedArray : public HeapObject {
+ public:
+  inline void set(uint32_t index, Object value) {
+    JS::Value(*this).toObject().as<js::NativeObject>().setDenseElement(index,
+                                                                       value);
+  }
+};
+
+// A fixed-size array of bytes.
+// TODO: figure out the best implementation for this. Uint8Array might work,
+// but it's not currently visible outside of TypedArrayObject.cpp.
+class ByteArray : public HeapObject {
+ public:
+  uint8_t get(uint32_t index);
+  void set(uint32_t index, uint8_t val);
+  uint32_t length();
+  byte* GetDataStartAddress();
+  byte* GetDataEndAddress();
+
+  static ByteArray cast(Object object);
+};
+
+// Like Handles in SM, V8 handles are references to marked pointers.
+// Unlike SM, where Rooted pointers are created individually on the
+// stack, the target of a V8 handle lives in a HandleScope.
+// HandleScopes are created on the stack and register themselves with
+// the isolate (~= JSContext). Whenever a Handle is created, the
+// outermost HandleScope is retrieved from the isolate, and a new root
+// is created in that HandleScope. The Handle remains valid for the
+// lifetime of the HandleScope.
+class HandleScope {
+ public:
+  HandleScope(Isolate* isolate);
+};
+
+// Origin:
+// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171
+template <typename T>
+class Handle {
+ public:
+  Handle();
+  Handle(T object, Isolate* isolate);
+
+  // Constructor for handling automatic up casting.
+  template <typename S, typename = typename std::enable_if<
+                            std::is_convertible<S*, T*>::value>::type>
+  /*inline*/ Handle(Handle<S> handle);
+
+  template <typename S>
+  /*inline*/ static const Handle<T> cast(Handle<S> that);
+
+  T* operator->() const;
+  T operator*() const;
+
+  bool is_null() const;
+
+  Address address();
+
+ private:
+  template <typename>
+  friend class Handle;
+  template <typename>
+  friend class MaybeHandle;
+};
+
+// A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
+// into a Handle requires checking that it does not point to nullptr.  This
+// ensures nullptr checks before use.
+//
+// Also note that Handles do not provide default equality comparison or hashing
+// operators on purpose. Such operators would be misleading, because intended
+// semantics is ambiguous between Handle location and object identity.
+// Origin:
+// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78
+template <typename T>
+class MaybeHandle final {
+ public:
+  MaybeHandle() = default;
+
+  // Constructor for handling automatic up casting from Handle.
+  // Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected.
+  template <typename S, typename = typename std::enable_if<
+                            std::is_convertible<S*, T*>::value>::type>
+  MaybeHandle(Handle<S> handle);
+
+  /*inline*/ Handle<T> ToHandleChecked() const;
+
+  // Convert to a Handle with a type that can be upcasted to.
+  template <typename S>
+  /*inline*/ bool ToHandle(Handle<S>* out) const;
+};
+
+// From v8/src/handles/handles-inl.h
+
+template <typename T>
+inline Handle<T> handle(T object, Isolate* isolate) {
+  return Handle<T>(object, isolate);
+}
+
+// RAII Guard classes
+
+class DisallowHeapAllocation {
+ public:
+  DisallowHeapAllocation() {}
+  operator const JS::AutoAssertNoGC&() const { return no_gc_; }
+
+ private:
+  const JS::AutoAssertNoGC no_gc_;
+};
+
+// This is used inside DisallowHeapAllocation regions to enable
+// allocation just before throwing an exception, to allocate the
+// exception object. Specifically, it only ever guards:
+// - isolate->stack_guard()->HandleInterrupts()
+// - isolate->StackOverflow()
+// Those cases don't allocate in SpiderMonkey, so this can be a no-op.
+class AllowHeapAllocation {
+ public:
+  // Empty constructor to avoid unused_variable warnings
+  AllowHeapAllocation() {}
+};
+
+class DisallowJavascriptExecution {
+ public:
+  DisallowJavascriptExecution(Isolate* isolate);
+
+ private:
+  js::AutoAssertNoContentJS nojs_;
+};
+
+// Origin:
+// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474
+class String : public HeapObject {
+ private:
+  JSString* str() const { return value_.toString(); }
+
+ public:
+  operator JSString*() const { return str(); }
+
+  // Max char codes.
+  static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
+  static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
+  static const int kMaxUtf16CodeUnit = 0xffff;
+  static const uc32 kMaxCodePoint = 0x10ffff;
+
+  MOZ_ALWAYS_INLINE int length() const { return str()->length(); }
+  uint16_t Get(uint32_t index);
+  bool IsFlat() { return str()->isLinear(); };
+
+  // These are only used in V8 functions that I want to rewrite.
+  // TODO: Rewrite those functions and delete this
+  bool IsConsString();
+  bool IsExternalString();
+  bool IsExternalOneByteString();
+  bool IsExternalTwoByteString();
+  bool IsSeqString();
+  bool IsSeqOneByteString();
+  bool IsSeqTwoByteString();
+  bool IsSlicedString();
+  bool IsThinString();
+
+  // Origin:
+  // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152
+  class FlatContent {
+   public:
+    FlatContent(JSLinearString* string, const DisallowHeapAllocation& no_gc)
+        : string_(string), no_gc_(no_gc) {}
+    inline bool IsOneByte() const { return string_->hasLatin1Chars(); }
+    inline bool IsTwoByte() const { return !string_->hasLatin1Chars(); }
+
+    Vector<const uint8_t> ToOneByteVector() const {
+      MOZ_ASSERT(IsOneByte());
+      return Vector<const uint8_t>(string_->latin1Chars(no_gc_),
+                                   string_->length());
+    }
+    Vector<const uc16> ToUC16Vector() const;
+    // TODO: twoByteChars returns char16_t*, but uc16 is uint16_t, which is
+    // not compatible :( :( :(
+    // {
+    //   MOZ_ASSERT(IsTwoByte());
+    //   return Vector<const uc16>(string_->twoByteChars(no_gc_),
+    //   string_->length());
+    // }
+   private:
+    const JSLinearString* string_;
+    const JS::AutoAssertNoGC& no_gc_;
+  };
+  FlatContent GetFlatContent(const DisallowHeapAllocation& no_gc) {
+    MOZ_ASSERT(IsFlat());
+    return FlatContent(&str()->asLinear(), no_gc);
+  }
+
+  static Handle<String> Flatten(Isolate* isolate, Handle<String> string);
+
+  inline static String cast(Object object) {
+    String s;
+    s.value_ = JS::StringValue(JS::Value(object).toString());
+    return s;
+  }
+
+  inline static bool IsOneByteRepresentationUnderneath(String string) {
+    return string.str()->hasLatin1Chars();
+  }
+  inline bool IsOneByteRepresentation() const {
+    return str()->hasLatin1Chars();
+  }
+
+  std::unique_ptr<char[]> ToCString();
+
+  template <typename Char>
+  Vector<const Char> GetCharVector(const DisallowHeapAllocation& no_gc);
+};
+
+// A flat string reader provides random access to the contents of a
+// string independent of the character width of the string.  The handle
+// must be valid as long as the reader is being used.
+// Origin:
+// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L807-L825
+class MOZ_STACK_CLASS FlatStringReader {
+ public:
+  FlatStringReader(JSAtom* string) : string_(string) {}
+  int length() { return string_->length(); }
+
+  inline char16_t Get(size_t index) {
+    return string_->latin1OrTwoByteChar(index);
+  }
+
+ private:
+  JSAtom* string_;
+  JS::AutoCheckCannotGC nogc;
+};
+
+//////////////////////////////////////////////////
+// TODO: Refactor NativeRegExpMacroAssembler and delete all of these:
+class ConsString : public String {
+ public:
+  String first();
+  String second();
+
+  static ConsString cast(Object object);
+};
+class ExternalOneByteString : public String {
+ public:
+  const uint8_t* GetChars();
+  static ExternalOneByteString cast(Object object);
+};
+class ExternalTwoByteString : public String {
+ public:
+  const uc16* GetChars();
+  static ExternalTwoByteString cast(Object object);
+};
+class SeqOneByteString : public String {
+ public:
+  uint8_t* GetChars(const DisallowHeapAllocation& no_gc);
+  static SeqOneByteString cast(Object object);
+};
+class SeqTwoByteString : public String {
+ public:
+  uc16* GetChars(const DisallowHeapAllocation& no_gc);
+  static SeqTwoByteString cast(Object object);
+
+  static constexpr size_t kMaxCharsSize = JSString::MAX_LENGTH * 2;
+};
+class SlicedString : public String {
+ public:
+  String parent();
+  int offset();
+  static SlicedString cast(Object object);
+};
+class ThinString : public String {
+ public:
+  String actual();
+  static ThinString cast(Object object);
+};
+class StringShape {
+ public:
+  explicit StringShape(const String s);
+  bool IsCons();
+  bool IsSliced();
+  bool IsThin();
+};
+// End of "TODO: Delete all of these"
+//////////////////////////////////////////////////
+
+class JSRegExp : public HeapObject {
+ public:
+  // ******************************************************
+  // Methods that are called from inside the implementation
+  // ******************************************************
+  void TierUpTick();
+  bool MarkedForTierUp() const;
+
+  Object Code(bool is_latin1) const;
+  Object Bytecode(bool is_latin1) const;
+
+  uint32_t BacktrackLimit() const;
+
+  static JSRegExp cast(Object object);
+
+  // ******************************
+  // Static constants
+  // ******************************
+
+  // Meaning of Type:
+  // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
+  // ATOM: A simple string to match against using an indexOf operation.
+  // IRREGEXP: Compiled with Irregexp.
+  enum Type { NOT_COMPILED, ATOM, IRREGEXP };
+
+  // Maximum number of captures allowed.
+  static constexpr int kMaxCaptures = 1 << 16;
+
+  // **************************************************
+  // JSRegExp::Flags
+  // **************************************************
+
+  struct FlagShiftBit {
+    static constexpr int kGlobal = 0;
+    static constexpr int kIgnoreCase = 1;
+    static constexpr int kMultiline = 2;
+    static constexpr int kSticky = 3;
+    static constexpr int kUnicode = 4;
+    static constexpr int kDotAll = 5;
+    static constexpr int kInvalid = 6;
+  };
+  enum Flag : uint8_t {
+    kNone = 0,
+    kGlobal = 1 << FlagShiftBit::kGlobal,
+    kIgnoreCase = 1 << FlagShiftBit::kIgnoreCase,
+    kMultiline = 1 << FlagShiftBit::kMultiline,
+    kSticky = 1 << FlagShiftBit::kSticky,
+    kUnicode = 1 << FlagShiftBit::kUnicode,
+    kDotAll = 1 << FlagShiftBit::kDotAll,
+    kInvalid = 1 << FlagShiftBit::kInvalid,  // Not included in FlagCount.
+  };
+  using Flags = base::Flags<Flag>;
+  static constexpr int kFlagCount = 6;
+
+  static constexpr int kNoBacktrackLimit = 0;
+};
+
+class Histogram {
+ public:
+  inline void AddSample(int sample) {}
+};
+
+class Counters {
+ public:
+  Histogram* regexp_backtracks() { return &regexp_backtracks_; }
+
+ private:
+  Histogram regexp_backtracks_;
+};
+
+#define PROFILE(isolate, call) \
+  do {                         \
+  } while (false);
+
+enum class AllocationType : uint8_t {
+  kYoung,  // Allocate in the nursery
+  kOld,    // Allocate in the tenured heap
+};
+
+using StackGuard = Isolate;
+using Factory = Isolate;
+
+class Isolate {
+ public:
+  //********** Isolate code **********//
+  RegExpStack* regexp_stack() const { return regexp_stack_; }
+  bool has_pending_exception() { return cx()->isExceptionPending(); }
+  void StackOverflow() { js::ReportOverRecursed(cx()); }
+
+  unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize();
+  unibrow::Mapping<unibrow::Ecma262Canonicalize>*
+  regexp_macro_assembler_canonicalize();
+  unibrow::Mapping<unibrow::CanonicalizationRange>* jsregexp_canonrange();
+
+  // An empty stub for telemetry we don't support
+  void IncreaseTotalRegexpCodeGenerated(int size) {}
+
+  Counters* counters() { return &counters_; }
+
+  //********** Factory code **********//
+  inline Factory* factory() { return this; }
+
+  Handle<ByteArray> NewByteArray(
+      int length, AllocationType allocation = AllocationType::kYoung);
+  MOZ_MUST_USE MaybeHandle<String> NewStringFromOneByte(
+      const Vector<const uint8_t>& str,
+      AllocationType allocation = AllocationType::kYoung);
+
+  // Allocates a fixed array initialized with undefined values.
+  Handle<FixedArray> NewFixedArray(
+      int length, AllocationType allocation = AllocationType::kYoung);
+
+  template <typename Char>
+  Handle<String> InternalizeString(const Vector<const Char>& str);
+
+  //********** Stack guard code **********//
+  inline StackGuard* stack_guard() { return this; }
+  Object HandleInterrupts() {
+    return Object(JS::BooleanValue(cx()->handleInterrupt()));
+  }
+
+  JSContext* cx() const { return cx_; }
+
+ private:
+  JSContext* cx_;
+  RegExpStack* regexp_stack_;
+  Counters counters_;
+};
+
+// Origin:
+// https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931
+class StackLimitCheck {
+ public:
+  StackLimitCheck(Isolate* isolate) : cx_(isolate->cx()) {}
+
+  // Use this to check for stack-overflows in C++ code.
+  bool HasOverflowed() { return !CheckRecursionLimitDontReport(cx_); }
+
+  // Use this to check for interrupt request in C++ code.
+  bool InterruptRequested() { return cx_->hasAnyPendingInterrupt(); }
+
+  // Use this to check for stack-overflow when entering runtime from JS code.
+  bool JsHasOverflowed() {
+    return !CheckRecursionLimitConservativeDontReport(cx_);
+  }
+
+ private:
+  JSContext* cx_;
+};
+
+class Code {
+ public:
+  bool operator!=(Code& other) const;
+
+  Address raw_instruction_start();
+  Address raw_instruction_end();
+  Address address();
+
+  static Code cast(Object object);
+};
+
+// GeneratedCode provides an interface for calling into jit code.
+// It will probably require additional work to hook this up to the
+// arm simulator.
+// Origin:
+// https://github.com/v8/v8/blob/abfbe7687edb5b2dffe0b33b24e0a41bb86a8214/src/execution/simulator.h#L96-L164
+template <typename Return, typename... Args>
+class GeneratedCode {
+ public:
+  using Signature = Return(Args...);
+
+  static GeneratedCode FromCode(Code code);  // TODO: implement
+  Return Call(Args... args) { return fn_ptr_(args...); }
+
+ private:
+  friend class GeneratedCode<Return(Args...)>;
+  Isolate* isolate_;
+  Signature* fn_ptr_;
+  GeneratedCode(Isolate* isolate, Signature* fn_ptr)
+      : isolate_(isolate), fn_ptr_(fn_ptr) {}
+};
+
+// Allow to use {GeneratedCode<ret(arg1, arg2)>} instead of
+// {GeneratedCode<ret, arg1, arg2>}.
+template <typename Return, typename... Args>
+class GeneratedCode<Return(Args...)> : public GeneratedCode<Return, Args...> {
+ public:
+  // Automatically convert from {GeneratedCode<ret, arg1, arg2>} to
+  // {GeneratedCode<ret(arg1, arg2)>}.
+  GeneratedCode(GeneratedCode<Return, Args...> other)
+      : GeneratedCode<Return, Args...>(other.isolate_, other.fn_ptr_) {}
+};
+
+enum class MessageTemplate { kStackOverflow };
+
+class MessageFormatter {
+ public:
+  static const char* TemplateString(MessageTemplate index) {
+    switch (index) {
+      case MessageTemplate::kStackOverflow:
+        return "too much recursion";
+    }
+  }
+};
+
+// Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h
+class Label {
+ public:
+  Label() : inner_(js::jit::Label()) {}
+
+  operator js::jit::Label*() { return &inner_; }
+
+  void Unuse() { inner_.reset(); }
+
+  bool is_linked() { return inner_.used(); }
+  bool is_bound() { return inner_.bound(); }
+  bool is_unused() { return !inner_.used() && !inner_.bound(); }
+
+  int pos() { return inner_.offset(); }
+  void link_to(int pos) { inner_.use(pos); }
+  void bind_to(int pos) { inner_.bind(pos); }
+
+ private:
+  js::jit::Label inner_;
+};
+
+// TODO: Map flags to jitoptions
+extern bool FLAG_correctness_fuzzer_suppressions;
+extern bool FLAG_enable_regexp_unaligned_accesses;
+extern bool FLAG_harmony_regexp_sequence;
+extern bool FLAG_regexp_interpret_all;
+extern bool FLAG_regexp_mode_modifiers;
+extern bool FLAG_regexp_optimization;
+extern bool FLAG_regexp_peephole_optimization;
+extern bool FLAG_regexp_possessive_quantifier;
+extern bool FLAG_regexp_tier_up;
+extern bool FLAG_trace_regexp_assembler;
+extern bool FLAG_trace_regexp_bytecodes;
+extern bool FLAG_trace_regexp_parser;
+extern bool FLAG_trace_regexp_peephole_optimization;
+
+}  // namespace internal
+}  // namespace v8
+
+#endif  // RegexpShim_h
diff --git a/js/src/regexp/regexp-stack.h b/js/src/regexp/regexp-stack.h
index 2c2cf2630..812195ad1 100644
--- a/js/src/regexp/regexp-stack.h
+++ b/js/src/regexp/regexp-stack.h
@@ -5,6 +5,7 @@
 #ifndef V8_REGEXP_REGEXP_STACK_H_
 #define V8_REGEXP_REGEXP_STACK_H_
 
+#include "regexp/regexp-shim.h"
 
 namespace v8 {
 namespace internal {
diff --git a/js/src/regexp/regexp.h b/js/src/regexp/regexp.h
index 1ee10da5c..cce58da38 100644
--- a/js/src/regexp/regexp.h
+++ b/js/src/regexp/regexp.h
@@ -5,6 +5,7 @@
 #ifndef V8_REGEXP_REGEXP_H_
 #define V8_REGEXP_REGEXP_H_
 
+#include "regexp/regexp-shim.h"
 
 namespace v8 {
 namespace internal {
diff --git a/js/src/regexp/util/flags.h b/js/src/regexp/util/flags.h
new file mode 100644
index 000000000..1fa421fc0
--- /dev/null
+++ b/js/src/regexp/util/flags.h
@@ -0,0 +1,93 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_UTIL_FLAGS_H_
+#define V8_UTIL_FLAGS_H_
+
+// Origin: https://github.com/v8/v8/blob/1bafcc6b999b23ea1d394f5d267a08183e3c4e19/src/base/flags.h#L15-L90
+
+namespace v8 {
+namespace base {
+
+// The Flags class provides a type-safe way of storing OR-combinations of enum
+// values. The Flags<T, S> class is a template class, where T is an enum type,
+// and S is the underlying storage type (usually int).
+//
+// The traditional C++ approach for storing OR-combinations of enum values is to
+// use an int or unsigned int variable. The inconvenience with this approach is
+// that there's no type checking at all; any enum value can be OR'd with any
+// other enum value and passed on to a function that takes an int or unsigned
+// int.
+template <typename T, typename S = int>
+class Flags final {
+ public:
+  using flag_type = T;
+  using mask_type = S;
+
+  constexpr Flags() : mask_(0) {}
+  constexpr Flags(flag_type flag)
+      : mask_(static_cast<S>(flag)) {}
+  constexpr explicit Flags(mask_type mask) : mask_(static_cast<S>(mask)) {}
+
+  constexpr bool operator==(flag_type flag) const {
+    return mask_ == static_cast<S>(flag);
+  }
+  constexpr bool operator!=(flag_type flag) const {
+    return mask_ != static_cast<S>(flag);
+  }
+
+  Flags& operator&=(const Flags& flags) {
+    mask_ &= flags.mask_;
+    return *this;
+  }
+  Flags& operator|=(const Flags& flags) {
+    mask_ |= flags.mask_;
+    return *this;
+  }
+  Flags& operator^=(const Flags& flags) {
+    mask_ ^= flags.mask_;
+    return *this;
+  }
+
+  constexpr Flags operator&(const Flags& flags) const {
+    return Flags(mask_ & flags.mask_);
+  }
+  constexpr Flags operator|(const Flags& flags) const {
+    return Flags(mask_ | flags.mask_);
+  }
+  constexpr Flags operator^(const Flags& flags) const {
+    return Flags(mask_ ^ flags.mask_);
+  }
+
+  Flags& operator&=(flag_type flag) { return operator&=(Flags(flag)); }
+  Flags& operator|=(flag_type flag) { return operator|=(Flags(flag)); }
+  Flags& operator^=(flag_type flag) { return operator^=(Flags(flag)); }
+
+  constexpr Flags operator&(flag_type flag) const {
+    return operator&(Flags(flag));
+  }
+  constexpr Flags operator|(flag_type flag) const {
+    return operator|(Flags(flag));
+  }
+  constexpr Flags operator^(flag_type flag) const {
+    return operator^(Flags(flag));
+  }
+
+  constexpr Flags operator~() const { return Flags(~mask_); }
+
+  constexpr operator mask_type() const { return mask_; }
+  constexpr bool operator!() const { return !mask_; }
+
+  Flags without(flag_type flag) { return *this & (~Flags(flag)); }
+
+  friend size_t hash_value(const Flags& flags) { return flags.mask_; }
+
+ private:
+  mask_type mask_;
+};
+
+} // namespace base
+} // namespace v8
+
+#endif  // V8_UTIL_FLAG_H_
diff --git a/js/src/regexp/util/vector.h b/js/src/regexp/util/vector.h
new file mode 100644
index 000000000..2419447d6
--- /dev/null
+++ b/js/src/regexp/util/vector.h
@@ -0,0 +1,204 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_UTIL_VECTOR_H_
+#define V8_UTIL_VECTOR_H_
+
+#include <algorithm>
+#include <cstring>
+#include <iterator>
+#include <memory>
+
+#include "js/Utility.h"
+
+namespace v8 {
+namespace internal {
+
+//////////////////////////////////////////////////
+
+// Adapted from: https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/allocation.h#L36-L58
+
+template <typename T>
+T* NewArray(size_t size) {
+  static_assert(std::is_pod<T>::value, "");
+  js::AutoEnterOOMUnsafeRegion oomUnsafe;
+  T* result = static_cast<T*>(js_malloc(size * sizeof(T)));
+  if (!result) {
+    oomUnsafe.crash("Irregexp NewArray");
+  }
+  return result;
+}
+
+template <typename T>
+void DeleteArray(T* array) {
+  js_free(array);
+}
+
+//////////////////////////////////////////////////
+
+// A non-resizable vector containing a pointer and a length.
+// The Vector may or may not own the pointer, depending on context.
+// Origin:
+// https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/vector.h#L20-L134
+
+template <typename T>
+class Vector {
+ public:
+  constexpr Vector() : start_(nullptr), length_(0) {}
+
+  constexpr Vector(T* data, size_t length) : start_(data), length_(length) {
+    MOZ_ASSERT_IF(length != 0, data != nullptr);
+  }
+
+  static Vector<T> New(size_t length) {
+    return Vector<T>(NewArray<T>(length), length);
+  }
+
+  // Returns a vector using the same backing storage as this one,
+  // spanning from and including 'from', to but not including 'to'.
+  Vector<T> SubVector(size_t from, size_t to) const {
+    MOZ_ASSERT(from < to);
+    MOZ_ASSERT(to < length_);
+    return Vector<T>(begin() + from, to - from);
+  }
+
+  // Returns the length of the vector. Only use this if you really need an
+  // integer return value. Use {size()} otherwise.
+  int length() const {
+    MOZ_ASSERT(length_ <= std::numeric_limits<int>::max());
+    return static_cast<int>(length_);
+  }
+
+  // Returns the length of the vector as a size_t.
+  constexpr size_t size() const { return length_; }
+
+  // Returns whether or not the vector is empty.
+  constexpr bool empty() const { return length_ == 0; }
+
+  // Access individual vector elements - checks bounds in debug mode.
+  T& operator[](size_t index) const {
+    MOZ_ASSERT(index < length_);
+    return start_[index];
+  }
+
+  const T& at(size_t index) const { return operator[](index); }
+
+  T& first() { return start_[0]; }
+
+  T& last() {
+    MOZ_ASSERT(length_ > 0);
+    return start_[length_ - 1];
+  }
+
+  // Returns a pointer to the start of the data in the vector.
+  constexpr T* begin() const { return start_; }
+
+  // Returns a pointer past the end of the data in the vector.
+  constexpr T* end() const { return start_ + length_; }
+
+  // Returns a clone of this vector with a new backing store.
+  Vector<T> Clone() const {
+    T* result = NewArray<T>(length_);
+    for (size_t i = 0; i < length_; i++) result[i] = start_[i];
+    return Vector<T>(result, length_);
+  }
+
+  void Truncate(size_t length) {
+    MOZ_ASSERT(length <= length_);
+    length_ = length;
+  }
+
+  // Releases the array underlying this vector. Once disposed the
+  // vector is empty.
+  void Dispose() {
+    DeleteArray(start_);
+    start_ = nullptr;
+    length_ = 0;
+  }
+
+  Vector<T> operator+(size_t offset) {
+    MOZ_ASSERT(offset <= length_);
+    return Vector<T>(start_ + offset, length_ - offset);
+  }
+
+  Vector<T> operator+=(size_t offset) {
+    MOZ_ASSERT(offset <= length_);
+    start_ += offset;
+    length_ -= offset;
+    return *this;
+  }
+
+  // Implicit conversion from Vector<T> to Vector<const T>.
+  inline operator Vector<const T>() const {
+    return Vector<const T>::cast(*this);
+  }
+
+  template <typename S>
+  static constexpr Vector<T> cast(Vector<S> input) {
+    return Vector<T>(reinterpret_cast<T*>(input.begin()),
+                     input.length() * sizeof(S) / sizeof(T));
+  }
+
+  bool operator==(const Vector<const T> other) const {
+    if (length_ != other.length_) return false;
+    if (start_ == other.start_) return true;
+    for (size_t i = 0; i < length_; ++i) {
+      if (start_[i] != other.start_[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+ private:
+  T* start_;
+  size_t length_;
+};
+
+// The resulting vector does not contain a null-termination byte. If you want
+// the null byte, use ArrayVector("foo").
+inline Vector<const char> CStrVector(const char* data) {
+  return Vector<const char>(data, strlen(data));
+}
+
+}  // namespace internal
+
+namespace base {
+
+// SmallVector uses inline storage first, and reallocates when full.
+// It is basically equivalent to js::Vector, and is implemented
+// as a thin wrapper.
+// V8's implementation: https://github.com/v8/v8/blob/master/src/base/small-vector.h
+template <typename T, size_t kSize>
+class SmallVector {
+public:
+  inline bool empty() const { return inner_.empty(); }
+  inline const T& back() const { return inner_.back(); }
+  inline void pop_back() { inner_.popBack(); };
+  template <typename... Args>
+  inline void emplace_back(Args&&... args) {
+    js::AutoEnterOOMUnsafeRegion oomUnsafe;
+    if (!inner_.emplaceBack(args...)) {
+      oomUnsafe.crash("Irregexp SmallVector emplace_back");
+    }
+  };
+  inline size_t size() const { return inner_.length(); }
+  inline const T& at(size_t index) const { return inner_[index]; }
+
+  void resize_no_init(size_t new_size) {
+    js::AutoEnterOOMUnsafeRegion oomUnsafe;
+    if (!inner_.resizeUninitialized(new_size)) {
+      oomUnsafe.crash("Irregexp SmallVector resize");
+    }
+  }
+private:
+  js::Vector<T, kSize, js::SystemAllocPolicy> inner_;
+};
+
+
+} // namespace base
+
+}  // namespace v8
+
+#endif  // V8_UTIL_VECTOR_H_
diff --git a/js/src/regexp/util/zone.h b/js/src/regexp/util/zone.h
new file mode 100644
index 000000000..32487f06a
--- /dev/null
+++ b/js/src/regexp/util/zone.h
@@ -0,0 +1,356 @@
+// Copyright 2019 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_UTIL_ZONE_H_
+#define V8_UTIL_ZONE_H_
+
+#include <list>
+#include <map>
+#include <set>
+#include <unordered_map>
+#include <vector>
+
+#include "ds/LifoAlloc.h"
+#include "regexp/util/vector.h"
+
+namespace v8 {
+namespace internal {
+
+// V8::Zone ~= LifoAlloc
+class Zone {
+ public:
+  Zone(size_t defaultChunkSize) : lifoAlloc_(defaultChunkSize) {
+    lifoAlloc_.setAsInfallibleByDefault();
+  }
+
+  void* New(size_t size) { return lifoAlloc_.alloc(size); }
+
+  void DeleteAll() { lifoAlloc_.freeAll(); }
+
+  // Returns true if the total memory allocated exceeds a threshold.
+  static const size_t kExcessLimit = 256 * 1024 * 1024;
+  bool excess_allocation() const {
+    return lifoAlloc_.peakSizeOfExcludingThis() > kExcessLimit;
+  }
+private:
+  js::LifoAlloc lifoAlloc_;
+};
+
+// Superclass for classes allocated in a Zone.
+// Origin: https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/zone/zone.h#L138-L155
+class ZoneObject {
+ public:
+  // Allocate a new ZoneObject of 'size' bytes in the Zone.
+  void* operator new(size_t size, Zone* zone) { return zone->New(size); }
+
+  // Ideally, the delete operator should be private instead of
+  // public, but unfortunately the compiler sometimes synthesizes
+  // (unused) destructors for classes derived from ZoneObject, which
+  // require the operator to be visible. MSVC requires the delete
+  // operator to be public.
+
+  // ZoneObjects should never be deleted individually; use
+  // Zone::DeleteAll() to delete all zone objects in one go.
+  void operator delete(void*, size_t) { MOZ_CRASH("unreachable"); }
+  void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); }
+};
+
+// ZoneLists are growable lists with constant-time access to the
+// elements. The list itself and all its elements are allocated in the
+// Zone. ZoneLists cannot be deleted individually; you can delete all
+// objects in the Zone by calling Zone::DeleteAll().
+// Used throughout irregexp.
+// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone.h#L173-L318
+// Inlines: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-list-inl.h#L17-L155
+template <typename T>
+class ZoneList final {
+ public:
+  // Construct a new ZoneList with the given capacity; the length is
+  // always zero. The capacity must be non-negative.
+  ZoneList(int capacity, Zone* zone) { Initialize(capacity, zone); }
+  // Construct a new ZoneList from a std::initializer_list
+  ZoneList(std::initializer_list<T> list, Zone* zone) {
+    Initialize(static_cast<int>(list.size()), zone);
+    for (auto& i : list) Add(i, zone);
+  }
+  // Construct a new ZoneList by copying the elements of the given ZoneList.
+  ZoneList(const ZoneList<T>& other, Zone* zone) {
+    Initialize(other.length(), zone);
+    AddAll(other, zone);
+  }
+
+  void* operator new(size_t size, Zone* zone) { return zone->New(size); }
+
+  // Returns a reference to the element at index i. This reference is not safe
+  // to use after operations that can change the list's backing store
+  // (e.g. Add).
+  inline T& operator[](int i) const {
+    MOZ_ASSERT(0 < i);
+    MOZ_ASSERT(static_cast<unsigned>(i) < static_cast<unsigned>(length_));
+    return data_[i];
+  }
+  inline T& at(int i) const { return operator[](i); }
+  inline T& last() const { return at(length_ - 1); }
+  inline T& first() const { return at(0); }
+
+  using iterator = T*;
+  inline iterator begin() const { return &data_[0]; }
+  inline iterator end() const { return &data_[length_]; }
+
+  inline bool is_empty() const { return length_ == 0; }
+  inline int length() const { return length_; }
+  inline int capacity() const { return capacity_; }
+
+  Vector<T> ToVector() const { return Vector<T>(data_, length_); }
+  Vector<T> ToVector(int start, int length) const {
+    return Vector<T>(data_ + start, std::min(length_ - start, length));
+  }
+
+  Vector<const T> ToConstVector() const {
+    return Vector<const T>(data_, length_);
+  }
+
+  inline void Initialize(int capacity, Zone* zone) {
+    MOZ_ASSERT(capacity >= 0);
+    data_ = (capacity > 0) ? NewData(capacity, zone) : nullptr;
+    capacity_ = capacity;
+    length_ = 0;
+  }
+
+  // Adds a copy of the given 'element' to the end of the list,
+  // expanding the list if necessary.
+  void Add(const T& element, Zone* zone) {
+    if (length_ < capacity_) {
+      data_[length_++] = element;
+    } else {
+      ZoneList<T>::ResizeAdd(element, zone);
+    }
+  }
+  // Add all the elements from the argument list to this list.
+  void AddAll(const ZoneList<T>& other, Zone* zone) {
+    AddAll(other.ToVector(), zone);
+  }
+  // Add all the elements from the vector to this list.
+  void AddAll(const Vector<T>& other, Zone* zone) {
+    int result_length = length_ + other.length();
+    if (capacity_ < result_length) {
+      Resize(result_length, zone);
+    }
+    if (std::is_fundamental<T>()) {
+      memcpy(data_ + length_, other.begin(), sizeof(*data_) * other.length());
+    } else {
+      for (int i = 0; i < other.length(); i++) {
+	data_[length_ + i] = other.at(i);
+      }
+    }
+    length_ = result_length;
+  }
+
+  // Overwrites the element at the specific index.
+  void Set(int index, const T& element) {
+    MOZ_ASSERT(index >= 0 && index <= length_);
+    data_[index] = element;
+  }
+
+  // Removes the i'th element without deleting it even if T is a
+  // pointer type; moves all elements above i "down". Returns the
+  // removed element.  This function's complexity is linear in the
+  // size of the list.
+  T Remove(int i) {
+    T element = at(i);
+    length_--;
+    while (i < length_) {
+      data_[i] = data_[i + 1];
+      i++;
+    }
+    return element;
+  }
+
+  // Removes the last element without deleting it even if T is a
+  // pointer type. Returns the removed element.
+  inline T RemoveLast() { return Remove(length_ - 1); }
+
+  // Clears the list by freeing the storage memory. If you want to keep the
+  // memory, use Rewind(0) instead. Be aware, that even if T is a
+  // pointer type, clearing the list doesn't delete the entries.
+  inline void Clear() {
+    data_ = nullptr;
+    capacity_ = 0;
+    length_ = 0;
+  }
+
+  // Drops all but the first 'pos' elements from the list.
+  inline void Rewind(int pos) {
+    MOZ_ASSERT(0 <= pos && pos <= length_);
+    length_ = pos;
+  }
+
+  inline bool Contains(const T& elm) const {
+    for (int i = 0; i < length_; i++) {
+      if (data_[i] == elm) return true;
+    }
+    return false;
+  }
+
+  template <typename CompareFunction>
+  void StableSort(CompareFunction cmp, size_t s, size_t l) {
+    std::stable_sort(begin() + s, begin() + s + l,
+		     [cmp](const T& a, const T& b) { return cmp(&a, &b) < 0; });
+  }
+
+  void operator delete(void* pointer) { MOZ_CRASH("unreachable"); }
+  void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); }
+
+ private:
+  T* data_;
+  int capacity_;
+  int length_;
+
+  inline T* NewData(int n, Zone* zone) {
+    return static_cast<T*>(zone->New(n * sizeof(T)));
+  }
+
+  // Increase the capacity of a full list, and add an element.
+  // List must be full already.
+  void ResizeAdd(const T& element, Zone* zone) {
+    MOZ_ASSERT(length_ >= capacity_);
+    // Grow the list capacity by 100%, but make sure to let it grow
+    // even when the capacity is zero (possible initial case).
+    int new_capacity = 1 + 2 * capacity_;
+    // Since the element reference could be an element of the list, copy
+    // it out of the old backing storage before resizing.
+    T temp = element;
+    Resize(new_capacity, zone);
+    data_[length_++] = temp;
+  }
+
+  // Resize the list.
+  void Resize(int new_capacity, Zone* zone) {
+    MOZ_ASSERT(length_ <= new_capacity);
+    T* new_data = NewData(new_capacity, zone);
+    if (length_ > 0) {
+      memcpy(new_data, data_, length_ * sizeof(T));
+    }
+    data_ = new_data;
+    capacity_ = new_capacity;
+  }
+
+  ZoneList& operator=(const ZoneList&) = delete;
+  ZoneList() = delete;
+  ZoneList(const ZoneList&) = delete;
+};
+
+// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-allocator.h#L14-L77
+template <typename T>
+class ZoneAllocator {
+public:
+  using pointer = T*;
+  using const_pointer = const T*;
+  using reference = T&;
+  using const_reference = const T&;
+  using value_type = T;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  template <class O>
+  struct rebind {
+    using other = ZoneAllocator<O>;
+  };
+
+  explicit ZoneAllocator(Zone* zone) : zone_(zone) {}
+  template <typename U>
+  ZoneAllocator(const ZoneAllocator<U>& other)
+      : ZoneAllocator<T>(other.zone_) {}
+  template <typename U>
+  friend class ZoneAllocator;
+
+  T* allocate(size_t n) { return static_cast<T*>(zone_->New(n * sizeof(T))); }
+  void deallocate(T* p, size_t) {}  // noop for zones
+
+  bool operator==(ZoneAllocator const& other) const {
+    return zone_ == other.zone_;
+  }
+  bool operator!=(ZoneAllocator const& other) const {
+    return zone_ != other.zone_;
+  }
+
+private:
+  Zone* zone_;
+};
+
+// Zone wrappers for std containers:
+// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-containers.h#L25-L169
+
+// A wrapper subclass for std::vector to make it easy to construct one
+// that uses a zone allocator.
+// Used throughout irregexp
+template <typename T>
+class ZoneVector : public std::vector<T, ZoneAllocator<T>> {
+public:
+  ZoneVector(Zone* zone)
+    : std::vector<T, ZoneAllocator<T>>(ZoneAllocator<T>(zone)) {}
+
+  // Constructs a new vector and fills it with the contents of the range
+  // [first, last).
+  template <class Iter>
+  ZoneVector(Iter first, Iter last, Zone* zone)
+      : std::vector<T, ZoneAllocator<T>>(first, last, ZoneAllocator<T>(zone)) {}
+};
+
+// A wrapper subclass for std::list to make it easy to construct one
+// that uses a zone allocator.
+// Used in regexp-bytecode-peephole.cc
+template <typename T>
+class ZoneLinkedList : public std::list<T, ZoneAllocator<T>> {
+ public:
+  // Constructs an empty list.
+  explicit ZoneLinkedList(Zone* zone)
+      : std::list<T, ZoneAllocator<T>>(ZoneAllocator<T>(zone)) {}
+};
+
+// A wrapper subclass for std::set to make it easy to construct one that uses
+// a zone allocator.
+// Used in regexp-parser.cc
+template <typename K, typename Compare = std::less<K>>
+class ZoneSet : public std::set<K, Compare, ZoneAllocator<K>> {
+ public:
+  // Constructs an empty set.
+  explicit ZoneSet(Zone* zone)
+      : std::set<K, Compare, ZoneAllocator<K>>(Compare(),
+                                               ZoneAllocator<K>(zone)) {}
+};
+
+// A wrapper subclass for std::map to make it easy to construct one that uses
+// a zone allocator.
+// Used in regexp-bytecode-peephole.cc
+template <typename K, typename V, typename Compare = std::less<K>>
+class ZoneMap
+    : public std::map<K, V, Compare, ZoneAllocator<std::pair<const K, V>>> {
+ public:
+  // Constructs an empty map.
+  explicit ZoneMap(Zone* zone)
+      : std::map<K, V, Compare, ZoneAllocator<std::pair<const K, V>>>(
+            Compare(), ZoneAllocator<std::pair<const K, V>>(zone)) {}
+};
+
+// A wrapper subclass for std::unordered_map to make it easy to construct one
+// that uses a zone allocator.
+// Used in regexp-bytecode-peephole.cc
+template <typename K, typename V, typename Hash = std::hash<K>,
+          typename KeyEqual = std::equal_to<K>>
+class ZoneUnorderedMap
+    : public std::unordered_map<K, V, Hash, KeyEqual,
+                                ZoneAllocator<std::pair<const K, V>>> {
+ public:
+  // Constructs an empty map.
+  explicit ZoneUnorderedMap(Zone* zone, size_t bucket_count = 100)
+      : std::unordered_map<K, V, Hash, KeyEqual,
+                           ZoneAllocator<std::pair<const K, V>>>(
+            bucket_count, Hash(), KeyEqual(),
+            ZoneAllocator<std::pair<const K, V>>(zone)) {}
+};
+
+} // namespace internal
+} // namespace v8
+
+#endif  // V8_UTIL_FLAG_H_
author	Matt A. Tobin <email@mattatobin.com>	2020-11-09 20:20:18 -0500
committer	Matt A. Tobin <email@mattatobin.com>	2020-11-09 20:20:18 -0500
commit	cb0208163f5b44651831f217db0d0d4e97dd2463 (patch)
tree	f9489df88dad982d544d3799fc6728d9211e6d55 /js
parent	19499014a36f2f27ac81d11495fdc5bdbd28ae25 (diff)
download	UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar.gz UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar.lz UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.tar.xz UXP-cb0208163f5b44651831f217db0d0d4e97dd2463.zip