summaryrefslogtreecommitdiffstats
path: root/js/src/irregexp/RegExpInterpreter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/irregexp/RegExpInterpreter.cpp')
-rw-r--r--js/src/irregexp/RegExpInterpreter.cpp501
1 files changed, 501 insertions, 0 deletions
diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp
new file mode 100644
index 000000000..7fd2d983a
--- /dev/null
+++ b/js/src/irregexp/RegExpInterpreter.cpp
@@ -0,0 +1,501 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99: */
+
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// A simple interpreter for the Irregexp byte code.
+
+#include "irregexp/RegExpBytecode.h"
+#include "irregexp/RegExpMacroAssembler.h"
+#include "vm/MatchPairs.h"
+
+using namespace js;
+using namespace js::irregexp;
+
+static const size_t kBitsPerByte = 8;
+static const size_t kBitsPerByteLog2 = 3;
+
+class MOZ_STACK_CLASS RegExpStackCursor
+{
+ public:
+ explicit RegExpStackCursor(JSContext* cx)
+ : cx(cx), cursor(nullptr)
+ {}
+
+ bool init() {
+ if (!stack.init()) {
+ ReportOutOfMemory(cx);
+ return false;
+ }
+ cursor = base();
+ return true;
+ }
+
+ bool push(int32_t value) {
+ *cursor++ = value;
+ if (cursor >= stack.limit()) {
+ int32_t pos = position();
+ if (!stack.grow()) {
+ ReportOverRecursed(cx);
+ return false;
+ }
+ setPosition(pos);
+ }
+ return true;
+ }
+
+ int32_t pop() {
+ MOZ_ASSERT(cursor > base());
+ return *--cursor;
+ }
+
+ int32_t peek() {
+ MOZ_ASSERT(cursor > base());
+ return *(cursor - 1);
+ }
+
+ int32_t position() {
+ MOZ_ASSERT(cursor >= base());
+ return cursor - base();
+ }
+
+ void setPosition(int32_t position) {
+ cursor = base() + position;
+ MOZ_ASSERT(cursor < stack.limit());
+ }
+
+ private:
+ JSContext* cx;
+ RegExpStack stack;
+
+ int32_t* cursor;
+
+ int32_t* base() { return (int32_t*) stack.base(); }
+};
+
+static int32_t
+Load32Aligned(const uint8_t* pc)
+{
+ MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 3) == 0);
+ return *reinterpret_cast<const int32_t*>(pc);
+}
+
+static int32_t
+Load16Aligned(const uint8_t* pc)
+{
+ MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 1) == 0);
+ return *reinterpret_cast<const uint16_t*>(pc);
+}
+
+#define BYTECODE(name) case BC_##name:
+
+template <typename CharT>
+RegExpRunStatus
+irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t current,
+ size_t length, MatchPairs* matches, size_t* endIndex)
+{
+ const uint8_t* pc = byteCode;
+
+ uint32_t current_char = current ? chars[current - 1] : '\n';
+
+ RegExpStackCursor stack(cx);
+
+ if (!stack.init())
+ return RegExpRunStatus_Error;
+
+ int32_t numRegisters = Load32Aligned(pc);
+ pc += 4;
+
+ Vector<int32_t, 0, SystemAllocPolicy> registers;
+ if (!registers.growByUninitialized(numRegisters))
+ return RegExpRunStatus_Error;
+ for (size_t i = 0; i < (size_t) numRegisters; i++)
+ registers[i] = -1;
+
+ while (true) {
+ int32_t insn = Load32Aligned(pc);
+ switch (insn & BYTECODE_MASK) {
+ BYTECODE(BREAK)
+ MOZ_CRASH("Bad bytecode: BREAK");
+ BYTECODE(PUSH_CP)
+ if (!stack.push(current))
+ return RegExpRunStatus_Error;
+ pc += BC_PUSH_CP_LENGTH;
+ break;
+ BYTECODE(PUSH_BT)
+ if (!stack.push(Load32Aligned(pc + 4)))
+ return RegExpRunStatus_Error;
+ pc += BC_PUSH_BT_LENGTH;
+ break;
+ BYTECODE(PUSH_REGISTER)
+ if (!stack.push(registers[insn >> BYTECODE_SHIFT]))
+ return RegExpRunStatus_Error;
+ pc += BC_PUSH_REGISTER_LENGTH;
+ break;
+ BYTECODE(SET_REGISTER)
+ registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
+ pc += BC_SET_REGISTER_LENGTH;
+ break;
+ BYTECODE(ADVANCE_REGISTER)
+ registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
+ pc += BC_ADVANCE_REGISTER_LENGTH;
+ break;
+ BYTECODE(SET_REGISTER_TO_CP)
+ registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
+ pc += BC_SET_REGISTER_TO_CP_LENGTH;
+ break;
+ BYTECODE(SET_CP_TO_REGISTER)
+ current = registers[insn >> BYTECODE_SHIFT];
+ pc += BC_SET_CP_TO_REGISTER_LENGTH;
+ break;
+ BYTECODE(SET_REGISTER_TO_SP)
+ registers[insn >> BYTECODE_SHIFT] = stack.position();
+ pc += BC_SET_REGISTER_TO_SP_LENGTH;
+ break;
+ BYTECODE(SET_SP_TO_REGISTER)
+ stack.setPosition(registers[insn >> BYTECODE_SHIFT]);
+ pc += BC_SET_SP_TO_REGISTER_LENGTH;
+ break;
+ BYTECODE(POP_CP)
+ current = stack.pop();
+ pc += BC_POP_CP_LENGTH;
+ break;
+ BYTECODE(POP_BT)
+ if (!CheckForInterrupt(cx))
+ return RegExpRunStatus_Error;
+ pc = byteCode + stack.pop();
+ break;
+ BYTECODE(POP_REGISTER)
+ registers[insn >> BYTECODE_SHIFT] = stack.pop();
+ pc += BC_POP_REGISTER_LENGTH;
+ break;
+ BYTECODE(FAIL)
+ return RegExpRunStatus_Success_NotFound;
+ BYTECODE(SUCCEED)
+ if (matches)
+ memcpy(matches->pairsRaw(), registers.begin(), matches->length() * 2 * sizeof(int32_t));
+ else if (endIndex)
+ *endIndex = registers[1];
+ return RegExpRunStatus_Success;
+ BYTECODE(ADVANCE_CP)
+ current += insn >> BYTECODE_SHIFT;
+ pc += BC_ADVANCE_CP_LENGTH;
+ break;
+ BYTECODE(GOTO)
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ BYTECODE(ADVANCE_CP_AND_GOTO)
+ current += insn >> BYTECODE_SHIFT;
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ BYTECODE(CHECK_GREEDY)
+ if ((int32_t)current == stack.peek()) {
+ stack.pop();
+ pc = byteCode + Load32Aligned(pc + 4);
+ } else {
+ pc += BC_CHECK_GREEDY_LENGTH;
+ }
+ break;
+ BYTECODE(LOAD_CURRENT_CHAR) {
+ size_t pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos >= length) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ } else {
+ current_char = chars[pos];
+ pc += BC_LOAD_CURRENT_CHAR_LENGTH;
+ }
+ break;
+ }
+ BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ current_char = chars[pos];
+ pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
+ break;
+ }
+ BYTECODE(LOAD_2_CURRENT_CHARS) {
+ size_t pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos + 2 > length) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ } else {
+ CharT next = chars[pos + 1];
+ current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(CharT))));
+ pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
+ }
+ break;
+ }
+ BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ char16_t next = chars[pos + 1];
+ current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(char16_t))));
+ pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
+ break;
+ }
+ BYTECODE(LOAD_4_CURRENT_CHARS)
+ MOZ_CRASH("ASCII handling implemented");
+ BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED)
+ MOZ_CRASH("ASCII handling implemented");
+ BYTECODE(CHECK_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c == current_char)
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_CHECK_4_CHARS_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c == current_char)
+ pc = byteCode + Load32Aligned(pc + 4);
+ else
+ pc += BC_CHECK_CHAR_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_NOT_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c != current_char)
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_CHECK_NOT_4_CHARS_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_NOT_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c != current_char)
+ pc = byteCode + Load32Aligned(pc + 4);
+ else
+ pc += BC_CHECK_NOT_CHAR_LENGTH;
+ break;
+ }
+ BYTECODE(AND_CHECK_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c == (current_char & Load32Aligned(pc + 8)))
+ pc = byteCode + Load32Aligned(pc + 12);
+ else
+ pc += BC_AND_CHECK_4_CHARS_LENGTH;
+ break;
+ }
+ BYTECODE(AND_CHECK_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c == (current_char & Load32Aligned(pc + 4)))
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_AND_CHECK_CHAR_LENGTH;
+ break;
+ }
+ BYTECODE(AND_CHECK_NOT_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c != (current_char & Load32Aligned(pc + 8)))
+ pc = byteCode + Load32Aligned(pc + 12);
+ else
+ pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
+ break;
+ }
+ BYTECODE(AND_CHECK_NOT_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c != (current_char & Load32Aligned(pc + 4)))
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
+ break;
+ }
+ BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ uint32_t minus = Load16Aligned(pc + 4);
+ uint32_t mask = Load16Aligned(pc + 6);
+ if (c != ((current_char - minus) & mask))
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_CHAR_IN_RANGE) {
+ uint32_t from = Load16Aligned(pc + 4);
+ uint32_t to = Load16Aligned(pc + 6);
+ if (from <= current_char && current_char <= to)
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
+ uint32_t from = Load16Aligned(pc + 4);
+ uint32_t to = Load16Aligned(pc + 6);
+ if (from > current_char || current_char > to)
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_BIT_IN_TABLE) {
+ int mask = RegExpMacroAssembler::kTableMask;
+ uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
+ int bit = (current_char & (kBitsPerByte - 1));
+ if ((b & (1 << bit)) != 0)
+ pc = byteCode + Load32Aligned(pc + 4);
+ else
+ pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_LT) {
+ uint32_t limit = (insn >> BYTECODE_SHIFT);
+ if (current_char < limit)
+ pc = byteCode + Load32Aligned(pc + 4);
+ else
+ pc += BC_CHECK_LT_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_GT) {
+ uint32_t limit = (insn >> BYTECODE_SHIFT);
+ if (current_char > limit)
+ pc = byteCode + Load32Aligned(pc + 4);
+ else
+ pc += BC_CHECK_GT_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_REGISTER_LT)
+ if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4))
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_CHECK_REGISTER_LT_LENGTH;
+ break;
+ BYTECODE(CHECK_REGISTER_GE)
+ if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4))
+ pc = byteCode + Load32Aligned(pc + 8);
+ else
+ pc += BC_CHECK_REGISTER_GE_LENGTH;
+ break;
+ BYTECODE(CHECK_REGISTER_EQ_POS)
+ if (registers[insn >> BYTECODE_SHIFT] == (int32_t) current)
+ pc = byteCode + Load32Aligned(pc + 4);
+ else
+ pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
+ break;
+ BYTECODE(CHECK_NOT_REGS_EQUAL)
+ if (registers[insn >> BYTECODE_SHIFT] == registers[Load32Aligned(pc + 4)])
+ pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
+ else
+ pc = byteCode + Load32Aligned(pc + 8);
+ break;
+ BYTECODE(CHECK_NOT_BACK_REF) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_LENGTH;
+ break;
+ }
+ if (current + len > length) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ } else {
+ int i;
+ for (i = 0; i < len; i++) {
+ if (chars[from + i] != chars[current + i]) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ }
+ if (i < len) break;
+ current += len;
+ }
+ pc += BC_CHECK_NOT_BACK_REF_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
+ break;
+ }
+ if (current + len > length) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ if (CaseInsensitiveCompareStrings(chars + from, chars + current, len * sizeof(CharT))) {
+ current += len;
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
+ } else {
+ pc = byteCode + Load32Aligned(pc + 4);
+ }
+ break;
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;
+ break;
+ }
+ if (current + len > length) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ if (CaseInsensitiveCompareUCStrings(chars + from, chars + current,
+ len * sizeof(CharT)))
+ {
+ current += len;
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;
+ } else {
+ pc = byteCode + Load32Aligned(pc + 4);
+ }
+ break;
+ }
+ BYTECODE(CHECK_AT_START)
+ if (current == 0)
+ pc = byteCode + Load32Aligned(pc + 4);
+ else
+ pc += BC_CHECK_AT_START_LENGTH;
+ break;
+ BYTECODE(CHECK_NOT_AT_START)
+ if (current == 0)
+ pc += BC_CHECK_NOT_AT_START_LENGTH;
+ else
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ BYTECODE(SET_CURRENT_POSITION_FROM_END) {
+ size_t by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
+ if (length - current > by) {
+ current = length - by;
+ current_char = chars[current - 1];
+ }
+ pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
+ break;
+ }
+ default:
+ MOZ_CRASH("Bad bytecode");
+ }
+ }
+}
+
+template RegExpRunStatus
+irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const Latin1Char* chars, size_t current,
+ size_t length, MatchPairs* matches, size_t* endIndex);
+
+template RegExpRunStatus
+irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const char16_t* chars, size_t current,
+ size_t length, MatchPairs* matches, size_t* endIndex);