diff options
Diffstat (limited to 'js/src/irregexp/RegExpInterpreter.cpp')
-rw-r--r-- | js/src/irregexp/RegExpInterpreter.cpp | 501 |
1 files changed, 501 insertions, 0 deletions
diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp new file mode 100644 index 000000000..7fd2d983a --- /dev/null +++ b/js/src/irregexp/RegExpInterpreter.cpp @@ -0,0 +1,501 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: */ + +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A simple interpreter for the Irregexp byte code. + +#include "irregexp/RegExpBytecode.h" +#include "irregexp/RegExpMacroAssembler.h" +#include "vm/MatchPairs.h" + +using namespace js; +using namespace js::irregexp; + +static const size_t kBitsPerByte = 8; +static const size_t kBitsPerByteLog2 = 3; + +class MOZ_STACK_CLASS RegExpStackCursor +{ + public: + explicit RegExpStackCursor(JSContext* cx) + : cx(cx), cursor(nullptr) + {} + + bool init() { + if (!stack.init()) { + ReportOutOfMemory(cx); + return false; + } + cursor = base(); + return true; + } + + bool push(int32_t value) { + *cursor++ = value; + if (cursor >= stack.limit()) { + int32_t pos = position(); + if (!stack.grow()) { + ReportOverRecursed(cx); + return false; + } + setPosition(pos); + } + return true; + } + + int32_t pop() { + MOZ_ASSERT(cursor > base()); + return *--cursor; + } + + int32_t peek() { + MOZ_ASSERT(cursor > base()); + return *(cursor - 1); + } + + int32_t position() { + MOZ_ASSERT(cursor >= base()); + return cursor - base(); + } + + void setPosition(int32_t position) { + cursor = base() + position; + MOZ_ASSERT(cursor < stack.limit()); + } + + private: + JSContext* cx; + RegExpStack stack; + + int32_t* cursor; + + int32_t* base() { return (int32_t*) stack.base(); } +}; + +static int32_t +Load32Aligned(const uint8_t* pc) +{ + MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 3) == 0); + return *reinterpret_cast<const int32_t*>(pc); +} + +static int32_t +Load16Aligned(const uint8_t* pc) +{ + MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 1) == 0); + return *reinterpret_cast<const uint16_t*>(pc); +} + +#define BYTECODE(name) case BC_##name: + +template <typename CharT> +RegExpRunStatus +irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t current, + size_t length, MatchPairs* matches, size_t* endIndex) +{ + const uint8_t* pc = byteCode; + + uint32_t current_char = current ? chars[current - 1] : '\n'; + + RegExpStackCursor stack(cx); + + if (!stack.init()) + return RegExpRunStatus_Error; + + int32_t numRegisters = Load32Aligned(pc); + pc += 4; + + Vector<int32_t, 0, SystemAllocPolicy> registers; + if (!registers.growByUninitialized(numRegisters)) + return RegExpRunStatus_Error; + for (size_t i = 0; i < (size_t) numRegisters; i++) + registers[i] = -1; + + while (true) { + int32_t insn = Load32Aligned(pc); + switch (insn & BYTECODE_MASK) { + BYTECODE(BREAK) + MOZ_CRASH("Bad bytecode: BREAK"); + BYTECODE(PUSH_CP) + if (!stack.push(current)) + return RegExpRunStatus_Error; + pc += BC_PUSH_CP_LENGTH; + break; + BYTECODE(PUSH_BT) + if (!stack.push(Load32Aligned(pc + 4))) + return RegExpRunStatus_Error; + pc += BC_PUSH_BT_LENGTH; + break; + BYTECODE(PUSH_REGISTER) + if (!stack.push(registers[insn >> BYTECODE_SHIFT])) + return RegExpRunStatus_Error; + pc += BC_PUSH_REGISTER_LENGTH; + break; + BYTECODE(SET_REGISTER) + registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); + pc += BC_SET_REGISTER_LENGTH; + break; + BYTECODE(ADVANCE_REGISTER) + registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); + pc += BC_ADVANCE_REGISTER_LENGTH; + break; + BYTECODE(SET_REGISTER_TO_CP) + registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); + pc += BC_SET_REGISTER_TO_CP_LENGTH; + break; + BYTECODE(SET_CP_TO_REGISTER) + current = registers[insn >> BYTECODE_SHIFT]; + pc += BC_SET_CP_TO_REGISTER_LENGTH; + break; + BYTECODE(SET_REGISTER_TO_SP) + registers[insn >> BYTECODE_SHIFT] = stack.position(); + pc += BC_SET_REGISTER_TO_SP_LENGTH; + break; + BYTECODE(SET_SP_TO_REGISTER) + stack.setPosition(registers[insn >> BYTECODE_SHIFT]); + pc += BC_SET_SP_TO_REGISTER_LENGTH; + break; + BYTECODE(POP_CP) + current = stack.pop(); + pc += BC_POP_CP_LENGTH; + break; + BYTECODE(POP_BT) + if (!CheckForInterrupt(cx)) + return RegExpRunStatus_Error; + pc = byteCode + stack.pop(); + break; + BYTECODE(POP_REGISTER) + registers[insn >> BYTECODE_SHIFT] = stack.pop(); + pc += BC_POP_REGISTER_LENGTH; + break; + BYTECODE(FAIL) + return RegExpRunStatus_Success_NotFound; + BYTECODE(SUCCEED) + if (matches) + memcpy(matches->pairsRaw(), registers.begin(), matches->length() * 2 * sizeof(int32_t)); + else if (endIndex) + *endIndex = registers[1]; + return RegExpRunStatus_Success; + BYTECODE(ADVANCE_CP) + current += insn >> BYTECODE_SHIFT; + pc += BC_ADVANCE_CP_LENGTH; + break; + BYTECODE(GOTO) + pc = byteCode + Load32Aligned(pc + 4); + break; + BYTECODE(ADVANCE_CP_AND_GOTO) + current += insn >> BYTECODE_SHIFT; + pc = byteCode + Load32Aligned(pc + 4); + break; + BYTECODE(CHECK_GREEDY) + if ((int32_t)current == stack.peek()) { + stack.pop(); + pc = byteCode + Load32Aligned(pc + 4); + } else { + pc += BC_CHECK_GREEDY_LENGTH; + } + break; + BYTECODE(LOAD_CURRENT_CHAR) { + size_t pos = current + (insn >> BYTECODE_SHIFT); + if (pos >= length) { + pc = byteCode + Load32Aligned(pc + 4); + } else { + current_char = chars[pos]; + pc += BC_LOAD_CURRENT_CHAR_LENGTH; + } + break; + } + BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { + int pos = current + (insn >> BYTECODE_SHIFT); + current_char = chars[pos]; + pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; + break; + } + BYTECODE(LOAD_2_CURRENT_CHARS) { + size_t pos = current + (insn >> BYTECODE_SHIFT); + if (pos + 2 > length) { + pc = byteCode + Load32Aligned(pc + 4); + } else { + CharT next = chars[pos + 1]; + current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(CharT)))); + pc += BC_LOAD_2_CURRENT_CHARS_LENGTH; + } + break; + } + BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { + int pos = current + (insn >> BYTECODE_SHIFT); + char16_t next = chars[pos + 1]; + current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(char16_t)))); + pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; + break; + } + BYTECODE(LOAD_4_CURRENT_CHARS) + MOZ_CRASH("ASCII handling implemented"); + BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) + MOZ_CRASH("ASCII handling implemented"); + BYTECODE(CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == current_char) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_CHECK_4_CHARS_LENGTH; + break; + } + BYTECODE(CHECK_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c == current_char) + pc = byteCode + Load32Aligned(pc + 4); + else + pc += BC_CHECK_CHAR_LENGTH; + break; + } + BYTECODE(CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != current_char) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_CHECK_NOT_4_CHARS_LENGTH; + break; + } + BYTECODE(CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c != current_char) + pc = byteCode + Load32Aligned(pc + 4); + else + pc += BC_CHECK_NOT_CHAR_LENGTH; + break; + } + BYTECODE(AND_CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == (current_char & Load32Aligned(pc + 8))) + pc = byteCode + Load32Aligned(pc + 12); + else + pc += BC_AND_CHECK_4_CHARS_LENGTH; + break; + } + BYTECODE(AND_CHECK_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c == (current_char & Load32Aligned(pc + 4))) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_AND_CHECK_CHAR_LENGTH; + break; + } + BYTECODE(AND_CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != (current_char & Load32Aligned(pc + 8))) + pc = byteCode + Load32Aligned(pc + 12); + else + pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; + break; + } + BYTECODE(AND_CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c != (current_char & Load32Aligned(pc + 4))) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_AND_CHECK_NOT_CHAR_LENGTH; + break; + } + BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + uint32_t minus = Load16Aligned(pc + 4); + uint32_t mask = Load16Aligned(pc + 6); + if (c != ((current_char - minus) & mask)) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; + break; + } + BYTECODE(CHECK_CHAR_IN_RANGE) { + uint32_t from = Load16Aligned(pc + 4); + uint32_t to = Load16Aligned(pc + 6); + if (from <= current_char && current_char <= to) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_CHECK_CHAR_IN_RANGE_LENGTH; + break; + } + BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { + uint32_t from = Load16Aligned(pc + 4); + uint32_t to = Load16Aligned(pc + 6); + if (from > current_char || current_char > to) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH; + break; + } + BYTECODE(CHECK_BIT_IN_TABLE) { + int mask = RegExpMacroAssembler::kTableMask; + uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; + int bit = (current_char & (kBitsPerByte - 1)); + if ((b & (1 << bit)) != 0) + pc = byteCode + Load32Aligned(pc + 4); + else + pc += BC_CHECK_BIT_IN_TABLE_LENGTH; + break; + } + BYTECODE(CHECK_LT) { + uint32_t limit = (insn >> BYTECODE_SHIFT); + if (current_char < limit) + pc = byteCode + Load32Aligned(pc + 4); + else + pc += BC_CHECK_LT_LENGTH; + break; + } + BYTECODE(CHECK_GT) { + uint32_t limit = (insn >> BYTECODE_SHIFT); + if (current_char > limit) + pc = byteCode + Load32Aligned(pc + 4); + else + pc += BC_CHECK_GT_LENGTH; + break; + } + BYTECODE(CHECK_REGISTER_LT) + if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_CHECK_REGISTER_LT_LENGTH; + break; + BYTECODE(CHECK_REGISTER_GE) + if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) + pc = byteCode + Load32Aligned(pc + 8); + else + pc += BC_CHECK_REGISTER_GE_LENGTH; + break; + BYTECODE(CHECK_REGISTER_EQ_POS) + if (registers[insn >> BYTECODE_SHIFT] == (int32_t) current) + pc = byteCode + Load32Aligned(pc + 4); + else + pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; + break; + BYTECODE(CHECK_NOT_REGS_EQUAL) + if (registers[insn >> BYTECODE_SHIFT] == registers[Load32Aligned(pc + 4)]) + pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; + else + pc = byteCode + Load32Aligned(pc + 8); + break; + BYTECODE(CHECK_NOT_BACK_REF) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_LENGTH; + break; + } + if (current + len > length) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } else { + int i; + for (i = 0; i < len; i++) { + if (chars[from + i] != chars[current + i]) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } + } + if (i < len) break; + current += len; + } + pc += BC_CHECK_NOT_BACK_REF_LENGTH; + break; + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; + break; + } + if (current + len > length) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } + if (CaseInsensitiveCompareStrings(chars + from, chars + current, len * sizeof(CharT))) { + current += len; + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; + } else { + pc = byteCode + Load32Aligned(pc + 4); + } + break; + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH; + break; + } + if (current + len > length) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } + if (CaseInsensitiveCompareUCStrings(chars + from, chars + current, + len * sizeof(CharT))) + { + current += len; + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH; + } else { + pc = byteCode + Load32Aligned(pc + 4); + } + break; + } + BYTECODE(CHECK_AT_START) + if (current == 0) + pc = byteCode + Load32Aligned(pc + 4); + else + pc += BC_CHECK_AT_START_LENGTH; + break; + BYTECODE(CHECK_NOT_AT_START) + if (current == 0) + pc += BC_CHECK_NOT_AT_START_LENGTH; + else + pc = byteCode + Load32Aligned(pc + 4); + break; + BYTECODE(SET_CURRENT_POSITION_FROM_END) { + size_t by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT; + if (length - current > by) { + current = length - by; + current_char = chars[current - 1]; + } + pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; + break; + } + default: + MOZ_CRASH("Bad bytecode"); + } + } +} + +template RegExpRunStatus +irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const Latin1Char* chars, size_t current, + size_t length, MatchPairs* matches, size_t* endIndex); + +template RegExpRunStatus +irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const char16_t* chars, size_t current, + size_t length, MatchPairs* matches, size_t* endIndex); |