diff options
Diffstat (limited to 'js/src/regexp/regexp-macro-assembler-arch.h')
-rw-r--r-- | js/src/regexp/regexp-macro-assembler-arch.h | 279 |
1 files changed, 277 insertions, 2 deletions
diff --git a/js/src/regexp/regexp-macro-assembler-arch.h b/js/src/regexp/regexp-macro-assembler-arch.h index 60b5c94de..1baa5ddd5 100644 --- a/js/src/regexp/regexp-macro-assembler-arch.h +++ b/js/src/regexp/regexp-macro-assembler-arch.h @@ -4,6 +4,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// Copyright 2020 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + // This file implements the NativeRegExpMacroAssembler interface for // SpiderMonkey. It provides the same interface as each of V8's // architecture-specific implementations. @@ -11,6 +15,277 @@ #ifndef RegexpMacroAssemblerArch_h #define RegexpMacroAssemblerArch_h -#include "regexp/regexp-shim.h" +#include "jit/MacroAssembler.h" +#include "regexp/regexp-macro-assembler.h" + +namespace v8 { +namespace internal { + +struct FrameData { + // Character position at the start of the input, stored as a + // negative offset from the end of the string (input_end_pointer_). + size_t inputStart; + + // The backtrack_stack_pointer_ register points to the top of the stack. + // This points to the bottom of the backtrack stack. + void* backtrackStackBase; + + // Copy of the input MatchPairs. + int32_t* matches; // pointer to capture array + int32_t numMatches; // size of capture array +}; + +class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler { + public: + SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate, + js::jit::StackMacroAssembler& masm, Zone* zone, + Mode mode, uint32_t num_capture_registers); + virtual ~SMRegExpMacroAssembler() {} // Nothing to do here + + virtual int stack_limit_slack(); + virtual IrregexpImplementation Implementation(); + + virtual bool Succeed(); + virtual void Fail(); + + virtual void AdvanceCurrentPosition(int by); + virtual void PopCurrentPosition(); + virtual void PushCurrentPosition(); + virtual void SetCurrentPositionFromEnd(int by); + + virtual void Backtrack(); + virtual void Bind(Label* label); + virtual void GoTo(Label* label); + virtual void PushBacktrack(Label* label); + + virtual void CheckCharacter(uint32_t c, Label* on_equal); + virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); + virtual void CheckCharacterGT(uc16 limit, Label* on_greater); + virtual void CheckCharacterLT(uc16 limit, Label* on_less); + virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, + Label* on_equal); + virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask, + Label* on_not_equal); + virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask, + Label* on_not_equal); + virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); + virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range); + virtual void CheckCharacterNotInRange(uc16 from, uc16 to, + Label* on_not_in_range); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckPosition(int cp_offset, Label* on_outside_input); + virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set); + virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); + virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, + Label* on_no_match); + + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); + + virtual void AdvanceRegister(int reg, int by); + virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); + virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); + virtual void IfRegisterEqPos(int reg, Label* if_eq); + virtual void PopRegister(int register_index); + virtual void PushRegister(int register_index, + StackCheckFlag check_stack_limit); + virtual void ReadCurrentPositionFromRegister(int reg); + virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); + virtual void ReadStackPointerFromRegister(int reg); + virtual void WriteStackPointerToRegister(int reg); + virtual void SetRegister(int register_index, int to); + virtual void ClearRegisters(int reg_from, int reg_to); + + virtual Handle<HeapObject> GetCode(Handle<String> source); + + private: + size_t frameSize_ = 0; + + void createStackFrame(); + void initFrameAndRegs(); + void successHandler(); + void exitHandler(); + void backtrackHandler(); + void stackOverflowHandler(); + + // Push a register on the backtrack stack. + void Push(js::jit::Register value); + + // Pop a value from the backtrack stack. + void Pop(js::jit::Register target); + + void CheckAtStartImpl(int cp_offset, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond, + bool negate); + void CheckCharacterInRangeImpl(uc16 from, uc16 to, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckNotBackReferenceImpl(int start_reg, bool read_backward, + Label* on_no_match, bool ignore_case); + + void LoadCurrentCharacterUnchecked(int cp_offset, int characters); + + void JumpOrBacktrack(Label* to); + + // MacroAssembler methods that take a Label can be called with a + // null label, which means that we should backtrack if we would jump + // to that label. This is a helper to avoid writing out the same + // logic a dozen times. + inline js::jit::Label* LabelOrBacktrack(Label* to) { + return to ? to->inner() : &backtrack_label_; + } + + void CheckBacktrackStackLimit(); + + static bool GrowBacktrackStack(RegExpStack* regexp_stack); + + static uint32_t CaseInsensitiveCompareStrings(const char16_t* substring1, + const char16_t* substring2, + size_t byteLength); + static uint32_t CaseInsensitiveCompareUCStrings(const char16_t* substring1, + const char16_t* substring2, + size_t byteLength); + + inline int char_size() { return static_cast<int>(mode_); } + inline js::jit::Scale factor() { + return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne; + } + + js::jit::Address inputStart() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, inputStart)); + } + js::jit::Address backtrackStackBase() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, backtrackStackBase)); + } + js::jit::Address matches() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, matches)); + } + js::jit::Address numMatches() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, numMatches)); + } + + // The stack-pointer-relative location of a regexp register. + js::jit::Address register_location(int register_index) { + return js::jit::Address(masm_.getStackPointer(), + register_offset(register_index)); + } + + int32_t register_offset(int register_index) { + MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister); + if (num_registers_ <= register_index) { + num_registers_ = register_index + 1; + } + static_assert(alignof(uintptr_t) <= alignof(FrameData)); + return sizeof(FrameData) + register_index * sizeof(uintptr_t*); + } + + JSContext* cx_; + js::jit::StackMacroAssembler& masm_; + + /* + * This assembler uses the following registers: + * + * - current_character_: + * Contains the character (or characters) currently being examined. + * Must be loaded using LoadCurrentCharacter before using any of the + * dispatch methods. After a matching pass for a global regexp, + * temporarily stores the index of capture start. + * - current_position_: + * Current position in input *as negative byte offset from end of string*. + * - input_end_pointer_: + * Points to byte after last character in the input. current_position_ is + * relative to this. + * - backtrack_stack_pointer_: + * Points to tip of the (heap-allocated) backtrack stack. The stack grows + * downward (like the native stack). + * - temp0_, temp1_, temp2_: + * Scratch registers. + * + * The native stack pointer is used to access arguments (InputOutputData), + * local variables (FrameData), and irregexp's internal virtual registers + * (see register_location). + */ + + js::jit::Register current_character_; + js::jit::Register current_position_; + js::jit::Register input_end_pointer_; + js::jit::Register backtrack_stack_pointer_; + js::jit::Register temp0_, temp1_, temp2_; + + js::jit::Label entry_label_; + js::jit::Label start_label_; + js::jit::Label backtrack_label_; + js::jit::Label success_label_; + js::jit::Label exit_label_; + js::jit::Label stack_overflow_label_; + js::jit::Label exit_with_exception_label_; + + // When we generate the code to push a backtrack label's address + // onto the backtrack stack, we don't know its final address. We + // have to patch it after linking. This is slightly delicate, as the + // Label itself (which is allocated on the stack) may not exist by + // the time we link. The approach is as follows: + // + // 1. When we push a label on the backtrack stack (PushBacktrack), + // we bind the label's patchOffset_ field to the offset within + // the code that should be overwritten. This works because each + // label is only pushed by a single instruction. + // + // 2. When we bind a label (Bind), we check to see if it has a + // bound patchOffset_. If it does, we create a LabelPatch mapping + // its patch offset to the offset of the label itself. + // + // 3. While linking the code, we walk the list of label patches + // and patch the code accordingly. + class LabelPatch { + public: + LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) + : patchOffset_(patchOffset), labelOffset_(labelOffset) {} + + js::jit::CodeOffset patchOffset_; + size_t labelOffset_ = 0; + }; + + js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_; + void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) { + oomUnsafe.crash("Irregexp label patch"); + } + } + + Mode mode_; + int num_registers_; + int num_capture_registers_; + js::jit::LiveGeneralRegisterSet savedRegisters_; + + public: + using TableVector = + js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>; + TableVector& tables() { return tables_; } + + private: + TableVector tables_; + void AddTable(PseudoHandle<ByteArrayData> table) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!tables_.append(std::move(table))) { + oomUnsafe.crash("Irregexp table append"); + } + } +}; + +} // namespace internal +} // namespace v8 -#endif // RegexpMacroAssemblerArch_h +#endif // RegexpMacroAssemblerArch_h |