From c732425e1a76ffbff74b42d777e3e00bc89be56e Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Mon, 9 Nov 2020 20:44:26 -0500 Subject: Issue #1677 - Part 6: Implement NativeRegExpMacroAssembler for new regexp import based on irregexp/NativeRegExpMacroAssembler.cpp --- js/src/regexp/RegExpTypes.h | 51 + js/src/regexp/moz.build | 1 + js/src/regexp/regexp-macro-assembler-arch.h | 279 +++++- js/src/regexp/regexp-native-macro-assembler.cc | 1215 ++++++++++++++++++++++++ js/src/regexp/regexp-shim.cc | 7 + js/src/regexp/regexp-shim.h | 32 +- 6 files changed, 1572 insertions(+), 13 deletions(-) create mode 100644 js/src/regexp/RegExpTypes.h create mode 100644 js/src/regexp/regexp-native-macro-assembler.cc (limited to 'js') diff --git a/js/src/regexp/RegExpTypes.h b/js/src/regexp/RegExpTypes.h new file mode 100644 index 000000000..e260b5bb6 --- /dev/null +++ b/js/src/regexp/RegExpTypes.h @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file forward-defines Irregexp classes that need to be visible +// to the rest of Spidermonkey and re-exports them into js::irregexp. + +#ifndef regexp_RegExpTypes_h +#define regexp_RegExpTypes_h + +namespace js { +class MatchPairs; +} + +namespace v8 { +namespace internal { + +struct InputOutputData { + const void* inputStart; + const void* inputEnd; + + // Index into inputStart (in chars) at which to begin matching. + size_t startIndex; + + js::MatchPairs* matches; + + template + InputOutputData(const CharT* inputStart, const CharT* inputEnd, + size_t startIndex, js::MatchPairs* matches) + : inputStart(inputStart), + inputEnd(inputEnd), + startIndex(startIndex), + matches(matches) + {} +}; + +} // namespace internal +} // namespace v8 + + +namespace js { +namespace irregexp { + +using InputOutputData = v8::internal::InputOutputData; + +} // namespace irregexp +} // namespace js + +#endif // regexp_RegExpTypes_h diff --git a/js/src/regexp/moz.build b/js/src/regexp/moz.build index 1ef280b7f..4caa4589c 100644 --- a/js/src/regexp/moz.build +++ b/js/src/regexp/moz.build @@ -22,6 +22,7 @@ SOURCES += [ 'regexp-interpreter.cc', 'regexp-macro-assembler-tracer.cc', 'regexp-macro-assembler.cc', + 'regexp-native-macro-assembler.cc', 'regexp-parser.cc', 'regexp-shim.cc', 'regexp-stack.cc', diff --git a/js/src/regexp/regexp-macro-assembler-arch.h b/js/src/regexp/regexp-macro-assembler-arch.h index 60b5c94de..1baa5ddd5 100644 --- a/js/src/regexp/regexp-macro-assembler-arch.h +++ b/js/src/regexp/regexp-macro-assembler-arch.h @@ -4,6 +4,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// Copyright 2020 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + // This file implements the NativeRegExpMacroAssembler interface for // SpiderMonkey. It provides the same interface as each of V8's // architecture-specific implementations. @@ -11,6 +15,277 @@ #ifndef RegexpMacroAssemblerArch_h #define RegexpMacroAssemblerArch_h -#include "regexp/regexp-shim.h" +#include "jit/MacroAssembler.h" +#include "regexp/regexp-macro-assembler.h" + +namespace v8 { +namespace internal { + +struct FrameData { + // Character position at the start of the input, stored as a + // negative offset from the end of the string (input_end_pointer_). + size_t inputStart; + + // The backtrack_stack_pointer_ register points to the top of the stack. + // This points to the bottom of the backtrack stack. + void* backtrackStackBase; + + // Copy of the input MatchPairs. + int32_t* matches; // pointer to capture array + int32_t numMatches; // size of capture array +}; + +class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler { + public: + SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate, + js::jit::StackMacroAssembler& masm, Zone* zone, + Mode mode, uint32_t num_capture_registers); + virtual ~SMRegExpMacroAssembler() {} // Nothing to do here + + virtual int stack_limit_slack(); + virtual IrregexpImplementation Implementation(); + + virtual bool Succeed(); + virtual void Fail(); + + virtual void AdvanceCurrentPosition(int by); + virtual void PopCurrentPosition(); + virtual void PushCurrentPosition(); + virtual void SetCurrentPositionFromEnd(int by); + + virtual void Backtrack(); + virtual void Bind(Label* label); + virtual void GoTo(Label* label); + virtual void PushBacktrack(Label* label); + + virtual void CheckCharacter(uint32_t c, Label* on_equal); + virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); + virtual void CheckCharacterGT(uc16 limit, Label* on_greater); + virtual void CheckCharacterLT(uc16 limit, Label* on_less); + virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, + Label* on_equal); + virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask, + Label* on_not_equal); + virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask, + Label* on_not_equal); + virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); + virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range); + virtual void CheckCharacterNotInRange(uc16 from, uc16 to, + Label* on_not_in_range); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckPosition(int cp_offset, Label* on_outside_input); + virtual void CheckBitInTable(Handle table, Label* on_bit_set); + virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); + virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, + Label* on_no_match); + + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); + + virtual void AdvanceRegister(int reg, int by); + virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); + virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); + virtual void IfRegisterEqPos(int reg, Label* if_eq); + virtual void PopRegister(int register_index); + virtual void PushRegister(int register_index, + StackCheckFlag check_stack_limit); + virtual void ReadCurrentPositionFromRegister(int reg); + virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); + virtual void ReadStackPointerFromRegister(int reg); + virtual void WriteStackPointerToRegister(int reg); + virtual void SetRegister(int register_index, int to); + virtual void ClearRegisters(int reg_from, int reg_to); + + virtual Handle GetCode(Handle source); + + private: + size_t frameSize_ = 0; + + void createStackFrame(); + void initFrameAndRegs(); + void successHandler(); + void exitHandler(); + void backtrackHandler(); + void stackOverflowHandler(); + + // Push a register on the backtrack stack. + void Push(js::jit::Register value); + + // Pop a value from the backtrack stack. + void Pop(js::jit::Register target); + + void CheckAtStartImpl(int cp_offset, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond, + bool negate); + void CheckCharacterInRangeImpl(uc16 from, uc16 to, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckNotBackReferenceImpl(int start_reg, bool read_backward, + Label* on_no_match, bool ignore_case); + + void LoadCurrentCharacterUnchecked(int cp_offset, int characters); + + void JumpOrBacktrack(Label* to); + + // MacroAssembler methods that take a Label can be called with a + // null label, which means that we should backtrack if we would jump + // to that label. This is a helper to avoid writing out the same + // logic a dozen times. + inline js::jit::Label* LabelOrBacktrack(Label* to) { + return to ? to->inner() : &backtrack_label_; + } + + void CheckBacktrackStackLimit(); + + static bool GrowBacktrackStack(RegExpStack* regexp_stack); + + static uint32_t CaseInsensitiveCompareStrings(const char16_t* substring1, + const char16_t* substring2, + size_t byteLength); + static uint32_t CaseInsensitiveCompareUCStrings(const char16_t* substring1, + const char16_t* substring2, + size_t byteLength); + + inline int char_size() { return static_cast(mode_); } + inline js::jit::Scale factor() { + return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne; + } + + js::jit::Address inputStart() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, inputStart)); + } + js::jit::Address backtrackStackBase() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, backtrackStackBase)); + } + js::jit::Address matches() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, matches)); + } + js::jit::Address numMatches() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, numMatches)); + } + + // The stack-pointer-relative location of a regexp register. + js::jit::Address register_location(int register_index) { + return js::jit::Address(masm_.getStackPointer(), + register_offset(register_index)); + } + + int32_t register_offset(int register_index) { + MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister); + if (num_registers_ <= register_index) { + num_registers_ = register_index + 1; + } + static_assert(alignof(uintptr_t) <= alignof(FrameData)); + return sizeof(FrameData) + register_index * sizeof(uintptr_t*); + } + + JSContext* cx_; + js::jit::StackMacroAssembler& masm_; + + /* + * This assembler uses the following registers: + * + * - current_character_: + * Contains the character (or characters) currently being examined. + * Must be loaded using LoadCurrentCharacter before using any of the + * dispatch methods. After a matching pass for a global regexp, + * temporarily stores the index of capture start. + * - current_position_: + * Current position in input *as negative byte offset from end of string*. + * - input_end_pointer_: + * Points to byte after last character in the input. current_position_ is + * relative to this. + * - backtrack_stack_pointer_: + * Points to tip of the (heap-allocated) backtrack stack. The stack grows + * downward (like the native stack). + * - temp0_, temp1_, temp2_: + * Scratch registers. + * + * The native stack pointer is used to access arguments (InputOutputData), + * local variables (FrameData), and irregexp's internal virtual registers + * (see register_location). + */ + + js::jit::Register current_character_; + js::jit::Register current_position_; + js::jit::Register input_end_pointer_; + js::jit::Register backtrack_stack_pointer_; + js::jit::Register temp0_, temp1_, temp2_; + + js::jit::Label entry_label_; + js::jit::Label start_label_; + js::jit::Label backtrack_label_; + js::jit::Label success_label_; + js::jit::Label exit_label_; + js::jit::Label stack_overflow_label_; + js::jit::Label exit_with_exception_label_; + + // When we generate the code to push a backtrack label's address + // onto the backtrack stack, we don't know its final address. We + // have to patch it after linking. This is slightly delicate, as the + // Label itself (which is allocated on the stack) may not exist by + // the time we link. The approach is as follows: + // + // 1. When we push a label on the backtrack stack (PushBacktrack), + // we bind the label's patchOffset_ field to the offset within + // the code that should be overwritten. This works because each + // label is only pushed by a single instruction. + // + // 2. When we bind a label (Bind), we check to see if it has a + // bound patchOffset_. If it does, we create a LabelPatch mapping + // its patch offset to the offset of the label itself. + // + // 3. While linking the code, we walk the list of label patches + // and patch the code accordingly. + class LabelPatch { + public: + LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) + : patchOffset_(patchOffset), labelOffset_(labelOffset) {} + + js::jit::CodeOffset patchOffset_; + size_t labelOffset_ = 0; + }; + + js::Vector labelPatches_; + void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) { + oomUnsafe.crash("Irregexp label patch"); + } + } + + Mode mode_; + int num_registers_; + int num_capture_registers_; + js::jit::LiveGeneralRegisterSet savedRegisters_; + + public: + using TableVector = + js::Vector, 4, js::SystemAllocPolicy>; + TableVector& tables() { return tables_; } + + private: + TableVector tables_; + void AddTable(PseudoHandle table) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!tables_.append(std::move(table))) { + oomUnsafe.crash("Irregexp table append"); + } + } +}; + +} // namespace internal +} // namespace v8 -#endif // RegexpMacroAssemblerArch_h +#endif // RegexpMacroAssemblerArch_h diff --git a/js/src/regexp/regexp-native-macro-assembler.cc b/js/src/regexp/regexp-native-macro-assembler.cc new file mode 100644 index 000000000..15182ad71 --- /dev/null +++ b/js/src/regexp/regexp-native-macro-assembler.cc @@ -0,0 +1,1215 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Copyright 2020 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "jit/Linker.h" +#include "regexp/regexp-macro-assembler-arch.h" +#include "regexp/regexp-stack.h" +#include "vm/MatchPairs.h" + +#include "jit/MacroAssembler-inl.h" + +namespace v8 { +namespace internal { + +using js::MatchPairs; +using js::jit::AbsoluteAddress; +using js::jit::Address; +using js::jit::AllocatableGeneralRegisterSet; +using js::jit::Assembler; +using js::jit::BaseIndex; +using js::jit::CodeLocationLabel; +using js::jit::GeneralRegisterBackwardIterator; +using js::jit::GeneralRegisterForwardIterator; +using js::jit::GeneralRegisterSet; +using js::jit::Imm32; +using js::jit::ImmPtr; +using js::jit::ImmWord; +using js::jit::JitCode; +using js::jit::Linker; +using js::jit::LiveGeneralRegisterSet; +using js::jit::Register; +using js::jit::Registers; +using js::jit::StackMacroAssembler; + +SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate, + StackMacroAssembler& masm, + Zone* zone, Mode mode, + uint32_t num_capture_registers) + : NativeRegExpMacroAssembler(isolate, zone), + cx_(cx), + masm_(masm), + mode_(mode), + num_registers_(num_capture_registers), + num_capture_registers_(num_capture_registers) { + // Each capture has a start and an end register + MOZ_ASSERT(num_capture_registers_ % 2 == 0); + + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + + temp0_ = regs.takeAny(); + temp1_ = regs.takeAny(); + temp2_ = regs.takeAny(); + input_end_pointer_ = regs.takeAny(); + current_character_ = regs.takeAny(); + current_position_ = regs.takeAny(); + backtrack_stack_pointer_ = regs.takeAny(); + savedRegisters_ = js::jit::SavedNonVolatileRegisters(regs); + + masm_.jump(&entry_label_); // We'll generate the entry code later + masm_.bind(&start_label_); // and continue from here. +} + +int SMRegExpMacroAssembler::stack_limit_slack() { + return RegExpStack::kStackLimitSlack; +} + +void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by) { + if (by != 0) { + masm_.addPtr(Imm32(by * char_size()), current_position_); + } +} + +void SMRegExpMacroAssembler::AdvanceRegister(int reg, int by) { + MOZ_ASSERT(reg >= 0 && reg < num_registers_); + if (by != 0) { + masm_.addPtr(Imm32(by), register_location(reg)); + } +} + +void SMRegExpMacroAssembler::Backtrack() { + // Check for an interrupt. We have to restart from the beginning if we + // are interrupted, so we only check for urgent interrupts. + js::jit::Label noInterrupt; + masm_.branchTest32( + Assembler::Zero, AbsoluteAddress(cx_->addressOfInterruptBits()), + Imm32(uint32_t(js::InterruptReason::CallbackUrgent)), &noInterrupt); + masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_); + masm_.jump(&exit_label_); + masm_.bind(&noInterrupt); + + // Pop code location from backtrack stack and jump to location. + Pop(temp0_); + masm_.jump(temp0_); +} + +void SMRegExpMacroAssembler::Bind(Label* label) { + masm_.bind(label->inner()); + if (label->patchOffset_.bound()) { + AddLabelPatch(label->patchOffset_, label->pos()); + } +} + +// Check if current_position + cp_offset is the input start +void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond, + Assembler::Condition cond) { + Address addr(current_position_, cp_offset * char_size()); + masm_.computeEffectiveAddress(addr, temp0_); + + masm_.branchPtr(cond, inputStart(), temp0_, + LabelOrBacktrack(on_cond)); +} + +void SMRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) { + CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal); +} + +void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual); +} + +void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c, Label* on_cond, + Assembler::Condition cond) { + masm_.branch32(cond, current_character_, c, LabelOrBacktrack(on_cond)); +} + +void SMRegExpMacroAssembler::CheckCharacter(uint32_t c, Label* on_equal) { + CheckCharacterImpl(Imm32(c), on_equal, Assembler::Equal); +} + +void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c, + Label* on_not_equal) { + CheckCharacterImpl(Imm32(c), on_not_equal, Assembler::NotEqual); +} + +void SMRegExpMacroAssembler::CheckCharacterGT(uc16 c, Label* on_greater) { + CheckCharacterImpl(Imm32(c), on_greater, Assembler::GreaterThan); +} + +void SMRegExpMacroAssembler::CheckCharacterLT(uc16 c, Label* on_less) { + CheckCharacterImpl(Imm32(c), on_less, Assembler::LessThan); +} + +// Bitwise-and the current character with mask and then check for a +// match with c. +void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c, + uint32_t mask, + Label* on_cond, + bool is_not) { + if (c == 0) { + Assembler::Condition cond = is_not ? Assembler::NonZero : Assembler::Zero; + masm_.branchTest32(cond, current_character_, Imm32(mask), + LabelOrBacktrack(on_cond)); + } else { + Assembler::Condition cond = is_not ? Assembler::NotEqual : Assembler::Equal; + masm_.move32(Imm32(mask), temp0_); + masm_.and32(current_character_, temp0_); + masm_.branch32(cond, temp0_, Imm32(c), LabelOrBacktrack(on_cond)); + } +} + +void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c, + uint32_t mask, + Label* on_equal) { + CheckCharacterAfterAndImpl(c, mask, on_equal, /*is_not =*/false); +} + +void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c, + uint32_t mask, + Label* on_not_equal) { + CheckCharacterAfterAndImpl(c, mask, on_not_equal, /*is_not =*/true); +} + + +// Subtract minus from the current character, then bitwise-and the +// result with mask, then check for a match with c. +void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd( + uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) { + masm_.computeEffectiveAddress(Address(current_character_, -minus), temp0_); + if (c == 0) { + masm_.branchTest32(Assembler::NonZero, temp0_, Imm32(mask), + LabelOrBacktrack(on_not_equal)); + } else { + masm_.and32(Imm32(mask), temp0_); + masm_.branch32(Assembler::NotEqual, temp0_, Imm32(c), + LabelOrBacktrack(on_not_equal)); + } +} + +// If the current position matches the position stored on top of the backtrack +// stack, pops the backtrack stack and branches to the given label. +void SMRegExpMacroAssembler::CheckGreedyLoop(Label* on_equal) { + js::jit::Label fallthrough; + masm_.branchPtr(Assembler::NotEqual, Address(backtrack_stack_pointer_, 0), + current_position_, &fallthrough); + masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_); // Pop. + JumpOrBacktrack(on_equal); + masm_.bind(&fallthrough); +} + +void SMRegExpMacroAssembler::CheckCharacterInRangeImpl( + uc16 from, uc16 to, Label* on_cond, Assembler::Condition cond) { + // x is in [from,to] if unsigned(x - from) <= to - from + masm_.computeEffectiveAddress(Address(current_character_, -from), temp0_); + masm_.branch32(cond, temp0_, Imm32(to - from), LabelOrBacktrack(on_cond)); +} + +void SMRegExpMacroAssembler::CheckCharacterInRange(uc16 from, uc16 to, + Label* on_in_range) { + CheckCharacterInRangeImpl(from, to, on_in_range, Assembler::BelowOrEqual); +} + +void SMRegExpMacroAssembler::CheckCharacterNotInRange(uc16 from, uc16 to, + Label* on_not_in_range) { + CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above); +} + +void SMRegExpMacroAssembler::CheckBitInTable(Handle table, + Label* on_bit_set) { + // Claim ownership of the ByteArray from the current HandleScope. + // ByteArrays are allocated on the C++ heap and are (eventually) + // owned by the RegExpShared. + PseudoHandle rawTable = table->takeOwnership(isolate()); + + masm_.movePtr(ImmPtr(rawTable->data()), temp0_); + + masm_.move32(Imm32(kTableMask), temp1_); + masm_.and32(current_character_, temp1_); + + masm_.load8ZeroExtend(BaseIndex(temp0_, temp1_, js::jit::TimesOne), temp0_); + masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, + LabelOrBacktrack(on_bit_set)); + + // Transfer ownership of |rawTable| to the |tables_| vector. + AddTable(std::move(rawTable)); +} + +void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg, + bool read_backward, + Label* on_no_match, + bool ignore_case) { + js::jit::Label fallthrough; + + // Captures are stored as a sequential pair of registers. + // Find the length of the back-referenced capture and load the + // capture's start index into current_character_. + masm_.loadPtr(register_location(start_reg), // index of start + current_character_); + masm_.loadPtr(register_location(start_reg + 1), temp0_); // index of end + masm_.subPtr(current_character_, temp0_); // length of capture + + // Capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. + masm_.branchPtr(Assembler::Equal, temp0_, ImmWord(0), &fallthrough); + + // Check that there are sufficient characters left in the input. + if (read_backward) { + // If start + len > current, there isn't enough room for a + // lookbehind backreference. + masm_.loadPtr(inputStart(), temp1_); + masm_.addPtr(temp0_, temp1_); + masm_.branchPtr(Assembler::GreaterThan, temp1_, current_position_, + LabelOrBacktrack(on_no_match)); + } else { + // current_position_ is the negative offset from the end. + // If current + len > 0, there isn't enough room for a backreference. + masm_.movePtr(current_position_, temp1_); + masm_.addPtr(temp0_, temp1_); + masm_.branchPtr(Assembler::GreaterThan, temp1_, ImmWord(0), + LabelOrBacktrack(on_no_match)); + } + + if (mode_ == UC16 && ignore_case) { + // We call a helper function for case-insensitive non-latin1 strings. + + // Save volatile regs. temp1_ and temp2_ don't need to be saved. + LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); + volatileRegs.takeUnchecked(temp1_); + volatileRegs.takeUnchecked(temp2_); + masm_.PushRegsInMask(volatileRegs); + + // Parameters are + // Address captured - Address of captured substring's start. + // Address current - Address of current character position. + // size_t byte_length - length of capture (in bytes) + + // Compute |captured| + masm_.addPtr(input_end_pointer_, current_character_); + + // Compute |current| + masm_.addPtr(input_end_pointer_, current_position_); + if (read_backward) { + // Offset by length when matching backwards. + masm_.subPtr(temp0_, current_position_); + } + + masm_.setupUnalignedABICall(temp1_); + masm_.passABIArg(current_character_); + masm_.passABIArg(current_position_); + masm_.passABIArg(temp0_); + + bool unicode = true; // TODO: Fix V8 bug + if (unicode) { + uint32_t (*fun)(const char16_t*, const char16_t*, size_t) = + CaseInsensitiveCompareUCStrings; + masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } else { + uint32_t (*fun)(const char16_t*, const char16_t*, size_t) = + CaseInsensitiveCompareStrings; + masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } + masm_.storeCallInt32Result(temp1_); + masm_.PopRegsInMask(volatileRegs); + masm_.branchTest32(Assembler::Zero, temp1_, temp1_, + LabelOrBacktrack(on_no_match)); + + // On success, advance position by length of capture + if (read_backward) { + masm_.subPtr(temp0_, current_position_); + } else { + masm_.addPtr(temp0_, current_position_); + } + + masm_.bind(&fallthrough); + return; + } + + // We will be modifying current_position_. Save it in case the match fails. + masm_.push(current_position_); + + // Compute start of capture string + masm_.addPtr(input_end_pointer_, current_character_); + + // Compute start of match string + masm_.addPtr(input_end_pointer_, current_position_); + if (read_backward) { + // Offset by length when matching backwards. + masm_.subPtr(temp0_, current_position_); + } + + // Compute end of match string + masm_.addPtr(current_position_, temp0_); + + js::jit::Label success; + js::jit::Label fail; + js::jit::Label loop; + masm_.bind(&loop); + + // Load next character from each string. + if (mode_ == LATIN1) { + masm_.load8ZeroExtend(Address(current_character_, 0), temp1_); + masm_.load8ZeroExtend(Address(current_position_, 0), temp2_); + } else { + masm_.load16ZeroExtend(Address(current_character_, 0), temp1_); + masm_.load16ZeroExtend(Address(current_position_, 0), temp2_); + } + + if (ignore_case) { + MOZ_ASSERT(mode_ == LATIN1); + // Try exact match. + js::jit::Label loop_increment; + masm_.branch32(Assembler::Equal, temp1_, temp2_, &loop_increment); + + // Mismatch. Try case-insensitive match. + // Force the match character to lower case (by setting bit 0x20) + // then check to see if it is a letter. + js::jit::Label convert_capture; + masm_.or32(Imm32(0x20), temp1_); + + // Check if it is in [a,z]. + masm_.computeEffectiveAddress(Address(temp1_, -'a'), temp2_); + masm_.branch32(Assembler::BelowOrEqual, temp2_, Imm32('z' - 'a'), + &convert_capture); + // Check for values in range [224,254]. + // Exclude 247 (U+00F7 DIVISION SIGN). + masm_.sub32(Imm32(224 - 'a'), temp2_); + masm_.branch32(Assembler::Above, temp2_, Imm32(254 - 224), &fail); + masm_.branch32(Assembler::Equal, temp2_, Imm32(247 - 224), &fail); + + // Match character is lower case. Convert capture character + // to lower case and compare. + masm_.bind(&convert_capture); + masm_.load8ZeroExtend(Address(current_character_, 0), temp2_); + masm_.or32(Imm32(0x20), temp2_); + masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail); + + masm_.bind(&loop_increment); + } else { + // Fail if characters do not match. + masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail); + } + + // Increment pointers into match and capture strings. + masm_.addPtr(Imm32(char_size()), current_character_); + masm_.addPtr(Imm32(char_size()), current_position_); + + // Loop if we have not reached the end of the match string. + masm_.branchPtr(Assembler::Below, current_position_, temp0_, &loop); + masm_.jump(&success); + + // If we fail, restore current_position_ and branch. + masm_.bind(&fail); + masm_.pop(current_position_); + JumpOrBacktrack(on_no_match); + + masm_.bind(&success); + + // current_position_ is a pointer. Convert it back to an offset. + masm_.subPtr(input_end_pointer_, current_position_); + if (read_backward) { + // Subtract match length if we matched backward + masm_.addPtr(register_location(start_reg), current_position_); + masm_.subPtr(register_location(start_reg + 1), current_position_); + } + + // Drop saved value of current_position_ + masm_.addToStackPtr(Imm32(sizeof(uintptr_t))); + + masm_.bind(&fallthrough); +} + +// Branch if a back-reference does not match a previous capture. +void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, + /*ignore_case = */ false); +} + +void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase( + int start_reg, bool read_backward, Label* on_no_match) { + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, + /*ignore_case = */ true); +} + +// Checks whether the given offset from the current position is +// inside the input string. +void SMRegExpMacroAssembler::CheckPosition(int cp_offset, + Label* on_outside_input) { + // Note: current_position_ is a (negative) byte offset relative to + // the end of the input string. + if (cp_offset >= 0) { + // end + current + offset >= end + // <=> current + offset >= 0 + // <=> current >= -offset + masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_, + ImmWord(-cp_offset * char_size()), + LabelOrBacktrack(on_outside_input)); + } else { + // Compute offset position + masm_.computeEffectiveAddress( + Address(current_position_, cp_offset * char_size()), temp0_); + + // Compare to start of input. + masm_.branchPtr(Assembler::GreaterThanOrEqual, inputStart(), temp0_, + LabelOrBacktrack(on_outside_input)); + } +} + +// This function attempts to generate special case code for character classes. +// Returns true if a special case is generated. +// Otherwise returns false and generates no code. +bool SMRegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type, + Label* on_no_match) { + js::jit::Label* no_match = LabelOrBacktrack(on_no_match); + + // Note: throughout this function, range checks (c in [min, max]) + // are implemented by an unsigned (c - min) <= (max - min) check. + switch (type) { + case 's': { + // Match space-characters + if (mode_ != LATIN1) { + return false; + } + js::jit::Label success; + // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP). + + // Check ' ' + masm_.branch32(Assembler::Equal, current_character_, Imm32(' '), + &success); + + // Check '\t'..'\r' + masm_.computeEffectiveAddress(Address(current_character_, -'\t'), + temp0_); + masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('\r' - '\t'), + &success); + + // Check \u00a0. + masm_.branch32(Assembler::NotEqual, temp0_, Imm32(0x00a0 - '\t'), + no_match); + + masm_.bind(&success); + return true; + } + case 'S': + // The emitted code for generic character classes is good enough. + return false; + case 'd': + // Match latin1 digits ('0'-'9') + masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_); + masm_.branch32(Assembler::Above, temp0_, Imm32('9' - '0'), no_match); + return true; + case 'D': + // Match anything except latin1 digits ('0'-'9') + masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_); + masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('9' - '0'), + no_match); + return true; + case '.': + // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d), + // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR. + // See https://tc39.es/ecma262/#prod-LineTerminator + + // To test for 0x0a and 0x0d efficiently, we XOR the input with 1. + // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to + // test for the contiguous range 0x0b..0x0c. + masm_.move32(current_character_, temp0_); + masm_.xor32(Imm32(0x01), temp0_); + masm_.sub32(Imm32(0x0b), temp0_); + masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b), + no_match); + + if (mode_ == UC16) { + // Compare original value to 0x2028 and 0x2029, using the already + // computed (current_char ^ 0x01 - 0x0b). I.e., check for + // 0x201d (0x2028 - 0x0b) or 0x201e. + masm_.sub32(Imm32(0x2028 - 0x0b), temp0_); + masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x2029 - 0x2028), + no_match); + } + return true; + case 'w': + // \w matches the set of 63 characters defined in Runtime Semantics: + // WordCharacters. We use a static lookup table, which is defined in + // regexp-macro-assembler.cc. + // Note: if both Unicode and IgnoreCase are true, \w matches a + // larger set of characters. That case is handled elsewhere. + if (mode_ != LATIN1) { + masm_.branch32(Assembler::Above, current_character_, Imm32('z'), + no_match); + } + static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar); + masm_.movePtr(ImmPtr(word_character_map), temp0_); + masm_.load8ZeroExtend( + BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_); + masm_.branchTest32(Assembler::Zero, temp0_, temp0_, no_match); + return true; + case 'W': { + // See 'w' above. + js::jit::Label done; + if (mode_ != LATIN1) { + masm_.branch32(Assembler::Above, current_character_, Imm32('z'), &done); + } + static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar); + masm_.movePtr(ImmPtr(word_character_map), temp0_); + masm_.load8ZeroExtend( + BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_); + masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, no_match); + if (mode_ != LATIN1) { + masm_.bind(&done); + } + return true; + } + //////////////////////////////////////////////////////////////////////// + // Non-standard classes (with no syntactic shorthand) used internally // + //////////////////////////////////////////////////////////////////////// + case '*': + // Match any character + return true; + case 'n': + // Match newlines. The opposite of '.'. See '.' above. + masm_.move32(current_character_, temp0_); + masm_.xor32(Imm32(0x01), temp0_); + masm_.sub32(Imm32(0x0b), temp0_); + if (mode_ == LATIN1) { + masm_.branch32(Assembler::Above, temp0_, Imm32(0x0c - 0x0b), no_match); + } else { + MOZ_ASSERT(mode_ == UC16); + js::jit::Label done; + masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b), + &done); + + // Compare original value to 0x2028 and 0x2029, using the already + // computed (current_char ^ 0x01 - 0x0b). I.e., check for + // 0x201d (0x2028 - 0x0b) or 0x201e. + masm_.sub32(Imm32(0x2028 - 0x0b), temp0_); + masm_.branch32(Assembler::Above, temp0_, Imm32(0x2029 - 0x2028), + no_match); + masm_.bind(&done); + } + return true; + + // No custom implementation + default: + return false; + } +} + +void SMRegExpMacroAssembler::Fail() { + masm_.movePtr(ImmWord(js::RegExpRunStatus_Success_NotFound), temp0_); + masm_.jump(&exit_label_); +} + +void SMRegExpMacroAssembler::GoTo(Label* to) { + masm_.jump(LabelOrBacktrack(to)); +} + +void SMRegExpMacroAssembler::IfRegisterGE(int reg, int comparand, + Label* if_ge) { + masm_.branchPtr(Assembler::GreaterThanOrEqual, register_location(reg), + ImmWord(comparand), LabelOrBacktrack(if_ge)); +} + +void SMRegExpMacroAssembler::IfRegisterLT(int reg, int comparand, + Label* if_lt) { + masm_.branchPtr(Assembler::LessThan, register_location(reg), + ImmWord(comparand), LabelOrBacktrack(if_lt)); +} + +void SMRegExpMacroAssembler::IfRegisterEqPos(int reg, Label* if_eq) { + masm_.branchPtr(Assembler::Equal, register_location(reg), current_position_, + LabelOrBacktrack(if_eq)); +} + +// This is a word-for-word identical copy of the V8 code, which is +// duplicated in at least nine different places in V8 (one per +// supported architecture) with no differences outside of comments and +// formatting. It should be hoisted into the superclass. Once that is +// done upstream, this version can be deleted. +void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + MOZ_ASSERT(eats_at_least >= characters); + MOZ_ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works) + + if (check_bounds) { + if (cp_offset >= 0) { + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } + } + LoadCurrentCharacterUnchecked(cp_offset, characters); +} + +// Load the character (or characters) at the specified offset from the +// current position. Zero-extend to 32 bits. +void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, + int characters) { + BaseIndex address(input_end_pointer_, current_position_, js::jit::TimesOne, + cp_offset * char_size()); + if (mode_ == LATIN1) { + if (characters == 4) { + masm_.load32(address, current_character_); + } else if (characters == 2) { + masm_.load16ZeroExtend(address, current_character_); + } else { + MOZ_ASSERT(characters == 1); + masm_.load8ZeroExtend(address, current_character_); + } + } else { + MOZ_ASSERT(mode_ == UC16); + if (characters == 2) { + masm_.load32(address, current_character_); + } else { + MOZ_ASSERT(characters == 1); + masm_.load16ZeroExtend(address, current_character_); + } + } +} + +void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_); } + +void SMRegExpMacroAssembler::PopRegister(int register_index) { + Pop(temp0_); + masm_.storePtr(temp0_, register_location(register_index)); +} + +void SMRegExpMacroAssembler::PushBacktrack(Label* label) { + MOZ_ASSERT(!label->is_bound()); + MOZ_ASSERT(!label->patchOffset_.bound()); + label->patchOffset_ = masm_.movWithPatch(ImmPtr(nullptr), temp0_); + MOZ_ASSERT(label->patchOffset_.bound()); + + Push(temp0_); + + CheckBacktrackStackLimit(); +} + +void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_); } + +void SMRegExpMacroAssembler::PushRegister(int register_index, + StackCheckFlag check_stack_limit) { + masm_.loadPtr(register_location(register_index), temp0_); + Push(temp0_); + if (check_stack_limit) { + CheckBacktrackStackLimit(); + } +} + +void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg) { + masm_.loadPtr(register_location(reg), current_position_); +} + +void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg, + int cp_offset) { + if (cp_offset == 0) { + masm_.storePtr(current_position_, register_location(reg)); + } else { + Address addr(current_position_, cp_offset * char_size()); + masm_.computeEffectiveAddress(addr, temp0_); + masm_.storePtr(temp0_, register_location(reg)); + } +} + +// Note: The backtrack stack pointer is stored in a register as an +// offset from the stack top, not as a bare pointer, so that it is not +// corrupted if the backtrack stack grows (and therefore moves). +void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg) { + masm_.loadPtr(register_location(reg), backtrack_stack_pointer_); + masm_.addPtr(backtrackStackBase(), backtrack_stack_pointer_); +} +void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg) { + masm_.movePtr(backtrack_stack_pointer_, temp0_); + masm_.subPtr(backtrackStackBase(), temp0_); + masm_.storePtr(temp0_, register_location(reg)); +} + +// When matching a regexp that is anchored at the end, this operation +// is used to try skipping the beginning of long strings. If the +// maximum length of a match is less than the length of the string, we +// can skip the initial len - max_len bytes. +void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by) { + js::jit::Label after_position; + masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_, + ImmWord(-by * char_size()), &after_position); + masm_.movePtr(ImmWord(-by * char_size()), current_position_); + + // On RegExp code entry (where this operation is used), the character before + // the current position is expected to be already loaded. + // We have advanced the position, so it's safe to read backwards. + LoadCurrentCharacterUnchecked(-1, 1); + masm_.bind(&after_position); +} + +void SMRegExpMacroAssembler::SetRegister(int register_index, int to) { + MOZ_ASSERT(register_index >= num_capture_registers_); + masm_.storePtr(ImmWord(to), register_location(register_index)); +} + +// Returns true if a regexp match can be restarted (aka the regexp is global). +// The return value is not used anywhere, but we implement it to be safe. +bool SMRegExpMacroAssembler::Succeed() { + masm_.jump(&success_label_); + return global(); +} + +// Capture registers are initialized to input[-1] +void SMRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to) { + MOZ_ASSERT(reg_from <= reg_to); + masm_.loadPtr(inputStart(), temp0_); + masm_.subPtr(Imm32(char_size()), temp0_); + for (int reg = reg_from; reg <= reg_to; reg++) { + masm_.storePtr(temp0_, register_location(reg)); + } +} + +void SMRegExpMacroAssembler::Push(Register source) { + MOZ_ASSERT(source != backtrack_stack_pointer_); + + masm_.subPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_); + masm_.storePtr(source, Address(backtrack_stack_pointer_, 0)); +} + +void SMRegExpMacroAssembler::Pop(Register target) { + MOZ_ASSERT(target != backtrack_stack_pointer_); + + masm_.loadPtr(Address(backtrack_stack_pointer_, 0), target); + masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_); +} + +void SMRegExpMacroAssembler::JumpOrBacktrack(Label* to) { + if (to) { + masm_.jump(to->inner()); + } else { + Backtrack(); + } +} + +// Generate a quick inline test for backtrack stack overflow. +// If the test fails, call an OOL handler to try growing the stack. +void SMRegExpMacroAssembler::CheckBacktrackStackLimit() { + js::jit::Label no_stack_overflow; + masm_.branchPtr( + Assembler::BelowOrEqual, + AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()), + backtrack_stack_pointer_, &no_stack_overflow); + + masm_.call(&stack_overflow_label_); + + // Exit with an exception if the call failed + masm_.branchTest32(Assembler::Zero, temp0_, temp0_, + &exit_with_exception_label_); + + masm_.bind(&no_stack_overflow); +} + +// This is used to sneak an OOM through the V8 layer. +static Handle DummyCode() { + return Handle::fromHandleValue(JS::UndefinedHandleValue); +} + +// Finalize code. This is called last, so that we know how many +// registers we need. +Handle SMRegExpMacroAssembler::GetCode(Handle source) { + if (!cx_->realm()->ensureJitRealmExists(cx_)) { + return DummyCode(); + } + + masm_.bind(&entry_label_); + + createStackFrame(); + initFrameAndRegs(); + + masm_.jump(&start_label_); + + successHandler(); + exitHandler(); + backtrackHandler(); + stackOverflowHandler(); + + Linker linker(masm_); + JitCode* code = linker.newCode(cx_, js::jit::CodeKind::RegExp); + if (!code) { + return DummyCode(); + } + + for (LabelPatch& lp : labelPatches_) { + Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, lp.patchOffset_), + ImmPtr(code->raw() + lp.labelOffset_), + ImmPtr(nullptr)); + } + + return Handle(JS::PrivateGCThingValue(code), isolate()); +} + +/* + * The stack will have the following structure: + * sp-> - FrameData + * - inputStart + * - backtrack stack base + * - matches + * - numMatches + * - Registers + * - Capture positions + * - Scratch registers + * --- frame alignment --- + * - Saved register area + * - Return address + */ +void SMRegExpMacroAssembler::createStackFrame() { +#ifdef JS_CODEGEN_ARM64 + // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for + // addressing. The register we use for PSP may however also be used by + // calling code, and it is nonvolatile, so save it. Do this as a special + // case first because the generic save/restore code needs the PSP to be + // initialized already. + MOZ_ASSERT(js::jit::PseudoStackPointer64.Is(masm_.GetStackPointer64())); + masm_.Str(js::jit::PseudoStackPointer64, + vixl::MemOperand(js::jit::sp, -16, vixl::PreIndex)); + + // Initialize the PSP from the SP. + masm_.initPseudoStackPtr(); +#endif + + // Push non-volatile registers which might be modified by jitcode. + size_t pushedNonVolatileRegisters = 0; + for (GeneralRegisterForwardIterator iter(savedRegisters_); iter.more(); + ++iter) { + masm_.Push(*iter); + pushedNonVolatileRegisters++; + } + + // The pointer to InputOutputData is passed as the first argument. + // On x86 we have to load it off the stack into temp0_. + // On other platforms it is already in a register. +#ifdef JS_CODEGEN_X86 + Address ioDataAddr(masm_.getStackPointer(), + (pushedNonVolatileRegisters + 1) * sizeof(void*)); + masm_.loadPtr(ioDataAddr, temp0_); +#else + if (js::jit::IntArgReg0 != temp0_) { + masm_.movePtr(js::jit::IntArgReg0, temp0_); + } +#endif + + // Start a new stack frame. + size_t frameBytes = sizeof(FrameData) + num_registers_ * sizeof(void*); + frameSize_ = js::jit::StackDecrementForCall(js::jit::ABIStackAlignment, + masm_.framePushed(), frameBytes); + masm_.reserveStack(frameSize_); + masm_.checkStackAlignment(); + + // Check if we have space on the stack. Use the *NoInterrupt stack limit to + // avoid failing repeatedly when the regex code is called from Ion JIT code. + // (See bug 1208819) + js::jit::Label stack_ok; + AbsoluteAddress limit_addr(cx_->addressOfJitStackLimitNoInterrupt()); + masm_.branchStackPtrRhs(Assembler::Below, limit_addr, &stack_ok); + + // There is not enough space on the stack. Exit with an exception. + masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_); + masm_.jump(&exit_label_); + + masm_.bind(&stack_ok); +} + +void SMRegExpMacroAssembler::initFrameAndRegs() { + // At this point, an uninitialized stack frame has been created, + // and the address of the InputOutputData is in temp0_. + Register ioDataReg = temp0_; + + Register matchesReg = temp1_; + masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, matches)), + matchesReg); + + // Initialize output registers + masm_.loadPtr(Address(matchesReg, MatchPairs::offsetOfPairs()), temp2_); + masm_.storePtr(temp2_, matches()); + masm_.load32(Address(matchesReg, MatchPairs::offsetOfPairCount()), temp2_); + masm_.store32(temp2_, numMatches()); + +#ifdef DEBUG + // Bounds-check numMatches. + js::jit::Label enoughRegisters; + masm_.branchPtr(Assembler::GreaterThanOrEqual, temp2_, + ImmWord(num_capture_registers_ / 2), &enoughRegisters); + masm_.assumeUnreachable("Not enough output pairs for RegExp"); + masm_.bind(&enoughRegisters); +#endif + + // Load input start pointer. + masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputStart)), + current_position_); + + // Load input end pointer + masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputEnd)), + input_end_pointer_); + + // Set up input position to be negative offset from string end. + masm_.subPtr(input_end_pointer_, current_position_); + + // Store inputStart + masm_.storePtr(current_position_, inputStart()); + + // Load start index + Register startIndexReg = temp1_; + masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, startIndex)), + startIndexReg); + masm_.computeEffectiveAddress( + BaseIndex(current_position_, startIndexReg, factor()), current_position_); + + // Initialize current_character_. + // Load newline if index is at start, or previous character otherwise. + js::jit::Label start_regexp; + js::jit::Label load_previous_character; + masm_.branchPtr(Assembler::NotEqual, startIndexReg, ImmWord(0), + &load_previous_character); + masm_.movePtr(ImmWord('\n'), current_character_); + masm_.jump(&start_regexp); + + masm_.bind(&load_previous_character); + LoadCurrentCharacterUnchecked(-1, 1); + masm_.bind(&start_regexp); + + // Initialize captured registers with inputStart - 1 + MOZ_ASSERT(num_capture_registers_ > 0); + Register inputStartMinusOneReg = temp2_; + masm_.loadPtr(inputStart(), inputStartMinusOneReg); + masm_.subPtr(Imm32(char_size()), inputStartMinusOneReg); + if (num_capture_registers_ > 8) { + masm_.movePtr(ImmWord(register_offset(0)), temp1_); + js::jit::Label init_loop; + masm_.bind(&init_loop); + masm_.storePtr(inputStartMinusOneReg, BaseIndex(masm_.getStackPointer(), + temp1_, js::jit::TimesOne)); + masm_.addPtr(ImmWord(sizeof(void*)), temp1_); + masm_.branchPtr(Assembler::LessThan, temp1_, + ImmWord(register_offset(num_capture_registers_)), + &init_loop); + } else { + // Unroll the loop + for (int i = 0; i < num_capture_registers_; i++) { + masm_.storePtr(inputStartMinusOneReg, register_location(i)); + } + } + + // Initialize backtrack stack pointer + masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()), + backtrack_stack_pointer_); + masm_.storePtr(backtrack_stack_pointer_, backtrackStackBase()); +} + +void SMRegExpMacroAssembler::successHandler() { + MOZ_ASSERT(success_label_.used()); + masm_.bind(&success_label_); + + // Copy captures to the MatchPairs pointed to by the InputOutputData. + // Captures are stored as positions, which are negative byte offsets + // from the end of the string. We must convert them to actual + // indices. + // + // Index: [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END] + // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6 + // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12 + // + // To convert a position to an index, we subtract InputStart, and + // divide the result by char_size. + Register matchesReg = temp1_; + masm_.loadPtr(matches(), matchesReg); + + Register inputStartReg = temp2_; + masm_.loadPtr(inputStart(), inputStartReg); + + for (int i = 0; i < num_capture_registers_; i++) { + masm_.loadPtr(register_location(i), temp0_); + masm_.subPtr(inputStartReg, temp0_); + if (mode_ == UC16) { + masm_.rshiftPtrArithmetic(Imm32(1), temp0_); + } + masm_.store32(temp0_, Address(matchesReg, i * sizeof(int32_t))); + } + + masm_.movePtr(ImmWord(js::RegExpRunStatus_Success), temp0_); + // This falls through to the exit handler. +} + +void SMRegExpMacroAssembler::exitHandler() { + masm_.bind(&exit_label_); + + if (temp0_ != js::jit::ReturnReg) { + masm_.movePtr(temp0_, js::jit::ReturnReg); + } + + masm_.freeStack(frameSize_); + + // Restore registers which were saved on entry + for (GeneralRegisterBackwardIterator iter(savedRegisters_); iter.more(); + ++iter) { + masm_.Pop(*iter); + } + +#ifdef JS_CODEGEN_ARM64 + // Now restore the value that was in the PSP register on entry, and return. + + // Obtain the correct SP from the PSP. + masm_.Mov(js::jit::sp, js::jit::PseudoStackPointer64); + + // Restore the saved value of the PSP register, this value is whatever the + // caller had saved in it, not any actual SP value, and it must not be + // overwritten subsequently. + masm_.Ldr(js::jit::PseudoStackPointer64, + vixl::MemOperand(js::jit::sp, 16, vixl::PostIndex)); + + // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong. + masm_.Ret(vixl::lr); +#else + masm_.abiret(); +#endif + + if (exit_with_exception_label_.used()) { + masm_.bind(&exit_with_exception_label_); + + // Exit with an error result to signal thrown exception + masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_); + masm_.jump(&exit_label_); + } +} + +void SMRegExpMacroAssembler::backtrackHandler() { + if (!backtrack_label_.used()) { + return; + } + masm_.bind(&backtrack_label_); + Backtrack(); +} + +void SMRegExpMacroAssembler::stackOverflowHandler() { + if (!stack_overflow_label_.used()) { + return; + } + + // Called if the backtrack-stack limit has been hit. + // NOTE: depending on architecture, the call may have + // changed the stack pointer. We adjust for that below. + masm_.bind(&stack_overflow_label_); + + // Load argument + masm_.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_); + + // Save registers before calling C function + LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); + +#ifdef JS_USE_LINK_REGISTER + masm.pushReturnAddress(); +#endif + + // Adjust for the return address on the stack. + size_t frameOffset = sizeof(void*); + + volatileRegs.takeUnchecked(temp0_); + volatileRegs.takeUnchecked(temp1_); + masm_.PushRegsInMask(volatileRegs); + + masm_.setupUnalignedABICall(temp0_); + masm_.passABIArg(temp1_); + masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, GrowBacktrackStack)); + masm_.storeCallBoolResult(temp0_); + + masm_.PopRegsInMask(volatileRegs); + + // If GrowBacktrackStack returned false, we have failed to grow the + // stack, and must exit with a stack-overflow exception. Do this in + // the caller so that the stack is adjusted by our return instruction. + js::jit::Label overflow_return; + masm_.branchTest32(Assembler::Zero, temp0_, temp0_, &overflow_return); + + // Otherwise, store the new backtrack stack base and recompute the new + // top of the stack. + Address bsbAddress(masm_.getStackPointer(), + offsetof(FrameData, backtrackStackBase) + frameOffset); + masm_.subPtr(bsbAddress, backtrack_stack_pointer_); + + masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()), temp1_); + masm_.storePtr(temp1_, bsbAddress); + masm_.addPtr(temp1_, backtrack_stack_pointer_); + + // Resume execution in calling code. + masm_.bind(&overflow_return); + masm_.ret(); +} + +// This is only used by tracing code. +// The return value doesn't matter. +RegExpMacroAssembler::IrregexpImplementation +SMRegExpMacroAssembler::Implementation() { + return kBytecodeImplementation; +} + +/*static */ +uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareStrings( + const char16_t* substring1, const char16_t* substring2, size_t byteLength) { + js::AutoUnsafeCallWithABI unsafe; + + MOZ_ASSERT(byteLength % sizeof(char16_t) == 0); + size_t length = byteLength / sizeof(char16_t); + + for (size_t i = 0; i < length; i++) { + char16_t c1 = substring1[i]; + char16_t c2 = substring2[i]; + if (c1 != c2) { + c1 = js::unicode::ToUpperCase(c1); + c2 = js::unicode::ToUpperCase(c2); + if (c1 != c2) { + return 0; + } + } + } + + return 1; +} + +/*static */ +uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUCStrings( + const char16_t* substring1, const char16_t* substring2, size_t byteLength) { + js::AutoUnsafeCallWithABI unsafe; + + MOZ_ASSERT(byteLength % sizeof(char16_t) == 0); + size_t length = byteLength / sizeof(char16_t); + + for (size_t i = 0; i < length; i++) { + char16_t c1 = substring1[i]; + char16_t c2 = substring2[i]; + if (c1 != c2) { + c1 = js::unicode::FoldCase(c1); + c2 = js::unicode::FoldCase(c2); + if (c1 != c2) { + return 0; + } + } + } + + return 1; +} + +/* static */ +bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack* regexp_stack) { + js::AutoUnsafeCallWithABI unsafe; + size_t size = regexp_stack->stack_capacity(); + return !!regexp_stack->EnsureCapacity(size * 2); +} + +} // namespace internal +} // namespace v8 diff --git a/js/src/regexp/regexp-shim.cc b/js/src/regexp/regexp-shim.cc index 773c17ead..3f3fa40eb 100644 --- a/js/src/regexp/regexp-shim.cc +++ b/js/src/regexp/regexp-shim.cc @@ -11,6 +11,7 @@ #include #include "regexp/regexp-shim.h" +#include "regexp/regexp-stack.h" namespace v8 { namespace internal { @@ -73,6 +74,8 @@ Handle::Handle(T object, Isolate* isolate) : location_(isolate->getHandleLocation(JS::Value(object))) {} template Handle::Handle(ByteArray b, Isolate* isolate); +template Handle::Handle(JS::Value v, Isolate* isolate); +template Handle::Handle(JSRegExp re, Isolate* isolate); template Handle::Handle(String s, Isolate* isolate); template @@ -150,6 +153,10 @@ std::unique_ptr String::ToCString() { return std::unique_ptr(); } +byte* Isolate::top_of_regexp_stack() const { + return reinterpret_cast(regexpStack_->memory_top_address_address()); +} + Handle Isolate::NewByteArray(int length, AllocationType alloc) { MOZ_RELEASE_ASSERT(length >= 0); diff --git a/js/src/regexp/regexp-shim.h b/js/src/regexp/regexp-shim.h index 462e396f4..942fcf733 100644 --- a/js/src/regexp/regexp-shim.h +++ b/js/src/regexp/regexp-shim.h @@ -22,7 +22,9 @@ #include #include "jit/Label.h" +#include "jit/shared/Assembler-shared.h" #include "js/Value.h" +#include "regexp/RegExpTypes.h" #include "regexp/util/flags.h" #include "regexp/util/vector.h" #include "regexp/util/zone.h" @@ -571,10 +573,8 @@ class ByteArray : public HeapObject { ByteArrayData* inner() const { return static_cast(value_.toPrivate()); } - PseudoHandle takeOwnership(Isolate* isolate); - - friend class SMRegExpMacroAssembler; public: + PseudoHandle takeOwnership(Isolate* isolate); byte get(uint32_t index) { MOZ_ASSERT(index < length()); return inner()->data()[index]; @@ -674,15 +674,19 @@ class MOZ_NONHEAP_CLASS Handle { }; inline ObjectRef operator->() const { return ObjectRef{**this}; } + static Handle fromHandleValue(JS::HandleValue handle) { + return Handle(handle.address()); + } + private: - Handle(JS::Value* location) : location_(location) {} + Handle(const JS::Value* location) : location_(location) {} template friend class Handle; template friend class MaybeHandle; - JS::Value* location_; + const JS::Value* location_; }; // A Handle can be converted into a MaybeHandle. Converting a MaybeHandle @@ -985,9 +989,13 @@ using Factory = Isolate; class Isolate { public: //********** Isolate code **********// - RegExpStack* regexp_stack() const { return regexp_stack_; } - bool has_pending_exception() { return cx()->isExceptionPending(); } - void StackOverflow() { js::ReportOverRecursed(cx()); } + RegExpStack* regexp_stack() const { return regexpStack_; } + byte* top_of_regexp_stack() const; + + // This is called from inside no-GC code. Instead of suppressing GC + // to allocate the error, we return false from Execute and call + // ReportOverRecursed in the caller. + void StackOverflow() {} #ifndef V8_INTL_SUPPORT unibrow::Mapping* jsregexp_uncanonicalize() { @@ -1066,7 +1074,7 @@ private: friend class HandleScope; JSContext* cx_; - RegExpStack* regexp_stack_; + RegExpStack* regexpStack_; Counters counters_; }; @@ -1101,7 +1109,6 @@ class Code : public HeapObject { c.value_ = JS::PrivateGCThingValue(JS::Value(object).toGCThing()); return c; } -private: js::jit::JitCode* inner() { return value_.toGCThing()->as(); } @@ -1124,7 +1131,7 @@ class Label { public: Label() : inner_(js::jit::Label()) {} - operator js::jit::Label*() { return &inner_; } + js::jit::Label* inner() { return &inner_; } void Unuse() { inner_.reset(); } @@ -1138,6 +1145,9 @@ class Label { private: js::jit::Label inner_; + js::jit::CodeOffset patchOffset_; + + friend class SMRegExpMacroAssembler; }; // TODO: Map flags to jitoptions -- cgit v1.2.3