summaryrefslogtreecommitdiffstats
path: root/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/jit/x86-shared/MacroAssembler-x86-shared.h')
-rw-r--r--js/src/jit/x86-shared/MacroAssembler-x86-shared.h1411
1 files changed, 1411 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
new file mode 100644
index 000000000..8a0e154f1
--- /dev/null
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
@@ -0,0 +1,1411 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_x86_shared_MacroAssembler_x86_shared_h
+#define jit_x86_shared_MacroAssembler_x86_shared_h
+
+#include "mozilla/Casting.h"
+
+#if defined(JS_CODEGEN_X86)
+# include "jit/x86/Assembler-x86.h"
+#elif defined(JS_CODEGEN_X64)
+# include "jit/x64/Assembler-x64.h"
+#endif
+
+#ifdef DEBUG
+ #define CHECK_BYTEREG(reg) \
+ JS_BEGIN_MACRO \
+ AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
+ MOZ_ASSERT(byteRegs.has(reg)); \
+ JS_END_MACRO
+ #define CHECK_BYTEREGS(r1, r2) \
+ JS_BEGIN_MACRO \
+ AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
+ MOZ_ASSERT(byteRegs.has(r1)); \
+ MOZ_ASSERT(byteRegs.has(r2)); \
+ JS_END_MACRO
+#else
+ #define CHECK_BYTEREG(reg) (void)0
+ #define CHECK_BYTEREGS(r1, r2) (void)0
+#endif
+
+namespace js {
+namespace jit {
+
+class MacroAssembler;
+
+class MacroAssemblerX86Shared : public Assembler
+{
+ private:
+ // Perform a downcast. Should be removed by Bug 996602.
+ MacroAssembler& asMasm();
+ const MacroAssembler& asMasm() const;
+
+ public:
+ typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
+
+ protected:
+
+ // For Double, Float and SimdData, make the move ctors explicit so that MSVC
+ // knows what to use instead of copying these data structures.
+ template<class T>
+ struct Constant {
+ typedef T Pod;
+
+ T value;
+ UsesVector uses;
+
+ explicit Constant(const T& value) : value(value) {}
+ Constant(Constant<T>&& other) : value(other.value), uses(mozilla::Move(other.uses)) {}
+ explicit Constant(const Constant<T>&) = delete;
+ };
+
+ // Containers use SystemAllocPolicy since wasm releases memory after each
+ // function is compiled, and these need to live until after all functions
+ // are compiled.
+ using Double = Constant<uint64_t>;
+ Vector<Double, 0, SystemAllocPolicy> doubles_;
+ typedef HashMap<uint64_t, size_t, DefaultHasher<uint64_t>, SystemAllocPolicy> DoubleMap;
+ DoubleMap doubleMap_;
+
+ using Float = Constant<uint32_t>;
+ Vector<Float, 0, SystemAllocPolicy> floats_;
+ typedef HashMap<uint32_t, size_t, DefaultHasher<uint32_t>, SystemAllocPolicy> FloatMap;
+ FloatMap floatMap_;
+
+ struct SimdData : public Constant<SimdConstant> {
+ explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
+ SimdData(SimdData&& d) : Constant<SimdConstant>(mozilla::Move(d)) {}
+ explicit SimdData(const SimdData&) = delete;
+ SimdConstant::Type type() const { return value.type(); }
+ };
+
+ Vector<SimdData, 0, SystemAllocPolicy> simds_;
+ typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy> SimdMap;
+ SimdMap simdMap_;
+
+ template<class T, class Map>
+ T* getConstant(const typename T::Pod& value, Map& map, Vector<T, 0, SystemAllocPolicy>& vec);
+
+ Float* getFloat(wasm::RawF32 f);
+ Double* getDouble(wasm::RawF64 d);
+ SimdData* getSimdData(const SimdConstant& v);
+
+ public:
+ using Assembler::call;
+
+ MacroAssemblerX86Shared()
+ { }
+
+ bool asmMergeWith(const MacroAssemblerX86Shared& other);
+
+ // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
+ // Checks for NaN if canBeNaN is true.
+ void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
+ void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
+
+ void compareDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
+ if (cond & DoubleConditionBitInvert)
+ vucomisd(lhs, rhs);
+ else
+ vucomisd(rhs, lhs);
+ }
+
+ void compareFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
+ if (cond & DoubleConditionBitInvert)
+ vucomiss(lhs, rhs);
+ else
+ vucomiss(rhs, lhs);
+ }
+
+ void branchNegativeZero(FloatRegister reg, Register scratch, Label* label, bool maybeNonZero = true);
+ void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label* label);
+
+ void move32(Imm32 imm, Register dest) {
+ // Use the ImmWord version of mov to register, which has special
+ // optimizations. Casting to uint32_t here ensures that the value
+ // is zero-extended.
+ mov(ImmWord(uint32_t(imm.value)), dest);
+ }
+ void move32(Imm32 imm, const Operand& dest) {
+ movl(imm, dest);
+ }
+ void move32(Register src, Register dest) {
+ movl(src, dest);
+ }
+ void move32(Register src, const Operand& dest) {
+ movl(src, dest);
+ }
+ void test32(Register lhs, Register rhs) {
+ testl(rhs, lhs);
+ }
+ void test32(const Address& addr, Imm32 imm) {
+ testl(imm, Operand(addr));
+ }
+ void test32(const Operand lhs, Imm32 imm) {
+ testl(imm, lhs);
+ }
+ void test32(Register lhs, Imm32 rhs) {
+ testl(rhs, lhs);
+ }
+ void cmp32(Register lhs, Imm32 rhs) {
+ cmpl(rhs, lhs);
+ }
+ void cmp32(Register lhs, Register rhs) {
+ cmpl(rhs, lhs);
+ }
+ void cmp32(const Address& lhs, Register rhs) {
+ cmp32(Operand(lhs), rhs);
+ }
+ void cmp32(const Address& lhs, Imm32 rhs) {
+ cmp32(Operand(lhs), rhs);
+ }
+ void cmp32(const Operand& lhs, Imm32 rhs) {
+ cmpl(rhs, lhs);
+ }
+ void cmp32(const Operand& lhs, Register rhs) {
+ cmpl(rhs, lhs);
+ }
+ void cmp32(Register lhs, const Operand& rhs) {
+ cmpl(rhs, lhs);
+ }
+ CodeOffset cmp32WithPatch(Register lhs, Imm32 rhs) {
+ return cmplWithPatch(rhs, lhs);
+ }
+ void atomic_inc32(const Operand& addr) {
+ lock_incl(addr);
+ }
+ void atomic_dec32(const Operand& addr) {
+ lock_decl(addr);
+ }
+
+ template <typename T>
+ void atomicFetchAdd8SignExtend(Register src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREGS(src, output);
+ if (src != output)
+ movl(src, output);
+ lock_xaddb(output, Operand(mem));
+ movsbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREGS(src, output);
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ lock_xaddb(output, Operand(mem));
+ movzbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREG(output);
+ MOZ_ASSERT(temp == InvalidReg);
+ movb(src, output);
+ lock_xaddb(output, Operand(mem));
+ movsbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREG(output);
+ MOZ_ASSERT(temp == InvalidReg);
+ movb(src, output);
+ lock_xaddb(output, Operand(mem));
+ movzbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd16SignExtend(Register src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ lock_xaddw(output, Operand(mem));
+ movswl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ lock_xaddw(output, Operand(mem));
+ movzwl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ movl(src, output);
+ lock_xaddw(output, Operand(mem));
+ movswl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ movl(src, output);
+ lock_xaddw(output, Operand(mem));
+ movzwl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchAdd32(Register src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ lock_xaddl(output, Operand(mem));
+ }
+
+ template <typename T>
+ void atomicFetchAdd32(Imm32 src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ movl(src, output);
+ lock_xaddl(output, Operand(mem));
+ }
+
+ template <typename T>
+ void atomicFetchSub8SignExtend(Register src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREGS(src, output);
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ negl(output);
+ lock_xaddb(output, Operand(mem));
+ movsbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREGS(src, output);
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ negl(output);
+ lock_xaddb(output, Operand(mem));
+ movzbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREG(output);
+ MOZ_ASSERT(temp == InvalidReg);
+ movb(Imm32(-src.value), output);
+ lock_xaddb(output, Operand(mem));
+ movsbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ CHECK_BYTEREG(output);
+ MOZ_ASSERT(temp == InvalidReg);
+ movb(Imm32(-src.value), output);
+ lock_xaddb(output, Operand(mem));
+ movzbl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub16SignExtend(Register src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ negl(output);
+ lock_xaddw(output, Operand(mem));
+ movswl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ negl(output);
+ lock_xaddw(output, Operand(mem));
+ movzwl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ movl(Imm32(-src.value), output);
+ lock_xaddw(output, Operand(mem));
+ movswl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ movl(Imm32(-src.value), output);
+ lock_xaddw(output, Operand(mem));
+ movzwl(output, output);
+ }
+
+ template <typename T>
+ void atomicFetchSub32(Register src, const T& mem, Register temp, Register output) {
+ MOZ_ASSERT(temp == InvalidReg);
+ if (src != output)
+ movl(src, output);
+ negl(output);
+ lock_xaddl(output, Operand(mem));
+ }
+
+ template <typename T>
+ void atomicFetchSub32(Imm32 src, const T& mem, Register temp, Register output) {
+ movl(Imm32(-src.value), output);
+ lock_xaddl(output, Operand(mem));
+ }
+
+ // requires output == eax
+#define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \
+ MOZ_ASSERT(output == eax); \
+ LOAD(Operand(mem), eax); \
+ Label again; \
+ bind(&again); \
+ movl(eax, temp); \
+ OP(src, temp); \
+ LOCK_CMPXCHG(temp, Operand(mem)); \
+ j(NonZero, &again);
+
+ template <typename S, typename T>
+ void atomicFetchAnd8SignExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
+ CHECK_BYTEREG(temp);
+ movsbl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchAnd8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
+ CHECK_BYTEREG(temp);
+ movzbl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchAnd16SignExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
+ movswl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchAnd16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
+ movzwl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchAnd32(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl)
+ }
+
+ template <typename S, typename T>
+ void atomicFetchOr8SignExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
+ CHECK_BYTEREG(temp);
+ movsbl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchOr8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
+ CHECK_BYTEREG(temp);
+ movzbl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchOr16SignExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
+ movswl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchOr16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
+ movzwl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchOr32(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl)
+ }
+
+ template <typename S, typename T>
+ void atomicFetchXor8SignExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
+ CHECK_BYTEREG(temp);
+ movsbl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchXor8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
+ CHECK_BYTEREG(temp);
+ movzbl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchXor16SignExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
+ movswl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchXor16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
+ movzwl(eax, eax);
+ }
+ template <typename S, typename T>
+ void atomicFetchXor32(const S& src, const T& mem, Register temp, Register output) {
+ ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchgl)
+ }
+
+#undef ATOMIC_BITOP_BODY
+
+ // S is Register or Imm32; T is Address or BaseIndex.
+
+ template <typename S, typename T>
+ void atomicAdd8(const S& src, const T& mem) {
+ lock_addb(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicAdd16(const S& src, const T& mem) {
+ lock_addw(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicAdd32(const S& src, const T& mem) {
+ lock_addl(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicSub8(const S& src, const T& mem) {
+ lock_subb(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicSub16(const S& src, const T& mem) {
+ lock_subw(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicSub32(const S& src, const T& mem) {
+ lock_subl(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicAnd8(const S& src, const T& mem) {
+ lock_andb(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicAnd16(const S& src, const T& mem) {
+ lock_andw(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicAnd32(const S& src, const T& mem) {
+ lock_andl(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicOr8(const S& src, const T& mem) {
+ lock_orb(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicOr16(const S& src, const T& mem) {
+ lock_orw(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicOr32(const S& src, const T& mem) {
+ lock_orl(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicXor8(const S& src, const T& mem) {
+ lock_xorb(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicXor16(const S& src, const T& mem) {
+ lock_xorw(src, Operand(mem));
+ }
+ template <typename S, typename T>
+ void atomicXor32(const S& src, const T& mem) {
+ lock_xorl(src, Operand(mem));
+ }
+
+ void storeLoadFence() {
+ // This implementation follows Linux.
+ if (HasSSE2())
+ masm.mfence();
+ else
+ lock_addl(Imm32(0), Operand(Address(esp, 0)));
+ }
+
+ void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
+ cmpw(rhs, lhs);
+ j(cond, label);
+ }
+ void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
+ testw(rhs, lhs);
+ j(cond, label);
+ }
+
+ void jump(Label* label) {
+ jmp(label);
+ }
+ void jump(JitCode* code) {
+ jmp(code);
+ }
+ void jump(RepatchLabel* label) {
+ jmp(label);
+ }
+ void jump(Register reg) {
+ jmp(Operand(reg));
+ }
+ void jump(const Address& addr) {
+ jmp(Operand(addr));
+ }
+ void jump(wasm::TrapDesc target) {
+ jmp(target);
+ }
+
+ void convertInt32ToDouble(Register src, FloatRegister dest) {
+ // vcvtsi2sd and friends write only part of their output register, which
+ // causes slowdowns on out-of-order processors. Explicitly break
+ // dependencies with vxorpd (and vxorps elsewhere), which are handled
+ // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
+ // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
+ // document.
+ zeroDouble(dest);
+ vcvtsi2sd(src, dest, dest);
+ }
+ void convertInt32ToDouble(const Address& src, FloatRegister dest) {
+ convertInt32ToDouble(Operand(src), dest);
+ }
+ void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
+ convertInt32ToDouble(Operand(src), dest);
+ }
+ void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
+ // Clear the output register first to break dependencies; see above;
+ zeroDouble(dest);
+ vcvtsi2sd(Operand(src), dest, dest);
+ }
+ void convertInt32ToFloat32(Register src, FloatRegister dest) {
+ // Clear the output register first to break dependencies; see above;
+ zeroFloat32(dest);
+ vcvtsi2ss(src, dest, dest);
+ }
+ void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
+ convertInt32ToFloat32(Operand(src), dest);
+ }
+ void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
+ // Clear the output register first to break dependencies; see above;
+ zeroFloat32(dest);
+ vcvtsi2ss(src, dest, dest);
+ }
+ Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
+ ScratchDoubleScope scratch(asMasm());
+ zeroDouble(scratch);
+ vucomisd(reg, scratch);
+ return truthy ? NonZero : Zero;
+ }
+
+ // Class which ensures that registers used in byte ops are compatible with
+ // such instructions, even if the original register passed in wasn't. This
+ // only applies to x86, as on x64 all registers are valid single byte regs.
+ // This doesn't lead to great code but helps to simplify code generation.
+ //
+ // Note that this can currently only be used in cases where the register is
+ // read from by the guarded instruction, not written to.
+ class AutoEnsureByteRegister {
+ MacroAssemblerX86Shared* masm;
+ Register original_;
+ Register substitute_;
+
+ public:
+ template <typename T>
+ AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address, Register reg)
+ : masm(masm), original_(reg)
+ {
+ AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
+ if (singleByteRegs.has(reg)) {
+ substitute_ = reg;
+ } else {
+ MOZ_ASSERT(address.base != StackPointer);
+ do {
+ substitute_ = singleByteRegs.takeAny();
+ } while (Operand(address).containsReg(substitute_));
+
+ masm->push(substitute_);
+ masm->mov(reg, substitute_);
+ }
+ }
+
+ ~AutoEnsureByteRegister() {
+ if (original_ != substitute_)
+ masm->pop(substitute_);
+ }
+
+ Register reg() {
+ return substitute_;
+ }
+ };
+
+ void load8ZeroExtend(const Operand& src, Register dest) {
+ movzbl(src, dest);
+ }
+ void load8ZeroExtend(const Address& src, Register dest) {
+ movzbl(Operand(src), dest);
+ }
+ void load8ZeroExtend(const BaseIndex& src, Register dest) {
+ movzbl(Operand(src), dest);
+ }
+ void load8SignExtend(const Operand& src, Register dest) {
+ movsbl(src, dest);
+ }
+ void load8SignExtend(const Address& src, Register dest) {
+ movsbl(Operand(src), dest);
+ }
+ void load8SignExtend(const BaseIndex& src, Register dest) {
+ movsbl(Operand(src), dest);
+ }
+ template <typename T>
+ void store8(Imm32 src, const T& dest) {
+ movb(src, Operand(dest));
+ }
+ template <typename T>
+ void store8(Register src, const T& dest) {
+ AutoEnsureByteRegister ensure(this, dest, src);
+ movb(ensure.reg(), Operand(dest));
+ }
+ template <typename T>
+ void compareExchange8ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
+ MOZ_ASSERT(output == eax);
+ CHECK_BYTEREG(newval);
+ if (oldval != output)
+ movl(oldval, output);
+ lock_cmpxchgb(newval, Operand(mem));
+ movzbl(output, output);
+ }
+ template <typename T>
+ void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output) {
+ MOZ_ASSERT(output == eax);
+ CHECK_BYTEREG(newval);
+ if (oldval != output)
+ movl(oldval, output);
+ lock_cmpxchgb(newval, Operand(mem));
+ movsbl(output, output);
+ }
+ template <typename T>
+ void atomicExchange8ZeroExtend(const T& mem, Register value, Register output) {
+ if (value != output)
+ movl(value, output);
+ xchgb(output, Operand(mem));
+ movzbl(output, output);
+ }
+ template <typename T>
+ void atomicExchange8SignExtend(const T& mem, Register value, Register output) {
+ if (value != output)
+ movl(value, output);
+ xchgb(output, Operand(mem));
+ movsbl(output, output);
+ }
+ void load16ZeroExtend(const Operand& src, Register dest) {
+ movzwl(src, dest);
+ }
+ void load16ZeroExtend(const Address& src, Register dest) {
+ movzwl(Operand(src), dest);
+ }
+ void load16ZeroExtend(const BaseIndex& src, Register dest) {
+ movzwl(Operand(src), dest);
+ }
+ template <typename S, typename T>
+ void store16(const S& src, const T& dest) {
+ movw(src, Operand(dest));
+ }
+ template <typename T>
+ void compareExchange16ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
+ MOZ_ASSERT(output == eax);
+ if (oldval != output)
+ movl(oldval, output);
+ lock_cmpxchgw(newval, Operand(mem));
+ movzwl(output, output);
+ }
+ template <typename T>
+ void compareExchange16SignExtend(const T& mem, Register oldval, Register newval, Register output) {
+ MOZ_ASSERT(output == eax);
+ if (oldval != output)
+ movl(oldval, output);
+ lock_cmpxchgw(newval, Operand(mem));
+ movswl(output, output);
+ }
+ template <typename T>
+ void atomicExchange16ZeroExtend(const T& mem, Register value, Register output) {
+ if (value != output)
+ movl(value, output);
+ xchgw(output, Operand(mem));
+ movzwl(output, output);
+ }
+ template <typename T>
+ void atomicExchange16SignExtend(const T& mem, Register value, Register output) {
+ if (value != output)
+ movl(value, output);
+ xchgw(output, Operand(mem));
+ movswl(output, output);
+ }
+ void load16SignExtend(const Operand& src, Register dest) {
+ movswl(src, dest);
+ }
+ void load16SignExtend(const Address& src, Register dest) {
+ movswl(Operand(src), dest);
+ }
+ void load16SignExtend(const BaseIndex& src, Register dest) {
+ movswl(Operand(src), dest);
+ }
+ void load32(const Address& address, Register dest) {
+ movl(Operand(address), dest);
+ }
+ void load32(const BaseIndex& src, Register dest) {
+ movl(Operand(src), dest);
+ }
+ void load32(const Operand& src, Register dest) {
+ movl(src, dest);
+ }
+ template <typename S, typename T>
+ void store32(const S& src, const T& dest) {
+ movl(src, Operand(dest));
+ }
+ template <typename T>
+ void compareExchange32(const T& mem, Register oldval, Register newval, Register output) {
+ MOZ_ASSERT(output == eax);
+ if (oldval != output)
+ movl(oldval, output);
+ lock_cmpxchgl(newval, Operand(mem));
+ }
+ template <typename T>
+ void atomicExchange32(const T& mem, Register value, Register output) {
+ if (value != output)
+ movl(value, output);
+ xchgl(output, Operand(mem));
+ }
+ template <typename S, typename T>
+ void store32_NoSecondScratch(const S& src, const T& dest) {
+ store32(src, dest);
+ }
+ void loadDouble(const Address& src, FloatRegister dest) {
+ vmovsd(src, dest);
+ }
+ void loadDouble(const BaseIndex& src, FloatRegister dest) {
+ vmovsd(src, dest);
+ }
+ void loadDouble(const Operand& src, FloatRegister dest) {
+ switch (src.kind()) {
+ case Operand::MEM_REG_DISP:
+ loadDouble(src.toAddress(), dest);
+ break;
+ case Operand::MEM_SCALE:
+ loadDouble(src.toBaseIndex(), dest);
+ break;
+ default:
+ MOZ_CRASH("unexpected operand kind");
+ }
+ }
+ void moveDouble(FloatRegister src, FloatRegister dest) {
+ // Use vmovapd instead of vmovsd to avoid dependencies.
+ vmovapd(src, dest);
+ }
+ void zeroDouble(FloatRegister reg) {
+ vxorpd(reg, reg, reg);
+ }
+ void zeroFloat32(FloatRegister reg) {
+ vxorps(reg, reg, reg);
+ }
+ void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
+ vcvtss2sd(src, dest, dest);
+ }
+ void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
+ vcvtsd2ss(src, dest, dest);
+ }
+
+ void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
+ // Note that if the conversion failed (because the converted
+ // result is larger than the maximum signed int32, or less than the
+ // least signed int32, or NaN), this will return the undefined integer
+ // value (0x8000000).
+ vcvttps2dq(src, dest);
+ }
+ void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
+ vcvtdq2ps(src, dest);
+ }
+
+ void bitwiseAndSimd128(const Operand& src, FloatRegister dest) {
+ // TODO Using the "ps" variant for all types incurs a domain crossing
+ // penalty for integer types and double.
+ vandps(src, dest, dest);
+ }
+ void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) {
+ vandnps(src, dest, dest);
+ }
+ void bitwiseOrSimd128(const Operand& src, FloatRegister dest) {
+ vorps(src, dest, dest);
+ }
+ void bitwiseXorSimd128(const Operand& src, FloatRegister dest) {
+ vxorps(src, dest, dest);
+ }
+ void zeroSimd128Float(FloatRegister dest) {
+ vxorps(dest, dest, dest);
+ }
+ void zeroSimd128Int(FloatRegister dest) {
+ vpxor(dest, dest, dest);
+ }
+
+ template <class T, class Reg> inline void loadScalar(const Operand& src, Reg dest);
+ template <class T, class Reg> inline void storeScalar(Reg src, const Address& dest);
+ template <class T> inline void loadAlignedVector(const Address& src, FloatRegister dest);
+ template <class T> inline void storeAlignedVector(FloatRegister src, const Address& dest);
+
+ void loadInt32x1(const Address& src, FloatRegister dest) {
+ vmovd(Operand(src), dest);
+ }
+ void loadInt32x1(const BaseIndex& src, FloatRegister dest) {
+ vmovd(Operand(src), dest);
+ }
+ void loadInt32x2(const Address& src, FloatRegister dest) {
+ vmovq(Operand(src), dest);
+ }
+ void loadInt32x2(const BaseIndex& src, FloatRegister dest) {
+ vmovq(Operand(src), dest);
+ }
+ void loadInt32x3(const BaseIndex& src, FloatRegister dest) {
+ BaseIndex srcZ(src);
+ srcZ.offset += 2 * sizeof(int32_t);
+
+ ScratchSimd128Scope scratch(asMasm());
+ vmovq(Operand(src), dest);
+ vmovd(Operand(srcZ), scratch);
+ vmovlhps(scratch, dest, dest);
+ }
+ void loadInt32x3(const Address& src, FloatRegister dest) {
+ Address srcZ(src);
+ srcZ.offset += 2 * sizeof(int32_t);
+
+ ScratchSimd128Scope scratch(asMasm());
+ vmovq(Operand(src), dest);
+ vmovd(Operand(srcZ), scratch);
+ vmovlhps(scratch, dest, dest);
+ }
+
+ void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
+ vmovdqa(Operand(src), dest);
+ }
+ void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
+ vmovdqa(src, dest);
+ }
+ void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
+ vmovdqa(src, Operand(dest));
+ }
+ void moveSimd128Int(FloatRegister src, FloatRegister dest) {
+ vmovdqa(src, dest);
+ }
+ FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
+ if (HasAVX())
+ return src;
+ moveSimd128Int(src, dest);
+ return dest;
+ }
+ FloatRegister reusedInputAlignedInt32x4(const Operand& src, FloatRegister dest) {
+ if (HasAVX() && src.kind() == Operand::FPREG)
+ return FloatRegister::FromCode(src.fpu());
+ loadAlignedSimd128Int(src, dest);
+ return dest;
+ }
+ void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
+ vmovdqu(Operand(src), dest);
+ }
+ void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
+ vmovdqu(Operand(src), dest);
+ }
+ void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
+ vmovdqu(src, dest);
+ }
+
+ void storeInt32x1(FloatRegister src, const Address& dest) {
+ vmovd(src, Operand(dest));
+ }
+ void storeInt32x1(FloatRegister src, const BaseIndex& dest) {
+ vmovd(src, Operand(dest));
+ }
+ void storeInt32x2(FloatRegister src, const Address& dest) {
+ vmovq(src, Operand(dest));
+ }
+ void storeInt32x2(FloatRegister src, const BaseIndex& dest) {
+ vmovq(src, Operand(dest));
+ }
+ void storeInt32x3(FloatRegister src, const Address& dest) {
+ Address destZ(dest);
+ destZ.offset += 2 * sizeof(int32_t);
+ vmovq(src, Operand(dest));
+ ScratchSimd128Scope scratch(asMasm());
+ vmovhlps(src, scratch, scratch);
+ vmovd(scratch, Operand(destZ));
+ }
+ void storeInt32x3(FloatRegister src, const BaseIndex& dest) {
+ BaseIndex destZ(dest);
+ destZ.offset += 2 * sizeof(int32_t);
+ vmovq(src, Operand(dest));
+ ScratchSimd128Scope scratch(asMasm());
+ vmovhlps(src, scratch, scratch);
+ vmovd(scratch, Operand(destZ));
+ }
+
+ void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
+ vmovdqu(src, Operand(dest));
+ }
+ void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
+ vmovdqu(src, Operand(dest));
+ }
+ void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
+ vmovdqu(src, dest);
+ }
+ void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
+ vpcmpeqd(src, dest, dest);
+ }
+ void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
+ vpcmpgtd(src, dest, dest);
+ }
+ void packedAddInt8(const Operand& src, FloatRegister dest) {
+ vpaddb(src, dest, dest);
+ }
+ void packedSubInt8(const Operand& src, FloatRegister dest) {
+ vpsubb(src, dest, dest);
+ }
+ void packedAddInt16(const Operand& src, FloatRegister dest) {
+ vpaddw(src, dest, dest);
+ }
+ void packedSubInt16(const Operand& src, FloatRegister dest) {
+ vpsubw(src, dest, dest);
+ }
+ void packedAddInt32(const Operand& src, FloatRegister dest) {
+ vpaddd(src, dest, dest);
+ }
+ void packedSubInt32(const Operand& src, FloatRegister dest) {
+ vpsubd(src, dest, dest);
+ }
+ void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
+ // This function is an approximation of the result, this might need
+ // fix up if the spec requires a given precision for this operation.
+ // TODO See also bug 1068028.
+ vrcpps(src, dest);
+ }
+ void packedRcpSqrtApproximationFloat32x4(const Operand& src, FloatRegister dest) {
+ // TODO See comment above. See also bug 1068028.
+ vrsqrtps(src, dest);
+ }
+ void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
+ vsqrtps(src, dest);
+ }
+
+ void packedLeftShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
+ vpsllw(src, dest, dest);
+ }
+ void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
+ vpsllw(count, dest, dest);
+ }
+ void packedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
+ vpsraw(src, dest, dest);
+ }
+ void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
+ vpsraw(count, dest, dest);
+ }
+ void packedUnsignedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
+ vpsrlw(src, dest, dest);
+ }
+ void packedUnsignedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
+ vpsrlw(count, dest, dest);
+ }
+
+ void packedLeftShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
+ vpslld(src, dest, dest);
+ }
+ void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
+ vpslld(count, dest, dest);
+ }
+ void packedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
+ vpsrad(src, dest, dest);
+ }
+ void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
+ vpsrad(count, dest, dest);
+ }
+ void packedUnsignedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
+ vpsrld(src, dest, dest);
+ }
+ void packedUnsignedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
+ vpsrld(count, dest, dest);
+ }
+
+ void loadFloat32x3(const Address& src, FloatRegister dest) {
+ Address srcZ(src);
+ srcZ.offset += 2 * sizeof(float);
+ vmovsd(src, dest);
+ ScratchSimd128Scope scratch(asMasm());
+ vmovss(srcZ, scratch);
+ vmovlhps(scratch, dest, dest);
+ }
+ void loadFloat32x3(const BaseIndex& src, FloatRegister dest) {
+ BaseIndex srcZ(src);
+ srcZ.offset += 2 * sizeof(float);
+ vmovsd(src, dest);
+ ScratchSimd128Scope scratch(asMasm());
+ vmovss(srcZ, scratch);
+ vmovlhps(scratch, dest, dest);
+ }
+
+ void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
+ vmovaps(Operand(src), dest);
+ }
+ void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
+ vmovaps(src, dest);
+ }
+
+ void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
+ vmovaps(src, Operand(dest));
+ }
+ void moveSimd128Float(FloatRegister src, FloatRegister dest) {
+ vmovaps(src, dest);
+ }
+ FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) {
+ if (HasAVX())
+ return src;
+ moveSimd128Float(src, dest);
+ return dest;
+ }
+ FloatRegister reusedInputAlignedFloat32x4(const Operand& src, FloatRegister dest) {
+ if (HasAVX() && src.kind() == Operand::FPREG)
+ return FloatRegister::FromCode(src.fpu());
+ loadAlignedSimd128Float(src, dest);
+ return dest;
+ }
+ void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
+ vmovups(Operand(src), dest);
+ }
+ void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
+ vmovdqu(Operand(src), dest);
+ }
+ void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
+ vmovups(src, dest);
+ }
+ void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
+ vmovups(src, Operand(dest));
+ }
+ void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
+ vmovups(src, Operand(dest));
+ }
+ void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
+ vmovups(src, dest);
+ }
+ void packedAddFloat32(const Operand& src, FloatRegister dest) {
+ vaddps(src, dest, dest);
+ }
+ void packedSubFloat32(const Operand& src, FloatRegister dest) {
+ vsubps(src, dest, dest);
+ }
+ void packedMulFloat32(const Operand& src, FloatRegister dest) {
+ vmulps(src, dest, dest);
+ }
+ void packedDivFloat32(const Operand& src, FloatRegister dest) {
+ vdivps(src, dest, dest);
+ }
+
+ static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
+ uint32_t z = 2, uint32_t w = 3)
+ {
+ MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
+ uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
+ MOZ_ASSERT(r < 256);
+ return r;
+ }
+
+ void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ vpshufd(mask, src, dest);
+ }
+ void moveLowInt32(FloatRegister src, Register dest) {
+ vmovd(src, dest);
+ }
+
+ void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
+ vmovhlps(src, dest, dest);
+ }
+ void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ // The shuffle instruction on x86 is such that it moves 2 words from
+ // the dest and 2 words from the src operands. To simplify things, just
+ // clobber the output with the input and apply the instruction
+ // afterwards.
+ // Note: this is useAtStart-safe because src isn't read afterwards.
+ FloatRegister srcCopy = reusedInputFloat32x4(src, dest);
+ vshufps(mask, srcCopy, srcCopy, dest);
+ }
+ void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
+ // Note this uses vshufps, which is a cross-domain penalty on CPU where it
+ // applies, but that's the way clang and gcc do it.
+ vshufps(mask, src, dest, dest);
+ }
+
+ void moveFloatAsDouble(Register src, FloatRegister dest) {
+ vmovd(src, dest);
+ vcvtss2sd(dest, dest, dest);
+ }
+ void loadFloatAsDouble(const Address& src, FloatRegister dest) {
+ vmovss(src, dest);
+ vcvtss2sd(dest, dest, dest);
+ }
+ void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
+ vmovss(src, dest);
+ vcvtss2sd(dest, dest, dest);
+ }
+ void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
+ loadFloat32(src, dest);
+ vcvtss2sd(dest, dest, dest);
+ }
+ void loadFloat32(const Address& src, FloatRegister dest) {
+ vmovss(src, dest);
+ }
+ void loadFloat32(const BaseIndex& src, FloatRegister dest) {
+ vmovss(src, dest);
+ }
+ void loadFloat32(const Operand& src, FloatRegister dest) {
+ switch (src.kind()) {
+ case Operand::MEM_REG_DISP:
+ loadFloat32(src.toAddress(), dest);
+ break;
+ case Operand::MEM_SCALE:
+ loadFloat32(src.toBaseIndex(), dest);
+ break;
+ default:
+ MOZ_CRASH("unexpected operand kind");
+ }
+ }
+ void moveFloat32(FloatRegister src, FloatRegister dest) {
+ // Use vmovaps instead of vmovss to avoid dependencies.
+ vmovaps(src, dest);
+ }
+
+ // Checks whether a double is representable as a 32-bit integer. If so, the
+ // integer is written to the output register. Otherwise, a bailout is taken to
+ // the given snapshot. This function overwrites the scratch float register.
+ void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
+ bool negativeZeroCheck = true)
+ {
+ // Check for -0.0
+ if (negativeZeroCheck)
+ branchNegativeZero(src, dest, fail);
+
+ ScratchDoubleScope scratch(asMasm());
+ vcvttsd2si(src, dest);
+ convertInt32ToDouble(dest, scratch);
+ vucomisd(scratch, src);
+ j(Assembler::Parity, fail);
+ j(Assembler::NotEqual, fail);
+ }
+
+ // Checks whether a float32 is representable as a 32-bit integer. If so, the
+ // integer is written to the output register. Otherwise, a bailout is taken to
+ // the given snapshot. This function overwrites the scratch float register.
+ void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
+ bool negativeZeroCheck = true)
+ {
+ // Check for -0.0
+ if (negativeZeroCheck)
+ branchNegativeZeroFloat32(src, dest, fail);
+
+ ScratchFloat32Scope scratch(asMasm());
+ vcvttss2si(src, dest);
+ convertInt32ToFloat32(dest, scratch);
+ vucomiss(scratch, src);
+ j(Assembler::Parity, fail);
+ j(Assembler::NotEqual, fail);
+ }
+
+ inline void clampIntToUint8(Register reg);
+
+ bool maybeInlineDouble(wasm::RawF64 d, FloatRegister dest) {
+ // Loading zero with xor is specially optimized in hardware.
+ if (d.bits() == 0) {
+ zeroDouble(dest);
+ return true;
+ }
+
+ // It is also possible to load several common constants using vpcmpeqw
+ // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
+ // as described in "13.4 Generating constants" of
+ // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
+ // previously implemented here. However, with x86 and x64 both using
+ // constant pool loads for double constants, this is probably only
+ // worthwhile in cases where a load is likely to be delayed.
+
+ return false;
+ }
+
+ bool maybeInlineFloat(wasm::RawF32 f, FloatRegister dest) {
+ // See comment above
+ if (f.bits() == 0) {
+ zeroFloat32(dest);
+ return true;
+ }
+ return false;
+ }
+
+ bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
+ static const SimdConstant zero = SimdConstant::SplatX4(0);
+ static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
+ if (v == zero) {
+ zeroSimd128Int(dest);
+ return true;
+ }
+ if (v == minusOne) {
+ vpcmpeqw(Operand(dest), dest, dest);
+ return true;
+ }
+ return false;
+ }
+ bool maybeInlineSimd128Float(const SimdConstant& v, const FloatRegister& dest) {
+ static const SimdConstant zero = SimdConstant::SplatX4(0.f);
+ if (v == zero) {
+ // This won't get inlined if the SimdConstant v contains -0 in any
+ // lane, as operator== here does a memcmp.
+ zeroSimd128Float(dest);
+ return true;
+ }
+ return false;
+ }
+
+ void convertBoolToInt32(Register source, Register dest) {
+ // Note that C++ bool is only 1 byte, so zero extend it to clear the
+ // higher-order bits.
+ movzbl(source, dest);
+ }
+
+ void emitSet(Assembler::Condition cond, Register dest,
+ Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
+ if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
+ // If the register we're defining is a single byte register,
+ // take advantage of the setCC instruction
+ setCC(cond, dest);
+ movzbl(dest, dest);
+
+ if (ifNaN != Assembler::NaN_HandledByCond) {
+ Label noNaN;
+ j(Assembler::NoParity, &noNaN);
+ mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
+ bind(&noNaN);
+ }
+ } else {
+ Label end;
+ Label ifFalse;
+
+ if (ifNaN == Assembler::NaN_IsFalse)
+ j(Assembler::Parity, &ifFalse);
+ // Note a subtlety here: FLAGS is live at this point, and the
+ // mov interface doesn't guarantee to preserve FLAGS. Use
+ // movl instead of mov, because the movl instruction
+ // preserves FLAGS.
+ movl(Imm32(1), dest);
+ j(cond, &end);
+ if (ifNaN == Assembler::NaN_IsTrue)
+ j(Assembler::Parity, &end);
+ bind(&ifFalse);
+ mov(ImmWord(0), dest);
+
+ bind(&end);
+ }
+ }
+
+ // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
+ CodeOffset toggledJump(Label* label) {
+ CodeOffset offset(size());
+ jump(label);
+ return offset;
+ }
+
+ template <typename T>
+ void computeEffectiveAddress(const T& address, Register dest) {
+ lea(Operand(address), dest);
+ }
+
+ void checkStackAlignment() {
+ // Exists for ARM compatibility.
+ }
+
+ CodeOffset labelForPatch() {
+ return CodeOffset(size());
+ }
+
+ void abiret() {
+ ret();
+ }
+
+ template<typename T>
+ void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register oldval, Register newval,
+ Register temp, AnyRegister output);
+
+ template<typename T>
+ void atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register value,
+ Register temp, AnyRegister output);
+
+ protected:
+ bool buildOOLFakeExitFrame(void* fakeReturnAddr);
+};
+
+// Specialize for float to use movaps. Use movdqa for everything else.
+template <>
+inline void
+MacroAssemblerX86Shared::loadAlignedVector<float>(const Address& src, FloatRegister dest)
+{
+ loadAlignedSimd128Float(src, dest);
+}
+
+template <typename T>
+inline void
+MacroAssemblerX86Shared::loadAlignedVector(const Address& src, FloatRegister dest)
+{
+ loadAlignedSimd128Int(src, dest);
+}
+
+// Specialize for float to use movaps. Use movdqa for everything else.
+template <>
+inline void
+MacroAssemblerX86Shared::storeAlignedVector<float>(FloatRegister src, const Address& dest)
+{
+ storeAlignedSimd128Float(src, dest);
+}
+
+template <typename T>
+inline void
+MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src, const Address& dest)
+{
+ storeAlignedSimd128Int(src, dest);
+}
+
+template <> inline void
+MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src, Register dest) {
+ load8ZeroExtend(src, dest);
+}
+template <> inline void
+MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src, Register dest) {
+ load16ZeroExtend(src, dest);
+}
+template <> inline void
+MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src, Register dest) {
+ load32(src, dest);
+}
+template <> inline void
+MacroAssemblerX86Shared::loadScalar<float>(const Operand& src, FloatRegister dest) {
+ loadFloat32(src, dest);
+}
+
+template <> inline void
+MacroAssemblerX86Shared::storeScalar<int8_t>(Register src, const Address& dest) {
+ store8(src, dest);
+}
+template <> inline void
+MacroAssemblerX86Shared::storeScalar<int16_t>(Register src, const Address& dest) {
+ store16(src, dest);
+}
+template <> inline void
+MacroAssemblerX86Shared::storeScalar<int32_t>(Register src, const Address& dest) {
+ store32(src, dest);
+}
+template <> inline void
+MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src, const Address& dest) {
+ vmovss(src, dest);
+}
+
+} // namespace jit
+} // namespace js
+
+#undef CHECK_BYTEREG
+#undef CHECK_BYTEREGS
+
+#endif /* jit_x86_shared_MacroAssembler_x86_shared_h */