From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Fri, 2 Feb 2018 04:16:08 -0500 Subject: Add m-esr52 at 52.6.0 --- js/src/jit/x86-shared/Architecture-x86-shared.cpp | 97 + js/src/jit/x86-shared/Architecture-x86-shared.h | 463 ++ js/src/jit/x86-shared/Assembler-x86-shared.cpp | 350 ++ js/src/jit/x86-shared/Assembler-x86-shared.h | 3652 +++++++++++++ .../jit/x86-shared/AssemblerBuffer-x86-shared.cpp | 25 + js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h | 205 + .../jit/x86-shared/AtomicOperations-x86-shared.h | 602 +++ js/src/jit/x86-shared/BaseAssembler-x86-shared.h | 5393 ++++++++++++++++++++ .../jit/x86-shared/BaselineCompiler-x86-shared.cpp | 15 + .../jit/x86-shared/BaselineCompiler-x86-shared.h | 24 + js/src/jit/x86-shared/BaselineIC-x86-shared.cpp | 44 + js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp | 4727 +++++++++++++++++ js/src/jit/x86-shared/CodeGenerator-x86-shared.h | 357 ++ js/src/jit/x86-shared/Constants-x86-shared.h | 228 + js/src/jit/x86-shared/Disassembler-x86-shared.cpp | 568 +++ js/src/jit/x86-shared/Encoding-x86-shared.h | 413 ++ js/src/jit/x86-shared/LIR-x86-shared.h | 421 ++ js/src/jit/x86-shared/Lowering-x86-shared.cpp | 1019 ++++ js/src/jit/x86-shared/Lowering-x86-shared.h | 81 + .../jit/x86-shared/MacroAssembler-x86-shared-inl.h | 1284 +++++ .../jit/x86-shared/MacroAssembler-x86-shared.cpp | 855 ++++ js/src/jit/x86-shared/MacroAssembler-x86-shared.h | 1411 +++++ js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp | 581 +++ js/src/jit/x86-shared/MoveEmitter-x86-shared.h | 74 + js/src/jit/x86-shared/Patching-x86-shared.h | 124 + 25 files changed, 23013 insertions(+) create mode 100644 js/src/jit/x86-shared/Architecture-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/Architecture-x86-shared.h create mode 100644 js/src/jit/x86-shared/Assembler-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/Assembler-x86-shared.h create mode 100644 js/src/jit/x86-shared/AssemblerBuffer-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h create mode 100644 js/src/jit/x86-shared/AtomicOperations-x86-shared.h create mode 100644 js/src/jit/x86-shared/BaseAssembler-x86-shared.h create mode 100644 js/src/jit/x86-shared/BaselineCompiler-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/BaselineCompiler-x86-shared.h create mode 100644 js/src/jit/x86-shared/BaselineIC-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/CodeGenerator-x86-shared.h create mode 100644 js/src/jit/x86-shared/Constants-x86-shared.h create mode 100644 js/src/jit/x86-shared/Disassembler-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/Encoding-x86-shared.h create mode 100644 js/src/jit/x86-shared/LIR-x86-shared.h create mode 100644 js/src/jit/x86-shared/Lowering-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/Lowering-x86-shared.h create mode 100644 js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h create mode 100644 js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/MacroAssembler-x86-shared.h create mode 100644 js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp create mode 100644 js/src/jit/x86-shared/MoveEmitter-x86-shared.h create mode 100644 js/src/jit/x86-shared/Patching-x86-shared.h (limited to 'js/src/jit/x86-shared') diff --git a/js/src/jit/x86-shared/Architecture-x86-shared.cpp b/js/src/jit/x86-shared/Architecture-x86-shared.cpp new file mode 100644 index 000000000..5069d8ac9 --- /dev/null +++ b/js/src/jit/x86-shared/Architecture-x86-shared.cpp @@ -0,0 +1,97 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/Architecture-x86-shared.h" +#if !defined(JS_CODEGEN_X86) && !defined(JS_CODEGEN_X64) +# error "Wrong architecture. Only x86 and x64 should build this file!" +#endif + +#include "jit/RegisterSets.h" + +const char* +js::jit::FloatRegister::name() const { + static const char* const names[] = { + +#ifdef JS_CODEGEN_X64 +#define FLOAT_REGS_(TYPE) \ + "%xmm0" TYPE, "%xmm1" TYPE, "%xmm2" TYPE, "%xmm3" TYPE, \ + "%xmm4" TYPE, "%xmm5" TYPE, "%xmm6" TYPE, "%xmm7" TYPE, \ + "%xmm8" TYPE, "%xmm9" TYPE, "%xmm10" TYPE, "%xmm11" TYPE, \ + "%xmm12" TYPE, "%xmm13" TYPE, "%xmm14" TYPE, "%xmm15" TYPE +#else +#define FLOAT_REGS_(TYPE) \ + "%xmm0" TYPE, "%xmm1" TYPE, "%xmm2" TYPE, "%xmm3" TYPE, \ + "%xmm4" TYPE, "%xmm5" TYPE, "%xmm6" TYPE, "%xmm7" TYPE +#endif + + // These should be enumerated in the same order as in + // FloatRegisters::ContentType. + FLOAT_REGS_(".s"), + FLOAT_REGS_(".d"), + FLOAT_REGS_(".i4"), + FLOAT_REGS_(".s4") +#undef FLOAT_REGS_ + + }; + MOZ_ASSERT(size_t(code()) < mozilla::ArrayLength(names)); + return names[size_t(code())]; +} + +js::jit::FloatRegisterSet +js::jit::FloatRegister::ReduceSetForPush(const FloatRegisterSet& s) +{ + SetType bits = s.bits(); + + // Ignore all SIMD register, if not supported. + if (!JitSupportsSimd()) + bits &= Codes::AllPhysMask * Codes::SpreadScalar; + + // Exclude registers which are already pushed with a larger type. High bits + // are associated with larger register types. Thus we keep the set of + // registers which are not included in larger type. + bits &= ~(bits >> (1 * Codes::TotalPhys)); + bits &= ~(bits >> (2 * Codes::TotalPhys)); + bits &= ~(bits >> (3 * Codes::TotalPhys)); + + return FloatRegisterSet(bits); +} + +uint32_t +js::jit::FloatRegister::GetPushSizeInBytes(const FloatRegisterSet& s) +{ + SetType all = s.bits(); + SetType set128b = + (all >> (uint32_t(Codes::Simd128) * Codes::TotalPhys)) & Codes::AllPhysMask; + SetType doubleSet = + (all >> (uint32_t(Codes::Double) * Codes::TotalPhys)) & Codes::AllPhysMask; + SetType singleSet = + (all >> (uint32_t(Codes::Single) * Codes::TotalPhys)) & Codes::AllPhysMask; + + // PushRegsInMask pushes the largest register first, and thus avoids pushing + // aliased registers. So we have to filter out the physical registers which + // are already pushed as part of larger registers. + SetType set64b = doubleSet & ~set128b; + SetType set32b = singleSet & ~set64b & ~set128b; + + static_assert(Codes::AllPhysMask <= 0xffff, "We can safely use CountPopulation32"); + uint32_t count32b = mozilla::CountPopulation32(set32b); + +#if defined(JS_CODEGEN_X64) + // If we have an odd number of 32 bits values, then we increase the size to + // keep the stack aligned on 8 bytes. Note: Keep in sync with + // PushRegsInMask, and PopRegsInMaskIgnore. + count32b += count32b & 1; +#endif + + return mozilla::CountPopulation32(set128b) * (4 * sizeof(int32_t)) + + mozilla::CountPopulation32(set64b) * sizeof(double) + + count32b * sizeof(float); +} +uint32_t +js::jit::FloatRegister::getRegisterDumpOffsetInBytes() +{ + return uint32_t(encoding()) * sizeof(FloatRegisters::RegisterContent); +} diff --git a/js/src/jit/x86-shared/Architecture-x86-shared.h b/js/src/jit/x86-shared/Architecture-x86-shared.h new file mode 100644 index 000000000..a4e4fa5f4 --- /dev/null +++ b/js/src/jit/x86-shared/Architecture-x86-shared.h @@ -0,0 +1,463 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Architecture_x86_h +#define jit_x86_shared_Architecture_x86_h + +#if !defined(JS_CODEGEN_X86) && !defined(JS_CODEGEN_X64) +# error "Unsupported architecture!" +#endif + +#include "mozilla/MathAlgorithms.h" + +#include + +#include "jit/x86-shared/Constants-x86-shared.h" + +namespace js { +namespace jit { + +// Does this architecture support SIMD conversions between Uint32x4 and Float32x4? +static constexpr bool SupportsUint32x4FloatConversions = false; + +// Does this architecture support comparisons of unsigned integer vectors? +static constexpr bool SupportsUint8x16Compares = false; +static constexpr bool SupportsUint16x8Compares = false; +static constexpr bool SupportsUint32x4Compares = false; + +#if defined(JS_CODEGEN_X86) +// In bytes: slots needed for potential memory->memory move spills. +// +8 for cycles +// +4 for gpr spills +// +8 for double spills +static const uint32_t ION_FRAME_SLACK_SIZE = 20; + +#elif defined(JS_CODEGEN_X64) +// In bytes: slots needed for potential memory->memory move spills. +// +8 for cycles +// +8 for gpr spills +// +8 for double spills +static const uint32_t ION_FRAME_SLACK_SIZE = 24; +#endif + +#if defined(JS_CODEGEN_X86) +// These offsets are specific to nunboxing, and capture offsets into the +// components of a js::Value. +static const int32_t NUNBOX32_TYPE_OFFSET = 4; +static const int32_t NUNBOX32_PAYLOAD_OFFSET = 0; + +// Size of each bailout table entry. On x86 this is a 5-byte relative call. +static const uint32_t BAILOUT_TABLE_ENTRY_SIZE = 5; +#endif + +#if defined(JS_CODEGEN_X64) && defined(_WIN64) +static const uint32_t ShadowStackSpace = 32; +#else +static const uint32_t ShadowStackSpace = 0; +#endif + +static const uint32_t JumpImmediateRange = INT32_MAX; + +class Registers { + public: + typedef uint8_t Code; + typedef X86Encoding::RegisterID Encoding; + + // Content spilled during bailouts. + union RegisterContent { + uintptr_t r; + }; + +#if defined(JS_CODEGEN_X86) + typedef uint8_t SetType; + + static const char* GetName(Code code) { + return X86Encoding::GPRegName(Encoding(code)); + } + + static const uint32_t Total = 8; + static const uint32_t TotalPhys = 8; + static const uint32_t Allocatable = 7; + +#elif defined(JS_CODEGEN_X64) + typedef uint16_t SetType; + + static const char* GetName(Code code) { + static const char * const Names[] = { "rax", "rcx", "rdx", "rbx", + "rsp", "rbp", "rsi", "rdi", + "r8", "r9", "r10", "r11", + "r12", "r13", "r14", "r15" }; + return Names[code]; + } + + static const uint32_t Total = 16; + static const uint32_t TotalPhys = 16; + static const uint32_t Allocatable = 14; +#endif + + static uint32_t SetSize(SetType x) { + static_assert(sizeof(SetType) <= 4, "SetType must be, at most, 32 bits"); + return mozilla::CountPopulation32(x); + } + static uint32_t FirstBit(SetType x) { + return mozilla::CountTrailingZeroes32(x); + } + static uint32_t LastBit(SetType x) { + return 31 - mozilla::CountLeadingZeroes32(x); + } + + static Code FromName(const char* name) { + for (size_t i = 0; i < Total; i++) { + if (strcmp(GetName(Code(i)), name) == 0) + return Code(i); + } + return Invalid; + } + + static const Encoding StackPointer = X86Encoding::rsp; + static const Encoding Invalid = X86Encoding::invalid_reg; + + static const SetType AllMask = (1 << Total) - 1; + +#if defined(JS_CODEGEN_X86) + static const SetType ArgRegMask = 0; + + static const SetType VolatileMask = + (1 << X86Encoding::rax) | + (1 << X86Encoding::rcx) | + (1 << X86Encoding::rdx); + + static const SetType WrapperMask = + VolatileMask | + (1 << X86Encoding::rbx); + + static const SetType SingleByteRegs = + (1 << X86Encoding::rax) | + (1 << X86Encoding::rcx) | + (1 << X86Encoding::rdx) | + (1 << X86Encoding::rbx); + + static const SetType NonAllocatableMask = + (1 << X86Encoding::rsp); + + // Registers returned from a JS -> JS call. + static const SetType JSCallMask = + (1 << X86Encoding::rcx) | + (1 << X86Encoding::rdx); + + // Registers returned from a JS -> C call. + static const SetType CallMask = + (1 << X86Encoding::rax); + +#elif defined(JS_CODEGEN_X64) + static const SetType ArgRegMask = +# if !defined(_WIN64) + (1 << X86Encoding::rdi) | + (1 << X86Encoding::rsi) | +# endif + (1 << X86Encoding::rdx) | + (1 << X86Encoding::rcx) | + (1 << X86Encoding::r8) | + (1 << X86Encoding::r9); + + static const SetType VolatileMask = + (1 << X86Encoding::rax) | + ArgRegMask | + (1 << X86Encoding::r10) | + (1 << X86Encoding::r11); + + static const SetType WrapperMask = VolatileMask; + + static const SetType SingleByteRegs = AllMask & ~(1 << X86Encoding::rsp); + + static const SetType NonAllocatableMask = + (1 << X86Encoding::rsp) | + (1 << X86Encoding::r11); // This is ScratchReg. + + // Registers returned from a JS -> JS call. + static const SetType JSCallMask = + (1 << X86Encoding::rcx); + + // Registers returned from a JS -> C call. + static const SetType CallMask = + (1 << X86Encoding::rax); + +#endif + + static const SetType NonVolatileMask = + AllMask & ~VolatileMask & ~(1 << X86Encoding::rsp); + + static const SetType AllocatableMask = AllMask & ~NonAllocatableMask; + + // Registers that can be allocated without being saved, generally. + static const SetType TempMask = VolatileMask & ~NonAllocatableMask; +}; + +typedef Registers::SetType PackedRegisterMask; + +class FloatRegisters { + public: + typedef X86Encoding::XMMRegisterID Encoding; + + enum ContentType { + Single, // 32-bit float. + Double, // 64-bit double. + Simd128, // 128-bit SIMD type (int32x4, bool16x8, etc). + NumTypes + }; + + // Content spilled during bailouts. + union RegisterContent { + float s; + double d; + int32_t i4[4]; + float s4[4]; + }; + + static const char* GetName(Encoding code) { + return X86Encoding::XMMRegName(code); + } + + static Encoding FromName(const char* name) { + for (size_t i = 0; i < Total; i++) { + if (strcmp(GetName(Encoding(i)), name) == 0) + return Encoding(i); + } + return Invalid; + } + + static const Encoding Invalid = X86Encoding::invalid_xmm; + +#if defined(JS_CODEGEN_X86) + static const uint32_t Total = 8 * NumTypes; + static const uint32_t TotalPhys = 8; + static const uint32_t Allocatable = 7; + typedef uint32_t SetType; + +#elif defined(JS_CODEGEN_X64) + static const uint32_t Total = 16 * NumTypes; + static const uint32_t TotalPhys = 16; + static const uint32_t Allocatable = 15; + typedef uint64_t SetType; + +#endif + + static_assert(sizeof(SetType) * 8 >= Total, + "SetType should be large enough to enumerate all registers."); + + // Magic values which are used to duplicate a mask of physical register for + // a specific type of register. A multiplication is used to copy and shift + // the bits of the physical register mask. + static const SetType SpreadSingle = SetType(1) << (uint32_t(Single) * TotalPhys); + static const SetType SpreadDouble = SetType(1) << (uint32_t(Double) * TotalPhys); + static const SetType SpreadSimd128 = SetType(1) << (uint32_t(Simd128) * TotalPhys); + static const SetType SpreadScalar = SpreadSingle | SpreadDouble; + static const SetType SpreadVector = SpreadSimd128; + static const SetType Spread = SpreadScalar | SpreadVector; + + static const SetType AllPhysMask = ((1 << TotalPhys) - 1); + static const SetType AllMask = AllPhysMask * Spread; + static const SetType AllDoubleMask = AllPhysMask * SpreadDouble; + static const SetType AllSingleMask = AllPhysMask * SpreadSingle; + +#if defined(JS_CODEGEN_X86) + static const SetType NonAllocatableMask = + Spread * (1 << X86Encoding::xmm7); // This is ScratchDoubleReg. + +#elif defined(JS_CODEGEN_X64) + static const SetType NonAllocatableMask = + Spread * (1 << X86Encoding::xmm15); // This is ScratchDoubleReg. +#endif + +#if defined(JS_CODEGEN_X64) && defined(_WIN64) + static const SetType VolatileMask = + ( (1 << X86Encoding::xmm0) | + (1 << X86Encoding::xmm1) | + (1 << X86Encoding::xmm2) | + (1 << X86Encoding::xmm3) | + (1 << X86Encoding::xmm4) | + (1 << X86Encoding::xmm5) + ) * SpreadScalar + | AllPhysMask * SpreadVector; + +#else + static const SetType VolatileMask = + AllMask; +#endif + + static const SetType NonVolatileMask = AllMask & ~VolatileMask; + static const SetType WrapperMask = VolatileMask; + static const SetType AllocatableMask = AllMask & ~NonAllocatableMask; +}; + +template +class TypedRegisterSet; + +struct FloatRegister { + typedef FloatRegisters Codes; + typedef size_t Code; + typedef Codes::Encoding Encoding; + typedef Codes::SetType SetType; + static uint32_t SetSize(SetType x) { + // Count the number of non-aliased registers, for the moment. + // + // Copy the set bits of each typed register to the low part of the of + // the Set, and count the number of registers. This is made to avoid + // registers which are allocated twice with different types (such as in + // AllMask). + x |= x >> (2 * Codes::TotalPhys); + x |= x >> Codes::TotalPhys; + x &= Codes::AllPhysMask; + static_assert(Codes::AllPhysMask <= 0xffff, "We can safely use CountPopulation32"); + return mozilla::CountPopulation32(x); + } + +#if defined(JS_CODEGEN_X86) + static uint32_t FirstBit(SetType x) { + static_assert(sizeof(SetType) == 4, "SetType must be 32 bits"); + return mozilla::CountTrailingZeroes32(x); + } + static uint32_t LastBit(SetType x) { + return 31 - mozilla::CountLeadingZeroes32(x); + } + +#elif defined(JS_CODEGEN_X64) + static uint32_t FirstBit(SetType x) { + static_assert(sizeof(SetType) == 8, "SetType must be 64 bits"); + return mozilla::CountTrailingZeroes64(x); + } + static uint32_t LastBit(SetType x) { + return 63 - mozilla::CountLeadingZeroes64(x); + } +#endif + + private: + // Note: These fields are using one extra bit to make the invalid enumerated + // values fit, and thus prevent a warning. + Codes::Encoding reg_ : 5; + Codes::ContentType type_ : 3; + bool isInvalid_ : 1; + + // Constants used for exporting/importing the float register code. +#if defined(JS_CODEGEN_X86) + static const size_t RegSize = 3; +#elif defined(JS_CODEGEN_X64) + static const size_t RegSize = 4; +#endif + static const size_t RegMask = (1 << RegSize) - 1; + + public: + constexpr FloatRegister() + : reg_(Codes::Encoding(0)), type_(Codes::Single), isInvalid_(true) + { } + constexpr FloatRegister(uint32_t r, Codes::ContentType k) + : reg_(Codes::Encoding(r)), type_(k), isInvalid_(false) + { } + constexpr FloatRegister(Codes::Encoding r, Codes::ContentType k) + : reg_(r), type_(k), isInvalid_(false) + { } + + static FloatRegister FromCode(uint32_t i) { + MOZ_ASSERT(i < Codes::Total); + return FloatRegister(i & RegMask, Codes::ContentType(i >> RegSize)); + } + + bool isSingle() const { MOZ_ASSERT(!isInvalid()); return type_ == Codes::Single; } + bool isDouble() const { MOZ_ASSERT(!isInvalid()); return type_ == Codes::Double; } + bool isSimd128() const { MOZ_ASSERT(!isInvalid()); return type_ == Codes::Simd128; } + bool isInvalid() const { return isInvalid_; } + + FloatRegister asSingle() const { MOZ_ASSERT(!isInvalid()); return FloatRegister(reg_, Codes::Single); } + FloatRegister asDouble() const { MOZ_ASSERT(!isInvalid()); return FloatRegister(reg_, Codes::Double); } + FloatRegister asSimd128() const { MOZ_ASSERT(!isInvalid()); return FloatRegister(reg_, Codes::Simd128); } + + uint32_t size() const { + MOZ_ASSERT(!isInvalid()); + if (isSingle()) + return sizeof(float); + if (isDouble()) + return sizeof(double); + MOZ_ASSERT(isSimd128()); + return 4 * sizeof(int32_t); + } + + Code code() const { + MOZ_ASSERT(!isInvalid()); + MOZ_ASSERT(uint32_t(reg_) < Codes::TotalPhys); + // :TODO: ARM is doing the same thing, but we should avoid this, except + // that the RegisterSets depends on this. + return Code(reg_ | (type_ << RegSize)); + } + Encoding encoding() const { + MOZ_ASSERT(!isInvalid()); + MOZ_ASSERT(uint32_t(reg_) < Codes::TotalPhys); + return reg_; + } + // defined in Assembler-x86-shared.cpp + const char* name() const; + bool volatile_() const { + return !!((SetType(1) << code()) & FloatRegisters::VolatileMask); + } + bool operator !=(FloatRegister other) const { + return other.reg_ != reg_ || other.type_ != type_; + } + bool operator ==(FloatRegister other) const { + return other.reg_ == reg_ && other.type_ == type_; + } + bool aliases(FloatRegister other) const { + return other.reg_ == reg_; + } + // Check if two floating point registers have the same type. + bool equiv(FloatRegister other) const { + return other.type_ == type_; + } + + uint32_t numAliased() const { + return Codes::NumTypes; + } + uint32_t numAlignedAliased() const { + return numAliased(); + } + + // N.B. FloatRegister is an explicit outparam here because msvc-2010 + // miscompiled it on win64 when the value was simply returned + void aliased(uint32_t aliasIdx, FloatRegister* ret) const { + MOZ_ASSERT(aliasIdx < Codes::NumTypes); + *ret = FloatRegister(reg_, Codes::ContentType((aliasIdx + type_) % Codes::NumTypes)); + } + void alignedAliased(uint32_t aliasIdx, FloatRegister* ret) const { + aliased(aliasIdx, ret); + } + + SetType alignedOrDominatedAliasedSet() const { + return Codes::Spread << reg_; + } + + static TypedRegisterSet ReduceSetForPush(const TypedRegisterSet& s); + static uint32_t GetPushSizeInBytes(const TypedRegisterSet& s); + uint32_t getRegisterDumpOffsetInBytes(); +}; + +// Arm/D32 has double registers that can NOT be treated as float32 +// and this requires some dances in lowering. +inline bool +hasUnaliasedDouble() +{ + return false; +} + +// On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32 +// to a double as a temporary, you need a temporary double register. +inline bool +hasMultiAlias() +{ + return false; +} + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Architecture_x86_h */ diff --git a/js/src/jit/x86-shared/Assembler-x86-shared.cpp b/js/src/jit/x86-shared/Assembler-x86-shared.cpp new file mode 100644 index 000000000..8d761c138 --- /dev/null +++ b/js/src/jit/x86-shared/Assembler-x86-shared.cpp @@ -0,0 +1,350 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gc/Marking.h" +#include "jit/Disassembler.h" +#include "jit/JitCompartment.h" +#if defined(JS_CODEGEN_X86) +# include "jit/x86/MacroAssembler-x86.h" +#elif defined(JS_CODEGEN_X64) +# include "jit/x64/MacroAssembler-x64.h" +#else +# error "Wrong architecture. Only x86 and x64 should build this file!" +#endif + +#ifdef _MSC_VER +# include // for __cpuid +# if defined(_M_X64) && (_MSC_FULL_VER >= 160040219) +# include // for _xgetbv +# endif +#endif + +using namespace js; +using namespace js::jit; + +void +AssemblerX86Shared::copyJumpRelocationTable(uint8_t* dest) +{ + if (jumpRelocations_.length()) + memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length()); +} + +void +AssemblerX86Shared::copyDataRelocationTable(uint8_t* dest) +{ + if (dataRelocations_.length()) + memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length()); +} + +void +AssemblerX86Shared::copyPreBarrierTable(uint8_t* dest) +{ + if (preBarriers_.length()) + memcpy(dest, preBarriers_.buffer(), preBarriers_.length()); +} + +static void +TraceDataRelocations(JSTracer* trc, uint8_t* buffer, CompactBufferReader& reader) +{ + while (reader.more()) { + size_t offset = reader.readUnsigned(); + void* ptr = X86Encoding::GetPointer(buffer + offset); + +#ifdef JS_PUNBOX64 + // All pointers on x64 will have the top bits cleared. If those bits + // are not cleared, this must be a Value. + uintptr_t word = reinterpret_cast(ptr); + if (word >> JSVAL_TAG_SHIFT) { + Value v = Value::fromRawBits(word); + TraceManuallyBarrieredEdge(trc, &v, "jit-masm-value"); + if (word != v.asRawBits()) { + // Only update the code if the Value changed, because the code + // is not writable if we're not moving objects. + X86Encoding::SetPointer(buffer + offset, v.bitsAsPunboxPointer()); + } + continue; + } +#endif + + // No barrier needed since these are constants. + gc::Cell* cellPtr = reinterpret_cast(ptr); + TraceManuallyBarrieredGenericPointerEdge(trc, &cellPtr, "jit-masm-ptr"); + if (cellPtr != ptr) + X86Encoding::SetPointer(buffer + offset, cellPtr); + } +} + + +void +AssemblerX86Shared::TraceDataRelocations(JSTracer* trc, JitCode* code, CompactBufferReader& reader) +{ + ::TraceDataRelocations(trc, code->raw(), reader); +} + +void +AssemblerX86Shared::trace(JSTracer* trc) +{ + for (size_t i = 0; i < jumps_.length(); i++) { + RelativePatch& rp = jumps_[i]; + if (rp.kind == Relocation::JITCODE) { + JitCode* code = JitCode::FromExecutable((uint8_t*)rp.target); + TraceManuallyBarrieredEdge(trc, &code, "masmrel32"); + MOZ_ASSERT(code == JitCode::FromExecutable((uint8_t*)rp.target)); + } + } + if (dataRelocations_.length()) { + CompactBufferReader reader(dataRelocations_); + ::TraceDataRelocations(trc, masm.data(), reader); + } +} + +void +AssemblerX86Shared::executableCopy(void* buffer) +{ + masm.executableCopy(buffer); + + // Crash diagnostics for bug 1124397. Check the code buffer has not been + // poisoned with 0xE5 bytes. + static const size_t MinPoisoned = 16; + const uint8_t* bytes = (const uint8_t*)buffer; + size_t len = size(); + + for (size_t i = 0; i < len; i += MinPoisoned) { + if (bytes[i] != 0xE5) + continue; + + size_t startOffset = i; + while (startOffset > 0 && bytes[startOffset - 1] == 0xE5) + startOffset--; + + size_t endOffset = i; + while (endOffset + 1 < len && bytes[endOffset + 1] == 0xE5) + endOffset++; + + if (endOffset - startOffset < MinPoisoned) + continue; + + volatile uintptr_t dump[5]; + blackbox = dump; + blackbox[0] = uintptr_t(0xABCD4321); + blackbox[1] = uintptr_t(len); + blackbox[2] = uintptr_t(startOffset); + blackbox[3] = uintptr_t(endOffset); + blackbox[4] = uintptr_t(0xFFFF8888); + MOZ_CRASH("Corrupt code buffer"); + } +} + +void +AssemblerX86Shared::processCodeLabels(uint8_t* rawCode) +{ + for (size_t i = 0; i < codeLabels_.length(); i++) { + CodeLabel label = codeLabels_[i]; + Bind(rawCode, label.patchAt(), rawCode + label.target()->offset()); + } +} + +AssemblerX86Shared::Condition +AssemblerX86Shared::InvertCondition(Condition cond) +{ + switch (cond) { + case Zero: + return NonZero; + case NonZero: + return Zero; + case LessThan: + return GreaterThanOrEqual; + case LessThanOrEqual: + return GreaterThan; + case GreaterThan: + return LessThanOrEqual; + case GreaterThanOrEqual: + return LessThan; + case Above: + return BelowOrEqual; + case AboveOrEqual: + return Below; + case Below: + return AboveOrEqual; + case BelowOrEqual: + return Above; + default: + MOZ_CRASH("unexpected condition"); + } +} + +AssemblerX86Shared::Condition +AssemblerX86Shared::UnsignedCondition(Condition cond) +{ + switch (cond) { + case Zero: + case NonZero: + return cond; + case LessThan: + case Below: + return Below; + case LessThanOrEqual: + case BelowOrEqual: + return BelowOrEqual; + case GreaterThan: + case Above: + return Above; + case AboveOrEqual: + case GreaterThanOrEqual: + return AboveOrEqual; + default: + MOZ_CRASH("unexpected condition"); + } +} + +AssemblerX86Shared::Condition +AssemblerX86Shared::ConditionWithoutEqual(Condition cond) +{ + switch (cond) { + case LessThan: + case LessThanOrEqual: + return LessThan; + case Below: + case BelowOrEqual: + return Below; + case GreaterThan: + case GreaterThanOrEqual: + return GreaterThan; + case Above: + case AboveOrEqual: + return Above; + default: + MOZ_CRASH("unexpected condition"); + } +} + +void +AssemblerX86Shared::verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, + const Disassembler::HeapAccess& heapAccess) +{ +#ifdef DEBUG + if (masm.oom()) + return; + Disassembler::VerifyHeapAccess(masm.data() + begin, masm.data() + end, heapAccess); +#endif +} + +CPUInfo::SSEVersion CPUInfo::maxSSEVersion = UnknownSSE; +CPUInfo::SSEVersion CPUInfo::maxEnabledSSEVersion = UnknownSSE; +bool CPUInfo::avxPresent = false; +bool CPUInfo::avxEnabled = false; +bool CPUInfo::popcntPresent = false; +bool CPUInfo::needAmdBugWorkaround = false; + +static uintptr_t +ReadXGETBV() +{ + // We use a variety of low-level mechanisms to get at the xgetbv + // instruction, including spelling out the xgetbv instruction as bytes, + // because older compilers and assemblers may not recognize the instruction + // by name. + size_t xcr0EAX = 0; +#if defined(_XCR_XFEATURE_ENABLED_MASK) + xcr0EAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); +#elif defined(__GNUC__) + // xgetbv returns its results in %eax and %edx, and for our purposes here, + // we're only interested in the %eax value. + asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0EAX) : "c"(0) : "%edx"); +#elif defined(_MSC_VER) && defined(_M_IX86) + __asm { + xor ecx, ecx + _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 + mov xcr0EAX, eax + } +#endif + return xcr0EAX; +} + +void +CPUInfo::SetSSEVersion() +{ + int flagsEAX = 0; + int flagsECX = 0; + int flagsEDX = 0; + +#ifdef _MSC_VER + int cpuinfo[4]; + __cpuid(cpuinfo, 1); + flagsEAX = cpuinfo[0]; + flagsECX = cpuinfo[2]; + flagsEDX = cpuinfo[3]; +#elif defined(__GNUC__) +# ifdef JS_CODEGEN_X64 + asm ( + "movl $0x1, %%eax;" + "cpuid;" + : "=a" (flagsEAX), "=c" (flagsECX), "=d" (flagsEDX) + : + : "%ebx" + ); +# else + // On x86, preserve ebx. The compiler needs it for PIC mode. + // Some older processors don't fill the ecx register with cpuid, so clobber + // it before calling cpuid, so that there's no risk of picking random bits + // indicating SSE3/SSE4 are present. + asm ( + "xor %%ecx, %%ecx;" + "movl $0x1, %%eax;" + "pushl %%ebx;" + "cpuid;" + "popl %%ebx;" + : "=a" (flagsEAX), "=c" (flagsECX), "=d" (flagsEDX) + : + : + ); +# endif +#else +# error "Unsupported compiler" +#endif + + static const int SSEBit = 1 << 25; + static const int SSE2Bit = 1 << 26; + static const int SSE3Bit = 1 << 0; + static const int SSSE3Bit = 1 << 9; + static const int SSE41Bit = 1 << 19; + static const int SSE42Bit = 1 << 20; + + if (flagsECX & SSE42Bit) maxSSEVersion = SSE4_2; + else if (flagsECX & SSE41Bit) maxSSEVersion = SSE4_1; + else if (flagsECX & SSSE3Bit) maxSSEVersion = SSSE3; + else if (flagsECX & SSE3Bit) maxSSEVersion = SSE3; + else if (flagsEDX & SSE2Bit) maxSSEVersion = SSE2; + else if (flagsEDX & SSEBit) maxSSEVersion = SSE; + else maxSSEVersion = NoSSE; + + if (maxEnabledSSEVersion != UnknownSSE) + maxSSEVersion = Min(maxSSEVersion, maxEnabledSSEVersion); + + static const int AVXBit = 1 << 28; + static const int XSAVEBit = 1 << 27; + avxPresent = (flagsECX & AVXBit) && (flagsECX & XSAVEBit) && avxEnabled; + + // If the hardware supports AVX, check whether the OS supports it too. + if (avxPresent) { + size_t xcr0EAX = ReadXGETBV(); + static const int xcr0SSEBit = 1 << 1; + static const int xcr0AVXBit = 1 << 2; + avxPresent = (xcr0EAX & xcr0SSEBit) && (xcr0EAX & xcr0AVXBit); + } + + static const int POPCNTBit = 1 << 23; + + popcntPresent = (flagsECX & POPCNTBit); + + // Check if we need to work around an AMD CPU bug (see bug 1281759). + // We check for family 20 models 0-2. Intel doesn't use family 20 at + // this point, so this should only match AMD CPUs. + unsigned family = ((flagsEAX >> 20) & 0xff) + ((flagsEAX >> 8) & 0xf); + unsigned model = (((flagsEAX >> 16) & 0xf) << 4) + ((flagsEAX >> 4) & 0xf); + needAmdBugWorkaround = (family == 20 && model <= 2); +} + +volatile uintptr_t* blackbox = nullptr; diff --git a/js/src/jit/x86-shared/Assembler-x86-shared.h b/js/src/jit/x86-shared/Assembler-x86-shared.h new file mode 100644 index 000000000..510ce9a99 --- /dev/null +++ b/js/src/jit/x86-shared/Assembler-x86-shared.h @@ -0,0 +1,3652 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Assembler_x86_shared_h +#define jit_x86_shared_Assembler_x86_shared_h + +#include + +#include "jit/shared/Assembler-shared.h" + +#if defined(JS_CODEGEN_X86) +# include "jit/x86/BaseAssembler-x86.h" +#elif defined(JS_CODEGEN_X64) +# include "jit/x64/BaseAssembler-x64.h" +#else +# error "Unknown architecture!" +#endif + +namespace js { +namespace jit { + +struct ScratchFloat32Scope : public AutoFloatRegisterScope +{ + explicit ScratchFloat32Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchFloat32Reg) + { } +}; + +struct ScratchDoubleScope : public AutoFloatRegisterScope +{ + explicit ScratchDoubleScope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchDoubleReg) + { } +}; + +struct ScratchSimd128Scope : public AutoFloatRegisterScope +{ + explicit ScratchSimd128Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchSimd128Reg) + { } +}; + +class Operand +{ + public: + enum Kind { + REG, + MEM_REG_DISP, + FPREG, + MEM_SCALE, + MEM_ADDRESS32 + }; + + private: + Kind kind_ : 4; + // Used as a Register::Encoding and a FloatRegister::Encoding. + uint32_t base_ : 5; + Scale scale_ : 3; + // We don't use all 8 bits, of course, but GCC complains if the size of + // this field is smaller than the size of Register::Encoding. + Register::Encoding index_ : 8; + int32_t disp_; + + public: + explicit Operand(Register reg) + : kind_(REG), + base_(reg.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(0) + { } + explicit Operand(FloatRegister reg) + : kind_(FPREG), + base_(reg.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(0) + { } + explicit Operand(const Address& address) + : kind_(MEM_REG_DISP), + base_(address.base.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(address.offset) + { } + explicit Operand(const BaseIndex& address) + : kind_(MEM_SCALE), + base_(address.base.encoding()), + scale_(address.scale), + index_(address.index.encoding()), + disp_(address.offset) + { } + Operand(Register base, Register index, Scale scale, int32_t disp = 0) + : kind_(MEM_SCALE), + base_(base.encoding()), + scale_(scale), + index_(index.encoding()), + disp_(disp) + { } + Operand(Register reg, int32_t disp) + : kind_(MEM_REG_DISP), + base_(reg.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(disp) + { } + explicit Operand(AbsoluteAddress address) + : kind_(MEM_ADDRESS32), + base_(Registers::Invalid), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(X86Encoding::AddressImmediate(address.addr)) + { } + explicit Operand(PatchedAbsoluteAddress address) + : kind_(MEM_ADDRESS32), + base_(Registers::Invalid), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(X86Encoding::AddressImmediate(address.addr)) + { } + + Address toAddress() const { + MOZ_ASSERT(kind() == MEM_REG_DISP); + return Address(Register::FromCode(base()), disp()); + } + + BaseIndex toBaseIndex() const { + MOZ_ASSERT(kind() == MEM_SCALE); + return BaseIndex(Register::FromCode(base()), Register::FromCode(index()), scale(), disp()); + } + + Kind kind() const { + return kind_; + } + Register::Encoding reg() const { + MOZ_ASSERT(kind() == REG); + return Register::Encoding(base_); + } + Register::Encoding base() const { + MOZ_ASSERT(kind() == MEM_REG_DISP || kind() == MEM_SCALE); + return Register::Encoding(base_); + } + Register::Encoding index() const { + MOZ_ASSERT(kind() == MEM_SCALE); + return index_; + } + Scale scale() const { + MOZ_ASSERT(kind() == MEM_SCALE); + return scale_; + } + FloatRegister::Encoding fpu() const { + MOZ_ASSERT(kind() == FPREG); + return FloatRegister::Encoding(base_); + } + int32_t disp() const { + MOZ_ASSERT(kind() == MEM_REG_DISP || kind() == MEM_SCALE); + return disp_; + } + void* address() const { + MOZ_ASSERT(kind() == MEM_ADDRESS32); + return reinterpret_cast(disp_); + } + + bool containsReg(Register r) const { + switch (kind()) { + case REG: return r.encoding() == reg(); + case MEM_REG_DISP: return r.encoding() == base(); + case MEM_SCALE: return r.encoding() == base() || r.encoding() == index(); + default: return false; + } + } +}; + +inline Imm32 +Imm64::firstHalf() const +{ + return low(); +} + +inline Imm32 +Imm64::secondHalf() const +{ + return hi(); +} + +class CPUInfo +{ + public: + // As the SSE's were introduced in order, the presence of a later SSE implies + // the presence of an earlier SSE. For example, SSE4_2 support implies SSE2 support. + enum SSEVersion { + UnknownSSE = 0, + NoSSE = 1, + SSE = 2, + SSE2 = 3, + SSE3 = 4, + SSSE3 = 5, + SSE4_1 = 6, + SSE4_2 = 7 + }; + + static SSEVersion GetSSEVersion() { + if (maxSSEVersion == UnknownSSE) + SetSSEVersion(); + + MOZ_ASSERT(maxSSEVersion != UnknownSSE); + MOZ_ASSERT_IF(maxEnabledSSEVersion != UnknownSSE, maxSSEVersion <= maxEnabledSSEVersion); + return maxSSEVersion; + } + + static bool IsAVXPresent() { + if (MOZ_UNLIKELY(maxSSEVersion == UnknownSSE)) + SetSSEVersion(); + + MOZ_ASSERT_IF(!avxEnabled, !avxPresent); + return avxPresent; + } + + private: + static SSEVersion maxSSEVersion; + static SSEVersion maxEnabledSSEVersion; + static bool avxPresent; + static bool avxEnabled; + static bool popcntPresent; + static bool needAmdBugWorkaround; + + static void SetSSEVersion(); + + public: + static bool IsSSE2Present() { +#ifdef JS_CODEGEN_X64 + return true; +#else + return GetSSEVersion() >= SSE2; +#endif + } + static bool IsSSE3Present() { return GetSSEVersion() >= SSE3; } + static bool IsSSSE3Present() { return GetSSEVersion() >= SSSE3; } + static bool IsSSE41Present() { return GetSSEVersion() >= SSE4_1; } + static bool IsSSE42Present() { return GetSSEVersion() >= SSE4_2; } + static bool IsPOPCNTPresent() { return popcntPresent; } + static bool NeedAmdBugWorkaround() { return needAmdBugWorkaround; } + + static void SetSSE3Disabled() { maxEnabledSSEVersion = SSE2; avxEnabled = false; } + static void SetSSE4Disabled() { maxEnabledSSEVersion = SSSE3; avxEnabled = false; } + static void SetAVXEnabled() { avxEnabled = true; } +}; + +class AssemblerX86Shared : public AssemblerShared +{ + protected: + struct RelativePatch { + int32_t offset; + void* target; + Relocation::Kind kind; + + RelativePatch(int32_t offset, void* target, Relocation::Kind kind) + : offset(offset), + target(target), + kind(kind) + { } + }; + + Vector jumps_; + CompactBufferWriter jumpRelocations_; + CompactBufferWriter dataRelocations_; + CompactBufferWriter preBarriers_; + + void writeDataRelocation(ImmGCPtr ptr) { + if (ptr.value) { + if (gc::IsInsideNursery(ptr.value)) + embedsNurseryPointers_ = true; + dataRelocations_.writeUnsigned(masm.currentOffset()); + } + } + void writePrebarrierOffset(CodeOffset label) { + preBarriers_.writeUnsigned(label.offset()); + } + + protected: + X86Encoding::BaseAssemblerSpecific masm; + + typedef X86Encoding::JmpSrc JmpSrc; + typedef X86Encoding::JmpDst JmpDst; + + public: + AssemblerX86Shared() + { + if (!HasAVX()) + masm.disableVEX(); + } + + enum Condition { + Equal = X86Encoding::ConditionE, + NotEqual = X86Encoding::ConditionNE, + Above = X86Encoding::ConditionA, + AboveOrEqual = X86Encoding::ConditionAE, + Below = X86Encoding::ConditionB, + BelowOrEqual = X86Encoding::ConditionBE, + GreaterThan = X86Encoding::ConditionG, + GreaterThanOrEqual = X86Encoding::ConditionGE, + LessThan = X86Encoding::ConditionL, + LessThanOrEqual = X86Encoding::ConditionLE, + Overflow = X86Encoding::ConditionO, + CarrySet = X86Encoding::ConditionC, + CarryClear = X86Encoding::ConditionNC, + Signed = X86Encoding::ConditionS, + NotSigned = X86Encoding::ConditionNS, + Zero = X86Encoding::ConditionE, + NonZero = X86Encoding::ConditionNE, + Parity = X86Encoding::ConditionP, + NoParity = X86Encoding::ConditionNP + }; + + // If this bit is set, the vucomisd operands have to be inverted. + static const int DoubleConditionBitInvert = 0x10; + + // Bit set when a DoubleCondition does not map to a single x86 condition. + // The macro assembler has to special-case these conditions. + static const int DoubleConditionBitSpecial = 0x20; + static const int DoubleConditionBits = DoubleConditionBitInvert | DoubleConditionBitSpecial; + + enum DoubleCondition { + // These conditions will only evaluate to true if the comparison is ordered - i.e. neither operand is NaN. + DoubleOrdered = NoParity, + DoubleEqual = Equal | DoubleConditionBitSpecial, + DoubleNotEqual = NotEqual, + DoubleGreaterThan = Above, + DoubleGreaterThanOrEqual = AboveOrEqual, + DoubleLessThan = Above | DoubleConditionBitInvert, + DoubleLessThanOrEqual = AboveOrEqual | DoubleConditionBitInvert, + // If either operand is NaN, these conditions always evaluate to true. + DoubleUnordered = Parity, + DoubleEqualOrUnordered = Equal, + DoubleNotEqualOrUnordered = NotEqual | DoubleConditionBitSpecial, + DoubleGreaterThanOrUnordered = Below | DoubleConditionBitInvert, + DoubleGreaterThanOrEqualOrUnordered = BelowOrEqual | DoubleConditionBitInvert, + DoubleLessThanOrUnordered = Below, + DoubleLessThanOrEqualOrUnordered = BelowOrEqual + }; + + enum NaNCond { + NaN_HandledByCond, + NaN_IsTrue, + NaN_IsFalse + }; + + // If the primary condition returned by ConditionFromDoubleCondition doesn't + // handle NaNs properly, return NaN_IsFalse if the comparison should be + // overridden to return false on NaN, NaN_IsTrue if it should be overridden + // to return true on NaN, or NaN_HandledByCond if no secondary check is + // needed. + static inline NaNCond NaNCondFromDoubleCondition(DoubleCondition cond) { + switch (cond) { + case DoubleOrdered: + case DoubleNotEqual: + case DoubleGreaterThan: + case DoubleGreaterThanOrEqual: + case DoubleLessThan: + case DoubleLessThanOrEqual: + case DoubleUnordered: + case DoubleEqualOrUnordered: + case DoubleGreaterThanOrUnordered: + case DoubleGreaterThanOrEqualOrUnordered: + case DoubleLessThanOrUnordered: + case DoubleLessThanOrEqualOrUnordered: + return NaN_HandledByCond; + case DoubleEqual: + return NaN_IsFalse; + case DoubleNotEqualOrUnordered: + return NaN_IsTrue; + } + + MOZ_CRASH("Unknown double condition"); + } + + static void StaticAsserts() { + // DoubleConditionBits should not interfere with x86 condition codes. + JS_STATIC_ASSERT(!((Equal | NotEqual | Above | AboveOrEqual | Below | + BelowOrEqual | Parity | NoParity) & DoubleConditionBits)); + } + + static Condition InvertCondition(Condition cond); + static Condition UnsignedCondition(Condition cond); + static Condition ConditionWithoutEqual(Condition cond); + + // Return the primary condition to test. Some primary conditions may not + // handle NaNs properly and may therefore require a secondary condition. + // Use NaNCondFromDoubleCondition to determine what else is needed. + static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) { + return static_cast(cond & ~DoubleConditionBits); + } + + static void TraceDataRelocations(JSTracer* trc, JitCode* code, CompactBufferReader& reader); + + // MacroAssemblers hold onto gcthings, so they are traced by the GC. + void trace(JSTracer* trc); + + bool oom() const { + return AssemblerShared::oom() || + masm.oom() || + jumpRelocations_.oom() || + dataRelocations_.oom() || + preBarriers_.oom(); + } + + void setPrinter(Sprinter* sp) { + masm.setPrinter(sp); + } + + static const Register getStackPointer() { + return StackPointer; + } + + void executableCopy(void* buffer); + bool asmMergeWith(const AssemblerX86Shared& other) { + MOZ_ASSERT(other.jumps_.length() == 0); + if (!AssemblerShared::asmMergeWith(masm.size(), other)) + return false; + return masm.appendBuffer(other.masm); + } + void processCodeLabels(uint8_t* rawCode); + void copyJumpRelocationTable(uint8_t* dest); + void copyDataRelocationTable(uint8_t* dest); + void copyPreBarrierTable(uint8_t* dest); + + // Size of the instruction stream, in bytes. + size_t size() const { + return masm.size(); + } + // Size of the jump relocation table, in bytes. + size_t jumpRelocationTableBytes() const { + return jumpRelocations_.length(); + } + size_t dataRelocationTableBytes() const { + return dataRelocations_.length(); + } + size_t preBarrierTableBytes() const { + return preBarriers_.length(); + } + // Size of the data table, in bytes. + size_t bytesNeeded() const { + return size() + + jumpRelocationTableBytes() + + dataRelocationTableBytes() + + preBarrierTableBytes(); + } + + public: + void haltingAlign(int alignment) { + masm.haltingAlign(alignment); + } + void nopAlign(int alignment) { + masm.nopAlign(alignment); + } + void writeCodePointer(CodeOffset* label) { + // A CodeOffset only has one use, bake in the "end of list" value. + masm.jumpTablePointer(LabelBase::INVALID_OFFSET); + label->bind(masm.size()); + } + void cmovz(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.cmovz_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.cmovz_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.cmovz_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movl(Imm32 imm32, Register dest) { + masm.movl_i32r(imm32.value, dest.encoding()); + } + void movl(Register src, Register dest) { + masm.movl_rr(src.encoding(), dest.encoding()); + } + void movl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.movl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.movl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.movl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.movl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movl(Imm32 imm32, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.movl_i32r(imm32.value, dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.movl_i32m(imm32.value, dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movl_i32m(imm32.value, dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_i32m(imm32.value, dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void xchgl(Register src, Register dest) { + masm.xchgl_rr(src.encoding(), dest.encoding()); + } + + // Eventually vmovapd should be overloaded to support loads and + // stores too. + void vmovapd(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovapd_rr(src.encoding(), dest.encoding()); + } + + void vmovaps(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovaps_rr(src.encoding(), dest.encoding()); + } + void vmovaps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovaps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovaps_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + case Operand::FPREG: + masm.vmovaps_rr(src.fpu(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovaps(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovaps_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovaps_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovups(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovups_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovups_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovups(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovups_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovups_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + // vmovsd is only provided in load/store form since the + // register-to-register form has different semantics (it doesn't clobber + // the whole output register) and isn't needed currently. + void vmovsd(const Address& src, FloatRegister dest) { + masm.vmovsd_mr(src.offset, src.base.encoding(), dest.encoding()); + } + void vmovsd(const BaseIndex& src, FloatRegister dest) { + masm.vmovsd_mr(src.offset, src.base.encoding(), src.index.encoding(), src.scale, dest.encoding()); + } + void vmovsd(FloatRegister src, const Address& dest) { + masm.vmovsd_rm(src.encoding(), dest.offset, dest.base.encoding()); + } + void vmovsd(FloatRegister src, const BaseIndex& dest) { + masm.vmovsd_rm(src.encoding(), dest.offset, dest.base.encoding(), dest.index.encoding(), dest.scale); + } + // Although vmovss is not only provided in load/store form (for the same + // reasons as vmovsd above), the register to register form should be only + // used in contexts where we care about not clearing the higher lanes of + // the FloatRegister. + void vmovss(const Address& src, FloatRegister dest) { + masm.vmovss_mr(src.offset, src.base.encoding(), dest.encoding()); + } + void vmovss(const BaseIndex& src, FloatRegister dest) { + masm.vmovss_mr(src.offset, src.base.encoding(), src.index.encoding(), src.scale, dest.encoding()); + } + void vmovss(FloatRegister src, const Address& dest) { + masm.vmovss_rm(src.encoding(), dest.offset, dest.base.encoding()); + } + void vmovss(FloatRegister src, const BaseIndex& dest) { + masm.vmovss_rm(src.encoding(), dest.offset, dest.base.encoding(), dest.index.encoding(), dest.scale); + } + void vmovss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + masm.vmovss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmovdqu(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqu_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovdqu_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqu(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqu_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovdqu_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqa(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovdqa_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmovdqa_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovdqa_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqa(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqa_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovdqa_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqa(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovdqa_rr(src.encoding(), dest.encoding()); + } + void vcvtss2sd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtss2sd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vcvtsd2ss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtsd2ss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void movzbl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movzbl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movzbl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movsbl(Register src, Register dest) { + masm.movsbl_rr(src.encoding(), dest.encoding()); + } + void movsbl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movsbl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movsbl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movb(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movb_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movb_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movb(Imm32 src, Register dest) { + masm.movb_ir(src.value & 255, dest.encoding()); + } + void movb(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movb_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movb_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movb(Imm32 src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movb_im(src.value, dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movb_im(src.value, dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movzwl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.movzwl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.movzwl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movzwl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movzwl(Register src, Register dest) { + masm.movzwl_rr(src.encoding(), dest.encoding()); + } + void movw(const Operand& src, Register dest) { + masm.prefix_16_for_32(); + movl(src, dest); + } + void movw(Imm32 src, Register dest) { + masm.prefix_16_for_32(); + movl(src, dest); + } + void movw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movw(Imm32 src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movw_im(src.value, dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movw_im(src.value, dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movswl(Register src, Register dest) { + masm.movswl_rr(src.encoding(), dest.encoding()); + } + void movswl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movswl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movswl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void leal(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.leal_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.leal_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + protected: + void jSrc(Condition cond, Label* label) { + if (label->bound()) { + // The jump can be immediately encoded to the correct destination. + masm.jCC_i(static_cast(cond), JmpDst(label->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc j = masm.jCC(static_cast(cond)); + JmpSrc prev = JmpSrc(label->use(j.offset())); + masm.setNextJump(j, prev); + } + } + void jmpSrc(Label* label) { + if (label->bound()) { + // The jump can be immediately encoded to the correct destination. + masm.jmp_i(JmpDst(label->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc j = masm.jmp(); + JmpSrc prev = JmpSrc(label->use(j.offset())); + masm.setNextJump(j, prev); + } + } + + // Comparison of EAX against the address given by a Label. + JmpSrc cmpSrc(Label* label) { + JmpSrc j = masm.cmp_eax(); + if (label->bound()) { + // The jump can be immediately patched to the correct destination. + masm.linkJump(j, JmpDst(label->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc prev = JmpSrc(label->use(j.offset())); + masm.setNextJump(j, prev); + } + return j; + } + + JmpSrc jSrc(Condition cond, RepatchLabel* label) { + JmpSrc j = masm.jCC(static_cast(cond)); + if (label->bound()) { + // The jump can be immediately patched to the correct destination. + masm.linkJump(j, JmpDst(label->offset())); + } else { + label->use(j.offset()); + } + return j; + } + JmpSrc jmpSrc(RepatchLabel* label) { + JmpSrc j = masm.jmp(); + if (label->bound()) { + // The jump can be immediately patched to the correct destination. + masm.linkJump(j, JmpDst(label->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + label->use(j.offset()); + } + return j; + } + + public: + void nop() { masm.nop(); } + void nop(size_t n) { masm.insert_nop(n); } + void j(Condition cond, Label* label) { jSrc(cond, label); } + void jmp(Label* label) { jmpSrc(label); } + void j(Condition cond, RepatchLabel* label) { jSrc(cond, label); } + void jmp(RepatchLabel* label) { jmpSrc(label); } + + void j(Condition cond, wasm::TrapDesc target) { + Label l; + j(cond, &l); + bindLater(&l, target); + } + void jmp(wasm::TrapDesc target) { + Label l; + jmp(&l); + bindLater(&l, target); + } + + void jmp(const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.jmp_m(op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.jmp_m(op.disp(), op.base(), op.index(), op.scale()); + break; + case Operand::REG: + masm.jmp_r(op.reg()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpEAX(Label* label) { cmpSrc(label); } + void bind(Label* label) { + JmpDst dst(masm.label()); + if (label->used()) { + bool more; + JmpSrc jmp(label->offset()); + do { + JmpSrc next; + more = masm.nextJump(jmp, &next); + masm.linkJump(jmp, dst); + jmp = next; + } while (more); + } + label->bind(dst.offset()); + } + void bindLater(Label* label, wasm::TrapDesc target) { + if (label->used()) { + JmpSrc jmp(label->offset()); + do { + append(wasm::TrapSite(target, jmp.offset())); + } while (masm.nextJump(jmp, &jmp)); + } + label->reset(); + } + void bind(RepatchLabel* label) { + JmpDst dst(masm.label()); + if (label->used()) { + JmpSrc jmp(label->offset()); + masm.linkJump(jmp, dst); + } + label->bind(dst.offset()); + } + void use(CodeOffset* label) { + label->bind(currentOffset()); + } + uint32_t currentOffset() { + return masm.label().offset(); + } + + // Re-routes pending jumps to a new label. + void retarget(Label* label, Label* target) { + if (!label->used()) + return; + bool more; + JmpSrc jmp(label->offset()); + do { + JmpSrc next; + more = masm.nextJump(jmp, &next); + if (target->bound()) { + // The jump can be immediately patched to the correct destination. + masm.linkJump(jmp, JmpDst(target->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc prev(target->use(jmp.offset())); + masm.setNextJump(jmp, prev); + } + jmp = JmpSrc(next.offset()); + } while (more); + label->reset(); + } + + static void Bind(uint8_t* raw, CodeOffset* label, const void* address) { + if (label->bound()) { + intptr_t offset = label->offset(); + X86Encoding::SetPointer(raw + offset, address); + } + } + + // See Bind and X86Encoding::setPointer. + size_t labelToPatchOffset(CodeOffset label) { + return label.offset() - sizeof(void*); + } + + void ret() { + masm.ret(); + } + void retn(Imm32 n) { + // Remove the size of the return address which is included in the frame. + masm.ret_i(n.value - sizeof(void*)); + } + CodeOffset call(Label* label) { + if (label->bound()) { + masm.linkJump(masm.call(), JmpDst(label->offset())); + } else { + JmpSrc j = masm.call(); + JmpSrc prev = JmpSrc(label->use(j.offset())); + masm.setNextJump(j, prev); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset call(Register reg) { + masm.call_r(reg.encoding()); + return CodeOffset(masm.currentOffset()); + } + void call(const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.call_r(op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.call_m(op.disp(), op.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + CodeOffset callWithPatch() { + return CodeOffset(masm.call().offset()); + } + + struct AutoPrepareForPatching : X86Encoding::AutoUnprotectAssemblerBufferRegion { + explicit AutoPrepareForPatching(AssemblerX86Shared& masm) + : X86Encoding::AutoUnprotectAssemblerBufferRegion(masm.masm, 0, masm.size()) + {} + }; + + void patchCall(uint32_t callerOffset, uint32_t calleeOffset) { + // The caller uses AutoUnprotectBuffer. + unsigned char* code = masm.data(); + X86Encoding::SetRel32(code + callerOffset, code + calleeOffset); + } + CodeOffset farJumpWithPatch() { + return CodeOffset(masm.jmp().offset()); + } + void patchFarJump(CodeOffset farJump, uint32_t targetOffset) { + // The caller uses AutoUnprotectBuffer. + unsigned char* code = masm.data(); + X86Encoding::SetRel32(code + farJump.offset(), code + targetOffset); + } + static void repatchFarJump(uint8_t* code, uint32_t farJumpOffset, uint32_t targetOffset) { + X86Encoding::SetRel32(code + farJumpOffset, code + targetOffset); + } + + CodeOffset twoByteNop() { + return CodeOffset(masm.twoByteNop().offset()); + } + static void patchTwoByteNopToJump(uint8_t* jump, uint8_t* target) { + X86Encoding::BaseAssembler::patchTwoByteNopToJump(jump, target); + } + static void patchJumpToTwoByteNop(uint8_t* jump) { + X86Encoding::BaseAssembler::patchJumpToTwoByteNop(jump); + } + + void breakpoint() { + masm.int3(); + } + + static bool HasSSE2() { return CPUInfo::IsSSE2Present(); } + static bool HasSSE3() { return CPUInfo::IsSSE3Present(); } + static bool HasSSSE3() { return CPUInfo::IsSSSE3Present(); } + static bool HasSSE41() { return CPUInfo::IsSSE41Present(); } + static bool HasPOPCNT() { return CPUInfo::IsPOPCNTPresent(); } + static bool SupportsFloatingPoint() { return CPUInfo::IsSSE2Present(); } + static bool SupportsUnalignedAccesses() { return true; } + static bool SupportsSimd() { return CPUInfo::IsSSE2Present(); } + static bool HasAVX() { return CPUInfo::IsAVXPresent(); } + + void cmpl(Register rhs, Register lhs) { + masm.cmpl_rr(rhs.encoding(), lhs.encoding()); + } + void cmpl(const Operand& rhs, Register lhs) { + switch (rhs.kind()) { + case Operand::REG: + masm.cmpl_rr(rhs.reg(), lhs.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.cmpl_mr(rhs.disp(), rhs.base(), lhs.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpl_mr(rhs.address(), lhs.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpl(Register rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpl_rr(rhs.encoding(), lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.cmpl_rm(rhs.encoding(), lhs.disp(), lhs.base()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpl_rm(rhs.encoding(), lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpl(Imm32 rhs, Register lhs) { + masm.cmpl_ir(rhs.value, lhs.encoding()); + } + void cmpl(Imm32 rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpl_ir(rhs.value, lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.cmpl_im(rhs.value, lhs.disp(), lhs.base()); + break; + case Operand::MEM_SCALE: + masm.cmpl_im(rhs.value, lhs.disp(), lhs.base(), lhs.index(), lhs.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpl_im(rhs.value, lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + CodeOffset cmplWithPatch(Imm32 rhs, Register lhs) { + masm.cmpl_i32r(rhs.value, lhs.encoding()); + return CodeOffset(masm.currentOffset()); + } + void cmpw(Register rhs, Register lhs) { + masm.cmpw_rr(rhs.encoding(), lhs.encoding()); + } + void setCC(Condition cond, Register r) { + masm.setCC_r(static_cast(cond), r.encoding()); + } + void testb(Register rhs, Register lhs) { + MOZ_ASSERT(AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(rhs)); + MOZ_ASSERT(AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(lhs)); + masm.testb_rr(rhs.encoding(), lhs.encoding()); + } + void testw(Register rhs, Register lhs) { + masm.testw_rr(lhs.encoding(), rhs.encoding()); + } + void testl(Register rhs, Register lhs) { + masm.testl_rr(lhs.encoding(), rhs.encoding()); + } + void testl(Imm32 rhs, Register lhs) { + masm.testl_ir(rhs.value, lhs.encoding()); + } + void testl(Imm32 rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.testl_ir(rhs.value, lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.testl_i32m(rhs.value, lhs.disp(), lhs.base()); + break; + case Operand::MEM_ADDRESS32: + masm.testl_i32m(rhs.value, lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void addl(Imm32 imm, Register dest) { + masm.addl_ir(imm.value, dest.encoding()); + } + CodeOffset addlWithPatch(Imm32 imm, Register dest) { + masm.addl_i32r(imm.value, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + void addl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.addl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_ADDRESS32: + masm.addl_im(imm.value, op.address()); + break; + case Operand::MEM_SCALE: + masm.addl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.addw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_ADDRESS32: + masm.addw_im(imm.value, op.address()); + break; + case Operand::MEM_SCALE: + masm.addw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subl(Imm32 imm, Register dest) { + masm.subl_ir(imm.value, dest.encoding()); + } + void subl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.subl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.subw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addl(Register src, Register dest) { + masm.addl_rr(src.encoding(), dest.encoding()); + } + void addl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.addl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.addl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.addw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.addw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subl(Register src, Register dest) { + masm.subl_rr(src.encoding(), dest.encoding()); + } + void subl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.subl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.subl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.subl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.subl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.subw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.subw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orl(Register reg, Register dest) { + masm.orl_rr(reg.encoding(), dest.encoding()); + } + void orl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.orl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.orl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.orw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.orw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orl(Imm32 imm, Register reg) { + masm.orl_ir(imm.value, reg.encoding()); + } + void orl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.orl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.orw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorl(Register src, Register dest) { + masm.xorl_rr(src.encoding(), dest.encoding()); + } + void xorl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.xorl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.xorl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.xorw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.xorw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorl(Imm32 imm, Register reg) { + masm.xorl_ir(imm.value, reg.encoding()); + } + void xorl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.xorl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.xorw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andl(Register src, Register dest) { + masm.andl_rr(src.encoding(), dest.encoding()); + } + void andl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.andl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.andl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.andw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.andw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andl(Imm32 imm, Register dest) { + masm.andl_ir(imm.value, dest.encoding()); + } + void andl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.andl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.andw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.addl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.addl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.orl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.orl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.xorl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.xorl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.andl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.andl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void bsrl(const Register& src, const Register& dest) { + masm.bsrl_rr(src.encoding(), dest.encoding()); + } + void bsfl(const Register& src, const Register& dest) { + masm.bsfl_rr(src.encoding(), dest.encoding()); + } + void popcntl(const Register& src, const Register& dest) { + masm.popcntl_rr(src.encoding(), dest.encoding()); + } + void imull(Register multiplier) { + masm.imull_r(multiplier.encoding()); + } + void umull(Register multiplier) { + masm.mull_r(multiplier.encoding()); + } + void imull(Imm32 imm, Register dest) { + masm.imull_ir(imm.value, dest.encoding(), dest.encoding()); + } + void imull(Register src, Register dest) { + masm.imull_rr(src.encoding(), dest.encoding()); + } + void imull(Imm32 imm, Register src, Register dest) { + masm.imull_ir(imm.value, src.encoding(), dest.encoding()); + } + void imull(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.imull_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.imull_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void negl(const Operand& src) { + switch (src.kind()) { + case Operand::REG: + masm.negl_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.negl_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void negl(Register reg) { + masm.negl_r(reg.encoding()); + } + void notl(const Operand& src) { + switch (src.kind()) { + case Operand::REG: + masm.notl_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.notl_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void notl(Register reg) { + masm.notl_r(reg.encoding()); + } + void shrl(const Imm32 imm, Register dest) { + masm.shrl_ir(imm.value, dest.encoding()); + } + void shll(const Imm32 imm, Register dest) { + masm.shll_ir(imm.value, dest.encoding()); + } + void sarl(const Imm32 imm, Register dest) { + masm.sarl_ir(imm.value, dest.encoding()); + } + void shrl_cl(Register dest) { + masm.shrl_CLr(dest.encoding()); + } + void shll_cl(Register dest) { + masm.shll_CLr(dest.encoding()); + } + void sarl_cl(Register dest) { + masm.sarl_CLr(dest.encoding()); + } + void shrdl_cl(Register src, Register dest) { + masm.shrdl_CLr(src.encoding(), dest.encoding()); + } + void shldl_cl(Register src, Register dest) { + masm.shldl_CLr(src.encoding(), dest.encoding()); + } + + void roll(const Imm32 imm, Register dest) { + masm.roll_ir(imm.value, dest.encoding()); + } + void roll_cl(Register dest) { + masm.roll_CLr(dest.encoding()); + } + void rorl(const Imm32 imm, Register dest) { + masm.rorl_ir(imm.value, dest.encoding()); + } + void rorl_cl(Register dest) { + masm.rorl_CLr(dest.encoding()); + } + + void incl(const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.incl_m32(op.disp(), op.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_incl(const Operand& op) { + masm.prefix_lock(); + incl(op); + } + + void decl(const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.decl_m32(op.disp(), op.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_decl(const Operand& op) { + masm.prefix_lock(); + decl(op); + } + + void addb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.addb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.addb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void addb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.addb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.addb_rm(src.encoding(), op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void subb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.subb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void subb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.subb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subb_rm(src.encoding(), op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void andb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.andb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void andb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.andb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andb_rm(src.encoding(), op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void orb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.orb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void orb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.orb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orb_rm(src.encoding(), op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void xorb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.xorb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void xorb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.xorb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorb_rm(src.encoding(), op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + template + void lock_addb(T src, const Operand& op) { + masm.prefix_lock(); + addb(src, op); + } + template + void lock_subb(T src, const Operand& op) { + masm.prefix_lock(); + subb(src, op); + } + template + void lock_andb(T src, const Operand& op) { + masm.prefix_lock(); + andb(src, op); + } + template + void lock_orb(T src, const Operand& op) { + masm.prefix_lock(); + orb(src, op); + } + template + void lock_xorb(T src, const Operand& op) { + masm.prefix_lock(); + xorb(src, op); + } + + template + void lock_addw(T src, const Operand& op) { + masm.prefix_lock(); + addw(src, op); + } + template + void lock_subw(T src, const Operand& op) { + masm.prefix_lock(); + subw(src, op); + } + template + void lock_andw(T src, const Operand& op) { + masm.prefix_lock(); + andw(src, op); + } + template + void lock_orw(T src, const Operand& op) { + masm.prefix_lock(); + orw(src, op); + } + template + void lock_xorw(T src, const Operand& op) { + masm.prefix_lock(); + xorw(src, op); + } + + // Note, lock_addl(imm, op) is used for a memory barrier on non-SSE2 systems, + // among other things. Do not optimize, replace by XADDL, or similar. + template + void lock_addl(T src, const Operand& op) { + masm.prefix_lock(); + addl(src, op); + } + template + void lock_subl(T src, const Operand& op) { + masm.prefix_lock(); + subl(src, op); + } + template + void lock_andl(T src, const Operand& op) { + masm.prefix_lock(); + andl(src, op); + } + template + void lock_orl(T src, const Operand& op) { + masm.prefix_lock(); + orl(src, op); + } + template + void lock_xorl(T src, const Operand& op) { + masm.prefix_lock(); + xorl(src, op); + } + + void lock_cmpxchgb(Register src, const Operand& mem) { + masm.prefix_lock(); + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.cmpxchgb(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.cmpxchgb(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_cmpxchgw(Register src, const Operand& mem) { + masm.prefix_lock(); + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.cmpxchgw(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.cmpxchgw(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_cmpxchgl(Register src, const Operand& mem) { + masm.prefix_lock(); + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.cmpxchgl(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.cmpxchgl(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void xchgb(Register src, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.xchgb_rm(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.xchgb_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xchgw(Register src, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.xchgw_rm(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.xchgw_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xchgl(Register src, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.xchgl_rm(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.xchgl_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void lock_xaddb(Register srcdest, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.lock_xaddb_rm(srcdest.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.lock_xaddb_rm(srcdest.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_xaddw(Register srcdest, const Operand& mem) { + masm.prefix_16_for_32(); + lock_xaddl(srcdest, mem); + } + void lock_xaddl(Register srcdest, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.lock_xaddl_rm(srcdest.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.lock_xaddl_rm(srcdest.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void push(const Imm32 imm) { + masm.push_i(imm.value); + } + + void push(const Operand& src) { + switch (src.kind()) { + case Operand::REG: + masm.push_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.push_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void push(Register src) { + masm.push_r(src.encoding()); + } + void push(const Address& src) { + masm.push_m(src.offset, src.base.encoding()); + } + + void pop(const Operand& src) { + switch (src.kind()) { + case Operand::REG: + masm.pop_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.pop_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void pop(Register src) { + masm.pop_r(src.encoding()); + } + void pop(const Address& src) { + masm.pop_m(src.offset, src.base.encoding()); + } + + void pushFlags() { + masm.push_flags(); + } + void popFlags() { + masm.pop_flags(); + } + +#ifdef JS_CODEGEN_X86 + void pushAllRegs() { + masm.pusha(); + } + void popAllRegs() { + masm.popa(); + } +#endif + + // Zero-extend byte to 32-bit integer. + void movzbl(Register src, Register dest) { + masm.movzbl_rr(src.encoding(), dest.encoding()); + } + + void cdq() { + masm.cdq(); + } + void idiv(Register divisor) { + masm.idivl_r(divisor.encoding()); + } + void udiv(Register divisor) { + masm.divl_r(divisor.encoding()); + } + + void vpinsrb(unsigned lane, Register src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpinsrb_irr(lane, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpinsrw(unsigned lane, Register src1, FloatRegister src0, FloatRegister dest) { + masm.vpinsrw_irr(lane, src1.encoding(), src0.encoding(), dest.encoding()); + } + + void vpinsrd(unsigned lane, Register src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpinsrd_irr(lane, src1.encoding(), src0.encoding(), dest.encoding()); + } + + void vpextrb(unsigned lane, FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpextrb_irr(lane, src.encoding(), dest.encoding()); + } + void vpextrw(unsigned lane, FloatRegister src, Register dest) { + masm.vpextrw_irr(lane, src.encoding(), dest.encoding()); + } + void vpextrd(unsigned lane, FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpextrd_irr(lane, src.encoding(), dest.encoding()); + } + void vpsrldq(Imm32 shift, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrldq_ir(shift.value, src0.encoding(), dest.encoding()); + } + void vpsllq(Imm32 shift, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsllq_ir(shift.value, src0.encoding(), dest.encoding()); + } + void vpsrlq(Imm32 shift, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrlq_ir(shift.value, src0.encoding(), dest.encoding()); + } + void vpslld(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpslld_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpslld(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpslld_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsrad(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrad_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsrad(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrad_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsrld(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrld_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsrld(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrld_ir(count.value, src0.encoding(), dest.encoding()); + } + + void vpsllw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsllw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsllw(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsllw_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsraw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsraw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsraw(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsraw_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsrlw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrlw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsrlw(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrlw_ir(count.value, src0.encoding(), dest.encoding()); + } + + void vcvtsi2sd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::REG: + masm.vcvtsi2sd_rr(src1.reg(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vcvtsi2sd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vcvtsi2sd_mr(src1.disp(), src1.base(), src1.index(), src1.scale(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vcvttsd2si(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvttsd2si_rr(src.encoding(), dest.encoding()); + } + void vcvttss2si(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvttss2si_rr(src.encoding(), dest.encoding()); + } + void vcvtsi2ss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::REG: + masm.vcvtsi2ss_rr(src1.reg(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vcvtsi2ss_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vcvtsi2ss_mr(src1.disp(), src1.base(), src1.index(), src1.scale(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vcvtsi2ss(Register src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtsi2ss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vcvtsi2sd(Register src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtsi2sd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vcvttps2dq(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvttps2dq_rr(src.encoding(), dest.encoding()); + } + void vcvtdq2ps(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtdq2ps_rr(src.encoding(), dest.encoding()); + } + void vmovmskpd(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovmskpd_rr(src.encoding(), dest.encoding()); + } + void vmovmskps(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovmskps_rr(src.encoding(), dest.encoding()); + } + void vptest(FloatRegister rhs, FloatRegister lhs) { + MOZ_ASSERT(HasSSE41()); + masm.vptest_rr(rhs.encoding(), lhs.encoding()); + } + void vucomisd(FloatRegister rhs, FloatRegister lhs) { + MOZ_ASSERT(HasSSE2()); + masm.vucomisd_rr(rhs.encoding(), lhs.encoding()); + } + void vucomiss(FloatRegister rhs, FloatRegister lhs) { + MOZ_ASSERT(HasSSE2()); + masm.vucomiss_rr(rhs.encoding(), lhs.encoding()); + } + + void vpcmpeqb(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpeqb_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpeqb_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpeqb_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpcmpgtb(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpgtb_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpgtb_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpgtb_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpcmpeqw(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpeqw_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpeqw_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpeqw_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpcmpgtw(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpgtw_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpgtw_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpgtw_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpcmpeqd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpeqd_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpeqd_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpeqd_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpcmpgtd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpgtd_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpgtd_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpgtd_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vcmpps(uint8_t order, Operand src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + // :TODO: (Bug 1132894) See LIRGeneratorX86Shared::lowerForFPU + // FIXME: This logic belongs in the MacroAssembler. + if (!HasAVX() && !src0.aliases(dest)) { + if (src1.kind() == Operand::FPREG && + dest.aliases(FloatRegister::FromCode(src1.fpu()))) + { + vmovdqa(src1, ScratchSimd128Reg); + src1 = Operand(ScratchSimd128Reg); + } + vmovdqa(src0, dest); + src0 = dest; + } + switch (src1.kind()) { + case Operand::FPREG: + masm.vcmpps_rr(order, src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vcmpps_mr(order, src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vcmpps_mr(order, src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vcmpeqps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_EQ, src1, src0, dest); + } + void vcmpltps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_LT, src1, src0, dest); + } + void vcmpleps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_LE, src1, src0, dest); + } + void vcmpunordps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_UNORD, src1, src0, dest); + } + void vcmpneqps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_NEQ, src1, src0, dest); + } + void vcmpordps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_ORD, src1, src0, dest); + } + void vrcpps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vrcpps_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vrcpps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vrcpps_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsqrtps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vsqrtps_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsqrtps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vsqrtps_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vrsqrtps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vrsqrtps_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vrsqrtps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vrsqrtps_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovd(Register src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovd_rr(src.encoding(), dest.encoding()); + } + void vmovd(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovd_rr(src.encoding(), dest.encoding()); + } + void vmovd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovd_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovd_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovd(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovd_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovd_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovq_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovq(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovq_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovq(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovq_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddsb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddsb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddsb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddsb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddusb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddusb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddusb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddusb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubsb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubsb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubsb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubsb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubusb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubusb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubusb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubusb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddsw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddsw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddsw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddsw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddusw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddusw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddusw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddusw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubsw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubsw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubsw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubsw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubusw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubusw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubusw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubusw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddd_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubd_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmuludq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpmuludq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpmuludq(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmuludq_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmuludq_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmullw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmullw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmullw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmulld_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmulld_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpmulld_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vaddps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vaddps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vaddps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vaddps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsubps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vsubps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsubps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vsubps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmulps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmulps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmulps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vdivps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vdivps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vdivps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vdivps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmaxps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmaxps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmaxps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmaxps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vminps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vminps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vminps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vminps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vandps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vandps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vandps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vandps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vandnps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + // Negates bits of dest and then applies AND + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vandnps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vandnps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vandnps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vorps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vorps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vorps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vorps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vxorps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vxorps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vxorps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vxorps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpand(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpand_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpand(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpand_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpand_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpand_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpor(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpor_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpor(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpor_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpor_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpor_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpxor(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpxor_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpxor(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpxor_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpxor_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpxor_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpandn(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpandn_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpandn(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpandn_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpandn_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpandn_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpshufd(uint32_t mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpshufd_irr(mask, src.encoding(), dest.encoding()); + } + void vpshufd(uint32_t mask, const Operand& src1, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpshufd_irr(mask, src1.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpshufd_imr(mask, src1.disp(), src1.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpshufd_imr(mask, src1.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpshuflw(uint32_t mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpshuflw_irr(mask, src.encoding(), dest.encoding()); + } + void vpshufhw(uint32_t mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpshufhw_irr(mask, src.encoding(), dest.encoding()); + } + void vpshufb(FloatRegister mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSSE3()); + masm.vpshufb_rr(mask.encoding(), src.encoding(), dest.encoding()); + } + void vmovddup(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + masm.vmovddup_rr(src.encoding(), dest.encoding()); + } + void vmovhlps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovhlps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmovlhps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovlhps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vunpcklps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vunpcklps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vunpcklps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vunpcklps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vunpcklps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vunpcklps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vunpckhps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vunpckhps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vunpckhps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vunpckhps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vunpckhps_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vunpckhps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vshufps(uint32_t mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vshufps_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vshufps(uint32_t mask, const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vshufps_irr(mask, src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vshufps_imr(mask, src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vshufps_imr(mask, src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vaddsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vaddsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vaddss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vaddss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vaddsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vaddsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vaddsd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vaddsd_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vaddss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vaddss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vaddss_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vaddss_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsubsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsubsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsubss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsubss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsubsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vsubsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsubsd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsubss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vsubss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsubss_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmulsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmulsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmulsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmulsd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmulss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmulss_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmulss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vdivsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vdivsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vdivss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vdivss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vdivsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vdivsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vdivsd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vdivss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vdivss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vdivss_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vxorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vxorpd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vxorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vxorps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vorpd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vorps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vandpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vandpd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vandps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vandps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsqrtsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsqrtsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsqrtss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsqrtss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vroundsd(X86Encoding::RoundingMode mode, FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vroundsd_irr(mode, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vroundss(X86Encoding::RoundingMode mode, FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vroundss_irr(mode, src1.encoding(), src0.encoding(), dest.encoding()); + } + unsigned vinsertpsMask(unsigned sourceLane, unsigned destLane, unsigned zeroMask = 0) + { + // Note that the sourceLane bits are ignored in the case of a source + // memory operand, and the source is the given 32-bits memory location. + MOZ_ASSERT(zeroMask < 16); + unsigned ret = zeroMask ; + ret |= destLane << 4; + ret |= sourceLane << 6; + MOZ_ASSERT(ret < 256); + return ret; + } + void vinsertps(uint32_t mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vinsertps_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vinsertps(uint32_t mask, const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vinsertps_irr(mask, src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vinsertps_imr(mask, src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + unsigned blendpsMask(bool x, bool y, bool z, bool w) { + return (x << 0) | (y << 1) | (z << 2) | (w << 3); + } + void vblendps(unsigned mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vblendps_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vblendps(unsigned mask, const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vblendps_irr(mask, src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vblendps_imr(mask, src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vblendvps(FloatRegister mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vblendvps_rr(mask.encoding(), src1.encoding(), src0.encoding(), dest.encoding()); + } + void vblendvps(FloatRegister mask, const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vblendvps_rr(mask.encoding(), src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vblendvps_mr(mask.encoding(), src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovsldup(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + masm.vmovsldup_rr(src.encoding(), dest.encoding()); + } + void vmovsldup(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovsldup_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmovsldup_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovshdup(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + masm.vmovshdup_rr(src.encoding(), dest.encoding()); + } + void vmovshdup(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovshdup_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmovshdup_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vminsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vminsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vminsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vminsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vminsd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vminss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vminss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmaxsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmaxsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmaxsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmaxsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmaxsd_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmaxss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmaxss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void fisttp(const Operand& dest) { + MOZ_ASSERT(HasSSE3()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fisttp_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fistp(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fistp_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fnstcw(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fnstcw_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fldcw(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fldcw_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fnstsw(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fnstsw_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fld(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fld_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fld32(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fld32_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fstp(const Operand& src) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.fstp_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fstp32(const Operand& src) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.fstp32_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + // Defined for compatibility with ARM's assembler + uint32_t actualIndex(uint32_t x) { + return x; + } + + void flushBuffer() { + } + + // Patching. + + static size_t PatchWrite_NearCallSize() { + return 5; + } + static uintptr_t GetPointer(uint8_t* instPtr) { + uintptr_t* ptr = ((uintptr_t*) instPtr) - 1; + return *ptr; + } + // Write a relative call at the start location |dataLabel|. + // Note that this DOES NOT patch data that comes before |label|. + static void PatchWrite_NearCall(CodeLocationLabel startLabel, CodeLocationLabel target) { + uint8_t* start = startLabel.raw(); + *start = 0xE8; + ptrdiff_t offset = target - startLabel - PatchWrite_NearCallSize(); + MOZ_ASSERT(int32_t(offset) == offset); + *((int32_t*) (start + 1)) = offset; + } + + static void PatchWrite_Imm32(CodeLocationLabel dataLabel, Imm32 toWrite) { + *((int32_t*) dataLabel.raw() - 1) = toWrite.value; + } + + static void PatchDataWithValueCheck(CodeLocationLabel data, PatchedImmPtr newData, + PatchedImmPtr expectedData) { + // The pointer given is a pointer to *after* the data. + uintptr_t* ptr = ((uintptr_t*) data.raw()) - 1; + MOZ_ASSERT(*ptr == (uintptr_t)expectedData.value); + *ptr = (uintptr_t)newData.value; + } + static void PatchDataWithValueCheck(CodeLocationLabel data, ImmPtr newData, ImmPtr expectedData) { + PatchDataWithValueCheck(data, PatchedImmPtr(newData.value), PatchedImmPtr(expectedData.value)); + } + + static void PatchInstructionImmediate(uint8_t* code, PatchedImmPtr imm) { + MOZ_CRASH("Unused."); + } + + static uint32_t NopSize() { + return 1; + } + static uint8_t* NextInstruction(uint8_t* cur, uint32_t* count) { + MOZ_CRASH("nextInstruction NYI on x86"); + } + + // Toggle a jmp or cmp emitted by toggledJump(). + static void ToggleToJmp(CodeLocationLabel inst) { + uint8_t* ptr = (uint8_t*)inst.raw(); + MOZ_ASSERT(*ptr == 0x3D); + *ptr = 0xE9; + } + static void ToggleToCmp(CodeLocationLabel inst) { + uint8_t* ptr = (uint8_t*)inst.raw(); + MOZ_ASSERT(*ptr == 0xE9); + *ptr = 0x3D; + } + static void ToggleCall(CodeLocationLabel inst, bool enabled) { + uint8_t* ptr = (uint8_t*)inst.raw(); + MOZ_ASSERT(*ptr == 0x3D || // CMP + *ptr == 0xE8); // CALL + *ptr = enabled ? 0xE8 : 0x3D; + } + + MOZ_COLD void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, + const Disassembler::HeapAccess& heapAccess); +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Assembler_x86_shared_h */ diff --git a/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.cpp b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.cpp new file mode 100644 index 000000000..6dec02a31 --- /dev/null +++ b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.cpp @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/AssemblerBuffer-x86-shared.h" + +#include "mozilla/Sprintf.h" + +#include "jsopcode.h" + +void js::jit::GenericAssembler::spew(const char* fmt, va_list va) +{ + // Buffer to hold the formatted string. Note that this may contain + // '%' characters, so do not pass it directly to printf functions. + char buf[200]; + + int i = VsprintfLiteral(buf, fmt, va); + if (i > -1) { + if (printer) + printer->printf("%s\n", buf); + js::jit::JitSpew(js::jit::JitSpew_Codegen, "%s", buf); + } +} diff --git a/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h new file mode 100644 index 000000000..8cb557784 --- /dev/null +++ b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h @@ -0,0 +1,205 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef jit_x86_shared_AssemblerBuffer_x86_shared_h +#define jit_x86_shared_AssemblerBuffer_x86_shared_h + +#include +#include + +#include "ds/PageProtectingVector.h" +#include "jit/ExecutableAllocator.h" +#include "jit/JitSpewer.h" + +// Spew formatting helpers. +#define PRETTYHEX(x) (((x)<0)?"-":""),(((x)<0)?-(x):(x)) + +#define MEM_o "%s0x%x" +#define MEM_os MEM_o "(,%s,%d)" +#define MEM_ob MEM_o "(%s)" +#define MEM_obs MEM_o "(%s,%s,%d)" + +#define MEM_o32 "%s0x%04x" +#define MEM_o32s MEM_o32 "(,%s,%d)" +#define MEM_o32b MEM_o32 "(%s)" +#define MEM_o32bs MEM_o32 "(%s,%s,%d)" +#define MEM_o32r ".Lfrom%d(%%rip)" + +#define ADDR_o(offset) PRETTYHEX(offset) +#define ADDR_os(offset, index, scale) ADDR_o(offset), GPRegName((index)), (1<<(scale)) +#define ADDR_ob(offset, base) ADDR_o(offset), GPRegName((base)) +#define ADDR_obs(offset, base, index, scale) ADDR_ob(offset, base), GPRegName((index)), (1<<(scale)) + +#define ADDR_o32(offset) ADDR_o(offset) +#define ADDR_o32s(offset, index, scale) ADDR_os(offset, index, scale) +#define ADDR_o32b(offset, base) ADDR_ob(offset, base) +#define ADDR_o32bs(offset, base, index, scale) ADDR_obs(offset, base, index, scale) +#define ADDR_o32r(offset) (offset) + +namespace js { + + class Sprinter; + +namespace jit { + + class AssemblerBuffer + { + template + MOZ_ALWAYS_INLINE void sizedAppendUnchecked(T value) + { + m_buffer.infallibleAppend(reinterpret_cast(&value), size); + } + + template + MOZ_ALWAYS_INLINE void sizedAppend(T value) + { + if (MOZ_UNLIKELY(!m_buffer.append(reinterpret_cast(&value), size))) + oomDetected(); + } + + public: + AssemblerBuffer() + : m_oom(false) + { + // Provide memory protection once the buffer starts to get big. + m_buffer.setLowerBoundForProtection(32 * 1024); + } + + void ensureSpace(size_t space) + { + if (MOZ_UNLIKELY(!m_buffer.reserve(m_buffer.length() + space))) + oomDetected(); + } + + bool isAligned(size_t alignment) const + { + return !(m_buffer.length() & (alignment - 1)); + } + + MOZ_ALWAYS_INLINE void putByteUnchecked(int value) { sizedAppendUnchecked<1>(value); } + MOZ_ALWAYS_INLINE void putShortUnchecked(int value) { sizedAppendUnchecked<2>(value); } + MOZ_ALWAYS_INLINE void putIntUnchecked(int value) { sizedAppendUnchecked<4>(value); } + MOZ_ALWAYS_INLINE void putInt64Unchecked(int64_t value) { sizedAppendUnchecked<8>(value); } + + MOZ_ALWAYS_INLINE void putByte(int value) { sizedAppend<1>(value); } + MOZ_ALWAYS_INLINE void putShort(int value) { sizedAppend<2>(value); } + MOZ_ALWAYS_INLINE void putInt(int value) { sizedAppend<4>(value); } + MOZ_ALWAYS_INLINE void putInt64(int64_t value) { sizedAppend<8>(value); } + + MOZ_MUST_USE bool append(const unsigned char* values, size_t size) + { + if (MOZ_UNLIKELY(!m_buffer.append(values, size))) { + oomDetected(); + return false; + } + return true; + } + + unsigned char* data() + { + return m_buffer.begin(); + } + + size_t size() const + { + return m_buffer.length(); + } + + bool oom() const + { + return m_oom; + } + + const unsigned char* buffer() const { + MOZ_ASSERT(!m_oom); + return m_buffer.begin(); + } + + void unprotectDataRegion(size_t firstByteOffset, size_t lastByteOffset) { + m_buffer.unprotectRegion(firstByteOffset, lastByteOffset); + } + void reprotectDataRegion(size_t firstByteOffset, size_t lastByteOffset) { + m_buffer.reprotectRegion(firstByteOffset, lastByteOffset); + } + + protected: + /* + * OOM handling: This class can OOM in the ensureSpace() method trying + * to allocate a new buffer. In response to an OOM, we need to avoid + * crashing and report the error. We also want to make it so that + * users of this class need to check for OOM only at certain points + * and not after every operation. + * + * Our strategy for handling an OOM is to set m_oom, and then clear (but + * not free) m_buffer, preserving the current buffer. This way, the user + * can continue assembling into the buffer, deferring OOM checking + * until the user wants to read code out of the buffer. + * + * See also the |buffer| method. + */ + void oomDetected() { + m_oom = true; + m_buffer.clear(); + } + + PageProtectingVector m_buffer; + bool m_oom; + }; + + class GenericAssembler + { + Sprinter* printer; + + public: + + GenericAssembler() + : printer(NULL) + {} + + void setPrinter(Sprinter* sp) { + printer = sp; + } + + void spew(const char* fmt, ...) MOZ_FORMAT_PRINTF(2, 3) + { + if (MOZ_UNLIKELY(printer || JitSpewEnabled(JitSpew_Codegen))) { + va_list va; + va_start(va, fmt); + spew(fmt, va); + va_end(va); + } + } + + MOZ_COLD void spew(const char* fmt, va_list va); + }; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_AssemblerBuffer_x86_shared_h */ diff --git a/js/src/jit/x86-shared/AtomicOperations-x86-shared.h b/js/src/jit/x86-shared/AtomicOperations-x86-shared.h new file mode 100644 index 000000000..b34aba7ef --- /dev/null +++ b/js/src/jit/x86-shared/AtomicOperations-x86-shared.h @@ -0,0 +1,602 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* For overall documentation, see jit/AtomicOperations.h */ + +#ifndef jit_shared_AtomicOperations_x86_shared_h +#define jit_shared_AtomicOperations_x86_shared_h + +#include "mozilla/Assertions.h" +#include "mozilla/Types.h" + +// Lock-freedom on x86 and x64: +// +// On x86 and x64 there are atomic instructions for 8-byte accesses: +// +// Load and stores: +// - Loads and stores are single-copy atomic for up to 8 bytes +// starting with the Pentium; the store requires a post-fence for +// sequential consistency +// +// CompareExchange: +// - On x64 CMPXCHGQ can always be used +// - On x86 CMPXCHG8B can be used starting with the first Pentium +// +// Exchange: +// - On x64 XCHGQ can always be used +// - On x86 one has to use a CompareExchange loop +// +// Observe also that the JIT will not be enabled unless we have SSE2, +// which was introduced with the Pentium 4. Ergo the JIT will be able +// to use atomic instructions for up to 8 bytes on all x86 platforms +// for the primitives we care about. +// +// However, C++ compilers and libraries may not provide access to +// those 8-byte instructions directly. Clang in 32-bit mode does not +// provide 8-byte atomic primitives at all (even with eg -arch i686 +// specified). On Windows 32-bit, MSVC does not provide +// _InterlockedExchange64 since it does not map directly to an +// instruction. +// +// There are thus sundry workarounds below to handle known corner +// cases. + +#if defined(__clang__) || defined(__GNUC__) + +// The default implementation tactic for gcc/clang is to use the newer +// __atomic intrinsics added for use in C++11 . Where that +// isn't available, we use GCC's older __sync functions instead. +// +// ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS is kept as a backward +// compatible option for older compilers: enable this to use GCC's old +// __sync functions instead of the newer __atomic functions. This +// will be required for GCC 4.6.x and earlier, and probably for Clang +// 3.1, should we need to use those versions. + +// #define ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + +// Lock-free 8-byte atomics are assumed on x86 but must be disabled in +// corner cases, see comments below and in isLockfree8(). + +# define LOCKFREE8 + +// This pertains to Clang compiling with -m32, in this case the 64-bit +// __atomic builtins are not available (observed on various Mac OS X +// versions with Apple Clang and on Linux with Clang 3.5). +// +// For now just punt: disable lock-free 8-word data. The JIT will +// call isLockfree8() to determine what to do and will stay in sync. +// (Bug 1146817 tracks the work to improve on this.) + +# if defined(__clang__) && defined(__i386) +# undef LOCKFREE8 +# endif + +inline bool +js::jit::AtomicOperations::isLockfree8() +{ +# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int8_t), 0)); + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int16_t), 0)); + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int32_t), 0)); +# endif +# ifdef LOCKFREE8 +# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int64_t), 0)); +# endif + return true; +# else + return false; +# endif +} + +inline void +js::jit::AtomicOperations::fenceSeqCst() +{ +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + __sync_synchronize(); +# else + __atomic_thread_fence(__ATOMIC_SEQ_CST); +# endif +} + +template +inline T +js::jit::AtomicOperations::loadSeqCst(T* addr) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + // Inhibit compiler reordering with a volatile load. The x86 does + // not reorder loads with respect to subsequent loads or stores + // and no ordering barrier is required here. See more elaborate + // comments in storeSeqCst. + T v = *static_cast(addr); +# else + T v; + __atomic_load(addr, &v, __ATOMIC_SEQ_CST); +# endif + return v; +} + +# ifndef LOCKFREE8 +template<> +inline int64_t +js::jit::AtomicOperations::loadSeqCst(int64_t* addr) +{ + MOZ_CRASH(); +} + +template<> +inline uint64_t +js::jit::AtomicOperations::loadSeqCst(uint64_t* addr) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template +inline void +js::jit::AtomicOperations::storeSeqCst(T* addr, T val) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + // Inhibit compiler reordering with a volatile store. The x86 may + // reorder a store with respect to a subsequent load from a + // different location, hence there is an ordering barrier here to + // prevent that. + // + // By way of background, look to eg + // http://bartoszmilewski.com/2008/11/05/who-ordered-memory-fences-on-an-x86/ + // + // Consider: + // + // uint8_t x = 0, y = 0; // to start + // + // thread1: + // sx: AtomicOperations::store(&x, 1); + // gy: uint8_t obs1 = AtomicOperations::loadSeqCst(&y); + // + // thread2: + // sy: AtomicOperations::store(&y, 1); + // gx: uint8_t obs2 = AtomicOperations::loadSeqCst(&x); + // + // Sequential consistency requires a total global ordering of + // operations: sx-gy-sy-gx, sx-sy-gx-gy, sx-sy-gy-gx, sy-gx-sx-gy, + // sy-sx-gy-gx, or sy-sx-gx-gy. In every ordering at least one of + // sx-before-gx or sy-before-gy happens, so *at least one* of + // obs1/obs2 is 1. + // + // If AtomicOperations::{load,store}SeqCst were just volatile + // {load,store}, x86 could reorder gx/gy before each thread's + // prior load. That would permit gx-gy-sx-sy: both loads would be + // 0! Thus after a volatile store we must synchronize to ensure + // the store happens before the load. + *static_cast(addr) = val; + __sync_synchronize(); +# else + __atomic_store(addr, &val, __ATOMIC_SEQ_CST); +# endif +} + +# ifndef LOCKFREE8 +template<> +inline void +js::jit::AtomicOperations::storeSeqCst(int64_t* addr, int64_t val) +{ + MOZ_CRASH(); +} + +template<> +inline void +js::jit::AtomicOperations::storeSeqCst(uint64_t* addr, uint64_t val) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template +inline T +js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + T v; + do { + // Here I assume the compiler will not hoist the load. It + // shouldn't, because the CAS could affect* addr. + v = *addr; + } while (!__sync_bool_compare_and_swap(addr, v, val)); + return v; +# else + T v; + __atomic_exchange(addr, &val, &v, __ATOMIC_SEQ_CST); + return v; +# endif +} + +# ifndef LOCKFREE8 +template<> +inline int64_t +js::jit::AtomicOperations::exchangeSeqCst(int64_t* addr, int64_t val) +{ + MOZ_CRASH(); +} + +template<> +inline uint64_t +js::jit::AtomicOperations::exchangeSeqCst(uint64_t* addr, uint64_t val) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template +inline T +js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_val_compare_and_swap(addr, oldval, newval); +# else + __atomic_compare_exchange(addr, &oldval, &newval, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return oldval; +# endif +} + +# ifndef LOCKFREE8 +template<> +inline int64_t +js::jit::AtomicOperations::compareExchangeSeqCst(int64_t* addr, int64_t oldval, int64_t newval) +{ + MOZ_CRASH(); +} + +template<> +inline uint64_t +js::jit::AtomicOperations::compareExchangeSeqCst(uint64_t* addr, uint64_t oldval, uint64_t newval) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template +inline T +js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_add(addr, val); +# else + return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template +inline T +js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_sub(addr, val); +# else + return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template +inline T +js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_and(addr, val); +# else + return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template +inline T +js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_or(addr, val); +# else + return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template +inline T +js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_xor(addr, val); +# else + return __atomic_fetch_xor(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template +inline T +js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) +{ + return *addr; // FIXME (1208663): not yet safe +} + +template +inline void +js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) +{ + *addr = val; // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +template +inline void +js::jit::RegionLock::acquire(void* addr) +{ +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + while (!__sync_bool_compare_and_swap(&spinlock, 0, 1)) + continue; +# else + uint32_t zero = 0; + uint32_t one = 1; + while (!__atomic_compare_exchange(&spinlock, &zero, &one, false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) { + zero = 0; + continue; + } +# endif +} + +template +inline void +js::jit::RegionLock::release(void* addr) +{ + MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + __sync_sub_and_fetch(&spinlock, 1); // Should turn into LOCK XADD +# else + uint32_t zero = 0; + __atomic_store(&spinlock, &zero, __ATOMIC_SEQ_CST); +# endif +} + +# undef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS +# undef LOCKFREE8 + +#elif defined(_MSC_VER) + +// On 32-bit CPUs there is no 64-bit XCHG instruction, one must +// instead use a loop with CMPXCHG8B. Since MSVC provides +// _InterlockedExchange64 only if it maps directly to XCHG, the +// workaround must be manual. + +# define HAVE_EXCHANGE64 + +# if !_WIN64 +# undef HAVE_EXCHANGE64 +# endif + +// Below, _ReadWriteBarrier is a compiler directive, preventing +// reordering of instructions and reuse of memory values across it. + +inline bool +js::jit::AtomicOperations::isLockfree8() +{ + // See general comments at the start of this file. + // + // The MSDN docs suggest very strongly that if code is compiled for + // Pentium or better the 64-bit primitives will be lock-free, see + // eg the "Remarks" secion of the page for _InterlockedCompareExchange64, + // currently here: + // https://msdn.microsoft.com/en-us/library/ttk2z1ws%28v=vs.85%29.aspx + // + // But I've found no way to assert that at compile time or run time, + // there appears to be no WinAPI is_lock_free() test. + return true; +} + +inline void +js::jit::AtomicOperations::fenceSeqCst() +{ + _ReadWriteBarrier(); +# if JS_BITS_PER_WORD == 32 + // If configured for SSE2+ we can use the MFENCE instruction, available + // through the _mm_mfence intrinsic. But for non-SSE2 systems we have + // to do something else. Linux uses "lock add [esp], 0", so why not? + __asm lock add [esp], 0; +# else + _mm_mfence(); +# endif +} + +template +inline T +js::jit::AtomicOperations::loadSeqCst(T* addr) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); + _ReadWriteBarrier(); + T v = *addr; + _ReadWriteBarrier(); + return v; +} + +template +inline void +js::jit::AtomicOperations::storeSeqCst(T* addr, T val) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); + _ReadWriteBarrier(); + *addr = val; + fenceSeqCst(); +} + +# define MSC_EXCHANGEOP(T, U, xchgop) \ + template<> inline T \ + js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) { \ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ + return (T)xchgop((U volatile*)addr, (U)val); \ + } + +# define MSC_EXCHANGEOP_CAS(T, U, cmpxchg) \ + template<> inline T \ + js::jit::AtomicOperations::exchangeSeqCst(T* addr, T newval) { \ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ + T oldval; \ + do { \ + _ReadWriteBarrier(); \ + oldval = *addr; \ + } while (!cmpxchg((U volatile*)addr, (U)newval, (U)oldval)); \ + return oldval; \ + } + +MSC_EXCHANGEOP(int8_t, char, _InterlockedExchange8) +MSC_EXCHANGEOP(uint8_t, char, _InterlockedExchange8) +MSC_EXCHANGEOP(int16_t, short, _InterlockedExchange16) +MSC_EXCHANGEOP(uint16_t, short, _InterlockedExchange16) +MSC_EXCHANGEOP(int32_t, long, _InterlockedExchange) +MSC_EXCHANGEOP(uint32_t, long, _InterlockedExchange) +# ifdef HAVE_EXCHANGE64 +MSC_EXCHANGEOP(int64_t, __int64, _InterlockedExchange64) +MSC_EXCHANGEOP(uint64_t, __int64, _InterlockedExchange64) +# else +MSC_EXCHANGEOP_CAS(int64_t, __int64, _InterlockedCompareExchange64) +MSC_EXCHANGEOP_CAS(uint64_t, __int64, _InterlockedCompareExchange64) +# endif + +# undef MSC_EXCHANGEOP +# undef MSC_EXCHANGEOP_CAS + +# define MSC_CAS(T, U, cmpxchg) \ + template<> inline T \ + js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) { \ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ + return (T)cmpxchg((U volatile*)addr, (U)newval, (U)oldval); \ + } + +MSC_CAS(int8_t, char, _InterlockedCompareExchange8) +MSC_CAS(uint8_t, char, _InterlockedCompareExchange8) +MSC_CAS(int16_t, short, _InterlockedCompareExchange16) +MSC_CAS(uint16_t, short, _InterlockedCompareExchange16) +MSC_CAS(int32_t, long, _InterlockedCompareExchange) +MSC_CAS(uint32_t, long, _InterlockedCompareExchange) +MSC_CAS(int64_t, __int64, _InterlockedCompareExchange64) +MSC_CAS(uint64_t, __int64, _InterlockedCompareExchange64) + +# undef MSC_CAS + +# define MSC_FETCHADDOP(T, U, xadd) \ + template<> inline T \ + js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)xadd((U volatile*)addr, (U)val); \ + } \ + template<> inline T \ + js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)xadd((U volatile*)addr, -(U)val); \ + } + +MSC_FETCHADDOP(int8_t, char, _InterlockedExchangeAdd8) +MSC_FETCHADDOP(uint8_t, char, _InterlockedExchangeAdd8) +MSC_FETCHADDOP(int16_t, short, _InterlockedExchangeAdd16) +MSC_FETCHADDOP(uint16_t, short, _InterlockedExchangeAdd16) +MSC_FETCHADDOP(int32_t, long, _InterlockedExchangeAdd) +MSC_FETCHADDOP(uint32_t, long, _InterlockedExchangeAdd) + +# undef MSC_FETCHADDOP + +# define MSC_FETCHBITOP(T, U, andop, orop, xorop) \ + template<> inline T \ + js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)andop((U volatile*)addr, (U)val); \ + } \ + template<> inline T \ + js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)orop((U volatile*)addr, (U)val); \ + } \ + template<> inline T \ + js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)xorop((U volatile*)addr, (U)val); \ + } + +MSC_FETCHBITOP(int8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8) +MSC_FETCHBITOP(uint8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8) +MSC_FETCHBITOP(int16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16) +MSC_FETCHBITOP(uint16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16) +MSC_FETCHBITOP(int32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor) +MSC_FETCHBITOP(uint32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor) + +# undef MSC_FETCHBITOP + +template +inline T +js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) +{ + return *addr; // FIXME (1208663): not yet safe +} + +template +inline void +js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) +{ + *addr = val; // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +template +inline void +js::jit::RegionLock::acquire(void* addr) +{ + while (_InterlockedCompareExchange((long*)&spinlock, /*newval=*/1, /*oldval=*/0) == 1) + continue; +} + +template +inline void +js::jit::RegionLock::release(void* addr) +{ + MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock"); + _InterlockedExchange((long*)&spinlock, 0); +} + +# undef HAVE_EXCHANGE64 + +#elif defined(ENABLE_SHARED_ARRAY_BUFFER) + +# error "Either disable JS shared memory at compile time, use GCC, Clang, or MSVC, or add code here" + +#endif // platform + +#endif // jit_shared_AtomicOperations_x86_shared_h diff --git a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h new file mode 100644 index 000000000..844fd5c0e --- /dev/null +++ b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h @@ -0,0 +1,5393 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef jit_x86_shared_BaseAssembler_x86_shared_h +#define jit_x86_shared_BaseAssembler_x86_shared_h + +#include "mozilla/IntegerPrintfMacros.h" + +#include "jit/x86-shared/AssemblerBuffer-x86-shared.h" +#include "jit/x86-shared/Encoding-x86-shared.h" +#include "jit/x86-shared/Patching-x86-shared.h" + +extern volatile uintptr_t* blackbox; + +namespace js { +namespace jit { + +namespace X86Encoding { + +class BaseAssembler; + +class AutoUnprotectAssemblerBufferRegion +{ + BaseAssembler* assembler; + size_t firstByteOffset; + size_t lastByteOffset; + + public: + AutoUnprotectAssemblerBufferRegion(BaseAssembler& holder, int32_t offset, size_t size); + ~AutoUnprotectAssemblerBufferRegion(); +}; + +class BaseAssembler : public GenericAssembler { +public: + BaseAssembler() + : useVEX_(true) + { } + + void disableVEX() { useVEX_ = false; } + + size_t size() const { return m_formatter.size(); } + const unsigned char* buffer() const { return m_formatter.buffer(); } + unsigned char* data() { return m_formatter.data(); } + bool oom() const { return m_formatter.oom(); } + + void nop() + { + spew("nop"); + m_formatter.oneByteOp(OP_NOP); + } + + void comment(const char* msg) + { + spew("; %s", msg); + } + + MOZ_MUST_USE JmpSrc + twoByteNop() + { + spew("nop (2 byte)"); + JmpSrc r(m_formatter.size()); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_NOP); + return r; + } + + static void patchTwoByteNopToJump(uint8_t* jump, uint8_t* target) + { + // Note: the offset is relative to the address of the instruction after + // the jump which is two bytes. + ptrdiff_t rel8 = target - jump - 2; + MOZ_RELEASE_ASSERT(rel8 >= INT8_MIN && rel8 <= INT8_MAX); + MOZ_RELEASE_ASSERT(jump[0] == PRE_OPERAND_SIZE); + MOZ_RELEASE_ASSERT(jump[1] == OP_NOP); + jump[0] = OP_JMP_rel8; + jump[1] = rel8; + } + + static void patchJumpToTwoByteNop(uint8_t* jump) + { + // See twoByteNop. + MOZ_RELEASE_ASSERT(jump[0] == OP_JMP_rel8); + jump[0] = PRE_OPERAND_SIZE; + jump[1] = OP_NOP; + } + + /* + * The nop multibytes sequences are directly taken from the Intel's + * architecture software developer manual. + * They are defined for sequences of sizes from 1 to 9 included. + */ + void nop_one() + { + m_formatter.oneByteOp(OP_NOP); + } + + void nop_two() + { + m_formatter.oneByteOp(OP_NOP_66); + m_formatter.oneByteOp(OP_NOP); + } + + void nop_three() + { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_four() + { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_40); + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_five() + { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_44); + m_formatter.oneByteOp(OP_NOP_00); + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_six() + { + m_formatter.oneByteOp(OP_NOP_66); + nop_five(); + } + + void nop_seven() + { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_80); + for (int i = 0; i < 4; ++i) + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_eight() + { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_84); + for (int i = 0; i < 5; ++i) + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_nine() + { + m_formatter.oneByteOp(OP_NOP_66); + nop_eight(); + } + + void insert_nop(int size) + { + switch (size) { + case 1: + nop_one(); + break; + case 2: + nop_two(); + break; + case 3: + nop_three(); + break; + case 4: + nop_four(); + break; + case 5: + nop_five(); + break; + case 6: + nop_six(); + break; + case 7: + nop_seven(); + break; + case 8: + nop_eight(); + break; + case 9: + nop_nine(); + break; + case 10: + nop_three(); + nop_seven(); + break; + case 11: + nop_four(); + nop_seven(); + break; + case 12: + nop_six(); + nop_six(); + break; + case 13: + nop_six(); + nop_seven(); + break; + case 14: + nop_seven(); + nop_seven(); + break; + case 15: + nop_one(); + nop_seven(); + nop_seven(); + break; + default: + MOZ_CRASH("Unhandled alignment"); + } + } + + // Stack operations: + + void push_r(RegisterID reg) + { + spew("push %s", GPRegName(reg)); + m_formatter.oneByteOp(OP_PUSH_EAX, reg); + } + + void pop_r(RegisterID reg) + { + spew("pop %s", GPRegName(reg)); + m_formatter.oneByteOp(OP_POP_EAX, reg); + } + + void push_i(int32_t imm) + { + spew("push $%s0x%x", PRETTYHEX(imm)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_PUSH_Ib); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_PUSH_Iz); + m_formatter.immediate32(imm); + } + } + + void push_i32(int32_t imm) + { + spew("push $%s0x%04x", PRETTYHEX(imm)); + m_formatter.oneByteOp(OP_PUSH_Iz); + m_formatter.immediate32(imm); + } + + void push_m(int32_t offset, RegisterID base) + { + spew("push " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_PUSH); + } + + void pop_m(int32_t offset, RegisterID base) + { + spew("pop " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1A_Ev, offset, base, GROUP1A_OP_POP); + } + + void push_flags() + { + spew("pushf"); + m_formatter.oneByteOp(OP_PUSHFLAGS); + } + + void pop_flags() + { + spew("popf"); + m_formatter.oneByteOp(OP_POPFLAGS); + } + + // Arithmetic operations: + + void addl_rr(RegisterID src, RegisterID dst) + { + spew("addl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_ADD_GvEv, src, dst); + } + + void addw_rr(RegisterID src, RegisterID dst) + { + spew("addw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_ADD_GvEv, src, dst); + } + + void addl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("addl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_ADD_GvEv, offset, base, dst); + } + + void addl_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("addl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, src); + } + + void addl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("addl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, index, scale, src); + } + + void addl_ir(int32_t imm, RegisterID dst) + { + spew("addl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_ADD_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + } + + void addw_ir(int32_t imm, RegisterID dst) + { + spew("addw $%d, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + + void addl_i32r(int32_t imm, RegisterID dst) + { + // 32-bit immediate always, for patching. + spew("addl $0x%04x, %s", imm, GPReg32Name(dst)); + if (dst == rax) + m_formatter.oneByteOp(OP_ADD_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + + void addl_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("addl $%d, " MEM_ob, imm, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + } + + void addl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("addl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + } + + void addl_im(int32_t imm, const void* addr) + { + spew("addl $%d, %p", imm, addr); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + } + void addw_im(int32_t imm, const void* addr) + { + spew("addw $%d, %p", int16_t(imm), addr); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + } + + void addw_im(int32_t imm, int32_t offset, RegisterID base) { + spew("addw $%d, " MEM_ob, int16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + + void addw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("addw $%d, " MEM_obs, int16_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + + void addw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("addw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, src); + } + + void addw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("addw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, index, scale, src); + } + + void addb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("addb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_ADD); + m_formatter.immediate8(imm); + } + + void addb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("addb $%d, " MEM_obs, int8_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, GROUP1_OP_ADD); + m_formatter.immediate8(imm); + } + + void addb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("addb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_ADD_EbGb, offset, base, src); + } + + void addb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("addb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_ADD_EbGb, offset, base, index, scale, src); + } + + void subb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("subb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_SUB); + m_formatter.immediate8(imm); + } + + void subb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("subb $%d, " MEM_obs, int8_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, GROUP1_OP_SUB); + m_formatter.immediate8(imm); + } + + void subb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("subb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_SUB_EbGb, offset, base, src); + } + + void subb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("subb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_SUB_EbGb, offset, base, index, scale, src); + } + + void andb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("andb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_AND); + m_formatter.immediate8(imm); + } + + void andb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("andb $%d, " MEM_obs, int8_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, GROUP1_OP_AND); + m_formatter.immediate8(imm); + } + + void andb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("andb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_AND_EbGb, offset, base, src); + } + + void andb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("andb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_AND_EbGb, offset, base, index, scale, src); + } + + void orb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("orb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_OR); + m_formatter.immediate8(imm); + } + + void orb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("orb $%d, " MEM_obs, int8_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, GROUP1_OP_OR); + m_formatter.immediate8(imm); + } + + void orb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("orb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_OR_EbGb, offset, base, src); + } + + void orb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("orb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_OR_EbGb, offset, base, index, scale, src); + } + + void xorb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("xorb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_XOR); + m_formatter.immediate8(imm); + } + + void xorb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xorb $%d, " MEM_obs, int8_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, GROUP1_OP_XOR); + m_formatter.immediate8(imm); + } + + void xorb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("xorb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_XOR_EbGb, offset, base, src); + } + + void xorb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xorb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_XOR_EbGb, offset, base, index, scale, src); + } + + void lock_xaddb_rm(RegisterID srcdest, int32_t offset, RegisterID base) + { + spew("lock xaddb %s, " MEM_ob, GPReg8Name(srcdest), ADDR_ob(offset, base)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp8(OP2_XADD_EbGb, offset, base, srcdest); + } + + void lock_xaddb_rm(RegisterID srcdest, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("lock xaddb %s, " MEM_obs, GPReg8Name(srcdest), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp8(OP2_XADD_EbGb, offset, base, index, scale, srcdest); + } + + void lock_xaddl_rm(RegisterID srcdest, int32_t offset, RegisterID base) + { + spew("lock xaddl %s, " MEM_ob, GPReg32Name(srcdest), ADDR_ob(offset, base)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, srcdest); + } + + void lock_xaddl_rm(RegisterID srcdest, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("lock xaddl %s, " MEM_obs, GPReg32Name(srcdest), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, index, scale, srcdest); + } + + void vpaddb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, src1, src0, dst); + } + void vpaddb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, offset, base, src0, dst); + } + void vpaddb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, address, src0, dst); + } + + void vpaddsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, src1, src0, dst); + } + void vpaddsb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, offset, base, src0, dst); + } + void vpaddsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, address, src0, dst); + } + + void vpaddusb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, src1, src0, dst); + } + void vpaddusb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, offset, base, src0, dst); + } + void vpaddusb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, address, src0, dst); + } + + void vpaddw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, src1, src0, dst); + } + void vpaddw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, offset, base, src0, dst); + } + void vpaddw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, address, src0, dst); + } + + void vpaddsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, src1, src0, dst); + } + void vpaddsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, offset, base, src0, dst); + } + void vpaddsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, address, src0, dst); + } + + void vpaddusw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, src1, src0, dst); + } + void vpaddusw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, offset, base, src0, dst); + } + void vpaddusw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, address, src0, dst); + } + + void vpaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, src1, src0, dst); + } + void vpaddd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, offset, base, src0, dst); + } + void vpaddd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, address, src0, dst); + } + + void vpsubb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, src1, src0, dst); + } + void vpsubb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, offset, base, src0, dst); + } + void vpsubb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, address, src0, dst); + } + + void vpsubsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, src1, src0, dst); + } + void vpsubsb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, offset, base, src0, dst); + } + void vpsubsb_mr(const void* subress, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, subress, src0, dst); + } + + void vpsubusb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, src1, src0, dst); + } + void vpsubusb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, offset, base, src0, dst); + } + void vpsubusb_mr(const void* subress, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, subress, src0, dst); + } + + void vpsubw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, src1, src0, dst); + } + void vpsubw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, offset, base, src0, dst); + } + void vpsubw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, address, src0, dst); + } + + void vpsubsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, src1, src0, dst); + } + void vpsubsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, offset, base, src0, dst); + } + void vpsubsw_mr(const void* subress, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, subress, src0, dst); + } + + void vpsubusw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, src1, src0, dst); + } + void vpsubusw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, offset, base, src0, dst); + } + void vpsubusw_mr(const void* subress, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, subress, src0, dst); + } + + void vpsubd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, src1, src0, dst); + } + void vpsubd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, offset, base, src0, dst); + } + void vpsubd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, address, src0, dst); + } + + void vpmuludq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, src1, src0, dst); + } + void vpmuludq_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, offset, base, src0, dst); + } + + void vpmullw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, src1, src0, dst); + } + void vpmullw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, offset, base, src0, dst); + } + + void vpmulld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, src1, src0, dst); + } + void vpmulld_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, offset, base, src0, dst); + } + void vpmulld_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address, src0, dst); + } + + void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst); + } + void vaddps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, offset, base, src0, dst); + } + void vaddps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, address, src0, dst); + } + + void vsubps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, src1, src0, dst); + } + void vsubps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, offset, base, src0, dst); + } + void vsubps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, address, src0, dst); + } + + void vmulps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, src1, src0, dst); + } + void vmulps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, offset, base, src0, dst); + } + void vmulps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, address, src0, dst); + } + + void vdivps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, src1, src0, dst); + } + void vdivps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, offset, base, src0, dst); + } + void vdivps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, address, src0, dst); + } + + void vmaxps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, src1, src0, dst); + } + void vmaxps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, offset, base, src0, dst); + } + void vmaxps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, address, src0, dst); + } + + void vminps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, src1, src0, dst); + } + void vminps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, offset, base, src0, dst); + } + void vminps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, address, src0, dst); + } + + void andl_rr(RegisterID src, RegisterID dst) + { + spew("andl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_AND_GvEv, src, dst); + } + + void andw_rr(RegisterID src, RegisterID dst) + { + spew("andw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_AND_GvEv, src, dst); + } + + void andl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("andl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_AND_GvEv, offset, base, dst); + } + + void andl_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("andl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, src); + } + + void andw_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("andw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, src); + } + + void andl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("andl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, index, scale, src); + } + + void andw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("andw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, index, scale, src); + } + + void andl_ir(int32_t imm, RegisterID dst) + { + spew("andl $0x%x, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_AND_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_AND); + m_formatter.immediate32(imm); + } + } + + void andw_ir(int32_t imm, RegisterID dst) + { + spew("andw $0x%x, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_AND_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_AND); + m_formatter.immediate16(imm); + } + } + + void andl_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("andl $0x%x, " MEM_ob, imm, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_AND); + m_formatter.immediate32(imm); + } + } + + void andw_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("andw $0x%x, " MEM_ob, int16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_AND); + m_formatter.immediate16(imm); + } + } + + void andl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("andl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_AND); + m_formatter.immediate32(imm); + } + } + + void andw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("andw $%d, " MEM_obs, int16_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_AND); + m_formatter.immediate16(imm); + } + } + + void fld_m(int32_t offset, RegisterID base) + { + spew("fld " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FLD); + } + void fld32_m(int32_t offset, RegisterID base) + { + spew("fld " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FLD); + } + void faddp() + { + spew("addp "); + m_formatter.oneByteOp(OP_FPU6_ADDP); + m_formatter.oneByteOp(OP_ADDP_ST0_ST1); + } + void fisttp_m(int32_t offset, RegisterID base) + { + spew("fisttp " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FISTTP); + } + void fistp_m(int32_t offset, RegisterID base) + { + spew("fistp " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FILD, offset, base, FPU6_OP_FISTP); + } + void fstp_m(int32_t offset, RegisterID base) + { + spew("fstp " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FSTP); + } + void fstp32_m(int32_t offset, RegisterID base) + { + spew("fstp32 " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FSTP); + } + void fnstcw_m(int32_t offset, RegisterID base) + { + spew("fnstcw " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FISTP); + } + void fldcw_m(int32_t offset, RegisterID base) + { + spew("fldcw " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FLDCW); + } + void fnstsw_m(int32_t offset, RegisterID base) + { + spew("fnstsw " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FISTP); + } + + void negl_r(RegisterID dst) + { + spew("negl %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP3_Ev, dst, GROUP3_OP_NEG); + } + + void negl_m(int32_t offset, RegisterID base) + { + spew("negl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_Ev, offset, base, GROUP3_OP_NEG); + } + + void notl_r(RegisterID dst) + { + spew("notl %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP3_Ev, dst, GROUP3_OP_NOT); + } + + void notl_m(int32_t offset, RegisterID base) + { + spew("notl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_Ev, offset, base, GROUP3_OP_NOT); + } + + void orl_rr(RegisterID src, RegisterID dst) + { + spew("orl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_OR_GvEv, src, dst); + } + + void orw_rr(RegisterID src, RegisterID dst) + { + spew("orw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_OR_GvEv, src, dst); + } + + void orl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("orl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_OR_GvEv, offset, base, dst); + } + + void orl_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("orl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, src); + } + + void orw_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("orw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, src); + } + + void orl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("orl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, index, scale, src); + } + + void orw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("orw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, index, scale, src); + } + + void orl_ir(int32_t imm, RegisterID dst) + { + spew("orl $0x%x, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_OR_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_OR); + m_formatter.immediate32(imm); + } + } + + void orw_ir(int32_t imm, RegisterID dst) + { + spew("orw $0x%x, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_OR_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_OR); + m_formatter.immediate16(imm); + } + } + + void orl_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("orl $0x%x, " MEM_ob, imm, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_OR); + m_formatter.immediate32(imm); + } + } + + void orw_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("orw $0x%x, " MEM_ob, int16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_OR); + m_formatter.immediate16(imm); + } + } + + void orl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("orl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_OR); + m_formatter.immediate32(imm); + } + } + + void orw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("orw $%d, " MEM_obs, int16_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_OR); + m_formatter.immediate16(imm); + } + } + + void subl_rr(RegisterID src, RegisterID dst) + { + spew("subl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SUB_GvEv, src, dst); + } + + void subw_rr(RegisterID src, RegisterID dst) + { + spew("subw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_SUB_GvEv, src, dst); + } + + void subl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("subl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SUB_GvEv, offset, base, dst); + } + + void subl_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("subl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, src); + } + + void subw_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("subw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, src); + } + + void subl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("subl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, index, scale, src); + } + + void subw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("subw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, index, scale, src); + } + + void subl_ir(int32_t imm, RegisterID dst) + { + spew("subl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_SUB_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SUB); + m_formatter.immediate32(imm); + } + } + + void subw_ir(int32_t imm, RegisterID dst) + { + spew("subw $%d, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_SUB_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SUB); + m_formatter.immediate16(imm); + } + } + + void subl_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("subl $%d, " MEM_ob, imm, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_SUB); + m_formatter.immediate32(imm); + } + } + + void subw_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("subw $%d, " MEM_ob, int16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_SUB); + m_formatter.immediate16(imm); + } + } + + void subl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("subl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_SUB); + m_formatter.immediate32(imm); + } + } + + void subw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("subw $%d, " MEM_obs, int16_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_SUB); + m_formatter.immediate16(imm); + } + } + + void xorl_rr(RegisterID src, RegisterID dst) + { + spew("xorl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_XOR_GvEv, src, dst); + } + + void xorw_rr(RegisterID src, RegisterID dst) + { + spew("xorw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XOR_GvEv, src, dst); + } + + void xorl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("xorl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_XOR_GvEv, offset, base, dst); + } + + void xorl_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("xorl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, src); + } + + void xorw_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("xorw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, src); + } + + void xorl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xorl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, index, scale, src); + } + + void xorw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xorw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, index, scale, src); + } + + void xorl_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("xorl $0x%x, " MEM_ob, imm, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_XOR); + m_formatter.immediate32(imm); + } + } + + void xorw_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("xorw $0x%x, " MEM_ob, int16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_XOR); + m_formatter.immediate16(imm); + } + } + + void xorl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xorl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_XOR); + m_formatter.immediate32(imm); + } + } + + void xorw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xorw $%d, " MEM_obs, int16_t(imm), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_XOR); + m_formatter.immediate16(imm); + } + } + + void xorl_ir(int32_t imm, RegisterID dst) + { + spew("xorl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_XOR_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_XOR); + m_formatter.immediate32(imm); + } + } + + void xorw_ir(int32_t imm, RegisterID dst) + { + spew("xorw $%d, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) + m_formatter.oneByteOp(OP_XOR_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_XOR); + m_formatter.immediate16(imm); + } + } + + void sarl_ir(int32_t imm, RegisterID dst) + { + MOZ_ASSERT(imm < 32); + spew("sarl $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SAR); + else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SAR); + m_formatter.immediate8u(imm); + } + } + + void sarl_CLr(RegisterID dst) + { + spew("sarl %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SAR); + } + + void shrl_ir(int32_t imm, RegisterID dst) + { + MOZ_ASSERT(imm < 32); + spew("shrl $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SHR); + else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SHR); + m_formatter.immediate8u(imm); + } + } + + void shrl_CLr(RegisterID dst) + { + spew("shrl %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SHR); + } + + void shrdl_CLr(RegisterID src, RegisterID dst) + { + spew("shrdl %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_SHRD_GvEv, dst, src); + } + + void shldl_CLr(RegisterID src, RegisterID dst) + { + spew("shldl %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_SHLD_GvEv, dst, src); + } + + void shll_ir(int32_t imm, RegisterID dst) + { + MOZ_ASSERT(imm < 32); + spew("shll $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SHL); + else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SHL); + m_formatter.immediate8u(imm); + } + } + + void shll_CLr(RegisterID dst) + { + spew("shll %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SHL); + } + + void roll_ir(int32_t imm, RegisterID dst) + { + MOZ_ASSERT(imm < 32); + spew("roll $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROL); + else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROL); + m_formatter.immediate8u(imm); + } + } + void roll_CLr(RegisterID dst) + { + spew("roll %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_ROL); + } + + void rorl_ir(int32_t imm, RegisterID dst) + { + MOZ_ASSERT(imm < 32); + spew("rorl $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROR); + else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROR); + m_formatter.immediate8u(imm); + } + } + void rorl_CLr(RegisterID dst) + { + spew("rorl %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_ROR); + } + + void bsrl_rr(RegisterID src, RegisterID dst) + { + spew("bsrl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_BSR_GvEv, src, dst); + } + + void bsfl_rr(RegisterID src, RegisterID dst) + { + spew("bsfl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_BSF_GvEv, src, dst); + } + + void popcntl_rr(RegisterID src, RegisterID dst) + { + spew("popcntl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(VEX_SS); + m_formatter.twoByteOp(OP2_POPCNT_GvEv, src, dst); + } + + void imull_rr(RegisterID src, RegisterID dst) + { + spew("imull %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_IMUL_GvEv, src, dst); + } + + void imull_r(RegisterID multiplier) + { + spew("imull %s", GPReg32Name(multiplier)); + m_formatter.oneByteOp(OP_GROUP3_Ev, multiplier, GROUP3_OP_IMUL); + } + + void imull_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("imull " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_IMUL_GvEv, offset, base, dst); + } + + void imull_ir(int32_t value, RegisterID src, RegisterID dst) + { + spew("imull $%d, %s, %s", value, GPReg32Name(src), GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(value)) { + m_formatter.oneByteOp(OP_IMUL_GvEvIb, src, dst); + m_formatter.immediate8s(value); + } else { + m_formatter.oneByteOp(OP_IMUL_GvEvIz, src, dst); + m_formatter.immediate32(value); + } + } + + void mull_r(RegisterID multiplier) + { + spew("mull %s", GPReg32Name(multiplier)); + m_formatter.oneByteOp(OP_GROUP3_Ev, multiplier, GROUP3_OP_MUL); + } + + void idivl_r(RegisterID divisor) + { + spew("idivl %s", GPReg32Name(divisor)); + m_formatter.oneByteOp(OP_GROUP3_Ev, divisor, GROUP3_OP_IDIV); + } + + void divl_r(RegisterID divisor) + { + spew("div %s", GPReg32Name(divisor)); + m_formatter.oneByteOp(OP_GROUP3_Ev, divisor, GROUP3_OP_DIV); + } + + void prefix_lock() + { + spew("lock"); + m_formatter.oneByteOp(PRE_LOCK); + } + + void prefix_16_for_32() + { + m_formatter.prefix(PRE_OPERAND_SIZE); + } + + void incl_m32(int32_t offset, RegisterID base) + { + spew("incl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_INC); + } + + void decl_m32(int32_t offset, RegisterID base) + { + spew("decl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_DEC); + } + + // Note that CMPXCHG performs comparison against REG = %al/%ax/%eax/%rax. + // If %REG == [%base+offset], then %src -> [%base+offset]. + // Otherwise, [%base+offset] -> %REG. + // For the 8-bit operations src must also be an 8-bit register. + + void cmpxchgb(RegisterID src, int32_t offset, RegisterID base) + { + spew("cmpxchgb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.twoByteOp8(OP2_CMPXCHG_GvEb, offset, base, src); + } + void cmpxchgb(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("cmpxchgb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.twoByteOp8(OP2_CMPXCHG_GvEb, offset, base, index, scale, src); + } + void cmpxchgw(RegisterID src, int32_t offset, RegisterID base) + { + spew("cmpxchgw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, src); + } + void cmpxchgw(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("cmpxchgw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, index, scale, src); + } + void cmpxchgl(RegisterID src, int32_t offset, RegisterID base) + { + spew("cmpxchgl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, src); + } + void cmpxchgl(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("cmpxchgl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, index, scale, src); + } + + + // Comparisons: + + void cmpl_rr(RegisterID rhs, RegisterID lhs) + { + spew("cmpl %s, %s", GPReg32Name(rhs), GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_CMP_GvEv, rhs, lhs); + } + + void cmpl_rm(RegisterID rhs, int32_t offset, RegisterID base) + { + spew("cmpl %s, " MEM_ob, GPReg32Name(rhs), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, rhs); + } + + void cmpl_mr(int32_t offset, RegisterID base, RegisterID lhs) + { + spew("cmpl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_CMP_GvEv, offset, base, lhs); + } + + void cmpl_mr(const void* address, RegisterID lhs) + { + spew("cmpl %p, %s", address, GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_CMP_GvEv, address, lhs); + } + + void cmpl_ir(int32_t rhs, RegisterID lhs) + { + if (rhs == 0) { + testl_rr(lhs, lhs); + return; + } + + spew("cmpl $0x%x, %s", rhs, GPReg32Name(lhs)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, lhs, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + if (lhs == rax) + m_formatter.oneByteOp(OP_CMP_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + } + + void cmpl_i32r(int32_t rhs, RegisterID lhs) + { + spew("cmpl $0x%04x, %s", rhs, GPReg32Name(lhs)); + if (lhs == rax) + m_formatter.oneByteOp(OP_CMP_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + + void cmpl_im(int32_t rhs, int32_t offset, RegisterID base) + { + spew("cmpl $0x%x, " MEM_ob, rhs, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + } + + void cmpb_im(int32_t rhs, int32_t offset, RegisterID base) + { + spew("cmpb $0x%x, " MEM_ob, rhs, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_CMP); + m_formatter.immediate8(rhs); + } + + void cmpb_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("cmpb $0x%x, " MEM_obs, rhs, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, GROUP1_OP_CMP); + m_formatter.immediate8(rhs); + } + + void cmpl_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("cmpl $0x%x, " MEM_o32b, rhs, ADDR_o32b(offset, base)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + } + + MOZ_MUST_USE JmpSrc + cmpl_im_disp32(int32_t rhs, int32_t offset, RegisterID base) + { + spew("cmpl $0x%x, " MEM_o32b, rhs, ADDR_o32b(offset, base)); + JmpSrc r; + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate32(rhs); + } + return r; + } + + MOZ_MUST_USE JmpSrc + cmpl_im_disp32(int32_t rhs, const void* addr) + { + spew("cmpl $0x%x, %p", rhs, addr); + JmpSrc r; + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate32(rhs); + } + return r; + } + + void cmpl_i32m(int32_t rhs, int32_t offset, RegisterID base) + { + spew("cmpl $0x%04x, " MEM_ob, rhs, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + + void cmpl_i32m(int32_t rhs, const void* addr) + { + spew("cmpl $0x%04x, %p", rhs, addr); + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + + void cmpl_rm(RegisterID rhs, const void* addr) + { + spew("cmpl %s, %p", GPReg32Name(rhs), addr); + m_formatter.oneByteOp(OP_CMP_EvGv, addr, rhs); + } + + void cmpl_rm_disp32(RegisterID rhs, const void* addr) + { + spew("cmpl %s, %p", GPReg32Name(rhs), addr); + m_formatter.oneByteOp_disp32(OP_CMP_EvGv, addr, rhs); + } + + void cmpl_im(int32_t rhs, const void* addr) + { + spew("cmpl $0x%x, %p", rhs, addr); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + } + + void cmpw_rr(RegisterID rhs, RegisterID lhs) + { + spew("cmpw %s, %s", GPReg16Name(rhs), GPReg16Name(lhs)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_CMP_GvEv, rhs, lhs); + } + + void cmpw_rm(RegisterID rhs, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("cmpw %s, " MEM_obs, GPReg16Name(rhs), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, index, scale, rhs); + } + + void cmpw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("cmpw $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, GROUP1_OP_CMP); + m_formatter.immediate8s(imm); + } else { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, GROUP1_OP_CMP); + m_formatter.immediate16(imm); + } + } + + void testl_rr(RegisterID rhs, RegisterID lhs) + { + spew("testl %s, %s", GPReg32Name(rhs), GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_TEST_EvGv, lhs, rhs); + } + + void testb_rr(RegisterID rhs, RegisterID lhs) + { + spew("testb %s, %s", GPReg8Name(rhs), GPReg8Name(lhs)); + m_formatter.oneByteOp(OP_TEST_EbGb, lhs, rhs); + } + + void testl_ir(int32_t rhs, RegisterID lhs) + { + // If the mask fits in an 8-bit immediate, we can use testb with an + // 8-bit subreg. + if (CAN_ZERO_EXTEND_8_32(rhs) && HasSubregL(lhs)) { + testb_ir(rhs, lhs); + return; + } + // If the mask is a subset of 0xff00, we can use testb with an h reg, if + // one happens to be available. + if (CAN_ZERO_EXTEND_8H_32(rhs) && HasSubregH(lhs)) { + testb_ir_norex(rhs >> 8, GetSubregH(lhs)); + return; + } + spew("testl $0x%x, %s", rhs, GPReg32Name(lhs)); + if (lhs == rax) + m_formatter.oneByteOp(OP_TEST_EAXIv); + else + m_formatter.oneByteOp(OP_GROUP3_EvIz, lhs, GROUP3_OP_TEST); + m_formatter.immediate32(rhs); + } + + void testl_i32m(int32_t rhs, int32_t offset, RegisterID base) + { + spew("testl $0x%x, " MEM_ob, rhs, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_EvIz, offset, base, GROUP3_OP_TEST); + m_formatter.immediate32(rhs); + } + + void testl_i32m(int32_t rhs, const void* addr) + { + spew("testl $0x%x, %p", rhs, addr); + m_formatter.oneByteOp(OP_GROUP3_EvIz, addr, GROUP3_OP_TEST); + m_formatter.immediate32(rhs); + } + + void testb_im(int32_t rhs, int32_t offset, RegisterID base) + { + spew("testb $0x%x, " MEM_ob, rhs, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_EbIb, offset, base, GROUP3_OP_TEST); + m_formatter.immediate8(rhs); + } + + void testb_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("testb $0x%x, " MEM_obs, rhs, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP3_EbIb, offset, base, index, scale, GROUP3_OP_TEST); + m_formatter.immediate8(rhs); + } + + void testl_i32m(int32_t rhs, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("testl $0x%4x, " MEM_obs, rhs, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP3_EvIz, offset, base, index, scale, GROUP3_OP_TEST); + m_formatter.immediate32(rhs); + } + + void testw_rr(RegisterID rhs, RegisterID lhs) + { + spew("testw %s, %s", GPReg16Name(rhs), GPReg16Name(lhs)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_TEST_EvGv, lhs, rhs); + } + + void testb_ir(int32_t rhs, RegisterID lhs) + { + spew("testb $0x%x, %s", rhs, GPReg8Name(lhs)); + if (lhs == rax) + m_formatter.oneByteOp8(OP_TEST_EAXIb); + else + m_formatter.oneByteOp8(OP_GROUP3_EbIb, lhs, GROUP3_OP_TEST); + m_formatter.immediate8(rhs); + } + + // Like testb_ir, but never emits a REX prefix. This may be used to + // reference ah..bh. + void testb_ir_norex(int32_t rhs, HRegisterID lhs) + { + spew("testb $0x%x, %s", rhs, HRegName8(lhs)); + m_formatter.oneByteOp8_norex(OP_GROUP3_EbIb, lhs, GROUP3_OP_TEST); + m_formatter.immediate8(rhs); + } + + void setCC_r(Condition cond, RegisterID lhs) + { + spew("set%s %s", CCName(cond), GPReg8Name(lhs)); + m_formatter.twoByteOp8(setccOpcode(cond), lhs, (GroupOpcodeID)0); + } + + void sete_r(RegisterID dst) + { + setCC_r(ConditionE, dst); + } + + void setz_r(RegisterID dst) + { + sete_r(dst); + } + + void setne_r(RegisterID dst) + { + setCC_r(ConditionNE, dst); + } + + void setnz_r(RegisterID dst) + { + setne_r(dst); + } + + // Various move ops: + + void cdq() + { + spew("cdq "); + m_formatter.oneByteOp(OP_CDQ); + } + + void xchgb_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("xchgb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_XCHG_GbEb, offset, base, src); + } + void xchgb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xchgb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_XCHG_GbEb, offset, base, index, scale, src); + } + + void xchgw_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("xchgw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, src); + } + void xchgw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xchgw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, index, scale, src); + } + + void xchgl_rr(RegisterID src, RegisterID dst) + { + spew("xchgl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_XCHG_GvEv, src, dst); + } + void xchgl_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("xchgl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, src); + } + void xchgl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("xchgl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, index, scale, src); + } + + void cmovz_rr(RegisterID src, RegisterID dst) + { + spew("cmovz %s, %s", GPReg16Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_CMOVZ_GvEv, src, dst); + } + void cmovz_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("cmovz " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_CMOVZ_GvEv, offset, base, dst); + } + void cmovz_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("cmovz " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_CMOVZ_GvEv, offset, base, index, scale, dst); + } + + void movl_rr(RegisterID src, RegisterID dst) + { + spew("movl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, src, dst); + } + + void movw_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("movw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, src); + } + + void movw_rm_disp32(RegisterID src, int32_t offset, RegisterID base) + { + spew("movw %s, " MEM_o32b, GPReg16Name(src), ADDR_o32b(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp_disp32(OP_MOV_EvGv, offset, base, src); + } + + void movw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("movw %s, " MEM_obs, GPReg16Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src); + } + + void movw_rm(RegisterID src, const void* addr) + { + spew("movw %s, %p", GPReg16Name(src), addr); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp_disp32(OP_MOV_EvGv, addr, src); + } + + void movl_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("movl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, src); + } + + void movl_rm_disp32(RegisterID src, int32_t offset, RegisterID base) + { + spew("movl %s, " MEM_o32b, GPReg32Name(src), ADDR_o32b(offset, base)); + m_formatter.oneByteOp_disp32(OP_MOV_EvGv, offset, base, src); + } + + void movl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("movl %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src); + } + + void movl_mEAX(const void* addr) + { +#ifdef JS_CODEGEN_X64 + if (IsAddressImmediate(addr)) { + movl_mr(addr, rax); + return; + } +#endif + +#ifdef JS_CODEGEN_X64 + spew("movabs %p, %%eax", addr); +#else + spew("movl %p, %%eax", addr); +#endif + m_formatter.oneByteOp(OP_MOV_EAXOv); +#ifdef JS_CODEGEN_X64 + m_formatter.immediate64(reinterpret_cast(addr)); +#else + m_formatter.immediate32(reinterpret_cast(addr)); +#endif + } + + void movl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, offset, base, dst); + } + + void movl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movl " MEM_o32b ", %s", ADDR_o32b(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp_disp32(OP_MOV_GvEv, offset, base, dst); + } + + void movl_mr(const void* base, RegisterID index, int scale, RegisterID dst) + { + int32_t disp = AddressImmediate(base); + + spew("movl " MEM_os ", %s", ADDR_os(disp, index, scale), GPReg32Name(dst)); + m_formatter.oneByteOp_disp32(OP_MOV_GvEv, disp, index, scale, dst); + } + + void movl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("movl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, offset, base, index, scale, dst); + } + + void movl_mr(const void* addr, RegisterID dst) + { + if (dst == rax +#ifdef JS_CODEGEN_X64 + && !IsAddressImmediate(addr) +#endif + ) + { + movl_mEAX(addr); + return; + } + + spew("movl %p, %s", addr, GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, addr, dst); + } + + void movl_i32r(int32_t imm, RegisterID dst) + { + spew("movl $0x%x, %s", imm, GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_EAXIv, dst); + m_formatter.immediate32(imm); + } + + void movb_ir(int32_t imm, RegisterID reg) + { + spew("movb $0x%x, %s", imm, GPReg8Name(reg)); + m_formatter.oneByteOp8(OP_MOV_EbIb, reg); + m_formatter.immediate8(imm); + } + + void movb_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("movb $0x%x, " MEM_ob, imm, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP11_EvIb, offset, base, GROUP11_MOV); + m_formatter.immediate8(imm); + } + + void movb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("movb $0x%x, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP11_EvIb, offset, base, index, scale, GROUP11_MOV); + m_formatter.immediate8(imm); + } + + void movb_im(int32_t imm, const void* addr) + { + spew("movb $%d, %p", imm, addr); + m_formatter.oneByteOp_disp32(OP_GROUP11_EvIb, addr, GROUP11_MOV); + m_formatter.immediate8(imm); + } + + void movw_im(int32_t imm, int32_t offset, RegisterID base) + { + spew("movw $0x%x, " MEM_ob, imm, ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, GROUP11_MOV); + m_formatter.immediate16(imm); + } + + void movw_im(int32_t imm, const void* addr) + { + spew("movw $%d, %p", imm, addr); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp_disp32(OP_GROUP11_EvIz, addr, GROUP11_MOV); + m_formatter.immediate16(imm); + } + + void movl_i32m(int32_t imm, int32_t offset, RegisterID base) + { + spew("movl $0x%x, " MEM_ob, imm, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, GROUP11_MOV); + m_formatter.immediate32(imm); + } + + void movw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("movw $0x%x, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, index, scale, GROUP11_MOV); + m_formatter.immediate16(imm); + } + + void movl_i32m(int32_t imm, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("movl $0x%x, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, index, scale, GROUP11_MOV); + m_formatter.immediate32(imm); + } + + void movl_EAXm(const void* addr) + { +#ifdef JS_CODEGEN_X64 + if (IsAddressImmediate(addr)) { + movl_rm(rax, addr); + return; + } +#endif + + spew("movl %%eax, %p", addr); + m_formatter.oneByteOp(OP_MOV_OvEAX); +#ifdef JS_CODEGEN_X64 + m_formatter.immediate64(reinterpret_cast(addr)); +#else + m_formatter.immediate32(reinterpret_cast(addr)); +#endif + } + + void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + // vmovq_rm can be encoded either as a true vmovq or as a vmovd with a + // REX prefix modifying it to be 64-bit. We choose the vmovq encoding + // because it's smaller (when it doesn't need a REX prefix for other + // reasons) and because it works on 32-bit x86 too. + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm, src); + } + + void vmovq_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd_disp32("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm, src); + } + + void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, index, scale, invalid_xmm, src); + } + + void vmovq_rm(XMMRegisterID src, const void* addr) + { + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, addr, invalid_xmm, src); + } + + void vmovq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + // vmovq_mr can be encoded either as a true vmovq or as a vmovd with a + // REX prefix modifying it to be 64-bit. We choose the vmovq encoding + // because it's smaller (when it doesn't need a REX prefix for other + // reasons) and because it works on 32-bit x86 too. + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm, dst); + } + + void vmovq_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd_disp32("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm, dst); + } + + void vmovq_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, index, scale, invalid_xmm, dst); + } + + void vmovq_mr(const void* addr, XMMRegisterID dst) + { + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, addr, invalid_xmm, dst); + } + + void movl_rm(RegisterID src, const void* addr) + { + if (src == rax +#ifdef JS_CODEGEN_X64 + && !IsAddressImmediate(addr) +#endif + ) { + movl_EAXm(addr); + return; + } + + spew("movl %s, %p", GPReg32Name(src), addr); + m_formatter.oneByteOp(OP_MOV_EvGv, addr, src); + } + + void movl_i32m(int32_t imm, const void* addr) + { + spew("movl $%d, %p", imm, addr); + m_formatter.oneByteOp(OP_GROUP11_EvIz, addr, GROUP11_MOV); + m_formatter.immediate32(imm); + } + + void movb_rm(RegisterID src, int32_t offset, RegisterID base) + { + spew("movb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_MOV_EbGv, offset, base, src); + } + + void movb_rm_disp32(RegisterID src, int32_t offset, RegisterID base) + { + spew("movb %s, " MEM_o32b, GPReg8Name(src), ADDR_o32b(offset, base)); + m_formatter.oneByteOp8_disp32(OP_MOV_EbGv, offset, base, src); + } + + void movb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + spew("movb %s, " MEM_obs, GPReg8Name(src), ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_MOV_EbGv, offset, base, index, scale, src); + } + + void movb_rm(RegisterID src, const void* addr) + { + spew("movb %s, %p", GPReg8Name(src), addr); + m_formatter.oneByteOp8(OP_MOV_EbGv, addr, src); + } + + void movb_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movb " MEM_ob ", %s", ADDR_ob(offset, base), GPReg8Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEb, offset, base, dst); + } + + void movb_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("movb " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg8Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEb, offset, base, index, scale, dst); + } + + void movzbl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movzbl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEb, offset, base, dst); + } + + void movzbl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movzbl " MEM_o32b ", %s", ADDR_o32b(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEb, offset, base, dst); + } + + void movzbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("movzbl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEb, offset, base, index, scale, dst); + } + + void movzbl_mr(const void* addr, RegisterID dst) + { + spew("movzbl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEb, addr, dst); + } + + void movsbl_rr(RegisterID src, RegisterID dst) + { + spew("movsbl %s, %s", GPReg8Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp8_movx(OP2_MOVSX_GvEb, src, dst); + } + + void movsbl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movsbl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEb, offset, base, dst); + } + + void movsbl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movsbl " MEM_o32b ", %s", ADDR_o32b(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEb, offset, base, dst); + } + + void movsbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("movsbl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEb, offset, base, index, scale, dst); + } + + void movsbl_mr(const void* addr, RegisterID dst) + { + spew("movsbl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEb, addr, dst); + } + + void movzwl_rr(RegisterID src, RegisterID dst) + { + spew("movzwl %s, %s", GPReg16Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, src, dst); + } + + void movzwl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movzwl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, offset, base, dst); + } + + void movzwl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movzwl " MEM_o32b ", %s", ADDR_o32b(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEw, offset, base, dst); + } + + void movzwl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("movzwl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, offset, base, index, scale, dst); + } + + void movzwl_mr(const void* addr, RegisterID dst) + { + spew("movzwl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, addr, dst); + } + + void movswl_rr(RegisterID src, RegisterID dst) + { + spew("movswl %s, %s", GPReg16Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, src, dst); + } + + void movswl_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movswl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, offset, base, dst); + } + + void movswl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) + { + spew("movswl " MEM_o32b ", %s", ADDR_o32b(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEw, offset, base, dst); + } + + void movswl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("movswl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, offset, base, index, scale, dst); + } + + void movswl_mr(const void* addr, RegisterID dst) + { + spew("movswl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, addr, dst); + } + + void movzbl_rr(RegisterID src, RegisterID dst) + { + spew("movzbl %s, %s", GPReg8Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp8_movx(OP2_MOVZX_GvEb, src, dst); + } + + void leal_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst) + { + spew("leal " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_LEA, offset, base, index, scale, dst); + } + + void leal_mr(int32_t offset, RegisterID base, RegisterID dst) + { + spew("leal " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_LEA, offset, base, dst); + } + + // Flow control: + + MOZ_MUST_USE JmpSrc + call() + { + m_formatter.oneByteOp(OP_CALL_rel32); + JmpSrc r = m_formatter.immediateRel32(); + spew("call .Lfrom%d", r.offset()); + return r; + } + + void call_r(RegisterID dst) + { + m_formatter.oneByteOp(OP_GROUP5_Ev, dst, GROUP5_OP_CALLN); + spew("call *%s", GPRegName(dst)); + } + + void call_m(int32_t offset, RegisterID base) + { + spew("call *" MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_CALLN); + } + + // Comparison of EAX against a 32-bit immediate. The immediate is patched + // in as if it were a jump target. The intention is to toggle the first + // byte of the instruction between a CMP and a JMP to produce a pseudo-NOP. + MOZ_MUST_USE JmpSrc + cmp_eax() + { + m_formatter.oneByteOp(OP_CMP_EAXIv); + JmpSrc r = m_formatter.immediateRel32(); + spew("cmpl %%eax, .Lfrom%d", r.offset()); + return r; + } + + void jmp_i(JmpDst dst) + { + int32_t diff = dst.offset() - m_formatter.size(); + spew("jmp .Llabel%d", dst.offset()); + + // The jump immediate is an offset from the end of the jump instruction. + // A jump instruction is either 1 byte opcode and 1 byte offset, or 1 + // byte opcode and 4 bytes offset. + if (CAN_SIGN_EXTEND_8_32(diff - 2)) { + m_formatter.oneByteOp(OP_JMP_rel8); + m_formatter.immediate8s(diff - 2); + } else { + m_formatter.oneByteOp(OP_JMP_rel32); + m_formatter.immediate32(diff - 5); + } + } + MOZ_MUST_USE JmpSrc + jmp() + { + m_formatter.oneByteOp(OP_JMP_rel32); + JmpSrc r = m_formatter.immediateRel32(); + spew("jmp .Lfrom%d", r.offset()); + return r; + } + + void jmp_r(RegisterID dst) + { + spew("jmp *%s", GPRegName(dst)); + m_formatter.oneByteOp(OP_GROUP5_Ev, dst, GROUP5_OP_JMPN); + } + + void jmp_m(int32_t offset, RegisterID base) + { + spew("jmp *" MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_JMPN); + } + + void jmp_m(int32_t offset, RegisterID base, RegisterID index, int scale) { + spew("jmp *" MEM_obs, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, index, scale, GROUP5_OP_JMPN); + } + + void jCC_i(Condition cond, JmpDst dst) + { + int32_t diff = dst.offset() - m_formatter.size(); + spew("j%s .Llabel%d", CCName(cond), dst.offset()); + + // The jump immediate is an offset from the end of the jump instruction. + // A conditional jump instruction is either 1 byte opcode and 1 byte + // offset, or 2 bytes opcode and 4 bytes offset. + if (CAN_SIGN_EXTEND_8_32(diff - 2)) { + m_formatter.oneByteOp(jccRel8(cond)); + m_formatter.immediate8s(diff - 2); + } else { + m_formatter.twoByteOp(jccRel32(cond)); + m_formatter.immediate32(diff - 6); + } + } + + MOZ_MUST_USE JmpSrc + jCC(Condition cond) + { + m_formatter.twoByteOp(jccRel32(cond)); + JmpSrc r = m_formatter.immediateRel32(); + spew("j%s .Lfrom%d", CCName(cond), r.offset()); + return r; + } + + // SSE operations: + + void vpcmpeqb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, src1, src0, dst); + } + void vpcmpeqb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, offset, base, src0, dst); + } + void vpcmpeqb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, address, src0, dst); + } + + void vpcmpgtb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, src1, src0, dst); + } + void vpcmpgtb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, offset, base, src0, dst); + } + void vpcmpgtb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, address, src0, dst); + } + + void vpcmpeqw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, src1, src0, dst); + } + void vpcmpeqw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, offset, base, src0, dst); + } + void vpcmpeqw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, address, src0, dst); + } + + void vpcmpgtw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, src1, src0, dst); + } + void vpcmpgtw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, offset, base, src0, dst); + } + void vpcmpgtw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, address, src0, dst); + } + + void vpcmpeqd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, src1, src0, dst); + } + void vpcmpeqd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, offset, base, src0, dst); + } + void vpcmpeqd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, address, src0, dst); + } + + void vpcmpgtd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, src1, src0, dst); + } + void vpcmpgtd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, offset, base, src0, dst); + } + void vpcmpgtd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, address, src0, dst); + } + + void vcmpps_rr(uint8_t order, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, src1, src0, dst); + } + void vcmpps_mr(uint8_t order, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, offset, base, src0, dst); + } + void vcmpps_mr(uint8_t order, const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, address, src0, dst); + } + + void vrcpps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, src, invalid_xmm, dst); + } + void vrcpps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, offset, base, invalid_xmm, dst); + } + void vrcpps_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, address, invalid_xmm, dst); + } + + void vrsqrtps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, src, invalid_xmm, dst); + } + void vrsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, offset, base, invalid_xmm, dst); + } + void vrsqrtps_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, address, invalid_xmm, dst); + } + + void vsqrtps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, src, invalid_xmm, dst); + } + void vsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, offset, base, invalid_xmm, dst); + } + void vsqrtps_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, address, invalid_xmm, dst); + } + + void vaddsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, src1, src0, dst); + } + + void vaddss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, src1, src0, dst); + } + + void vaddsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, offset, base, src0, dst); + } + + void vaddss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, offset, base, src0, dst); + } + + void vaddsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, address, src0, dst); + } + void vaddss_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, address, src0, dst); + } + + void vcvtss2sd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vcvtss2sd", VEX_SS, OP2_CVTSS2SD_VsdEd, src1, src0, dst); + } + + void vcvtsd2ss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vcvtsd2ss", VEX_SD, OP2_CVTSD2SS_VsdEd, src1, src0, dst); + } + + void vcvtsi2ss_rr(RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpInt32Simd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, src1, src0, dst); + } + + void vcvtsi2sd_rr(RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpInt32Simd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, src1, src0, dst); + } + + void vcvttps2dq_rr(XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpSimd("vcvttps2dq", VEX_SS, OP2_CVTTPS2DQ_VdqWps, src, invalid_xmm, dst); + } + + void vcvtdq2ps_rr(XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpSimd("vcvtdq2ps", VEX_PS, OP2_CVTDQ2PS_VpsWdq, src, invalid_xmm, dst); + } + + void vcvtsi2sd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, offset, base, src0, dst); + } + + void vcvtsi2sd_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, offset, base, index, scale, src0, dst); + } + + void vcvtsi2ss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, offset, base, src0, dst); + } + + void vcvtsi2ss_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, offset, base, index, scale, src0, dst); + } + + void vcvttsd2si_rr(XMMRegisterID src, RegisterID dst) + { + twoByteOpSimdInt32("vcvttsd2si", VEX_SD, OP2_CVTTSD2SI_GdWsd, src, dst); + } + + void vcvttss2si_rr(XMMRegisterID src, RegisterID dst) + { + twoByteOpSimdInt32("vcvttss2si", VEX_SS, OP2_CVTTSD2SI_GdWsd, src, dst); + } + + void vunpcklps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, src1, src0, dst); + } + void vunpcklps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, offset, base, src0, dst); + } + void vunpcklps_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, addr, src0, dst); + } + + void vunpckhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, src1, src0, dst); + } + void vunpckhps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, offset, base, src0, dst); + } + void vunpckhps_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, addr, src0, dst); + } + + void vpand_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, src1, src0, dst); + } + void vpand_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, offset, base, src0, dst); + } + void vpand_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, address, src0, dst); + } + void vpor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, src1, src0, dst); + } + void vpor_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, offset, base, src0, dst); + } + void vpor_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, address, src0, dst); + } + void vpxor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, src1, src0, dst); + } + void vpxor_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, offset, base, src0, dst); + } + void vpxor_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, address, src0, dst); + } + void vpandn_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, src1, src0, dst); + } + void vpandn_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, offset, base, src0, dst); + } + void vpandn_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, address, src0, dst); + } + + void vpshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, src, invalid_xmm, dst); + } + void vpshufd_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, offset, base, invalid_xmm, dst); + } + void vpshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst) + { + twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, address, invalid_xmm, dst); + } + + void vpshuflw_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpImmSimd("vpshuflw", VEX_SD, OP2_PSHUFLW_VdqWdqIb, mask, src, invalid_xmm, dst); + } + + void vpshufhw_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpImmSimd("vpshufhw", VEX_SS, OP2_PSHUFHW_VdqWdqIb, mask, src, invalid_xmm, dst); + } + + void vpshufb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpSimd("vpshufb", VEX_PD, OP3_PSHUFB_VdqWdq, ESCAPE_38, src1, src0, dst); + } + + void vshufps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, src1, src0, dst); + } + void vshufps_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, offset, base, src0, dst); + } + void vshufps_imr(uint32_t mask, const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, address, src0, dst); + } + + void vmovddup_rr(XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpSimd("vmovddup", VEX_SD, OP2_MOVDDUP_VqWq, src, invalid_xmm, dst); + } + + void vmovhlps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmovhlps", VEX_PS, OP2_MOVHLPS_VqUq, src1, src0, dst); + } + + void vmovlhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmovlhps", VEX_PS, OP2_MOVLHPS_VqUq, src1, src0, dst); + } + + void vpsrldq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsrldq", OP2_PSRLDQ_Vd, ShiftID::vpsrldq, count, src, dst); + } + + void vpsllq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 64); + shiftOpImmSimd("vpsllq", OP2_PSRLDQ_Vd, ShiftID::vpsllx, count, src, dst); + } + + void vpsrlq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 64); + shiftOpImmSimd("vpsrlq", OP2_PSRLDQ_Vd, ShiftID::vpsrlx, count, src, dst); + } + + void vpslld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpslld", VEX_PD, OP2_PSLLD_VdqWdq, src1, src0, dst); + } + + void vpslld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 32); + shiftOpImmSimd("vpslld", OP2_PSLLD_UdqIb, ShiftID::vpsllx, count, src, dst); + } + + void vpsrad_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsrad", VEX_PD, OP2_PSRAD_VdqWdq, src1, src0, dst); + } + + void vpsrad_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 32); + shiftOpImmSimd("vpsrad", OP2_PSRAD_UdqIb, ShiftID::vpsrad, count, src, dst); + } + + void vpsrld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsrld", VEX_PD, OP2_PSRLD_VdqWdq, src1, src0, dst); + } + + void vpsrld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 32); + shiftOpImmSimd("vpsrld", OP2_PSRLD_UdqIb, ShiftID::vpsrlx, count, src, dst); + } + + void vpsllw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsllw", VEX_PD, OP2_PSLLW_VdqWdq, src1, src0, dst); + } + + void vpsllw_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsllw", OP2_PSLLW_UdqIb, ShiftID::vpsllx, count, src, dst); + } + + void vpsraw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsraw", VEX_PD, OP2_PSRAW_VdqWdq, src1, src0, dst); + } + + void vpsraw_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsraw", OP2_PSRAW_UdqIb, ShiftID::vpsrad, count, src, dst); + } + + void vpsrlw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpsrlw", VEX_PD, OP2_PSRLW_VdqWdq, src1, src0, dst); + } + + void vpsrlw_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) + { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsrlw", OP2_PSRLW_UdqIb, ShiftID::vpsrlx, count, src, dst); + } + + void vmovmskpd_rr(XMMRegisterID src, RegisterID dst) + { + twoByteOpSimdInt32("vmovmskpd", VEX_PD, OP2_MOVMSKPD_EdVd, src, dst); + } + + void vmovmskps_rr(XMMRegisterID src, RegisterID dst) + { + twoByteOpSimdInt32("vmovmskps", VEX_PS, OP2_MOVMSKPD_EdVd, src, dst); + } + + void vptest_rr(XMMRegisterID rhs, XMMRegisterID lhs) { + threeByteOpSimd("vptest", VEX_PD, OP3_PTEST_VdVd, ESCAPE_38, rhs, invalid_xmm, lhs); + } + + void vmovd_rr(XMMRegisterID src, RegisterID dst) + { + twoByteOpSimdInt32("vmovd", VEX_PD, OP2_MOVD_EdVd, (XMMRegisterID)dst, (RegisterID)src); + } + + void vmovd_rr(RegisterID src, XMMRegisterID dst) + { + twoByteOpInt32Simd("vmovd", VEX_PD, OP2_MOVD_VdEd, src, invalid_xmm, dst); + } + + void vmovd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, invalid_xmm, dst); + } + + void vmovd_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, index, scale, invalid_xmm, dst); + } + + void vmovd_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, invalid_xmm, dst); + } + + void vmovd_mr(const void* address, XMMRegisterID dst) + { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, address, invalid_xmm, dst); + } + + void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, invalid_xmm, src); + } + + void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, index, scale, invalid_xmm, src); + } + + void vmovd_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, invalid_xmm, src); + } + + void vmovd_rm(XMMRegisterID src, const void* address) + { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, address, invalid_xmm, src); + } + + void vmovsd_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm, src); + } + + void vmovsd_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd_disp32("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm, src); + } + + void vmovss_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm, src); + } + + void vmovss_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd_disp32("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm, src); + } + + void vmovss_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm, dst); + } + + void vmovss_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd_disp32("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm, dst); + } + + void vmovsd_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, index, scale, invalid_xmm, src); + } + + void vmovss_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, index, scale, invalid_xmm, src); + } + + void vmovss_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, index, scale, invalid_xmm, dst); + } + + void vmovsd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm, dst); + } + + void vmovsd_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd_disp32("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm, dst); + } + + void vmovsd_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, index, scale, invalid_xmm, dst); + } + + // Note that the register-to-register form of vmovsd does not write to the + // entire output register. For general-purpose register-to-register moves, + // use vmovapd instead. + void vmovsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, src1, src0, dst); + } + + // The register-to-register form of vmovss has the same problem as vmovsd + // above. Prefer vmovaps for register-to-register moves. + void vmovss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, src1, src0, dst); + } + + void vmovsd_mr(const void* address, XMMRegisterID dst) + { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, address, invalid_xmm, dst); + } + + void vmovss_mr(const void* address, XMMRegisterID dst) + { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, address, invalid_xmm, dst); + } + + void vmovups_mr(const void* address, XMMRegisterID dst) + { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, address, invalid_xmm, dst); + } + + void vmovdqu_mr(const void* address, XMMRegisterID dst) + { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, address, invalid_xmm, dst); + } + + void vmovsd_rm(XMMRegisterID src, const void* address) + { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, address, invalid_xmm, src); + } + + void vmovss_rm(XMMRegisterID src, const void* address) + { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, address, invalid_xmm, src); + } + + void vmovdqa_rm(XMMRegisterID src, const void* address) + { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, address, invalid_xmm, src); + } + + void vmovaps_rm(XMMRegisterID src, const void* address) + { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, address, invalid_xmm, src); + } + + void vmovdqu_rm(XMMRegisterID src, const void* address) + { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, address, invalid_xmm, src); + } + + void vmovups_rm(XMMRegisterID src, const void* address) + { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, address, invalid_xmm, src); + } + + void vmovaps_rr(XMMRegisterID src, XMMRegisterID dst) + { +#ifdef JS_CODEGEN_X64 + // There are two opcodes that can encode this instruction. If we have + // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the + // opcode which swaps the operands, as that way we can get a two-byte + // VEX in that case. + if (src >= xmm8 && dst < xmm8) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, dst, invalid_xmm, src); + return; + } +#endif + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, src, invalid_xmm, dst); + } + void vmovaps_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, offset, base, invalid_xmm, src); + } + void vmovaps_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, offset, base, index, scale, invalid_xmm, src); + } + void vmovaps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base, invalid_xmm, dst); + } + void vmovaps_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base, index, scale, invalid_xmm, dst); + } + + void vmovups_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base, invalid_xmm, src); + } + void vmovups_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd_disp32("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base, invalid_xmm, src); + } + void vmovups_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base, index, scale, invalid_xmm, src); + } + void vmovups_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, invalid_xmm, dst); + } + void vmovups_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd_disp32("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, invalid_xmm, dst); + } + void vmovups_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, index, scale, invalid_xmm, dst); + } + + void vmovapd_rr(XMMRegisterID src, XMMRegisterID dst) + { +#ifdef JS_CODEGEN_X64 + // There are two opcodes that can encode this instruction. If we have + // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the + // opcode which swaps the operands, as that way we can get a two-byte + // VEX in that case. + if (src >= xmm8 && dst < xmm8) { + twoByteOpSimd("vmovapd", VEX_PD, OP2_MOVAPS_WsdVsd, dst, invalid_xmm, src); + return; + } +#endif + twoByteOpSimd("vmovapd", VEX_PD, OP2_MOVAPD_VsdWsd, src, invalid_xmm, dst); + } + + void vmovdqu_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base, invalid_xmm, src); + } + + void vmovdqu_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd_disp32("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base, invalid_xmm, src); + } + + void vmovdqu_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base, index, scale, invalid_xmm, src); + } + + void vmovdqu_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, invalid_xmm, dst); + } + + void vmovdqu_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd_disp32("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, invalid_xmm, dst); + } + + void vmovdqu_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, index, scale, invalid_xmm, dst); + } + + void vmovdqa_rr(XMMRegisterID src, XMMRegisterID dst) + { +#ifdef JS_CODEGEN_X64 + // There are two opcodes that can encode this instruction. If we have + // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the + // opcode which swaps the operands, as that way we can get a two-byte + // VEX in that case. + if (src >= xmm8 && dst < xmm8) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, dst, invalid_xmm, src); + return; + } +#endif + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, src, invalid_xmm, dst); + } + + void vmovdqa_rm(XMMRegisterID src, int32_t offset, RegisterID base) + { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, offset, base, invalid_xmm, src); + } + + void vmovdqa_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale) + { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, offset, base, index, scale, invalid_xmm, src); + } + + void vmovdqa_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base, invalid_xmm, dst); + } + + void vmovdqa_mr(int32_t offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base, index, scale, invalid_xmm, dst); + } + + void vmulsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmulsd", VEX_SD, OP2_MULSD_VsdWsd, src1, src0, dst); + } + + void vmulss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, src1, src0, dst); + } + + void vmulsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmulsd", VEX_SD, OP2_MULSD_VsdWsd, offset, base, src0, dst); + } + + void vmulss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, offset, base, src0, dst); + } + + void vpinsrw_irr(uint32_t whichWord, RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + MOZ_ASSERT(whichWord < 8); + twoByteOpImmInt32Simd("vpinsrw", VEX_PD, OP2_PINSRW, whichWord, src1, src0, dst); + } + + void vpextrw_irr(uint32_t whichWord, XMMRegisterID src, RegisterID dst) + { + MOZ_ASSERT(whichWord < 8); + twoByteOpImmSimdInt32("vpextrw", VEX_PD, OP2_PEXTRW_GdUdIb, whichWord, src, dst); + } + + void vsubsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsubsd", VEX_SD, OP2_SUBSD_VsdWsd, src1, src0, dst); + } + + void vsubss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsubss", VEX_SS, OP2_SUBSD_VsdWsd, src1, src0, dst); + } + + void vsubsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsubsd", VEX_SD, OP2_SUBSD_VsdWsd, offset, base, src0, dst); + } + + void vsubss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsubss", VEX_SS, OP2_SUBSD_VsdWsd, offset, base, src0, dst); + } + + void vucomiss_rr(XMMRegisterID rhs, XMMRegisterID lhs) + { + twoByteOpSimdFlags("vucomiss", VEX_PS, OP2_UCOMISD_VsdWsd, rhs, lhs); + } + + void vucomisd_rr(XMMRegisterID rhs, XMMRegisterID lhs) + { + twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, rhs, lhs); + } + + void vucomisd_mr(int32_t offset, RegisterID base, XMMRegisterID lhs) + { + twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, offset, base, lhs); + } + + void vdivsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vdivsd", VEX_SD, OP2_DIVSD_VsdWsd, src1, src0, dst); + } + + void vdivss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, src1, src0, dst); + } + + void vdivsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vdivsd", VEX_SD, OP2_DIVSD_VsdWsd, offset, base, src0, dst); + } + + void vdivss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, offset, base, src0, dst); + } + + void vxorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vxorpd", VEX_PD, OP2_XORPD_VpdWpd, src1, src0, dst); + } + + void vorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vorpd", VEX_PD, OP2_ORPD_VpdWpd, src1, src0, dst); + } + + void vandpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vandpd", VEX_PD, OP2_ANDPD_VpdWpd, src1, src0, dst); + } + + void vandps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, src1, src0, dst); + } + + void vandps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, offset, base, src0, dst); + } + + void vandps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, address, src0, dst); + } + + void vandnps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, src1, src0, dst); + } + + void vandnps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, offset, base, src0, dst); + } + + void vandnps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, address, src0, dst); + } + + void vorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, src1, src0, dst); + } + + void vorps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, offset, base, src0, dst); + } + + void vorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, address, src0, dst); + } + + void vxorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, src1, src0, dst); + } + + void vxorps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, offset, base, src0, dst); + } + + void vxorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, address, src0, dst); + } + + void vsqrtsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsqrtsd", VEX_SD, OP2_SQRTSD_VsdWsd, src1, src0, dst); + } + + void vsqrtss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vsqrtss", VEX_SS, OP2_SQRTSS_VssWss, src1, src0, dst); + } + + void vroundsd_irr(RoundingMode mode, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpImmSimd("vroundsd", VEX_PD, OP3_ROUNDSD_VsdWsd, ESCAPE_3A, mode, src1, src0, dst); + } + + void vroundss_irr(RoundingMode mode, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpImmSimd("vroundss", VEX_PD, OP3_ROUNDSS_VsdWsd, ESCAPE_3A, mode, src1, src0, dst); + } + + void vinsertps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A, mask, src1, src0, dst); + } + void vinsertps_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A, mask, offset, base, src0, dst); + } + + void vpinsrb_irr(unsigned lane, RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + MOZ_ASSERT(lane < 16); + threeByteOpImmInt32Simd("vpinsrb", VEX_PD, OP3_PINSRB_VdqEdIb, ESCAPE_3A, lane, src1, src0, dst); + } + + void vpinsrd_irr(unsigned lane, RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + MOZ_ASSERT(lane < 4); + threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEdIb, ESCAPE_3A, lane, src1, src0, dst); + } + + void vpextrb_irr(unsigned lane, XMMRegisterID src, RegisterID dst) + { + MOZ_ASSERT(lane < 16); + threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EdVdqIb, ESCAPE_3A, lane, (XMMRegisterID)dst, (RegisterID)src); + } + + void vpextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst) + { + MOZ_ASSERT(lane < 4); + threeByteOpImmSimdInt32("vpextrd", VEX_PD, OP3_PEXTRD_EdVdqIb, ESCAPE_3A, lane, (XMMRegisterID)dst, (RegisterID)src); + } + + void vblendps_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + MOZ_ASSERT(imm < 16); + // Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix. + threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, src1, src0, dst); + } + + void vblendps_imr(unsigned imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + MOZ_ASSERT(imm < 16); + // Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix. +threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, offset, base, src0, dst); + } + + void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + vblendvOpSimd(mask, src1, src0, dst); + } + void vblendvps_mr(XMMRegisterID mask, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) { + vblendvOpSimd(mask, offset, base, src0, dst); + } + + void vmovsldup_rr(XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, src, invalid_xmm, dst); + } + void vmovsldup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, offset, base, invalid_xmm, dst); + } + + void vmovshdup_rr(XMMRegisterID src, XMMRegisterID dst) + { + twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, src, invalid_xmm, dst); + } + void vmovshdup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) + { + twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, offset, base, invalid_xmm, dst); + } + + void vminsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, src1, src0, dst); + } + void vminsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, offset, base, src0, dst); + } + + void vminss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vminss", VEX_SS, OP2_MINSS_VssWss, src1, src0, dst); + } + + void vmaxsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, src1, src0, dst); + } + void vmaxsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, offset, base, src0, dst); + } + + void vmaxss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vmaxss", VEX_SS, OP2_MAXSS_VssWss, src1, src0, dst); + } + + // Misc instructions: + + void int3() + { + spew("int3"); + m_formatter.oneByteOp(OP_INT3); + } + + void ud2() + { + spew("ud2"); + m_formatter.twoByteOp(OP2_UD2); + } + + void ret() + { + spew("ret"); + m_formatter.oneByteOp(OP_RET); + } + + void ret_i(int32_t imm) + { + spew("ret $%d", imm); + m_formatter.oneByteOp(OP_RET_Iz); + m_formatter.immediate16u(imm); + } + + void mfence() { + spew("mfence"); + m_formatter.twoByteOp(OP_FENCE, (RegisterID)0, 6); + } + + // Assembler admin methods: + + JmpDst label() + { + JmpDst r = JmpDst(m_formatter.size()); + spew(".set .Llabel%d, .", r.offset()); + return r; + } + + size_t currentOffset() const { + return m_formatter.size(); + } + + static JmpDst labelFor(JmpSrc jump, intptr_t offset = 0) + { + return JmpDst(jump.offset() + offset); + } + + void haltingAlign(int alignment) + { + spew(".balign %d, 0x%x # hlt", alignment, OP_HLT); + while (!m_formatter.isAligned(alignment)) + m_formatter.oneByteOp(OP_HLT); + } + + void nopAlign(int alignment) + { + spew(".balign %d", alignment); + + int remainder = m_formatter.size() % alignment; + if (remainder > 0) + insert_nop(alignment - remainder); + } + + void jumpTablePointer(uintptr_t ptr) + { +#ifdef JS_CODEGEN_X64 + spew(".quad 0x%" PRIxPTR, ptr); +#else + spew(".int 0x%" PRIxPTR, ptr); +#endif + m_formatter.jumpTablePointer(ptr); + } + + void doubleConstant(double d) + { + spew(".double %.16g", d); + m_formatter.doubleConstant(d); + } + void floatConstant(float f) + { + spew(".float %.16g", f); + m_formatter.floatConstant(f); + } + + void simd128Constant(const void* data) + { + const uint32_t* dw = reinterpret_cast(data); + spew(".int 0x%08x,0x%08x,0x%08x,0x%08x", dw[0], dw[1], dw[2], dw[3]); + MOZ_ASSERT(m_formatter.isAligned(16)); + m_formatter.simd128Constant(data); + } + + void int32Constant(int32_t i) + { + spew(".int %d", i); + m_formatter.int32Constant(i); + } + void int64Constant(int64_t i) + { + spew(".quad %lld", (long long)i); + m_formatter.int64Constant(i); + } + + // Linking & patching: + + void assertValidJmpSrc(JmpSrc src) + { + // The target offset is stored at offset - 4. + MOZ_RELEASE_ASSERT(src.offset() > int32_t(sizeof(int32_t))); + MOZ_RELEASE_ASSERT(size_t(src.offset()) <= size()); + } + + bool nextJump(const JmpSrc& from, JmpSrc* next) + { + // Sanity check - if the assembler has OOM'd, it will start overwriting + // its internal buffer and thus our links could be garbage. + if (oom()) + return false; + + assertValidJmpSrc(from); + + const unsigned char* code = m_formatter.data(); + int32_t offset = GetInt32(code + from.offset()); + if (offset == -1) + return false; + + if (MOZ_UNLIKELY(size_t(offset) >= size())) { +#ifdef NIGHTLY_BUILD + // Stash some data on the stack so we can retrieve it from minidumps, + // see bug 1124397. + int32_t startOffset = from.offset() - 1; + while (startOffset >= 0 && code[startOffset] == 0xe5) + startOffset--; + int32_t endOffset = from.offset() - 1; + while (endOffset < int32_t(size()) && code[endOffset] == 0xe5) + endOffset++; + volatile uintptr_t dump[10]; + blackbox = dump; + blackbox[0] = uintptr_t(0xABCD1234); + blackbox[1] = uintptr_t(offset); + blackbox[2] = uintptr_t(size()); + blackbox[3] = uintptr_t(from.offset()); + blackbox[4] = uintptr_t(code[from.offset() - 5]); + blackbox[5] = uintptr_t(code[from.offset() - 4]); + blackbox[6] = uintptr_t(code[from.offset() - 3]); + blackbox[7] = uintptr_t(startOffset); + blackbox[8] = uintptr_t(endOffset); + blackbox[9] = uintptr_t(0xFFFF7777); +#endif + MOZ_CRASH("nextJump bogus offset"); + } + + *next = JmpSrc(offset); + return true; + } + void setNextJump(const JmpSrc& from, const JmpSrc& to) + { + // Sanity check - if the assembler has OOM'd, it will start overwriting + // its internal buffer and thus our links could be garbage. + if (oom()) + return; + + assertValidJmpSrc(from); + MOZ_RELEASE_ASSERT(to.offset() == -1 || size_t(to.offset()) <= size()); + + unsigned char* code = m_formatter.data(); + AutoUnprotectAssemblerBufferRegion unprotect(*this, from.offset() - 4, 4); + SetInt32(code + from.offset(), to.offset()); + } + + void linkJump(JmpSrc from, JmpDst to) + { + MOZ_ASSERT(from.offset() != -1); + MOZ_ASSERT(to.offset() != -1); + + // Sanity check - if the assembler has OOM'd, it will start overwriting + // its internal buffer and thus our links could be garbage. + if (oom()) + return; + + assertValidJmpSrc(from); + MOZ_RELEASE_ASSERT(size_t(to.offset()) <= size()); + + spew(".set .Lfrom%d, .Llabel%d", from.offset(), to.offset()); + unsigned char* code = m_formatter.data(); + AutoUnprotectAssemblerBufferRegion unprotect(*this, from.offset() - 4, 4); + SetRel32(code + from.offset(), code + to.offset()); + } + + void executableCopy(void* buffer) + { + memcpy(buffer, m_formatter.buffer(), size()); + } + MOZ_MUST_USE bool appendBuffer(const BaseAssembler& other) + { + return m_formatter.append(other.m_formatter.buffer(), other.size()); + } + + void unprotectDataRegion(size_t firstByteOffset, size_t lastByteOffset) { + m_formatter.unprotectDataRegion(firstByteOffset, lastByteOffset); + } + void reprotectDataRegion(size_t firstByteOffset, size_t lastByteOffset) { + m_formatter.reprotectDataRegion(firstByteOffset, lastByteOffset); + } + + protected: + static bool CAN_SIGN_EXTEND_8_32(int32_t value) { return value == (int32_t)(int8_t)value; } + static bool CAN_SIGN_EXTEND_16_32(int32_t value) { return value == (int32_t)(int16_t)value; } + static bool CAN_ZERO_EXTEND_8_32(int32_t value) { return value == (int32_t)(uint8_t)value; } + static bool CAN_ZERO_EXTEND_8H_32(int32_t value) { return value == (value & 0xff00); } + static bool CAN_ZERO_EXTEND_16_32(int32_t value) { return value == (int32_t)(uint16_t)value; } + static bool CAN_ZERO_EXTEND_32_64(int32_t value) { return value >= 0; } + + // Methods for encoding SIMD instructions via either legacy SSE encoding or + // VEX encoding. + + bool useLegacySSEEncoding(XMMRegisterID src0, XMMRegisterID dst) + { + // If we don't have AVX or it's disabled, use the legacy SSE encoding. + if (!useVEX_) { + MOZ_ASSERT(src0 == invalid_xmm || src0 == dst, + "Legacy SSE (pre-AVX) encoding requires the output register to be " + "the same as the src0 input register"); + return true; + } + + // If src0 is the same as the output register, we might as well use + // the legacy SSE encoding, since it is smaller. However, this is only + // beneficial as long as we're not using ymm registers anywhere. + return src0 == dst; + } + + bool useLegacySSEEncodingForVblendv(XMMRegisterID mask, XMMRegisterID src0, XMMRegisterID dst) + { + // Similar to useLegacySSEEncoding, but for vblendv the Legacy SSE + // encoding also requires the mask to be in xmm0. + + if (!useVEX_) { + MOZ_ASSERT(src0 == dst, + "Legacy SSE (pre-AVX) encoding requires the output register to be " + "the same as the src0 input register"); + MOZ_ASSERT(mask == xmm0, + "Legacy SSE (pre-AVX) encoding for blendv requires the mask to be " + "in xmm0"); + return true; + } + + return src0 == dst && mask == xmm0; + } + + bool useLegacySSEEncodingForOtherOutput() + { + return !useVEX_; + } + + const char* legacySSEOpName(const char* name) + { + MOZ_ASSERT(name[0] == 'v'); + return name + 1; + } + + void twoByteOpSimd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(dst), XMMRegName(rm)); + else + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %s", name, XMMRegName(dst), XMMRegName(rm)); + else + spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(dst)); + } else { + spew("%-11s%s, %s, %s", name, XMMRegName(rm), XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst); + } + + void twoByteOpImmSimd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + uint32_t imm, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + m_formatter.immediate8u(imm); + return; + } + + if (src0 == invalid_xmm) + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), XMMRegName(dst)); + else + spew("%-11s$0x%x, %s, %s, %s", name, imm, XMMRegName(rm), XMMRegName(src0), XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpSimd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_ob, legacySSEOpName(name), + XMMRegName(dst), ADDR_ob(offset, base)); + } else { + spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), + ADDR_ob(offset, base), XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, " MEM_ob, name, XMMRegName(dst), ADDR_ob(offset, base)); + else + spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base), XMMRegName(dst)); + } else { + spew("%-11s" MEM_ob ", %s, %s", name, + ADDR_ob(offset, base), XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst); + } + + void twoByteOpSimd_disp32(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, " MEM_o32b, legacySSEOpName(name), XMMRegName(dst), ADDR_o32b(offset, base)); + else + spew("%-11s" MEM_o32b ", %s", legacySSEOpName(name), ADDR_o32b(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp_disp32(opcode, offset, base, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, " MEM_o32b, name, XMMRegName(dst), ADDR_o32b(offset, base)); + else + spew("%-11s" MEM_o32b ", %s", name, ADDR_o32b(offset, base), XMMRegName(dst)); + } else { + spew("%-11s" MEM_o32b ", %s, %s", name, + ADDR_o32b(offset, base), XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex_disp32(ty, opcode, offset, base, src0, dst); + } + + void twoByteOpImmSimd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + uint32_t imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, + ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpSimd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_obs, legacySSEOpName(name), + XMMRegName(dst), ADDR_obs(offset, base, index, scale)); + } else { + spew("%-11s" MEM_obs ", %s", legacySSEOpName(name), + ADDR_obs(offset, base, index, scale), XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, index, scale, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_obs, name, XMMRegName(dst), + ADDR_obs(offset, base, index, scale)); + } else { + spew("%-11s" MEM_obs ", %s", name, ADDR_obs(offset, base, index, scale), + XMMRegName(dst)); + } + } else { + spew("%-11s" MEM_obs ", %s, %s", name, ADDR_obs(offset, base, index, scale), + XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, offset, base, index, scale, src0, dst); + } + + void twoByteOpSimd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %p", legacySSEOpName(name), XMMRegName(dst), address); + else + spew("%-11s%p, %s", legacySSEOpName(name), address, XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, address, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %p", name, XMMRegName(dst), address); + else + spew("%-11s%p, %s", name, address, XMMRegName(dst)); + } else { + spew("%-11s%p, %s, %s", name, address, XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, address, src0, dst); + } + + void twoByteOpImmSimd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + uint32_t imm, const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %p, %s", legacySSEOpName(name), imm, address, XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, address, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %p, %s, %s", name, imm, address, XMMRegName(src0), XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, address, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpInt32Simd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + RegisterID rm, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(dst), GPReg32Name(rm)); + else + spew("%-11s%s, %s", legacySSEOpName(name), GPReg32Name(rm), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, rm, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %s", name, XMMRegName(dst), GPReg32Name(rm)); + else + spew("%-11s%s, %s", name, GPReg32Name(rm), XMMRegName(dst)); + } else { + spew("%-11s%s, %s, %s", name, GPReg32Name(rm), XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, rm, src0, dst); + } + + void twoByteOpSimdInt32(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + XMMRegisterID rm, RegisterID dst) + { + if (useLegacySSEEncodingForOtherOutput()) { + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %s", legacySSEOpName(name), GPReg32Name(dst), XMMRegName(rm)); + else if (opcode == OP2_MOVD_EdVd) + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName((XMMRegisterID)dst), GPReg32Name((RegisterID)rm)); + else + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + return; + } + + if (IsXMMReversedOperands(opcode)) + spew("%-11s%s, %s", name, GPReg32Name(dst), XMMRegName(rm)); + else if (opcode == OP2_MOVD_EdVd) + spew("%-11s%s, %s", name, XMMRegName((XMMRegisterID)dst), GPReg32Name((RegisterID)rm)); + else + spew("%-11s%s, %s", name, XMMRegName(rm), GPReg32Name(dst)); + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, dst); + } + + void twoByteOpImmSimdInt32(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + uint32_t imm, XMMRegisterID rm, RegisterID dst) + { + if (useLegacySSEEncodingForOtherOutput()) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), GPReg32Name(dst)); + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpImmInt32Simd(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + uint32_t imm, RegisterID rm, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncodingForOtherOutput()) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, GPReg32Name(rm), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, rm, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %s, %s", name, imm, GPReg32Name(rm), XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, rm, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpSimdFlags(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + XMMRegisterID rm, XMMRegisterID reg) + { + if (useLegacySSEEncodingForOtherOutput()) { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), XMMRegName(reg)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, reg); + return; + } + + spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(reg)); + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, (XMMRegisterID)reg); + } + + void twoByteOpSimdFlags(const char* name, VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, XMMRegisterID reg) + { + if (useLegacySSEEncodingForOtherOutput()) { + spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), + ADDR_ob(offset, base), XMMRegName(reg)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, reg); + return; + } + + spew("%-11s" MEM_ob ", %s", name, + ADDR_ob(offset, base), XMMRegName(reg)); + m_formatter.twoByteOpVex(ty, opcode, offset, base, invalid_xmm, (XMMRegisterID)reg); + } + + void threeByteOpSimd(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, + XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst); + return; + } + + spew("%-11s%s, %s, %s", name, XMMRegName(rm), XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst); + } + + void threeByteOpImmSimd(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, + uint32_t imm, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %s, %s, %s", name, imm, XMMRegName(rm), XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpSimd(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, + int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), + ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + return; + } + + spew("%-11s" MEM_ob ", %s, %s", name, + ADDR_ob(offset, base), XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst); + } + + void threeByteOpImmSimd(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, + uint32_t imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, + ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpSimd(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, + const void* address, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s%p, %s", legacySSEOpName(name), address, XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, address, dst); + return; + } + + spew("%-11s%p, %s, %s", name, address, XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, address, src0, dst); + } + + void threeByteOpImmInt32Simd(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, uint32_t imm, + RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, GPReg32Name(src1), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, src1, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %s, %s, %s", name, imm, GPReg32Name(src1), XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, src1, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmInt32Simd(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, uint32_t imm, + int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmSimdInt32(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, uint32_t imm, + XMMRegisterID src, RegisterID dst) + { + if (useLegacySSEEncodingForOtherOutput()) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(src), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, (RegisterID)src, dst); + m_formatter.immediate8u(imm); + return; + } + + if (opcode == OP3_PEXTRD_EdVdqIb) + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName((XMMRegisterID)dst), GPReg32Name((RegisterID)src)); + else + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(src), GPReg32Name(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)src, invalid_xmm, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmSimdInt32(const char* name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, uint32_t imm, + int32_t offset, RegisterID base, RegisterID dst) + { + if (useLegacySSEEncodingForOtherOutput()) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s", name, imm, ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, invalid_xmm, dst); + m_formatter.immediate8u(imm); + } + + // Blendv is a three-byte op, but the VEX encoding has a different opcode + // than the SSE encoding, so we handle it specially. + void vblendvOpSimd(XMMRegisterID mask, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncodingForVblendv(mask, src0, dst)) { + spew("blendvps %s, %s", XMMRegName(rm), XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.legacySSEPrefix(VEX_PD); + m_formatter.threeByteOp(OP3_BLENDVPS_VdqWdq, ESCAPE_3A, (RegisterID)rm, dst); + return; + } + + spew("vblendvps %s, %s, %s, %s", + XMMRegName(mask), XMMRegName(rm), XMMRegName(src0), XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.vblendvOpVex(VEX_PD, OP3_VBLENDVPS_VdqWdq, ESCAPE_3A, + mask, (RegisterID)rm, src0, dst); + } + + void vblendvOpSimd(XMMRegisterID mask, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncodingForVblendv(mask, src0, dst)) { + spew("blendvps " MEM_ob ", %s", ADDR_ob(offset, base), XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.legacySSEPrefix(VEX_PD); + m_formatter.threeByteOp(OP3_BLENDVPS_VdqWdq, ESCAPE_3A, offset, base, dst); + return; + } + + spew("vblendvps %s, " MEM_ob ", %s, %s", + XMMRegName(mask), ADDR_ob(offset, base), XMMRegName(src0), XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.vblendvOpVex(VEX_PD, OP3_VBLENDVPS_VdqWdq, ESCAPE_3A, + mask, offset, base, src0, dst); + } + + void shiftOpImmSimd(const char* name, TwoByteOpcodeID opcode, ShiftID shiftKind, + uint32_t imm, XMMRegisterID src, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src, dst)) { + spew("%-11s$%d, %s", legacySSEOpName(name), imm, XMMRegName(dst)); + m_formatter.legacySSEPrefix(VEX_PD); + m_formatter.twoByteOp(opcode, (RegisterID)dst, (int)shiftKind); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$%d, %s, %s", name, imm, XMMRegName(src), XMMRegName(dst)); + m_formatter.twoByteOpVex(VEX_PD, opcode, (RegisterID)dst, src, (int)shiftKind); + m_formatter.immediate8u(imm); + } + + class X86InstructionFormatter { + + public: + // Legacy prefix bytes: + // + // These are emmitted prior to the instruction. + + void prefix(OneByteOpcodeID pre) + { + m_buffer.putByte(pre); + } + + void legacySSEPrefix(VexOperandType ty) + { + switch (ty) { + case VEX_PS: break; + case VEX_PD: prefix(PRE_SSE_66); break; + case VEX_SS: prefix(PRE_SSE_F3); break; + case VEX_SD: prefix(PRE_SSE_F2); break; + } + } + + // Word-sized operands / no operand instruction formatters. + // + // In addition to the opcode, the following operand permutations are supported: + // * None - instruction takes no operands. + // * One register - the low three bits of the RegisterID are added into the opcode. + // * Two registers - encode a register form ModRm (for all ModRm formats, the reg field is passed first, and a GroupOpcodeID may be passed in its place). + // * Three argument ModRM - a register, and a register and an offset describing a memory operand. + // * Five argument ModRM - a register, and a base register, an index, scale, and offset describing a memory operand. + // + // For 32-bit x86 targets, the address operand may also be provided as a + // void*. On 64-bit targets REX prefixes will be planted as necessary, + // where high numbered registers are used. + // + // The twoByteOp methods plant two-byte Intel instructions sequences + // (first opcode byte 0x0F). + + void oneByteOp(OneByteOpcodeID opcode) + { + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(opcode); + } + + void oneByteOp(OneByteOpcodeID opcode, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(0, 0, reg); + m_buffer.putByteUnchecked(opcode + (reg & 7)); + } + + void oneByteOp(OneByteOpcodeID opcode, RegisterID rm, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, rm); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void oneByteOp(OneByteOpcodeID opcode, int32_t offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void oneByteOp_disp32(OneByteOpcodeID opcode, int32_t offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void oneByteOp(OneByteOpcodeID opcode, int32_t offset, RegisterID base, RegisterID index, int scale, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, index, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void oneByteOp_disp32(OneByteOpcodeID opcode, int32_t offset, RegisterID index, int scale, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, index, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, index, scale, reg); + } + + void oneByteOp(OneByteOpcodeID opcode, const void* address, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(address, reg); + } + + void oneByteOp_disp32(OneByteOpcodeID opcode, const void* address, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(address, reg); + } +#ifdef JS_CODEGEN_X64 + void oneByteRipOp(OneByteOpcodeID opcode, int ripOffset, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void oneByteRipOp64(OneByteOpcodeID opcode, int ripOffset, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void twoByteRipOp(TwoByteOpcodeID opcode, int ripOffset, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void twoByteRipOpVex(VexOperandType ty, TwoByteOpcodeID opcode, int ripOffset, + XMMRegisterID src0, XMMRegisterID reg) + { + int r = (reg >> 3), x = 0, b = 0; + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } +#endif + + void twoByteOp(TwoByteOpcodeID opcode) + { + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + } + + void twoByteOp(TwoByteOpcodeID opcode, RegisterID rm, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, + RegisterID rm, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + } + + void twoByteOp(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOp_disp32(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void twoByteOpVex_disp32(VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM_disp32(offset, base, reg); + } + + void twoByteOp(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, RegisterID index, int scale, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, index, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = (index >> 3), b = (base >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void twoByteOp(TwoByteOpcodeID opcode, const void* address, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, + const void* address, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = 0; + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(address, reg); + } + + void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, RegisterID rm, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, + RegisterID rm, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: m = 2; break; + case ESCAPE_3A: m = 3; break; + default: MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + } + + void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, int32_t offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, + int32_t offset, RegisterID base, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: m = 2; break; + case ESCAPE_3A: m = 3; break; + default: MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, reg); + } + + void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, const void* address, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, + const void* address, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = 0; + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: m = 2; break; + case ESCAPE_3A: m = 3; break; + default: MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(address, reg); + } + + void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, + XMMRegisterID mask, RegisterID rm, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: m = 2; break; + case ESCAPE_3A: m = 3; break; + default: MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + immediate8u(mask << 4); + } + + void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, + XMMRegisterID mask, int32_t offset, RegisterID base, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: m = 2; break; + case ESCAPE_3A: m = 3; break; + default: MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, reg); + immediate8u(mask << 4); + } + +#ifdef JS_CODEGEN_X64 + // Quad-word-sized operands: + // + // Used to format 64-bit operantions, planting a REX.w prefix. When + // planting d64 or f64 instructions, not requiring a REX.w prefix, the + // normal (non-'64'-postfixed) formatters should be used. + + void oneByteOp64(OneByteOpcodeID opcode) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(0, 0, 0); + m_buffer.putByteUnchecked(opcode); + } + + void oneByteOp64(OneByteOpcodeID opcode, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(0, 0, reg); + m_buffer.putByteUnchecked(opcode + (reg & 7)); + } + + void oneByteOp64(OneByteOpcodeID opcode, RegisterID rm, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, rm); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void oneByteOp64(OneByteOpcodeID opcode, int32_t offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void oneByteOp64_disp32(OneByteOpcodeID opcode, int32_t offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void oneByteOp64(OneByteOpcodeID opcode, int32_t offset, RegisterID base, RegisterID index, int scale, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, index, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void oneByteOp64(OneByteOpcodeID opcode, const void* address, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, RegisterID rm, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base, RegisterID index, int scale, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, index, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, const void* address, int reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void twoByteOpVex64(VexOperandType ty, TwoByteOpcodeID opcode, + RegisterID rm, XMMRegisterID src0, XMMRegisterID reg) + { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 1; // 0x0F + int w = 1, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + } +#endif + + // Byte-operands: + // + // These methods format byte operations. Byte operations differ from + // the normal formatters in the circumstances under which they will + // decide to emit REX prefixes. These should be used where any register + // operand signifies a byte register. + // + // The disctinction is due to the handling of register numbers in the + // range 4..7 on x86-64. These register numbers may either represent + // the second byte of the first four registers (ah..bh) or the first + // byte of the second four registers (spl..dil). + // + // Address operands should still be checked using regRequiresRex(), + // while byteRegRequiresRex() is provided to check byte register + // operands. + + void oneByteOp8(OneByteOpcodeID opcode) + { + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(opcode); + } + + void oneByteOp8(OneByteOpcodeID opcode, RegisterID r) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(r), 0, 0, r); + m_buffer.putByteUnchecked(opcode + (r & 7)); + } + + void oneByteOp8(OneByteOpcodeID opcode, RegisterID rm, GroupOpcodeID groupOp) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(rm), 0, 0, rm); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, groupOp); + } + + // Like oneByteOp8, but never emits a REX prefix. + void oneByteOp8_norex(OneByteOpcodeID opcode, HRegisterID rm, GroupOpcodeID groupOp) + { + MOZ_ASSERT(!regRequiresRex(RegisterID(rm))); + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(opcode); + registerModRM(RegisterID(rm), groupOp); + } + + void oneByteOp8(OneByteOpcodeID opcode, int32_t offset, RegisterID base, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void oneByteOp8_disp32(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void oneByteOp8(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID index, int scale, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, index, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void oneByteOp8(OneByteOpcodeID opcode, const void* address, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(address, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, RegisterID rm, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg)|byteRegRequiresRex(rm), reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg)|regRequiresRex(base), reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, RegisterID index, + int scale, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg)|regRequiresRex(base)|regRequiresRex(index), + reg, index, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + // Like twoByteOp8 but doesn't add a REX prefix if the destination reg + // is in esp..edi. This may be used when the destination is not an 8-bit + // register (as in a movzbl instruction), so it doesn't need a REX + // prefix to disambiguate it from ah..bh. + void twoByteOp8_movx(TwoByteOpcodeID opcode, RegisterID rm, RegisterID reg) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(regRequiresRex(reg)|byteRegRequiresRex(rm), reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, RegisterID rm, GroupOpcodeID groupOp) + { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(rm), 0, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, groupOp); + } + + // Immediates: + // + // An immedaite should be appended where appropriate after an op has + // been emitted. The writes are unchecked since the opcode formatters + // above will have ensured space. + + // A signed 8-bit immediate. + MOZ_ALWAYS_INLINE void immediate8s(int32_t imm) + { + MOZ_ASSERT(CAN_SIGN_EXTEND_8_32(imm)); + m_buffer.putByteUnchecked(imm); + } + + // An unsigned 8-bit immediate. + MOZ_ALWAYS_INLINE void immediate8u(uint32_t imm) + { + MOZ_ASSERT(CAN_ZERO_EXTEND_8_32(imm)); + m_buffer.putByteUnchecked(int32_t(imm)); + } + + // An 8-bit immediate with is either signed or unsigned, for use in + // instructions which actually only operate on 8 bits. + MOZ_ALWAYS_INLINE void immediate8(int32_t imm) + { + m_buffer.putByteUnchecked(imm); + } + + // A signed 16-bit immediate. + MOZ_ALWAYS_INLINE void immediate16s(int32_t imm) + { + MOZ_ASSERT(CAN_SIGN_EXTEND_16_32(imm)); + m_buffer.putShortUnchecked(imm); + } + + // An unsigned 16-bit immediate. + MOZ_ALWAYS_INLINE void immediate16u(int32_t imm) + { + MOZ_ASSERT(CAN_ZERO_EXTEND_16_32(imm)); + m_buffer.putShortUnchecked(imm); + } + + // A 16-bit immediate with is either signed or unsigned, for use in + // instructions which actually only operate on 16 bits. + MOZ_ALWAYS_INLINE void immediate16(int32_t imm) + { + m_buffer.putShortUnchecked(imm); + } + + MOZ_ALWAYS_INLINE void immediate32(int32_t imm) + { + m_buffer.putIntUnchecked(imm); + } + + MOZ_ALWAYS_INLINE void immediate64(int64_t imm) + { + m_buffer.putInt64Unchecked(imm); + } + + MOZ_ALWAYS_INLINE MOZ_MUST_USE JmpSrc + immediateRel32() + { + m_buffer.putIntUnchecked(0); + return JmpSrc(m_buffer.size()); + } + + // Data: + + void jumpTablePointer(uintptr_t ptr) + { + m_buffer.ensureSpace(sizeof(uintptr_t)); +#ifdef JS_CODEGEN_X64 + m_buffer.putInt64Unchecked(ptr); +#else + m_buffer.putIntUnchecked(ptr); +#endif + } + + void doubleConstant(double d) + { + m_buffer.ensureSpace(sizeof(double)); + m_buffer.putInt64Unchecked(mozilla::BitwiseCast(d)); + } + + void floatConstant(float f) + { + m_buffer.ensureSpace(sizeof(float)); + m_buffer.putIntUnchecked(mozilla::BitwiseCast(f)); + } + + void simd128Constant(const void* data) + { + const uint8_t* bytes = reinterpret_cast(data); + m_buffer.ensureSpace(16); + for (size_t i = 0; i < 16; ++i) + m_buffer.putByteUnchecked(bytes[i]); + } + + void int64Constant(int64_t i) + { + m_buffer.ensureSpace(sizeof(int64_t)); + m_buffer.putInt64Unchecked(i); + } + + void int32Constant(int32_t i) + { + m_buffer.ensureSpace(sizeof(int32_t)); + m_buffer.putIntUnchecked(i); + } + + // Administrative methods: + + size_t size() const { return m_buffer.size(); } + const unsigned char* buffer() const { return m_buffer.buffer(); } + bool oom() const { return m_buffer.oom(); } + bool isAligned(int alignment) const { return m_buffer.isAligned(alignment); } + unsigned char* data() { return m_buffer.data(); } + + MOZ_MUST_USE bool append(const unsigned char* values, size_t size) + { + return m_buffer.append(values, size); + } + + void unprotectDataRegion(size_t firstByteOffset, size_t lastByteOffset) { + m_buffer.unprotectDataRegion(firstByteOffset, lastByteOffset); + } + void reprotectDataRegion(size_t firstByteOffset, size_t lastByteOffset) { + m_buffer.reprotectDataRegion(firstByteOffset, lastByteOffset); + } + + private: + + // Internals; ModRm and REX formatters. + + // Byte operand register spl & above requir a REX prefix, which precludes + // use of the h registers in the same instruction. + static bool byteRegRequiresRex(RegisterID reg) + { +#ifdef JS_CODEGEN_X64 + return reg >= rsp; +#else + return false; +#endif + } + + // For non-byte sizes, registers r8 & above always require a REX prefix. + static bool regRequiresRex(RegisterID reg) + { +#ifdef JS_CODEGEN_X64 + return reg >= r8; +#else + return false; +#endif + } + +#ifdef JS_CODEGEN_X64 + // Format a REX prefix byte. + void emitRex(bool w, int r, int x, int b) + { + m_buffer.putByteUnchecked(PRE_REX | ((int)w << 3) | ((r>>3)<<2) | ((x>>3)<<1) | (b>>3)); + } + + // Used to plant a REX byte with REX.w set (for 64-bit operations). + void emitRexW(int r, int x, int b) + { + emitRex(true, r, x, b); + } + + // Used for operations with byte operands - use byteRegRequiresRex() to + // check register operands, regRequiresRex() to check other registers + // (i.e. address base & index). + // + // NB: WebKit's use of emitRexIf() is limited such that the + // reqRequiresRex() checks are not needed. SpiderMonkey extends + // oneByteOp8 and twoByteOp8 functionality such that r, x, and b + // can all be used. + void emitRexIf(bool condition, int r, int x, int b) + { + if (condition || + regRequiresRex(RegisterID(r)) || + regRequiresRex(RegisterID(x)) || + regRequiresRex(RegisterID(b))) + { + emitRex(false, r, x, b); + } + } + + // Used for word sized operations, will plant a REX prefix if necessary + // (if any register is r8 or above). + void emitRexIfNeeded(int r, int x, int b) + { + emitRexIf(false, r, x, b); + } +#else + // No REX prefix bytes on 32-bit x86. + void emitRexIf(bool condition, int, int, int) + { + MOZ_ASSERT(!condition, "32-bit x86 should never use a REX prefix"); + } + void emitRexIfNeeded(int, int, int) {} +#endif + + void putModRm(ModRmMode mode, RegisterID rm, int reg) + { + m_buffer.putByteUnchecked((mode << 6) | ((reg & 7) << 3) | (rm & 7)); + } + + void putModRmSib(ModRmMode mode, RegisterID base, RegisterID index, int scale, int reg) + { + MOZ_ASSERT(mode != ModRmRegister); + + putModRm(mode, hasSib, reg); + m_buffer.putByteUnchecked((scale << 6) | ((index & 7) << 3) | (base & 7)); + } + + void registerModRM(RegisterID rm, int reg) + { + putModRm(ModRmRegister, rm, reg); + } + + void memoryModRM(int32_t offset, RegisterID base, int reg) + { + // A base of esp or r12 would be interpreted as a sib, so force a + // sib with no index & put the base in there. +#ifdef JS_CODEGEN_X64 + if ((base == hasSib) || (base == hasSib2)) +#else + if (base == hasSib) +#endif + { + if (!offset) // No need to check if the base is noBase, since we know it is hasSib! + putModRmSib(ModRmMemoryNoDisp, base, noIndex, 0, reg); + else if (CAN_SIGN_EXTEND_8_32(offset)) { + putModRmSib(ModRmMemoryDisp8, base, noIndex, 0, reg); + m_buffer.putByteUnchecked(offset); + } else { + putModRmSib(ModRmMemoryDisp32, base, noIndex, 0, reg); + m_buffer.putIntUnchecked(offset); + } + } else { +#ifdef JS_CODEGEN_X64 + if (!offset && (base != noBase) && (base != noBase2)) +#else + if (!offset && (base != noBase)) +#endif + putModRm(ModRmMemoryNoDisp, base, reg); + else if (CAN_SIGN_EXTEND_8_32(offset)) { + putModRm(ModRmMemoryDisp8, base, reg); + m_buffer.putByteUnchecked(offset); + } else { + putModRm(ModRmMemoryDisp32, base, reg); + m_buffer.putIntUnchecked(offset); + } + } + } + + void memoryModRM_disp32(int32_t offset, RegisterID base, int reg) + { + // A base of esp or r12 would be interpreted as a sib, so force a + // sib with no index & put the base in there. +#ifdef JS_CODEGEN_X64 + if ((base == hasSib) || (base == hasSib2)) +#else + if (base == hasSib) +#endif + { + putModRmSib(ModRmMemoryDisp32, base, noIndex, 0, reg); + m_buffer.putIntUnchecked(offset); + } else { + putModRm(ModRmMemoryDisp32, base, reg); + m_buffer.putIntUnchecked(offset); + } + } + + void memoryModRM(int32_t offset, RegisterID base, RegisterID index, int scale, int reg) + { + MOZ_ASSERT(index != noIndex); + +#ifdef JS_CODEGEN_X64 + if (!offset && (base != noBase) && (base != noBase2)) +#else + if (!offset && (base != noBase)) +#endif + putModRmSib(ModRmMemoryNoDisp, base, index, scale, reg); + else if (CAN_SIGN_EXTEND_8_32(offset)) { + putModRmSib(ModRmMemoryDisp8, base, index, scale, reg); + m_buffer.putByteUnchecked(offset); + } else { + putModRmSib(ModRmMemoryDisp32, base, index, scale, reg); + m_buffer.putIntUnchecked(offset); + } + } + + void memoryModRM_disp32(int32_t offset, RegisterID index, int scale, int reg) + { + MOZ_ASSERT(index != noIndex); + + // NB: the base-less memoryModRM overloads generate different code + // then the base-full memoryModRM overloads in the base == noBase + // case. The base-less overloads assume that the desired effective + // address is: + // + // reg := [scaled index] + disp32 + // + // which means the mod needs to be ModRmMemoryNoDisp. The base-full + // overloads pass ModRmMemoryDisp32 in all cases and thus, when + // base == noBase (== ebp), the effective address is: + // + // reg := [scaled index] + disp32 + [ebp] + // + // See Intel developer manual, Vol 2, 2.1.5, Table 2-3. + putModRmSib(ModRmMemoryNoDisp, noBase, index, scale, reg); + m_buffer.putIntUnchecked(offset); + } + + void memoryModRM_disp32(const void* address, int reg) + { + int32_t disp = AddressImmediate(address); + +#ifdef JS_CODEGEN_X64 + // On x64-64, non-RIP-relative absolute mode requires a SIB. + putModRmSib(ModRmMemoryNoDisp, noBase, noIndex, 0, reg); +#else + // noBase + ModRmMemoryNoDisp means noBase + ModRmMemoryDisp32! + putModRm(ModRmMemoryNoDisp, noBase, reg); +#endif + m_buffer.putIntUnchecked(disp); + } + + void memoryModRM(const void* address, int reg) + { + memoryModRM_disp32(address, reg); + } + + void threeOpVex(VexOperandType p, int r, int x, int b, int m, int w, int v, int l, + int opcode) + { + m_buffer.ensureSpace(MaxInstructionSize); + + if (v == invalid_xmm) + v = XMMRegisterID(0); + + if (x == 0 && b == 0 && m == 1 && w == 0) { + // Two byte VEX. + m_buffer.putByteUnchecked(PRE_VEX_C5); + m_buffer.putByteUnchecked(((r << 7) | (v << 3) | (l << 2) | p) ^ 0xf8); + } else { + // Three byte VEX. + m_buffer.putByteUnchecked(PRE_VEX_C4); + m_buffer.putByteUnchecked(((r << 7) | (x << 6) | (b << 5) | m) ^ 0xe0); + m_buffer.putByteUnchecked(((w << 7) | (v << 3) | (l << 2) | p) ^ 0x78); + } + + m_buffer.putByteUnchecked(opcode); + } + + AssemblerBuffer m_buffer; + } m_formatter; + + bool useVEX_; +}; + +MOZ_ALWAYS_INLINE +AutoUnprotectAssemblerBufferRegion::AutoUnprotectAssemblerBufferRegion(BaseAssembler& holder, + int32_t offset, size_t size) +{ + assembler = &holder; + MOZ_ASSERT(offset >= 0); + firstByteOffset = size_t(offset); + lastByteOffset = firstByteOffset + (size - 1); + assembler->unprotectDataRegion(firstByteOffset, lastByteOffset); +} + +MOZ_ALWAYS_INLINE +AutoUnprotectAssemblerBufferRegion::~AutoUnprotectAssemblerBufferRegion() +{ + assembler->reprotectDataRegion(firstByteOffset, lastByteOffset); +} + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_BaseAssembler_x86_shared_h */ diff --git a/js/src/jit/x86-shared/BaselineCompiler-x86-shared.cpp b/js/src/jit/x86-shared/BaselineCompiler-x86-shared.cpp new file mode 100644 index 000000000..327015df8 --- /dev/null +++ b/js/src/jit/x86-shared/BaselineCompiler-x86-shared.cpp @@ -0,0 +1,15 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/BaselineCompiler-x86-shared.h" + +using namespace js; +using namespace js::jit; + +BaselineCompilerX86Shared::BaselineCompilerX86Shared(JSContext* cx, TempAllocator& alloc, JSScript* script) + : BaselineCompilerShared(cx, alloc, script) +{ +} diff --git a/js/src/jit/x86-shared/BaselineCompiler-x86-shared.h b/js/src/jit/x86-shared/BaselineCompiler-x86-shared.h new file mode 100644 index 000000000..65b702d54 --- /dev/null +++ b/js/src/jit/x86-shared/BaselineCompiler-x86-shared.h @@ -0,0 +1,24 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_BaselineCompiler_x86_shared_h +#define jit_x86_shared_BaselineCompiler_x86_shared_h + +#include "jit/shared/BaselineCompiler-shared.h" + +namespace js { +namespace jit { + +class BaselineCompilerX86Shared : public BaselineCompilerShared +{ + protected: + BaselineCompilerX86Shared(JSContext* cx, TempAllocator& alloc, JSScript* script); +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_BaselineCompiler_x86_shared_h */ diff --git a/js/src/jit/x86-shared/BaselineIC-x86-shared.cpp b/js/src/jit/x86-shared/BaselineIC-x86-shared.cpp new file mode 100644 index 000000000..4e25f87bf --- /dev/null +++ b/js/src/jit/x86-shared/BaselineIC-x86-shared.cpp @@ -0,0 +1,44 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/BaselineIC.h" +#include "jit/SharedICHelpers.h" + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +bool +ICCompare_Double::Compiler::generateStubCode(MacroAssembler& masm) +{ + Label failure, notNaN; + masm.ensureDouble(R0, FloatReg0, &failure); + masm.ensureDouble(R1, FloatReg1, &failure); + + Register dest = R0.scratchReg(); + + Assembler::DoubleCondition cond = JSOpToDoubleCondition(op); + masm.mov(ImmWord(0), dest); + masm.compareDouble(cond, FloatReg0, FloatReg1); + masm.setCC(Assembler::ConditionFromDoubleCondition(cond), dest); + + // Check for NaN, if needed. + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (nanCond != Assembler::NaN_HandledByCond) { + masm.j(Assembler::NoParity, ¬NaN); + masm.mov(ImmWord(nanCond == Assembler::NaN_IsTrue), dest); + masm.bind(¬NaN); + } + + masm.tagValue(JSVAL_TYPE_BOOLEAN, dest, R0); + EmitReturnFromIC(masm); + + // Failure case - jump to next stub + masm.bind(&failure); + EmitStubGuardFailure(masm); + return true; +} diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp new file mode 100644 index 000000000..9cf03aede --- /dev/null +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -0,0 +1,4727 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/CodeGenerator-x86-shared.h" + +#include "mozilla/DebugOnly.h" +#include "mozilla/MathAlgorithms.h" + +#include "jsmath.h" + +#include "jit/JitCompartment.h" +#include "jit/JitFrames.h" +#include "jit/Linker.h" +#include "jit/RangeAnalysis.h" +#include "vm/TraceLogging.h" + +#include "jit/MacroAssembler-inl.h" +#include "jit/shared/CodeGenerator-shared-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::Abs; +using mozilla::BitwiseCast; +using mozilla::DebugOnly; +using mozilla::FloatingPoint; +using mozilla::FloorLog2; +using mozilla::NegativeInfinity; +using mozilla::SpecificNaN; + +using JS::GenericNaN; + +namespace js { +namespace jit { + +CodeGeneratorX86Shared::CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm) + : CodeGeneratorShared(gen, graph, masm) +{ +} + +#ifdef JS_PUNBOX64 +Operand +CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input) +{ + return ToOperand(input.value()); +} +#else +Register64 +CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input) +{ + return ToRegister64(input); +} +#endif + +void +OutOfLineBailout::accept(CodeGeneratorX86Shared* codegen) +{ + codegen->visitOutOfLineBailout(this); +} + +void +CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, MBasicBlock* mirTrue, + MBasicBlock* mirFalse, Assembler::NaNCond ifNaN) +{ + if (ifNaN == Assembler::NaN_IsFalse) + jumpToBlock(mirFalse, Assembler::Parity); + else if (ifNaN == Assembler::NaN_IsTrue) + jumpToBlock(mirTrue, Assembler::Parity); + + if (isNextBlock(mirFalse->lir())) { + jumpToBlock(mirTrue, cond); + } else { + jumpToBlock(mirFalse, Assembler::InvertCondition(cond)); + jumpToBlock(mirTrue); + } +} + +void +CodeGeneratorX86Shared::visitDouble(LDouble* ins) +{ + const LDefinition* out = ins->getDef(0); + masm.loadConstantDouble(ins->getDouble(), ToFloatRegister(out)); +} + +void +CodeGeneratorX86Shared::visitFloat32(LFloat32* ins) +{ + const LDefinition* out = ins->getDef(0); + masm.loadConstantFloat32(ins->getFloat(), ToFloatRegister(out)); +} + +void +CodeGeneratorX86Shared::visitTestIAndBranch(LTestIAndBranch* test) +{ + Register input = ToRegister(test->input()); + masm.test32(input, input); + emitBranch(Assembler::NonZero, test->ifTrue(), test->ifFalse()); +} + +void +CodeGeneratorX86Shared::visitTestDAndBranch(LTestDAndBranch* test) +{ + const LAllocation* opd = test->input(); + + // vucomisd flags: + // Z P C + // --------- + // NaN 1 1 1 + // > 0 0 0 + // < 0 0 1 + // = 1 0 0 + // + // NaN is falsey, so comparing against 0 and then using the Z flag is + // enough to determine which branch to take. + ScratchDoubleScope scratch(masm); + masm.zeroDouble(scratch); + masm.vucomisd(scratch, ToFloatRegister(opd)); + emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); +} + +void +CodeGeneratorX86Shared::visitTestFAndBranch(LTestFAndBranch* test) +{ + const LAllocation* opd = test->input(); + // vucomiss flags are the same as doubles; see comment above + { + ScratchFloat32Scope scratch(masm); + masm.zeroFloat32(scratch); + masm.vucomiss(scratch, ToFloatRegister(opd)); + } + emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); +} + +void +CodeGeneratorX86Shared::visitBitAndAndBranch(LBitAndAndBranch* baab) +{ + if (baab->right()->isConstant()) + masm.test32(ToRegister(baab->left()), Imm32(ToInt32(baab->right()))); + else + masm.test32(ToRegister(baab->left()), ToRegister(baab->right())); + emitBranch(Assembler::NonZero, baab->ifTrue(), baab->ifFalse()); +} + +void +CodeGeneratorX86Shared::emitCompare(MCompare::CompareType type, const LAllocation* left, const LAllocation* right) +{ +#ifdef JS_CODEGEN_X64 + if (type == MCompare::Compare_Object) { + masm.cmpPtr(ToRegister(left), ToOperand(right)); + return; + } +#endif + + if (right->isConstant()) + masm.cmp32(ToRegister(left), Imm32(ToInt32(right))); + else + masm.cmp32(ToRegister(left), ToOperand(right)); +} + +void +CodeGeneratorX86Shared::visitCompare(LCompare* comp) +{ + MCompare* mir = comp->mir(); + emitCompare(mir->compareType(), comp->left(), comp->right()); + masm.emitSet(JSOpToCondition(mir->compareType(), comp->jsop()), ToRegister(comp->output())); +} + +void +CodeGeneratorX86Shared::visitCompareAndBranch(LCompareAndBranch* comp) +{ + MCompare* mir = comp->cmpMir(); + emitCompare(mir->compareType(), comp->left(), comp->right()); + Assembler::Condition cond = JSOpToCondition(mir->compareType(), comp->jsop()); + emitBranch(cond, comp->ifTrue(), comp->ifFalse()); +} + +void +CodeGeneratorX86Shared::visitCompareD(LCompareD* comp) +{ + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->mir()->operandsAreNeverNaN()) + nanCond = Assembler::NaN_HandledByCond; + + masm.compareDouble(cond, lhs, rhs); + masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond); +} + +void +CodeGeneratorX86Shared::visitCompareF(LCompareF* comp) +{ + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->mir()->operandsAreNeverNaN()) + nanCond = Assembler::NaN_HandledByCond; + + masm.compareFloat(cond, lhs, rhs); + masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond); +} + +void +CodeGeneratorX86Shared::visitNotI(LNotI* ins) +{ + masm.cmp32(ToRegister(ins->input()), Imm32(0)); + masm.emitSet(Assembler::Equal, ToRegister(ins->output())); +} + +void +CodeGeneratorX86Shared::visitNotD(LNotD* ins) +{ + FloatRegister opd = ToFloatRegister(ins->input()); + + // Not returns true if the input is a NaN. We don't have to worry about + // it if we know the input is never NaN though. + Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; + if (ins->mir()->operandIsNeverNaN()) + nanCond = Assembler::NaN_HandledByCond; + + ScratchDoubleScope scratch(masm); + masm.zeroDouble(scratch); + masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch); + masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); +} + +void +CodeGeneratorX86Shared::visitNotF(LNotF* ins) +{ + FloatRegister opd = ToFloatRegister(ins->input()); + + // Not returns true if the input is a NaN. We don't have to worry about + // it if we know the input is never NaN though. + Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; + if (ins->mir()->operandIsNeverNaN()) + nanCond = Assembler::NaN_HandledByCond; + + ScratchFloat32Scope scratch(masm); + masm.zeroFloat32(scratch); + masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch); + masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); +} + +void +CodeGeneratorX86Shared::visitCompareDAndBranch(LCompareDAndBranch* comp) +{ + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->cmpMir()->operandsAreNeverNaN()) + nanCond = Assembler::NaN_HandledByCond; + + masm.compareDouble(cond, lhs, rhs); + emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond); +} + +void +CodeGeneratorX86Shared::visitCompareFAndBranch(LCompareFAndBranch* comp) +{ + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->cmpMir()->operandsAreNeverNaN()) + nanCond = Assembler::NaN_HandledByCond; + + masm.compareFloat(cond, lhs, rhs); + emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond); +} + +void +CodeGeneratorX86Shared::visitWasmStackArg(LWasmStackArg* ins) +{ + const MWasmStackArg* mir = ins->mir(); + Address dst(StackPointer, mir->spOffset()); + if (ins->arg()->isConstant()) { + masm.storePtr(ImmWord(ToInt32(ins->arg())), dst); + } else if (ins->arg()->isGeneralReg()) { + masm.storePtr(ToRegister(ins->arg()), dst); + } else { + switch (mir->input()->type()) { + case MIRType::Double: + masm.storeDouble(ToFloatRegister(ins->arg()), dst); + return; + case MIRType::Float32: + masm.storeFloat32(ToFloatRegister(ins->arg()), dst); + return; + // StackPointer is SIMD-aligned and ABIArgGenerator guarantees + // stack offsets are SIMD-aligned. + case MIRType::Int32x4: + case MIRType::Bool32x4: + masm.storeAlignedSimd128Int(ToFloatRegister(ins->arg()), dst); + return; + case MIRType::Float32x4: + masm.storeAlignedSimd128Float(ToFloatRegister(ins->arg()), dst); + return; + default: break; + } + MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("unexpected mir type in WasmStackArg"); + } +} + +void +CodeGeneratorX86Shared::visitWasmStackArgI64(LWasmStackArgI64* ins) +{ + const MWasmStackArg* mir = ins->mir(); + Address dst(StackPointer, mir->spOffset()); + if (IsConstant(ins->arg())) + masm.store64(Imm64(ToInt64(ins->arg())), dst); + else + masm.store64(ToRegister64(ins->arg()), dst); +} + +void +CodeGeneratorX86Shared::visitWasmSelect(LWasmSelect* ins) +{ + MIRType mirType = ins->mir()->type(); + + Register cond = ToRegister(ins->condExpr()); + Operand falseExpr = ToOperand(ins->falseExpr()); + + masm.test32(cond, cond); + + if (mirType == MIRType::Int32) { + Register out = ToRegister(ins->output()); + MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, "true expr input is reused for output"); + masm.cmovz(falseExpr, out); + return; + } + + FloatRegister out = ToFloatRegister(ins->output()); + MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out, "true expr input is reused for output"); + + Label done; + masm.j(Assembler::NonZero, &done); + + if (mirType == MIRType::Float32) { + if (falseExpr.kind() == Operand::FPREG) + masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out); + else + masm.loadFloat32(falseExpr, out); + } else if (mirType == MIRType::Double) { + if (falseExpr.kind() == Operand::FPREG) + masm.moveDouble(ToFloatRegister(ins->falseExpr()), out); + else + masm.loadDouble(falseExpr, out); + } else { + MOZ_CRASH("unhandled type in visitWasmSelect!"); + } + + masm.bind(&done); + return; +} + +void +CodeGeneratorX86Shared::visitWasmReinterpret(LWasmReinterpret* lir) +{ + MOZ_ASSERT(gen->compilingWasm()); + MWasmReinterpret* ins = lir->mir(); + + MIRType to = ins->type(); +#ifdef DEBUG + MIRType from = ins->input()->type(); +#endif + + switch (to) { + case MIRType::Int32: + MOZ_ASSERT(from == MIRType::Float32); + masm.vmovd(ToFloatRegister(lir->input()), ToRegister(lir->output())); + break; + case MIRType::Float32: + MOZ_ASSERT(from == MIRType::Int32); + masm.vmovd(ToRegister(lir->input()), ToFloatRegister(lir->output())); + break; + case MIRType::Double: + case MIRType::Int64: + MOZ_CRASH("not handled by this LIR opcode"); + default: + MOZ_CRASH("unexpected WasmReinterpret"); + } +} + +void +CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool) +{ + switch (ool->viewType()) { + case Scalar::Int64: + case Scalar::Float32x4: + case Scalar::Int8x16: + case Scalar::Int16x8: + case Scalar::Int32x4: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + case Scalar::Float32: + masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu()); + break; + case Scalar::Float64: + masm.loadConstantDouble(GenericNaN(), ool->dest().fpu()); + break; + case Scalar::Int8: + case Scalar::Uint8: + case Scalar::Int16: + case Scalar::Uint16: + case Scalar::Int32: + case Scalar::Uint32: + case Scalar::Uint8Clamped: + Register destReg = ool->dest().gpr(); + masm.mov(ImmWord(0), destReg); + break; + } + masm.jmp(ool->rejoin()); +} + +void +CodeGeneratorX86Shared::visitWasmAddOffset(LWasmAddOffset* lir) +{ + MWasmAddOffset* mir = lir->mir(); + Register base = ToRegister(lir->base()); + Register out = ToRegister(lir->output()); + + if (base != out) + masm.move32(base, out); + masm.add32(Imm32(mir->offset()), out); + + masm.j(Assembler::CarrySet, trap(mir, wasm::Trap::OutOfBounds)); +} + +void +CodeGeneratorX86Shared::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) +{ + FloatRegister input = ToFloatRegister(lir->input()); + Register output = ToRegister(lir->output()); + + MWasmTruncateToInt32* mir = lir->mir(); + MIRType inputType = mir->input()->type(); + + MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32); + + auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input); + addOutOfLineCode(ool, mir); + + Label* oolEntry = ool->entry(); + if (mir->isUnsigned()) { + if (inputType == MIRType::Double) + masm.wasmTruncateDoubleToUInt32(input, output, oolEntry); + else if (inputType == MIRType::Float32) + masm.wasmTruncateFloat32ToUInt32(input, output, oolEntry); + else + MOZ_CRASH("unexpected type"); + return; + } + + if (inputType == MIRType::Double) + masm.wasmTruncateDoubleToInt32(input, output, oolEntry); + else if (inputType == MIRType::Float32) + masm.wasmTruncateFloat32ToInt32(input, output, oolEntry); + else + MOZ_CRASH("unexpected type"); + + masm.bind(ool->rejoin()); +} + +bool +CodeGeneratorX86Shared::generateOutOfLineCode() +{ + if (!CodeGeneratorShared::generateOutOfLineCode()) + return false; + + if (deoptLabel_.used()) { + // All non-table-based bailouts will go here. + masm.bind(&deoptLabel_); + + // Push the frame size, so the handler can recover the IonScript. + masm.push(Imm32(frameSize())); + + JitCode* handler = gen->jitRuntime()->getGenericBailoutHandler(); + masm.jmp(ImmPtr(handler->raw()), Relocation::JITCODE); + } + + return !masm.oom(); +} + +class BailoutJump { + Assembler::Condition cond_; + + public: + explicit BailoutJump(Assembler::Condition cond) : cond_(cond) + { } +#ifdef JS_CODEGEN_X86 + void operator()(MacroAssembler& masm, uint8_t* code) const { + masm.j(cond_, ImmPtr(code), Relocation::HARDCODED); + } +#endif + void operator()(MacroAssembler& masm, Label* label) const { + masm.j(cond_, label); + } +}; + +class BailoutLabel { + Label* label_; + + public: + explicit BailoutLabel(Label* label) : label_(label) + { } +#ifdef JS_CODEGEN_X86 + void operator()(MacroAssembler& masm, uint8_t* code) const { + masm.retarget(label_, ImmPtr(code), Relocation::HARDCODED); + } +#endif + void operator()(MacroAssembler& masm, Label* label) const { + masm.retarget(label_, label); + } +}; + +template void +CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot) +{ + encode(snapshot); + + // Though the assembler doesn't track all frame pushes, at least make sure + // the known value makes sense. We can't use bailout tables if the stack + // isn't properly aligned to the static frame size. + MOZ_ASSERT_IF(frameClass_ != FrameSizeClass::None() && deoptTable_, + frameClass_.frameSize() == masm.framePushed()); + +#ifdef JS_CODEGEN_X86 + // On x64, bailout tables are pointless, because 16 extra bytes are + // reserved per external jump, whereas it takes only 10 bytes to encode a + // a non-table based bailout. + if (assignBailoutId(snapshot)) { + binder(masm, deoptTable_->raw() + snapshot->bailoutId() * BAILOUT_TABLE_ENTRY_SIZE); + return; + } +#endif + + // We could not use a jump table, either because all bailout IDs were + // reserved, or a jump table is not optimal for this frame size or + // platform. Whatever, we will generate a lazy bailout. + // + // All bailout code is associated with the bytecodeSite of the block we are + // bailing out from. + InlineScriptTree* tree = snapshot->mir()->block()->trackedTree(); + OutOfLineBailout* ool = new(alloc()) OutOfLineBailout(snapshot); + addOutOfLineCode(ool, new(alloc()) BytecodeSite(tree, tree->script()->code())); + + binder(masm, ool->entry()); +} + +void +CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, LSnapshot* snapshot) +{ + bailout(BailoutJump(condition), snapshot); +} + +void +CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot) +{ + MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == Assembler::NaN_HandledByCond); + bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot); +} + +void +CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot) +{ + MOZ_ASSERT(label->used() && !label->bound()); + bailout(BailoutLabel(label), snapshot); +} + +void +CodeGeneratorX86Shared::bailout(LSnapshot* snapshot) +{ + Label label; + masm.jump(&label); + bailoutFrom(&label, snapshot); +} + +void +CodeGeneratorX86Shared::visitOutOfLineBailout(OutOfLineBailout* ool) +{ + masm.push(Imm32(ool->snapshot()->snapshotOffset())); + masm.jmp(&deoptLabel_); +} + +void +CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD* ins) +{ + FloatRegister first = ToFloatRegister(ins->first()); + FloatRegister second = ToFloatRegister(ins->second()); +#ifdef DEBUG + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(first == output); +#endif + + bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); + + if (ins->mir()->isMax()) + masm.maxDouble(second, first, handleNaN); + else + masm.minDouble(second, first, handleNaN); +} + +void +CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF* ins) +{ + FloatRegister first = ToFloatRegister(ins->first()); + FloatRegister second = ToFloatRegister(ins->second()); +#ifdef DEBUG + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(first == output); +#endif + + bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); + + if (ins->mir()->isMax()) + masm.maxFloat32(second, first, handleNaN); + else + masm.minFloat32(second, first, handleNaN); +} + +void +CodeGeneratorX86Shared::visitAbsD(LAbsD* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); + // Load a value which is all ones except for the sign bit. + ScratchDoubleScope scratch(masm); + masm.loadConstantDouble(SpecificNaN(0, FloatingPoint::kSignificandBits), scratch); + masm.vandpd(scratch, input, input); +} + +void +CodeGeneratorX86Shared::visitAbsF(LAbsF* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); + // Same trick as visitAbsD above. + ScratchFloat32Scope scratch(masm); + masm.loadConstantFloat32(SpecificNaN(0, FloatingPoint::kSignificandBits), scratch); + masm.vandps(scratch, input, input); +} + +void +CodeGeneratorX86Shared::visitClzI(LClzI* ins) +{ + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + bool knownNotZero = ins->mir()->operandIsNeverZero(); + + masm.clz32(input, output, knownNotZero); +} + +void +CodeGeneratorX86Shared::visitCtzI(LCtzI* ins) +{ + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + bool knownNotZero = ins->mir()->operandIsNeverZero(); + + masm.ctz32(input, output, knownNotZero); +} + +void +CodeGeneratorX86Shared::visitPopcntI(LPopcntI* ins) +{ + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp()); + + masm.popcnt32(input, output, temp); +} + +void +CodeGeneratorX86Shared::visitSqrtD(LSqrtD* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + masm.vsqrtsd(input, output, output); +} + +void +CodeGeneratorX86Shared::visitSqrtF(LSqrtF* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + masm.vsqrtss(input, output, output); +} + +void +CodeGeneratorX86Shared::visitPowHalfD(LPowHalfD* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + + ScratchDoubleScope scratch(masm); + + Label done, sqrt; + + if (!ins->mir()->operandIsNeverNegativeInfinity()) { + // Branch if not -Infinity. + masm.loadConstantDouble(NegativeInfinity(), scratch); + + Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered; + if (ins->mir()->operandIsNeverNaN()) + cond = Assembler::DoubleNotEqual; + masm.branchDouble(cond, input, scratch, &sqrt); + + // Math.pow(-Infinity, 0.5) == Infinity. + masm.zeroDouble(output); + masm.subDouble(scratch, output); + masm.jump(&done); + + masm.bind(&sqrt); + } + + if (!ins->mir()->operandIsNeverNegativeZero()) { + // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). Adding 0 converts any -0 to 0. + masm.zeroDouble(scratch); + masm.addDouble(input, scratch); + masm.vsqrtsd(scratch, output, output); + } else { + masm.vsqrtsd(input, output, output); + } + + masm.bind(&done); +} + +class OutOfLineUndoALUOperation : public OutOfLineCodeBase +{ + LInstruction* ins_; + + public: + explicit OutOfLineUndoALUOperation(LInstruction* ins) + : ins_(ins) + { } + + virtual void accept(CodeGeneratorX86Shared* codegen) { + codegen->visitOutOfLineUndoALUOperation(this); + } + LInstruction* ins() const { + return ins_; + } +}; + +void +CodeGeneratorX86Shared::visitAddI(LAddI* ins) +{ + if (ins->rhs()->isConstant()) + masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); + else + masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); + + if (ins->snapshot()) { + if (ins->recoversInput()) { + OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins); + addOutOfLineCode(ool, ins->mir()); + masm.j(Assembler::Overflow, ool->entry()); + } else { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } + } +} + +void +CodeGeneratorX86Shared::visitAddI64(LAddI64* lir) +{ + const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + if (IsConstant(rhs)) { + masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + return; + } + + masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); +} + +void +CodeGeneratorX86Shared::visitSubI(LSubI* ins) +{ + if (ins->rhs()->isConstant()) + masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); + else + masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); + + if (ins->snapshot()) { + if (ins->recoversInput()) { + OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins); + addOutOfLineCode(ool, ins->mir()); + masm.j(Assembler::Overflow, ool->entry()); + } else { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } + } +} + +void +CodeGeneratorX86Shared::visitSubI64(LSubI64* lir) +{ + const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + if (IsConstant(rhs)) { + masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + return; + } + + masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); +} + +void +CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool) +{ + LInstruction* ins = ool->ins(); + Register reg = ToRegister(ins->getDef(0)); + + DebugOnly lhs = ins->getOperand(0); + LAllocation* rhs = ins->getOperand(1); + + MOZ_ASSERT(reg == ToRegister(lhs)); + MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs)); + + // Undo the effect of the ALU operation, which was performed on the output + // register and overflowed. Writing to the output register clobbered an + // input reg, and the original value of the input needs to be recovered + // to satisfy the constraint imposed by any RECOVERED_INPUT operands to + // the bailout snapshot. + + if (rhs->isConstant()) { + Imm32 constant(ToInt32(rhs)); + if (ins->isAddI()) + masm.subl(constant, reg); + else + masm.addl(constant, reg); + } else { + if (ins->isAddI()) + masm.subl(ToOperand(rhs), reg); + else + masm.addl(ToOperand(rhs), reg); + } + + bailout(ool->ins()->snapshot()); +} + +class MulNegativeZeroCheck : public OutOfLineCodeBase +{ + LMulI* ins_; + + public: + explicit MulNegativeZeroCheck(LMulI* ins) + : ins_(ins) + { } + + virtual void accept(CodeGeneratorX86Shared* codegen) { + codegen->visitMulNegativeZeroCheck(this); + } + LMulI* ins() const { + return ins_; + } +}; + +void +CodeGeneratorX86Shared::visitMulI(LMulI* ins) +{ + const LAllocation* lhs = ins->lhs(); + const LAllocation* rhs = ins->rhs(); + MMul* mul = ins->mir(); + MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow()); + + if (rhs->isConstant()) { + // Bailout on -0.0 + int32_t constant = ToInt32(rhs); + if (mul->canBeNegativeZero() && constant <= 0) { + Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal; + masm.test32(ToRegister(lhs), ToRegister(lhs)); + bailoutIf(bailoutCond, ins->snapshot()); + } + + switch (constant) { + case -1: + masm.negl(ToOperand(lhs)); + break; + case 0: + masm.xorl(ToOperand(lhs), ToRegister(lhs)); + return; // escape overflow check; + case 1: + // nop + return; // escape overflow check; + case 2: + masm.addl(ToOperand(lhs), ToRegister(lhs)); + break; + default: + if (!mul->canOverflow() && constant > 0) { + // Use shift if cannot overflow and constant is power of 2 + int32_t shift = FloorLog2(constant); + if ((1 << shift) == constant) { + masm.shll(Imm32(shift), ToRegister(lhs)); + return; + } + } + masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs)); + } + + // Bailout on overflow + if (mul->canOverflow()) + bailoutIf(Assembler::Overflow, ins->snapshot()); + } else { + masm.imull(ToOperand(rhs), ToRegister(lhs)); + + // Bailout on overflow + if (mul->canOverflow()) + bailoutIf(Assembler::Overflow, ins->snapshot()); + + if (mul->canBeNegativeZero()) { + // Jump to an OOL path if the result is 0. + MulNegativeZeroCheck* ool = new(alloc()) MulNegativeZeroCheck(ins); + addOutOfLineCode(ool, mul); + + masm.test32(ToRegister(lhs), ToRegister(lhs)); + masm.j(Assembler::Zero, ool->entry()); + masm.bind(ool->rejoin()); + } + } +} + +void +CodeGeneratorX86Shared::visitMulI64(LMulI64* lir) +{ + const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs); + + MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir)); + + if (IsConstant(rhs)) { + int64_t constant = ToInt64(rhs); + switch (constant) { + case -1: + masm.neg64(ToRegister64(lhs)); + return; + case 0: + masm.xor64(ToRegister64(lhs), ToRegister64(lhs)); + return; + case 1: + // nop + return; + case 2: + masm.add64(ToRegister64(lhs), ToRegister64(lhs)); + return; + default: + if (constant > 0) { + // Use shift if constant is power of 2. + int32_t shift = mozilla::FloorLog2(constant); + if (int64_t(1) << shift == constant) { + masm.lshift64(Imm32(shift), ToRegister64(lhs)); + return; + } + } + Register temp = ToTempRegisterOrInvalid(lir->temp()); + masm.mul64(Imm64(constant), ToRegister64(lhs), temp); + } + } else { + Register temp = ToTempRegisterOrInvalid(lir->temp()); + masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp); + } +} + +class ReturnZero : public OutOfLineCodeBase +{ + Register reg_; + + public: + explicit ReturnZero(Register reg) + : reg_(reg) + { } + + virtual void accept(CodeGeneratorX86Shared* codegen) { + codegen->visitReturnZero(this); + } + Register reg() const { + return reg_; + } +}; + +void +CodeGeneratorX86Shared::visitReturnZero(ReturnZero* ool) +{ + masm.mov(ImmWord(0), ool->reg()); + masm.jmp(ool->rejoin()); +} + +void +CodeGeneratorX86Shared::visitUDivOrMod(LUDivOrMod* ins) +{ + Register lhs = ToRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + Register output = ToRegister(ins->output()); + + MOZ_ASSERT_IF(lhs != rhs, rhs != eax); + MOZ_ASSERT(rhs != edx); + MOZ_ASSERT_IF(output == eax, ToRegister(ins->remainder()) == edx); + + ReturnZero* ool = nullptr; + + // Put the lhs in eax. + if (lhs != eax) + masm.mov(lhs, eax); + + // Prevent divide by zero. + if (ins->canBeDivideByZero()) { + masm.test32(rhs, rhs); + if (ins->mir()->isTruncated()) { + if (ins->trapOnError()) { + masm.j(Assembler::Zero, trap(ins, wasm::Trap::IntegerDivideByZero)); + } else { + ool = new(alloc()) ReturnZero(output); + masm.j(Assembler::Zero, ool->entry()); + } + } else { + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit. + masm.mov(ImmWord(0), edx); + masm.udiv(rhs); + + // If the remainder is > 0, bailout since this must be a double. + if (ins->mir()->isDiv() && !ins->mir()->toDiv()->canTruncateRemainder()) { + Register remainder = ToRegister(ins->remainder()); + masm.test32(remainder, remainder); + bailoutIf(Assembler::NonZero, ins->snapshot()); + } + + // Unsigned div or mod can return a value that's not a signed int32. + // If our users aren't expecting that, bail. + if (!ins->mir()->isTruncated()) { + masm.test32(output, output); + bailoutIf(Assembler::Signed, ins->snapshot()); + } + + if (ool) { + addOutOfLineCode(ool, ins->mir()); + masm.bind(ool->rejoin()); + } +} + +void +CodeGeneratorX86Shared::visitUDivOrModConstant(LUDivOrModConstant *ins) { + Register lhs = ToRegister(ins->numerator()); + Register output = ToRegister(ins->output()); + uint32_t d = ins->denominator(); + + // This emits the division answer into edx or the modulus answer into eax. + MOZ_ASSERT(output == eax || output == edx); + MOZ_ASSERT(lhs != eax && lhs != edx); + bool isDiv = (output == edx); + + if (d == 0) { + if (ins->mir()->isTruncated()) { + if (ins->trapOnError()) + masm.jump(trap(ins, wasm::Trap::IntegerDivideByZero)); + else + masm.xorl(output, output); + } else { + bailout(ins->snapshot()); + } + return; + } + + // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI). + MOZ_ASSERT((d & (d - 1)) != 0); + + ReciprocalMulConstants rmc = computeDivisionConstants(d, /* maxLog = */ 32); + + // We first compute (M * n) >> 32, where M = rmc.multiplier. + masm.movl(Imm32(rmc.multiplier), eax); + masm.umull(lhs); + if (rmc.multiplier > UINT32_MAX) { + // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that + // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, contradicting + // the proof of correctness in computeDivisionConstants. + MOZ_ASSERT(rmc.shiftAmount > 0); + MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33)); + + // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since + // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can + // correct for the overflow. This case is a bit trickier than the signed + // case, though, as the (edx + n) addition itself can overflow; however, + // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1), + // which is overflow-free. See Hacker's Delight, section 10-8 for details. + + // Compute (n - edx) >> 1 into eax. + masm.movl(lhs, eax); + masm.subl(edx, eax); + masm.shrl(Imm32(1), eax); + + // Finish the computation. + masm.addl(eax, edx); + masm.shrl(Imm32(rmc.shiftAmount - 1), edx); + } else { + masm.shrl(Imm32(rmc.shiftAmount), edx); + } + + // We now have the truncated division value in edx. If we're + // computing a modulus or checking whether the division resulted + // in an integer, we need to multiply the obtained value by d and + // finish the computation/check. + if (!isDiv) { + masm.imull(Imm32(d), edx, edx); + masm.movl(lhs, eax); + masm.subl(edx, eax); + + // The final result of the modulus op, just computed above by the + // sub instruction, can be a number in the range [2^31, 2^32). If + // this is the case and the modulus is not truncated, we must bail + // out. + if (!ins->mir()->isTruncated()) + bailoutIf(Assembler::Signed, ins->snapshot()); + } else if (!ins->mir()->isTruncated()) { + masm.imull(Imm32(d), edx, eax); + masm.cmpl(lhs, eax); + bailoutIf(Assembler::NotEqual, ins->snapshot()); + } +} + +void +CodeGeneratorX86Shared::visitMulNegativeZeroCheck(MulNegativeZeroCheck* ool) +{ + LMulI* ins = ool->ins(); + Register result = ToRegister(ins->output()); + Operand lhsCopy = ToOperand(ins->lhsCopy()); + Operand rhs = ToOperand(ins->rhs()); + MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, lhsCopy.reg() != result.code()); + + // Result is -0 if lhs or rhs is negative. + masm.movl(lhsCopy, result); + masm.orl(rhs, result); + bailoutIf(Assembler::Signed, ins->snapshot()); + + masm.mov(ImmWord(0), result); + masm.jmp(ool->rejoin()); +} + +void +CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI* ins) +{ + Register lhs = ToRegister(ins->numerator()); + DebugOnly output = ToRegister(ins->output()); + + int32_t shift = ins->shift(); + bool negativeDivisor = ins->negativeDivisor(); + MDiv* mir = ins->mir(); + + // We use defineReuseInput so these should always be the same, which is + // convenient since all of our instructions here are two-address. + MOZ_ASSERT(lhs == output); + + if (!mir->isTruncated() && negativeDivisor) { + // 0 divided by a negative number must return a double. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + + if (shift) { + if (!mir->isTruncated()) { + // If the remainder is != 0, bailout since this must be a double. + masm.test32(lhs, Imm32(UINT32_MAX >> (32 - shift))); + bailoutIf(Assembler::NonZero, ins->snapshot()); + } + + if (mir->isUnsigned()) { + masm.shrl(Imm32(shift), lhs); + } else { + // Adjust the value so that shifting produces a correctly + // rounded result when the numerator is negative. See 10-1 + // "Signed Division by a Known Power of 2" in Henry + // S. Warren, Jr.'s Hacker's Delight. + if (mir->canBeNegativeDividend()) { + Register lhsCopy = ToRegister(ins->numeratorCopy()); + MOZ_ASSERT(lhsCopy != lhs); + if (shift > 1) + masm.sarl(Imm32(31), lhs); + masm.shrl(Imm32(32 - shift), lhs); + masm.addl(lhsCopy, lhs); + } + masm.sarl(Imm32(shift), lhs); + + if (negativeDivisor) + masm.negl(lhs); + } + return; + } + + if (negativeDivisor) { + // INT32_MIN / -1 overflows. + masm.negl(lhs); + if (!mir->isTruncated()) + bailoutIf(Assembler::Overflow, ins->snapshot()); + else if (mir->trapOnError()) + masm.j(Assembler::Overflow, trap(mir, wasm::Trap::IntegerOverflow)); + } else if (mir->isUnsigned() && !mir->isTruncated()) { + // Unsigned division by 1 can overflow if output is not + // truncated. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Signed, ins->snapshot()); + } +} + +void +CodeGeneratorX86Shared::visitDivOrModConstantI(LDivOrModConstantI* ins) { + Register lhs = ToRegister(ins->numerator()); + Register output = ToRegister(ins->output()); + int32_t d = ins->denominator(); + + // This emits the division answer into edx or the modulus answer into eax. + MOZ_ASSERT(output == eax || output == edx); + MOZ_ASSERT(lhs != eax && lhs != edx); + bool isDiv = (output == edx); + + // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI + // and LModPowTwoI). + MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0); + + // We will first divide by Abs(d), and negate the answer if d is negative. + // If desired, this can be avoided by generalizing computeDivisionConstants. + ReciprocalMulConstants rmc = computeDivisionConstants(Abs(d), /* maxLog = */ 31); + + // We first compute (M * n) >> 32, where M = rmc.multiplier. + masm.movl(Imm32(rmc.multiplier), eax); + masm.imull(lhs); + if (rmc.multiplier > INT32_MAX) { + MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32)); + + // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since + // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow. + // (edx + n) can't overflow, as n and edx have opposite signs because int32_t(M) + // is negative. + masm.addl(lhs, edx); + } + // (M * n) >> (32 + shift) is the truncated division answer if n is non-negative, + // as proved in the comments of computeDivisionConstants. We must add 1 later if n is + // negative to get the right answer in all cases. + masm.sarl(Imm32(rmc.shiftAmount), edx); + + // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be + // computed with just a sign-extending shift of 31 bits. + if (ins->canBeNegativeDividend()) { + masm.movl(lhs, eax); + masm.sarl(Imm32(31), eax); + masm.subl(eax, edx); + } + + // After this, edx contains the correct truncated division result. + if (d < 0) + masm.negl(edx); + + if (!isDiv) { + masm.imull(Imm32(-d), edx, eax); + masm.addl(lhs, eax); + } + + if (!ins->mir()->isTruncated()) { + if (isDiv) { + // This is a division op. Multiply the obtained value by d to check if + // the correct answer is an integer. This cannot overflow, since |d| > 1. + masm.imull(Imm32(d), edx, eax); + masm.cmp32(lhs, eax); + bailoutIf(Assembler::NotEqual, ins->snapshot()); + + // If lhs is zero and the divisor is negative, the answer should have + // been -0. + if (d < 0) { + masm.test32(lhs, lhs); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } else if (ins->canBeNegativeDividend()) { + // This is a mod op. If the computed value is zero and lhs + // is negative, the answer should have been -0. + Label done; + + masm.cmp32(lhs, Imm32(0)); + masm.j(Assembler::GreaterThanOrEqual, &done); + + masm.test32(eax, eax); + bailoutIf(Assembler::Zero, ins->snapshot()); + + masm.bind(&done); + } + } +} + +void +CodeGeneratorX86Shared::visitDivI(LDivI* ins) +{ + Register remainder = ToRegister(ins->remainder()); + Register lhs = ToRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + Register output = ToRegister(ins->output()); + + MDiv* mir = ins->mir(); + + MOZ_ASSERT_IF(lhs != rhs, rhs != eax); + MOZ_ASSERT(rhs != edx); + MOZ_ASSERT(remainder == edx); + MOZ_ASSERT(output == eax); + + Label done; + ReturnZero* ool = nullptr; + + // Put the lhs in eax, for either the negative overflow case or the regular + // divide case. + if (lhs != eax) + masm.mov(lhs, eax); + + // Handle divide by zero. + if (mir->canBeDivideByZero()) { + masm.test32(rhs, rhs); + if (mir->trapOnError()) { + masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero)); + } else if (mir->canTruncateInfinities()) { + // Truncated division by zero is zero (Infinity|0 == 0) + if (!ool) + ool = new(alloc()) ReturnZero(output); + masm.j(Assembler::Zero, ool->entry()); + } else { + MOZ_ASSERT(mir->fallible()); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + // Handle an integer overflow exception from -2147483648 / -1. + if (mir->canBeNegativeOverflow()) { + Label notmin; + masm.cmp32(lhs, Imm32(INT32_MIN)); + masm.j(Assembler::NotEqual, ¬min); + masm.cmp32(rhs, Imm32(-1)); + if (mir->trapOnError()) { + masm.j(Assembler::Equal, trap(mir, wasm::Trap::IntegerOverflow)); + } else if (mir->canTruncateOverflow()) { + // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the + // output register (lhs == eax). + masm.j(Assembler::Equal, &done); + } else { + MOZ_ASSERT(mir->fallible()); + bailoutIf(Assembler::Equal, ins->snapshot()); + } + masm.bind(¬min); + } + + // Handle negative 0. + if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) { + Label nonzero; + masm.test32(lhs, lhs); + masm.j(Assembler::NonZero, &nonzero); + masm.cmp32(rhs, Imm32(0)); + bailoutIf(Assembler::LessThan, ins->snapshot()); + masm.bind(&nonzero); + } + + // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit. + if (lhs != eax) + masm.mov(lhs, eax); + masm.cdq(); + masm.idiv(rhs); + + if (!mir->canTruncateRemainder()) { + // If the remainder is > 0, bailout since this must be a double. + masm.test32(remainder, remainder); + bailoutIf(Assembler::NonZero, ins->snapshot()); + } + + masm.bind(&done); + + if (ool) { + addOutOfLineCode(ool, mir); + masm.bind(ool->rejoin()); + } +} + +void +CodeGeneratorX86Shared::visitModPowTwoI(LModPowTwoI* ins) +{ + Register lhs = ToRegister(ins->getOperand(0)); + int32_t shift = ins->shift(); + + Label negative; + + if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { + // Switch based on sign of the lhs. + // Positive numbers are just a bitmask + masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); + } + + masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); + + if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { + Label done; + masm.jump(&done); + + // Negative numbers need a negate, bitmask, negate + masm.bind(&negative); + + // Unlike in the visitModI case, we are not computing the mod by means of a + // division. Therefore, the divisor = -1 case isn't problematic (the andl + // always returns 0, which is what we expect). + // + // The negl instruction overflows if lhs == INT32_MIN, but this is also not + // a problem: shift is at most 31, and so the andl also always returns 0. + masm.negl(lhs); + masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); + masm.negl(lhs); + + // Since a%b has the same sign as b, and a is negative in this branch, + // an answer of 0 means the correct result is actually -0. Bail out. + if (!ins->mir()->isTruncated()) + bailoutIf(Assembler::Zero, ins->snapshot()); + masm.bind(&done); + } +} + +class ModOverflowCheck : public OutOfLineCodeBase +{ + Label done_; + LModI* ins_; + Register rhs_; + + public: + explicit ModOverflowCheck(LModI* ins, Register rhs) + : ins_(ins), rhs_(rhs) + { } + + virtual void accept(CodeGeneratorX86Shared* codegen) { + codegen->visitModOverflowCheck(this); + } + Label* done() { + return &done_; + } + LModI* ins() const { + return ins_; + } + Register rhs() const { + return rhs_; + } +}; + +void +CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool) +{ + masm.cmp32(ool->rhs(), Imm32(-1)); + if (ool->ins()->mir()->isTruncated()) { + masm.j(Assembler::NotEqual, ool->rejoin()); + masm.mov(ImmWord(0), edx); + masm.jmp(ool->done()); + } else { + bailoutIf(Assembler::Equal, ool->ins()->snapshot()); + masm.jmp(ool->rejoin()); + } +} + +void +CodeGeneratorX86Shared::visitModI(LModI* ins) +{ + Register remainder = ToRegister(ins->remainder()); + Register lhs = ToRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + + // Required to use idiv. + MOZ_ASSERT_IF(lhs != rhs, rhs != eax); + MOZ_ASSERT(rhs != edx); + MOZ_ASSERT(remainder == edx); + MOZ_ASSERT(ToRegister(ins->getTemp(0)) == eax); + + Label done; + ReturnZero* ool = nullptr; + ModOverflowCheck* overflow = nullptr; + + // Set up eax in preparation for doing a div. + if (lhs != eax) + masm.mov(lhs, eax); + + MMod* mir = ins->mir(); + + // Prevent divide by zero. + if (mir->canBeDivideByZero()) { + masm.test32(rhs, rhs); + if (mir->isTruncated()) { + if (mir->trapOnError()) { + masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero)); + } else { + if (!ool) + ool = new(alloc()) ReturnZero(edx); + masm.j(Assembler::Zero, ool->entry()); + } + } else { + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + Label negative; + + // Switch based on sign of the lhs. + if (mir->canBeNegativeDividend()) + masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); + + // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive. + { + // Check if rhs is a power-of-two. + if (mir->canBePowerOfTwoDivisor()) { + MOZ_ASSERT(rhs != remainder); + + // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if + // y is any negative number other than INT32_MIN, both y and + // y-1 will have the sign bit set so these are never optimized + // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX + // and because lhs >= 0 at this point, lhs & INT32_MAX returns + // the correct value. + Label notPowerOfTwo; + masm.mov(rhs, remainder); + masm.subl(Imm32(1), remainder); + masm.branchTest32(Assembler::NonZero, remainder, rhs, ¬PowerOfTwo); + { + masm.andl(lhs, remainder); + masm.jmp(&done); + } + masm.bind(¬PowerOfTwo); + } + + // Since lhs >= 0, the sign-extension will be 0 + masm.mov(ImmWord(0), edx); + masm.idiv(rhs); + } + + // Otherwise, we have to beware of two special cases: + if (mir->canBeNegativeDividend()) { + masm.jump(&done); + + masm.bind(&negative); + + // Prevent an integer overflow exception from -2147483648 % -1 + Label notmin; + masm.cmp32(lhs, Imm32(INT32_MIN)); + overflow = new(alloc()) ModOverflowCheck(ins, rhs); + masm.j(Assembler::Equal, overflow->entry()); + masm.bind(overflow->rejoin()); + masm.cdq(); + masm.idiv(rhs); + + if (!mir->isTruncated()) { + // A remainder of 0 means that the rval must be -0, which is a double. + masm.test32(remainder, remainder); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + masm.bind(&done); + + if (overflow) { + addOutOfLineCode(overflow, mir); + masm.bind(overflow->done()); + } + + if (ool) { + addOutOfLineCode(ool, mir); + masm.bind(ool->rejoin()); + } +} + +void +CodeGeneratorX86Shared::visitBitNotI(LBitNotI* ins) +{ + const LAllocation* input = ins->getOperand(0); + MOZ_ASSERT(!input->isConstant()); + + masm.notl(ToOperand(input)); +} + +void +CodeGeneratorX86Shared::visitBitOpI(LBitOpI* ins) +{ + const LAllocation* lhs = ins->getOperand(0); + const LAllocation* rhs = ins->getOperand(1); + + switch (ins->bitop()) { + case JSOP_BITOR: + if (rhs->isConstant()) + masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs)); + else + masm.orl(ToOperand(rhs), ToRegister(lhs)); + break; + case JSOP_BITXOR: + if (rhs->isConstant()) + masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs)); + else + masm.xorl(ToOperand(rhs), ToRegister(lhs)); + break; + case JSOP_BITAND: + if (rhs->isConstant()) + masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs)); + else + masm.andl(ToOperand(rhs), ToRegister(lhs)); + break; + default: + MOZ_CRASH("unexpected binary opcode"); + } +} + +void +CodeGeneratorX86Shared::visitBitOpI64(LBitOpI64* lir) +{ + const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + switch (lir->bitop()) { + case JSOP_BITOR: + if (IsConstant(rhs)) + masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + else + masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); + break; + case JSOP_BITXOR: + if (IsConstant(rhs)) + masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + else + masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); + break; + case JSOP_BITAND: + if (IsConstant(rhs)) + masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + else + masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); + break; + default: + MOZ_CRASH("unexpected binary opcode"); + } +} + +void +CodeGeneratorX86Shared::visitShiftI(LShiftI* ins) +{ + Register lhs = ToRegister(ins->lhs()); + const LAllocation* rhs = ins->rhs(); + + if (rhs->isConstant()) { + int32_t shift = ToInt32(rhs) & 0x1F; + switch (ins->bitop()) { + case JSOP_LSH: + if (shift) + masm.shll(Imm32(shift), lhs); + break; + case JSOP_RSH: + if (shift) + masm.sarl(Imm32(shift), lhs); + break; + case JSOP_URSH: + if (shift) { + masm.shrl(Imm32(shift), lhs); + } else if (ins->mir()->toUrsh()->fallible()) { + // x >>> 0 can overflow. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Signed, ins->snapshot()); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } else { + MOZ_ASSERT(ToRegister(rhs) == ecx); + switch (ins->bitop()) { + case JSOP_LSH: + masm.shll_cl(lhs); + break; + case JSOP_RSH: + masm.sarl_cl(lhs); + break; + case JSOP_URSH: + masm.shrl_cl(lhs); + if (ins->mir()->toUrsh()->fallible()) { + // x >>> 0 can overflow. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Signed, ins->snapshot()); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } +} + +void +CodeGeneratorX86Shared::visitShiftI64(LShiftI64* lir) +{ + const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs); + LAllocation* rhs = lir->getOperand(LShiftI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + if (rhs->isConstant()) { + int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F); + switch (lir->bitop()) { + case JSOP_LSH: + if (shift) + masm.lshift64(Imm32(shift), ToRegister64(lhs)); + break; + case JSOP_RSH: + if (shift) + masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs)); + break; + case JSOP_URSH: + if (shift) + masm.rshift64(Imm32(shift), ToRegister64(lhs)); + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + return; + } + + MOZ_ASSERT(ToRegister(rhs) == ecx); + switch (lir->bitop()) { + case JSOP_LSH: + masm.lshift64(ecx, ToRegister64(lhs)); + break; + case JSOP_RSH: + masm.rshift64Arithmetic(ecx, ToRegister64(lhs)); + break; + case JSOP_URSH: + masm.rshift64(ecx, ToRegister64(lhs)); + break; + default: + MOZ_CRASH("Unexpected shift op"); + } +} + +void +CodeGeneratorX86Shared::visitUrshD(LUrshD* ins) +{ + Register lhs = ToRegister(ins->lhs()); + MOZ_ASSERT(ToRegister(ins->temp()) == lhs); + + const LAllocation* rhs = ins->rhs(); + FloatRegister out = ToFloatRegister(ins->output()); + + if (rhs->isConstant()) { + int32_t shift = ToInt32(rhs) & 0x1F; + if (shift) + masm.shrl(Imm32(shift), lhs); + } else { + MOZ_ASSERT(ToRegister(rhs) == ecx); + masm.shrl_cl(lhs); + } + + masm.convertUInt32ToDouble(lhs, out); +} + +Operand +CodeGeneratorX86Shared::ToOperand(const LAllocation& a) +{ + if (a.isGeneralReg()) + return Operand(a.toGeneralReg()->reg()); + if (a.isFloatReg()) + return Operand(a.toFloatReg()->reg()); + return Operand(masm.getStackPointer(), ToStackOffset(&a)); +} + +Operand +CodeGeneratorX86Shared::ToOperand(const LAllocation* a) +{ + return ToOperand(*a); +} + +Operand +CodeGeneratorX86Shared::ToOperand(const LDefinition* def) +{ + return ToOperand(def->output()); +} + +MoveOperand +CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const +{ + if (a.isGeneralReg()) + return MoveOperand(ToRegister(a)); + if (a.isFloatReg()) + return MoveOperand(ToFloatRegister(a)); + return MoveOperand(StackPointer, ToStackOffset(a)); +} + +class OutOfLineTableSwitch : public OutOfLineCodeBase +{ + MTableSwitch* mir_; + CodeLabel jumpLabel_; + + void accept(CodeGeneratorX86Shared* codegen) { + codegen->visitOutOfLineTableSwitch(this); + } + + public: + explicit OutOfLineTableSwitch(MTableSwitch* mir) + : mir_(mir) + {} + + MTableSwitch* mir() const { + return mir_; + } + + CodeLabel* jumpLabel() { + return &jumpLabel_; + } +}; + +void +CodeGeneratorX86Shared::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) +{ + MTableSwitch* mir = ool->mir(); + + masm.haltingAlign(sizeof(void*)); + masm.use(ool->jumpLabel()->target()); + masm.addCodeLabel(*ool->jumpLabel()); + + for (size_t i = 0; i < mir->numCases(); i++) { + LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir(); + Label* caseheader = caseblock->label(); + uint32_t caseoffset = caseheader->offset(); + + // The entries of the jump table need to be absolute addresses and thus + // must be patched after codegen is finished. + CodeLabel cl; + masm.writeCodePointer(cl.patchAt()); + cl.target()->bind(caseoffset); + masm.addCodeLabel(cl); + } +} + +void +CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base) +{ + Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label(); + + // Lower value with low value + if (mir->low() != 0) + masm.subl(Imm32(mir->low()), index); + + // Jump to default case if input is out of range + int32_t cases = mir->numCases(); + masm.cmp32(index, Imm32(cases)); + masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase); + + // To fill in the CodeLabels for the case entries, we need to first + // generate the case entries (we don't yet know their offsets in the + // instruction stream). + OutOfLineTableSwitch* ool = new(alloc()) OutOfLineTableSwitch(mir); + addOutOfLineCode(ool, mir); + + // Compute the position where a pointer to the right case stands. + masm.mov(ool->jumpLabel()->patchAt(), base); + Operand pointer = Operand(base, index, ScalePointer); + + // Jump to the right case + masm.jmp(pointer); +} + +void +CodeGeneratorX86Shared::visitMathD(LMathD* math) +{ + FloatRegister lhs = ToFloatRegister(math->lhs()); + Operand rhs = ToOperand(math->rhs()); + FloatRegister output = ToFloatRegister(math->output()); + + switch (math->jsop()) { + case JSOP_ADD: + masm.vaddsd(rhs, lhs, output); + break; + case JSOP_SUB: + masm.vsubsd(rhs, lhs, output); + break; + case JSOP_MUL: + masm.vmulsd(rhs, lhs, output); + break; + case JSOP_DIV: + masm.vdivsd(rhs, lhs, output); + break; + default: + MOZ_CRASH("unexpected opcode"); + } +} + +void +CodeGeneratorX86Shared::visitMathF(LMathF* math) +{ + FloatRegister lhs = ToFloatRegister(math->lhs()); + Operand rhs = ToOperand(math->rhs()); + FloatRegister output = ToFloatRegister(math->output()); + + switch (math->jsop()) { + case JSOP_ADD: + masm.vaddss(rhs, lhs, output); + break; + case JSOP_SUB: + masm.vsubss(rhs, lhs, output); + break; + case JSOP_MUL: + masm.vmulss(rhs, lhs, output); + break; + case JSOP_DIV: + masm.vdivss(rhs, lhs, output); + break; + default: + MOZ_CRASH("unexpected opcode"); + } +} + +void +CodeGeneratorX86Shared::visitFloor(LFloor* lir) +{ + FloatRegister input = ToFloatRegister(lir->input()); + Register output = ToRegister(lir->output()); + + Label bailout; + + if (AssemblerX86Shared::HasSSE41()) { + // Bail on negative-zero. + masm.branchNegativeZero(input, output, &bailout); + bailoutFrom(&bailout, lir->snapshot()); + + // Round toward -Infinity. + { + ScratchDoubleScope scratch(masm); + masm.vroundsd(X86Encoding::RoundDown, input, scratch, scratch); + bailoutCvttsd2si(scratch, output, lir->snapshot()); + } + } else { + Label negative, end; + + // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + { + ScratchDoubleScope scratch(masm); + masm.zeroDouble(scratch); + masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative); + } + + // Bail on negative-zero. + masm.branchNegativeZero(input, output, &bailout); + bailoutFrom(&bailout, lir->snapshot()); + + // Input is non-negative, so truncation correctly rounds. + bailoutCvttsd2si(input, output, lir->snapshot()); + + masm.jump(&end); + + // Input is negative, but isn't -0. + // Negative values go on a comparatively expensive path, since no + // native rounding mode matches JS semantics. Still better than callVM. + masm.bind(&negative); + { + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + bailoutCvttsd2si(input, output, lir->snapshot()); + + // Test whether the input double was integer-valued. + { + ScratchDoubleScope scratch(masm); + masm.convertInt32ToDouble(output, scratch); + masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end); + } + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + masm.subl(Imm32(1), output); + // Cannot overflow: output was already checked against INT_MIN. + } + + masm.bind(&end); + } +} + +void +CodeGeneratorX86Shared::visitFloorF(LFloorF* lir) +{ + FloatRegister input = ToFloatRegister(lir->input()); + Register output = ToRegister(lir->output()); + + Label bailout; + + if (AssemblerX86Shared::HasSSE41()) { + // Bail on negative-zero. + masm.branchNegativeZeroFloat32(input, output, &bailout); + bailoutFrom(&bailout, lir->snapshot()); + + // Round toward -Infinity. + { + ScratchFloat32Scope scratch(masm); + masm.vroundss(X86Encoding::RoundDown, input, scratch, scratch); + bailoutCvttss2si(scratch, output, lir->snapshot()); + } + } else { + Label negative, end; + + // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + { + ScratchFloat32Scope scratch(masm); + masm.zeroFloat32(scratch); + masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative); + } + + // Bail on negative-zero. + masm.branchNegativeZeroFloat32(input, output, &bailout); + bailoutFrom(&bailout, lir->snapshot()); + + // Input is non-negative, so truncation correctly rounds. + bailoutCvttss2si(input, output, lir->snapshot()); + + masm.jump(&end); + + // Input is negative, but isn't -0. + // Negative values go on a comparatively expensive path, since no + // native rounding mode matches JS semantics. Still better than callVM. + masm.bind(&negative); + { + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + bailoutCvttss2si(input, output, lir->snapshot()); + + // Test whether the input double was integer-valued. + { + ScratchFloat32Scope scratch(masm); + masm.convertInt32ToFloat32(output, scratch); + masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end); + } + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + masm.subl(Imm32(1), output); + // Cannot overflow: output was already checked against INT_MIN. + } + + masm.bind(&end); + } +} + +void +CodeGeneratorX86Shared::visitCeil(LCeil* lir) +{ + FloatRegister input = ToFloatRegister(lir->input()); + ScratchDoubleScope scratch(masm); + Register output = ToRegister(lir->output()); + + Label bailout, lessThanMinusOne; + + // Bail on ]-1; -0] range + masm.loadConstantDouble(-1, scratch); + masm.branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, input, + scratch, &lessThanMinusOne); + + // Test for remaining values with the sign bit set, i.e. ]-1; -0] + masm.vmovmskpd(input, output); + masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout); + bailoutFrom(&bailout, lir->snapshot()); + + if (AssemblerX86Shared::HasSSE41()) { + // x <= -1 or x > -0 + masm.bind(&lessThanMinusOne); + // Round toward +Infinity. + masm.vroundsd(X86Encoding::RoundUp, input, scratch, scratch); + bailoutCvttsd2si(scratch, output, lir->snapshot()); + return; + } + + // No SSE4.1 + Label end; + + // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for + // integer (resp. non-integer) values. + // Will also work for values >= INT_MAX + 1, as the truncate + // operation will return INT_MIN and there'll be a bailout. + bailoutCvttsd2si(input, output, lir->snapshot()); + masm.convertInt32ToDouble(output, scratch); + masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end); + + // Input is not integer-valued, add 1 to obtain the ceiling value + masm.addl(Imm32(1), output); + // if input > INT_MAX, output == INT_MAX so adding 1 will overflow. + bailoutIf(Assembler::Overflow, lir->snapshot()); + masm.jump(&end); + + // x <= -1, truncation is the way to go. + masm.bind(&lessThanMinusOne); + bailoutCvttsd2si(input, output, lir->snapshot()); + + masm.bind(&end); +} + +void +CodeGeneratorX86Shared::visitCeilF(LCeilF* lir) +{ + FloatRegister input = ToFloatRegister(lir->input()); + ScratchFloat32Scope scratch(masm); + Register output = ToRegister(lir->output()); + + Label bailout, lessThanMinusOne; + + // Bail on ]-1; -0] range + masm.loadConstantFloat32(-1.f, scratch); + masm.branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, input, + scratch, &lessThanMinusOne); + + // Test for remaining values with the sign bit set, i.e. ]-1; -0] + masm.vmovmskps(input, output); + masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout); + bailoutFrom(&bailout, lir->snapshot()); + + if (AssemblerX86Shared::HasSSE41()) { + // x <= -1 or x > -0 + masm.bind(&lessThanMinusOne); + // Round toward +Infinity. + masm.vroundss(X86Encoding::RoundUp, input, scratch, scratch); + bailoutCvttss2si(scratch, output, lir->snapshot()); + return; + } + + // No SSE4.1 + Label end; + + // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for + // integer (resp. non-integer) values. + // Will also work for values >= INT_MAX + 1, as the truncate + // operation will return INT_MIN and there'll be a bailout. + bailoutCvttss2si(input, output, lir->snapshot()); + masm.convertInt32ToFloat32(output, scratch); + masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end); + + // Input is not integer-valued, add 1 to obtain the ceiling value + masm.addl(Imm32(1), output); + // if input > INT_MAX, output == INT_MAX so adding 1 will overflow. + bailoutIf(Assembler::Overflow, lir->snapshot()); + masm.jump(&end); + + // x <= -1, truncation is the way to go. + masm.bind(&lessThanMinusOne); + bailoutCvttss2si(input, output, lir->snapshot()); + + masm.bind(&end); +} + +void +CodeGeneratorX86Shared::visitRound(LRound* lir) +{ + FloatRegister input = ToFloatRegister(lir->input()); + FloatRegister temp = ToFloatRegister(lir->temp()); + ScratchDoubleScope scratch(masm); + Register output = ToRegister(lir->output()); + + Label negativeOrZero, negative, end, bailout; + + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. + masm.zeroDouble(scratch); + masm.loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); + masm.branchDouble(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero); + + // Input is positive. Add the biggest double less than 0.5 and + // truncate, rounding down (because if the input is the biggest double less + // than 0.5, adding 0.5 would undesirably round up to 1). Note that we have + // to add the input to the temp register because we're not allowed to + // modify the input register. + masm.addDouble(input, temp); + bailoutCvttsd2si(temp, output, lir->snapshot()); + + masm.jump(&end); + + // Input is negative, +0 or -0. + masm.bind(&negativeOrZero); + // Branch on negative input. + masm.j(Assembler::NotEqual, &negative); + + // Bail on negative-zero. + masm.branchNegativeZero(input, output, &bailout, /* maybeNonZero = */ false); + bailoutFrom(&bailout, lir->snapshot()); + + // Input is +0 + masm.xor32(output, output); + masm.jump(&end); + + // Input is negative. + masm.bind(&negative); + + // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to + // be added the biggest double less than 0.5. + Label loadJoin; + masm.loadConstantDouble(-0.5, scratch); + masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &loadJoin); + masm.loadConstantDouble(0.5, temp); + masm.bind(&loadJoin); + + if (AssemblerX86Shared::HasSSE41()) { + // Add 0.5 and round toward -Infinity. The result is stored in the temp + // register (currently contains 0.5). + masm.addDouble(input, temp); + masm.vroundsd(X86Encoding::RoundDown, temp, scratch, scratch); + + // Truncate. + bailoutCvttsd2si(scratch, output, lir->snapshot()); + + // If the result is positive zero, then the actual result is -0. Bail. + // Otherwise, the truncation will have produced the correct negative integer. + masm.test32(output, output); + bailoutIf(Assembler::Zero, lir->snapshot()); + } else { + masm.addDouble(input, temp); + + // Round toward -Infinity without the benefit of ROUNDSD. + { + // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0. + masm.compareDouble(Assembler::DoubleGreaterThanOrEqual, temp, scratch); + bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot()); + + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + bailoutCvttsd2si(temp, output, lir->snapshot()); + + // Test whether the truncated double was integer-valued. + masm.convertInt32ToDouble(output, scratch); + masm.branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + masm.subl(Imm32(1), output); + // Cannot overflow: output was already checked against INT_MIN. + } + } + + masm.bind(&end); +} + +void +CodeGeneratorX86Shared::visitRoundF(LRoundF* lir) +{ + FloatRegister input = ToFloatRegister(lir->input()); + FloatRegister temp = ToFloatRegister(lir->temp()); + ScratchFloat32Scope scratch(masm); + Register output = ToRegister(lir->output()); + + Label negativeOrZero, negative, end, bailout; + + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. + masm.zeroFloat32(scratch); + masm.loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); + masm.branchFloat(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero); + + // Input is non-negative. Add the biggest float less than 0.5 and truncate, + // rounding down (because if the input is the biggest float less than 0.5, + // adding 0.5 would undesirably round up to 1). Note that we have to add + // the input to the temp register because we're not allowed to modify the + // input register. + masm.addFloat32(input, temp); + + bailoutCvttss2si(temp, output, lir->snapshot()); + + masm.jump(&end); + + // Input is negative, +0 or -0. + masm.bind(&negativeOrZero); + // Branch on negative input. + masm.j(Assembler::NotEqual, &negative); + + // Bail on negative-zero. + masm.branchNegativeZeroFloat32(input, output, &bailout); + bailoutFrom(&bailout, lir->snapshot()); + + // Input is +0. + masm.xor32(output, output); + masm.jump(&end); + + // Input is negative. + masm.bind(&negative); + + // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to + // be added the biggest double less than 0.5. + Label loadJoin; + masm.loadConstantFloat32(-0.5f, scratch); + masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &loadJoin); + masm.loadConstantFloat32(0.5f, temp); + masm.bind(&loadJoin); + + if (AssemblerX86Shared::HasSSE41()) { + // Add 0.5 and round toward -Infinity. The result is stored in the temp + // register (currently contains 0.5). + masm.addFloat32(input, temp); + masm.vroundss(X86Encoding::RoundDown, temp, scratch, scratch); + + // Truncate. + bailoutCvttss2si(scratch, output, lir->snapshot()); + + // If the result is positive zero, then the actual result is -0. Bail. + // Otherwise, the truncation will have produced the correct negative integer. + masm.test32(output, output); + bailoutIf(Assembler::Zero, lir->snapshot()); + } else { + masm.addFloat32(input, temp); + // Round toward -Infinity without the benefit of ROUNDSS. + { + // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0. + masm.compareFloat(Assembler::DoubleGreaterThanOrEqual, temp, scratch); + bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot()); + + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + bailoutCvttss2si(temp, output, lir->snapshot()); + + // Test whether the truncated double was integer-valued. + masm.convertInt32ToFloat32(output, scratch); + masm.branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + masm.subl(Imm32(1), output); + // Cannot overflow: output was already checked against INT_MIN. + } + } + + masm.bind(&end); +} + +void +CodeGeneratorX86Shared::visitGuardShape(LGuardShape* guard) +{ + Register obj = ToRegister(guard->input()); + masm.cmpPtr(Operand(obj, ShapedObject::offsetOfShape()), ImmGCPtr(guard->mir()->shape())); + + bailoutIf(Assembler::NotEqual, guard->snapshot()); +} + +void +CodeGeneratorX86Shared::visitGuardObjectGroup(LGuardObjectGroup* guard) +{ + Register obj = ToRegister(guard->input()); + + masm.cmpPtr(Operand(obj, JSObject::offsetOfGroup()), ImmGCPtr(guard->mir()->group())); + + Assembler::Condition cond = + guard->mir()->bailOnEquality() ? Assembler::Equal : Assembler::NotEqual; + bailoutIf(cond, guard->snapshot()); +} + +void +CodeGeneratorX86Shared::visitGuardClass(LGuardClass* guard) +{ + Register obj = ToRegister(guard->input()); + Register tmp = ToRegister(guard->tempInt()); + + masm.loadPtr(Address(obj, JSObject::offsetOfGroup()), tmp); + masm.cmpPtr(Operand(tmp, ObjectGroup::offsetOfClasp()), ImmPtr(guard->mir()->getClass())); + bailoutIf(Assembler::NotEqual, guard->snapshot()); +} + +void +CodeGeneratorX86Shared::visitEffectiveAddress(LEffectiveAddress* ins) +{ + const MEffectiveAddress* mir = ins->mir(); + Register base = ToRegister(ins->base()); + Register index = ToRegister(ins->index()); + Register output = ToRegister(ins->output()); + masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output); +} + +void +CodeGeneratorX86Shared::generateInvalidateEpilogue() +{ + // Ensure that there is enough space in the buffer for the OsiPoint + // patching to occur. Otherwise, we could overwrite the invalidation + // epilogue. + for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) + masm.nop(); + + masm.bind(&invalidate_); + + // Push the Ion script onto the stack (when we determine what that pointer is). + invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1))); + JitCode* thunk = gen->jitRuntime()->getInvalidationThunk(); + + masm.call(thunk); + + // We should never reach this point in JIT code -- the invalidation thunk should + // pop the invalidated JS frame and return directly to its caller. + masm.assumeUnreachable("Should have returned directly to its caller instead of here."); +} + +void +CodeGeneratorX86Shared::visitNegI(LNegI* ins) +{ + Register input = ToRegister(ins->input()); + MOZ_ASSERT(input == ToRegister(ins->output())); + + masm.neg32(input); +} + +void +CodeGeneratorX86Shared::visitNegD(LNegD* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); + + masm.negateDouble(input); +} + +void +CodeGeneratorX86Shared::visitNegF(LNegF* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); + + masm.negateFloat(input); +} + +void +CodeGeneratorX86Shared::visitSimd128Int(LSimd128Int* ins) +{ + const LDefinition* out = ins->getDef(0); + masm.loadConstantSimd128Int(ins->getValue(), ToFloatRegister(out)); +} + +void +CodeGeneratorX86Shared::visitSimd128Float(LSimd128Float* ins) +{ + const LDefinition* out = ins->getDef(0); + masm.loadConstantSimd128Float(ins->getValue(), ToFloatRegister(out)); +} + +void +CodeGeneratorX86Shared::visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins) +{ + FloatRegister in = ToFloatRegister(ins->input()); + FloatRegister out = ToFloatRegister(ins->output()); + masm.convertInt32x4ToFloat32x4(in, out); +} + +void +CodeGeneratorX86Shared::visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins) +{ + FloatRegister in = ToFloatRegister(ins->input()); + FloatRegister out = ToFloatRegister(ins->output()); + Register temp = ToRegister(ins->temp()); + + masm.convertFloat32x4ToInt32x4(in, out); + + auto* ool = new(alloc()) OutOfLineSimdFloatToIntCheck(temp, in, ins, ins->mir()->trapOffset()); + addOutOfLineCode(ool, ins->mir()); + + static const SimdConstant InvalidResult = SimdConstant::SplatX4(int32_t(-2147483648)); + + ScratchSimd128Scope scratch(masm); + masm.loadConstantSimd128Int(InvalidResult, scratch); + masm.packedEqualInt32x4(Operand(out), scratch); + // TODO (bug 1156228): If we have SSE4.1, we can use PTEST here instead of + // the two following instructions. + masm.vmovmskps(scratch, temp); + masm.cmp32(temp, Imm32(0)); + masm.j(Assembler::NotEqual, ool->entry()); + + masm.bind(ool->rejoin()); +} + +void +CodeGeneratorX86Shared::visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIntCheck *ool) +{ + static const SimdConstant Int32MaxX4 = SimdConstant::SplatX4(2147483647.f); + static const SimdConstant Int32MinX4 = SimdConstant::SplatX4(-2147483648.f); + + Label onConversionError; + + FloatRegister input = ool->input(); + Register temp = ool->temp(); + + ScratchSimd128Scope scratch(masm); + masm.loadConstantSimd128Float(Int32MinX4, scratch); + masm.vcmpleps(Operand(input), scratch, scratch); + masm.vmovmskps(scratch, temp); + masm.cmp32(temp, Imm32(15)); + masm.j(Assembler::NotEqual, &onConversionError); + + masm.loadConstantSimd128Float(Int32MaxX4, scratch); + masm.vcmpleps(Operand(input), scratch, scratch); + masm.vmovmskps(scratch, temp); + masm.cmp32(temp, Imm32(0)); + masm.j(Assembler::NotEqual, &onConversionError); + + masm.jump(ool->rejoin()); + + if (gen->compilingWasm()) { + masm.bindLater(&onConversionError, trap(ool, wasm::Trap::ImpreciseSimdConversion)); + } else { + masm.bind(&onConversionError); + bailout(ool->ins()->snapshot()); + } +} + +// Convert Float32x4 to Uint32x4. +// +// If any input lane value is out of range or NaN, bail out. +void +CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins) +{ + const MSimdConvert* mir = ins->mir(); + FloatRegister in = ToFloatRegister(ins->input()); + FloatRegister out = ToFloatRegister(ins->output()); + Register temp = ToRegister(ins->tempR()); + FloatRegister tempF = ToFloatRegister(ins->tempF()); + + // Classify lane values into 4 disjoint classes: + // + // N-lanes: in <= -1.0 + // A-lanes: -1.0 < in <= 0x0.ffffffp31 + // B-lanes: 0x1.0p31 <= in <= 0x0.ffffffp32 + // V-lanes: 0x1.0p32 <= in, or isnan(in) + // + // We need to bail out to throw a RangeError if we see any N-lanes or + // V-lanes. + // + // For A-lanes and B-lanes, we make two float -> int32 conversions: + // + // A = cvttps2dq(in) + // B = cvttps2dq(in - 0x1.0p31f) + // + // Note that the subtraction for the B computation is exact for B-lanes. + // There is no rounding, so B is the low 31 bits of the correctly converted + // result. + // + // The cvttps2dq instruction produces 0x80000000 when the input is NaN or + // out of range for a signed int32_t. This conveniently provides the missing + // high bit for B, so the desired result is A for A-lanes and A|B for + // B-lanes. + + ScratchSimd128Scope scratch(masm); + + // TODO: If the majority of lanes are A-lanes, it could be faster to compute + // A first, use vmovmskps to check for any non-A-lanes and handle them in + // ool code. OTOH, we we're wrong about the lane distribution, that would be + // slower. + + // Compute B in |scratch|. + static const float Adjust = 0x80000000; // 0x1.0p31f for the benefit of MSVC. + static const SimdConstant Bias = SimdConstant::SplatX4(-Adjust); + masm.loadConstantSimd128Float(Bias, scratch); + masm.packedAddFloat32(Operand(in), scratch); + masm.convertFloat32x4ToInt32x4(scratch, scratch); + + // Compute A in |out|. This is the last time we use |in| and the first time + // we use |out|, so we can tolerate if they are the same register. + masm.convertFloat32x4ToInt32x4(in, out); + + // We can identify A-lanes by the sign bits in A: Any A-lanes will be + // positive in A, and N, B, and V-lanes will be 0x80000000 in A. Compute a + // mask of non-A-lanes into |tempF|. + masm.zeroSimd128Float(tempF); + masm.packedGreaterThanInt32x4(Operand(out), tempF); + + // Clear the A-lanes in B. + masm.bitwiseAndSimd128(Operand(tempF), scratch); + + // Compute the final result: A for A-lanes, A|B for B-lanes. + masm.bitwiseOrSimd128(Operand(scratch), out); + + // We still need to filter out the V-lanes. They would show up as 0x80000000 + // in both A and B. Since we cleared the valid A-lanes in B, the V-lanes are + // the remaining negative lanes in B. + masm.vmovmskps(scratch, temp); + masm.cmp32(temp, Imm32(0)); + + if (gen->compilingWasm()) + masm.j(Assembler::NotEqual, trap(mir, wasm::Trap::ImpreciseSimdConversion)); + else + bailoutIf(Assembler::NotEqual, ins->snapshot()); +} + +void +CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4* ins) +{ + MOZ_ASSERT(ins->mir()->type() == MIRType::Int32x4 || ins->mir()->type() == MIRType::Bool32x4); + + FloatRegister output = ToFloatRegister(ins->output()); + if (AssemblerX86Shared::HasSSE41()) { + masm.vmovd(ToRegister(ins->getOperand(0)), output); + for (size_t i = 1; i < 4; ++i) { + Register r = ToRegister(ins->getOperand(i)); + masm.vpinsrd(i, r, output, output); + } + return; + } + + masm.reserveStack(Simd128DataSize); + for (size_t i = 0; i < 4; ++i) { + Register r = ToRegister(ins->getOperand(i)); + masm.store32(r, Address(StackPointer, i * sizeof(int32_t))); + } + masm.loadAlignedSimd128Int(Address(StackPointer, 0), output); + masm.freeStack(Simd128DataSize); +} + +void +CodeGeneratorX86Shared::visitSimdValueFloat32x4(LSimdValueFloat32x4* ins) +{ + MOZ_ASSERT(ins->mir()->type() == MIRType::Float32x4); + + FloatRegister r0 = ToFloatRegister(ins->getOperand(0)); + FloatRegister r1 = ToFloatRegister(ins->getOperand(1)); + FloatRegister r2 = ToFloatRegister(ins->getOperand(2)); + FloatRegister r3 = ToFloatRegister(ins->getOperand(3)); + FloatRegister tmp = ToFloatRegister(ins->getTemp(0)); + FloatRegister output = ToFloatRegister(ins->output()); + + FloatRegister r0Copy = masm.reusedInputFloat32x4(r0, output); + FloatRegister r1Copy = masm.reusedInputFloat32x4(r1, tmp); + + masm.vunpcklps(r3, r1Copy, tmp); + masm.vunpcklps(r2, r0Copy, output); + masm.vunpcklps(tmp, output, output); +} + +void +CodeGeneratorX86Shared::visitSimdSplatX16(LSimdSplatX16* ins) +{ + MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 16); + Register input = ToRegister(ins->getOperand(0)); + FloatRegister output = ToFloatRegister(ins->output()); + masm.vmovd(input, output); + if (AssemblerX86Shared::HasSSSE3()) { + masm.zeroSimd128Int(ScratchSimd128Reg); + masm.vpshufb(ScratchSimd128Reg, output, output); + } else { + // Use two shifts to duplicate the low 8 bits into the low 16 bits. + masm.vpsllw(Imm32(8), output, output); + masm.vmovdqa(output, ScratchSimd128Reg); + masm.vpsrlw(Imm32(8), ScratchSimd128Reg, ScratchSimd128Reg); + masm.vpor(ScratchSimd128Reg, output, output); + // Then do an X8 splat. + masm.vpshuflw(0, output, output); + masm.vpshufd(0, output, output); + } +} + +void +CodeGeneratorX86Shared::visitSimdSplatX8(LSimdSplatX8* ins) +{ + MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 8); + Register input = ToRegister(ins->getOperand(0)); + FloatRegister output = ToFloatRegister(ins->output()); + masm.vmovd(input, output); + masm.vpshuflw(0, output, output); + masm.vpshufd(0, output, output); +} + +void +CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4* ins) +{ + FloatRegister output = ToFloatRegister(ins->output()); + + MSimdSplat* mir = ins->mir(); + MOZ_ASSERT(IsSimdType(mir->type())); + JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t)); + + if (mir->type() == MIRType::Float32x4) { + FloatRegister r = ToFloatRegister(ins->getOperand(0)); + FloatRegister rCopy = masm.reusedInputFloat32x4(r, output); + masm.vshufps(0, rCopy, rCopy, output); + } else { + Register r = ToRegister(ins->getOperand(0)); + masm.vmovd(r, output); + masm.vpshufd(0, output, output); + } +} + +void +CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + + if (input.aliases(output)) + return; + + if (IsIntegerSimdType(ins->mir()->type())) + masm.vmovdqa(input, output); + else + masm.vmovaps(input, output); +} + +// Extract an integer lane from the 32x4 vector register |input| and place it in +// |output|. +void +CodeGeneratorX86Shared::emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane) +{ + if (lane == 0) { + // The value we want to extract is in the low double-word + masm.moveLowInt32(input, output); + } else if (AssemblerX86Shared::HasSSE41()) { + masm.vpextrd(lane, input, output); + } else { + uint32_t mask = MacroAssembler::ComputeShuffleMask(lane); + masm.shuffleInt32(mask, input, ScratchSimd128Reg); + masm.moveLowInt32(ScratchSimd128Reg, output); + } +} + +// Extract an integer lane from the 16x8 vector register |input|, sign- or +// zero-extend to 32 bits and place the result in |output|. +void +CodeGeneratorX86Shared::emitSimdExtractLane16x8(FloatRegister input, Register output, + unsigned lane, SimdSign signedness) +{ + // Unlike pextrd and pextrb, this is available in SSE2. + masm.vpextrw(lane, input, output); + + if (signedness == SimdSign::Signed) + masm.movswl(output, output); +} + +// Extract an integer lane from the 8x16 vector register |input|, sign- or +// zero-extend to 32 bits and place the result in |output|. +void +CodeGeneratorX86Shared::emitSimdExtractLane8x16(FloatRegister input, Register output, + unsigned lane, SimdSign signedness) +{ + if (AssemblerX86Shared::HasSSE41()) { + masm.vpextrb(lane, input, output); + // vpextrb clears the high bits, so no further extension required. + if (signedness == SimdSign::Unsigned) + signedness = SimdSign::NotApplicable; + } else { + // Extract the relevant 16 bits containing our lane, then shift the + // right 8 bits into place. + emitSimdExtractLane16x8(input, output, lane / 2, SimdSign::Unsigned); + if (lane % 2) { + masm.shrl(Imm32(8), output); + // The shrl handles the zero-extension. Don't repeat it. + if (signedness == SimdSign::Unsigned) + signedness = SimdSign::NotApplicable; + } + } + + // We have the right low 8 bits in |output|, but we may need to fix the high + // bits. Note that this requires |output| to be one of the %eax-%edx + // registers. + switch (signedness) { + case SimdSign::Signed: + masm.movsbl(output, output); + break; + case SimdSign::Unsigned: + masm.movzbl(output, output); + break; + case SimdSign::NotApplicable: + // No adjustment needed. + break; + } +} + +void +CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + Register output = ToRegister(ins->output()); + MSimdExtractElement* mir = ins->mir(); + unsigned length = SimdTypeToLength(mir->specialization()); + + switch (length) { + case 4: + emitSimdExtractLane32x4(input, output, mir->lane()); + break; + case 8: + // Get a lane, don't bother fixing the high bits since we'll mask below. + emitSimdExtractLane16x8(input, output, mir->lane(), SimdSign::NotApplicable); + break; + case 16: + emitSimdExtractLane8x16(input, output, mir->lane(), SimdSign::NotApplicable); + break; + default: + MOZ_CRASH("Unhandled SIMD length"); + } + + // We need to generate a 0/1 value. We have 0/-1 and possibly dirty high bits. + masm.and32(Imm32(1), output); +} + +void +CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + Register output = ToRegister(ins->output()); + MSimdExtractElement* mir = ins->mir(); + unsigned length = SimdTypeToLength(mir->specialization()); + + switch (length) { + case 4: + emitSimdExtractLane32x4(input, output, mir->lane()); + break; + case 8: + emitSimdExtractLane16x8(input, output, mir->lane(), mir->signedness()); + break; + case 16: + emitSimdExtractLane8x16(input, output, mir->lane(), mir->signedness()); + break; + default: + MOZ_CRASH("Unhandled SIMD length"); + } +} + +void +CodeGeneratorX86Shared::visitSimdExtractElementU2D(LSimdExtractElementU2D* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + Register temp = ToRegister(ins->temp()); + MSimdExtractElement* mir = ins->mir(); + MOZ_ASSERT(mir->specialization() == MIRType::Int32x4); + emitSimdExtractLane32x4(input, temp, mir->lane()); + masm.convertUInt32ToDouble(temp, output); +} + +void +CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + + unsigned lane = ins->mir()->lane(); + if (lane == 0) { + // The value we want to extract is in the low double-word + if (input != output) + masm.moveFloat32(input, output); + } else if (lane == 2) { + masm.moveHighPairToLowPairFloat32(input, output); + } else { + uint32_t mask = MacroAssembler::ComputeShuffleMask(lane); + masm.shuffleFloat32(mask, input, output); + } + // NaNs contained within SIMD values are not enforced to be canonical, so + // when we extract an element into a "regular" scalar JS value, we have to + // canonicalize. In wasm code, we can skip this, as wasm only has to + // canonicalize NaNs at FFI boundaries. + if (!gen->compilingWasm()) + masm.canonicalizeFloat(output); +} + +void +CodeGeneratorX86Shared::visitSimdInsertElementI(LSimdInsertElementI* ins) +{ + FloatRegister vector = ToFloatRegister(ins->vector()); + Register value = ToRegister(ins->value()); + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(vector == output); // defineReuseInput(0) + + unsigned lane = ins->lane(); + unsigned length = ins->length(); + + if (length == 8) { + // Available in SSE 2. + masm.vpinsrw(lane, value, vector, output); + return; + } + + // Note that, contrarily to float32x4, we cannot use vmovd if the inserted + // value goes into the first component, as vmovd clears out the higher lanes + // of the output. + if (AssemblerX86Shared::HasSSE41()) { + // TODO: Teach Lowering that we don't need defineReuseInput if we have AVX. + switch (length) { + case 4: + masm.vpinsrd(lane, value, vector, output); + return; + case 16: + masm.vpinsrb(lane, value, vector, output); + return; + } + } + + masm.reserveStack(Simd128DataSize); + masm.storeAlignedSimd128Int(vector, Address(StackPointer, 0)); + switch (length) { + case 4: + masm.store32(value, Address(StackPointer, lane * sizeof(int32_t))); + break; + case 16: + // Note that this requires `value` to be in one the registers where the + // low 8 bits are addressible (%eax - %edx on x86, all of them on x86-64). + masm.store8(value, Address(StackPointer, lane * sizeof(int8_t))); + break; + default: + MOZ_CRASH("Unsupported SIMD length"); + } + masm.loadAlignedSimd128Int(Address(StackPointer, 0), output); + masm.freeStack(Simd128DataSize); +} + +void +CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF* ins) +{ + FloatRegister vector = ToFloatRegister(ins->vector()); + FloatRegister value = ToFloatRegister(ins->value()); + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(vector == output); // defineReuseInput(0) + + if (ins->lane() == 0) { + // As both operands are registers, vmovss doesn't modify the upper bits + // of the destination operand. + if (value != output) + masm.vmovss(value, vector, output); + return; + } + + if (AssemblerX86Shared::HasSSE41()) { + // The input value is in the low float32 of the 'value' FloatRegister. + masm.vinsertps(masm.vinsertpsMask(0, ins->lane()), value, output, output); + return; + } + + unsigned component = unsigned(ins->lane()); + masm.reserveStack(Simd128DataSize); + masm.storeAlignedSimd128Float(vector, Address(StackPointer, 0)); + masm.storeFloat32(value, Address(StackPointer, component * sizeof(int32_t))); + masm.loadAlignedSimd128Float(Address(StackPointer, 0), output); + masm.freeStack(Simd128DataSize); +} + +void +CodeGeneratorX86Shared::visitSimdAllTrue(LSimdAllTrue* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + Register output = ToRegister(ins->output()); + + masm.vmovmskps(input, output); + masm.cmp32(output, Imm32(0xf)); + masm.emitSet(Assembler::Zero, output); +} + +void +CodeGeneratorX86Shared::visitSimdAnyTrue(LSimdAnyTrue* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + Register output = ToRegister(ins->output()); + + masm.vmovmskps(input, output); + masm.cmp32(output, Imm32(0x0)); + masm.emitSet(Assembler::NonZero, output); +} + +template void +CodeGeneratorX86Shared::visitSimdGeneralShuffle(LSimdGeneralShuffleBase* ins, Reg tempRegister) +{ + MSimdGeneralShuffle* mir = ins->mir(); + unsigned numVectors = mir->numVectors(); + + Register laneTemp = ToRegister(ins->temp()); + + // This won't generate fast code, but it's fine because we expect users + // to have used constant indices (and thus MSimdGeneralShuffle to be fold + // into MSimdSwizzle/MSimdShuffle, which are fast). + + // We need stack space for the numVectors inputs and for the output vector. + unsigned stackSpace = Simd128DataSize * (numVectors + 1); + masm.reserveStack(stackSpace); + + for (unsigned i = 0; i < numVectors; i++) { + masm.storeAlignedVector(ToFloatRegister(ins->vector(i)), + Address(StackPointer, Simd128DataSize * (1 + i))); + } + + Label bail; + const Scale laneScale = ScaleFromElemWidth(sizeof(T)); + + for (size_t i = 0; i < mir->numLanes(); i++) { + Operand lane = ToOperand(ins->lane(i)); + + masm.cmp32(lane, Imm32(numVectors * mir->numLanes() - 1)); + masm.j(Assembler::Above, &bail); + + if (lane.kind() == Operand::REG) { + masm.loadScalar(Operand(StackPointer, ToRegister(ins->lane(i)), laneScale, Simd128DataSize), + tempRegister); + } else { + masm.load32(lane, laneTemp); + masm.loadScalar(Operand(StackPointer, laneTemp, laneScale, Simd128DataSize), tempRegister); + } + + masm.storeScalar(tempRegister, Address(StackPointer, i * sizeof(T))); + } + + FloatRegister output = ToFloatRegister(ins->output()); + masm.loadAlignedVector(Address(StackPointer, 0), output); + + Label join; + masm.jump(&join); + + { + masm.bind(&bail); + masm.freeStack(stackSpace); + bailout(ins->snapshot()); + } + + masm.bind(&join); + masm.setFramePushed(masm.framePushed() + stackSpace); + masm.freeStack(stackSpace); +} + +void +CodeGeneratorX86Shared::visitSimdGeneralShuffleI(LSimdGeneralShuffleI* ins) +{ + switch (ins->mir()->type()) { + case MIRType::Int8x16: + return visitSimdGeneralShuffle(ins, ToRegister(ins->temp())); + case MIRType::Int16x8: + return visitSimdGeneralShuffle(ins, ToRegister(ins->temp())); + case MIRType::Int32x4: + return visitSimdGeneralShuffle(ins, ToRegister(ins->temp())); + default: + MOZ_CRASH("unsupported type for general shuffle"); + } +} +void +CodeGeneratorX86Shared::visitSimdGeneralShuffleF(LSimdGeneralShuffleF* ins) +{ + ScratchFloat32Scope scratch(masm); + visitSimdGeneralShuffle(ins, scratch); +} + +void +CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + const unsigned numLanes = ins->numLanes(); + + switch (numLanes) { + case 4: { + uint32_t x = ins->lane(0); + uint32_t y = ins->lane(1); + uint32_t z = ins->lane(2); + uint32_t w = ins->lane(3); + + uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w); + masm.shuffleInt32(mask, input, output); + return; + } + } + + // In the general case, use pshufb if it is available. Convert to a + // byte-wise swizzle. + const unsigned bytesPerLane = 16 / numLanes; + int8_t bLane[16]; + for (unsigned i = 0; i < numLanes; i++) { + for (unsigned b = 0; b < bytesPerLane; b++) { + bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b; + } + } + + if (AssemblerX86Shared::HasSSSE3()) { + ScratchSimd128Scope scratch(masm); + masm.loadConstantSimd128Int(SimdConstant::CreateX16(bLane), scratch); + FloatRegister inputCopy = masm.reusedInputInt32x4(input, output); + masm.vpshufb(scratch, inputCopy, output); + return; + } + + // Worst-case fallback for pre-SSSE3 machines. Bounce through memory. + Register temp = ToRegister(ins->getTemp(0)); + masm.reserveStack(2 * Simd128DataSize); + masm.storeAlignedSimd128Int(input, Address(StackPointer, Simd128DataSize)); + for (unsigned i = 0; i < 16; i++) { + masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp); + masm.store8(temp, Address(StackPointer, i)); + } + masm.loadAlignedSimd128Int(Address(StackPointer, 0), output); + masm.freeStack(2 * Simd128DataSize); +} + +void +CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF* ins) +{ + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(ins->numLanes() == 4); + + uint32_t x = ins->lane(0); + uint32_t y = ins->lane(1); + uint32_t z = ins->lane(2); + uint32_t w = ins->lane(3); + + if (AssemblerX86Shared::HasSSE3()) { + if (ins->lanesMatch(0, 0, 2, 2)) { + masm.vmovsldup(input, output); + return; + } + if (ins->lanesMatch(1, 1, 3, 3)) { + masm.vmovshdup(input, output); + return; + } + } + + // TODO Here and below, arch specific lowering could identify this pattern + // and use defineReuseInput to avoid this move (bug 1084404) + if (ins->lanesMatch(2, 3, 2, 3)) { + FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); + masm.vmovhlps(input, inputCopy, output); + return; + } + + if (ins->lanesMatch(0, 1, 0, 1)) { + if (AssemblerX86Shared::HasSSE3() && !AssemblerX86Shared::HasAVX()) { + masm.vmovddup(input, output); + return; + } + FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); + masm.vmovlhps(input, inputCopy, output); + return; + } + + if (ins->lanesMatch(0, 0, 1, 1)) { + FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); + masm.vunpcklps(input, inputCopy, output); + return; + } + + if (ins->lanesMatch(2, 2, 3, 3)) { + FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); + masm.vunpckhps(input, inputCopy, output); + return; + } + + uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w); + masm.shuffleFloat32(mask, input, output); +} + +void +CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + FloatRegister rhs = ToFloatRegister(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + const unsigned numLanes = ins->numLanes(); + const unsigned bytesPerLane = 16 / numLanes; + + // Convert the shuffle to a byte-wise shuffle. + uint8_t bLane[16]; + for (unsigned i = 0; i < numLanes; i++) { + for (unsigned b = 0; b < bytesPerLane; b++) { + bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b; + } + } + + // Use pshufb if it is available. + if (AssemblerX86Shared::HasSSSE3()) { + FloatRegister scratch1 = ToFloatRegister(ins->temp()); + ScratchSimd128Scope scratch2(masm); + + // Use pshufb instructions to gather the lanes from each source vector. + // A negative index creates a zero lane, so the two vectors can be combined. + + // Set scratch2 = lanes from lhs. + int8_t idx[16]; + for (unsigned i = 0; i < 16; i++) + idx[i] = bLane[i] < 16 ? bLane[i] : -1; + masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1); + FloatRegister lhsCopy = masm.reusedInputInt32x4(lhs, scratch2); + masm.vpshufb(scratch1, lhsCopy, scratch2); + + // Set output = lanes from rhs. + for (unsigned i = 0; i < 16; i++) + idx[i] = bLane[i] >= 16 ? bLane[i] - 16 : -1; + masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1); + FloatRegister rhsCopy = masm.reusedInputInt32x4(rhs, output); + masm.vpshufb(scratch1, rhsCopy, output); + + // Combine. + masm.vpor(scratch2, output, output); + return; + } + + // Worst-case fallback for pre-SSE3 machines. Bounce through memory. + Register temp = ToRegister(ins->getTemp(0)); + masm.reserveStack(3 * Simd128DataSize); + masm.storeAlignedSimd128Int(lhs, Address(StackPointer, Simd128DataSize)); + masm.storeAlignedSimd128Int(rhs, Address(StackPointer, 2 * Simd128DataSize)); + for (unsigned i = 0; i < 16; i++) { + masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp); + masm.store8(temp, Address(StackPointer, i)); + } + masm.loadAlignedSimd128Int(Address(StackPointer, 0), output); + masm.freeStack(3 * Simd128DataSize); +} + +void +CodeGeneratorX86Shared::visitSimdShuffleX4(LSimdShuffleX4* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister out = ToFloatRegister(ins->output()); + + uint32_t x = ins->lane(0); + uint32_t y = ins->lane(1); + uint32_t z = ins->lane(2); + uint32_t w = ins->lane(3); + + // Check that lanes come from LHS in majority: + unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4); + MOZ_ASSERT(numLanesFromLHS >= 2); + + // When reading this method, remember that vshufps takes the two first + // inputs of the destination operand (right operand) and the two last + // inputs of the source operand (left operand). + // + // Legend for explanations: + // - L: LHS + // - R: RHS + // - T: temporary + + uint32_t mask; + + // If all lanes came from a single vector, we should have constructed a + // MSimdSwizzle instead. + MOZ_ASSERT(numLanesFromLHS < 4); + + // If all values stay in their lane, this is a blend. + if (AssemblerX86Shared::HasSSE41()) { + if (x % 4 == 0 && y % 4 == 1 && z % 4 == 2 && w % 4 == 3) { + masm.vblendps(masm.blendpsMask(x >= 4, y >= 4, z >= 4, w >= 4), rhs, lhs, out); + return; + } + } + + // One element of the second, all other elements of the first + if (numLanesFromLHS == 3) { + unsigned firstMask = -1, secondMask = -1; + + // register-register vmovss preserves the high lanes. + if (ins->lanesMatch(4, 1, 2, 3) && rhs.kind() == Operand::FPREG) { + masm.vmovss(FloatRegister::FromCode(rhs.fpu()), lhs, out); + return; + } + + // SSE4.1 vinsertps can handle any single element. + unsigned numLanesUnchanged = (x == 0) + (y == 1) + (z == 2) + (w == 3); + if (AssemblerX86Shared::HasSSE41() && numLanesUnchanged == 3) { + unsigned srcLane; + unsigned dstLane; + if (x >= 4) { + srcLane = x - 4; + dstLane = 0; + } else if (y >= 4) { + srcLane = y - 4; + dstLane = 1; + } else if (z >= 4) { + srcLane = z - 4; + dstLane = 2; + } else { + MOZ_ASSERT(w >= 4); + srcLane = w - 4; + dstLane = 3; + } + masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, lhs, out); + return; + } + + FloatRegister rhsCopy = ToFloatRegister(ins->temp()); + + if (x < 4 && y < 4) { + if (w >= 4) { + w %= 4; + // T = (Rw Rw Lz Lz) = vshufps(firstMask, lhs, rhs, rhsCopy) + firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z); + // (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = vshufps(secondMask, T, lhs, out) + secondMask = MacroAssembler::ComputeShuffleMask(x, y, 2, 0); + } else { + MOZ_ASSERT(z >= 4); + z %= 4; + // T = (Rz Rz Lw Lw) = vshufps(firstMask, lhs, rhs, rhsCopy) + firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w); + // (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = vshufps(secondMask, T, lhs, out) + secondMask = MacroAssembler::ComputeShuffleMask(x, y, 0, 2); + } + + masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy); + masm.vshufps(secondMask, rhsCopy, lhs, out); + return; + } + + MOZ_ASSERT(z < 4 && w < 4); + + if (y >= 4) { + y %= 4; + // T = (Ry Ry Lx Lx) = vshufps(firstMask, lhs, rhs, rhsCopy) + firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x); + // (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = vshufps(secondMask, lhs, T, out) + secondMask = MacroAssembler::ComputeShuffleMask(2, 0, z, w); + } else { + MOZ_ASSERT(x >= 4); + x %= 4; + // T = (Rx Rx Ly Ly) = vshufps(firstMask, lhs, rhs, rhsCopy) + firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y); + // (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = vshufps(secondMask, lhs, T, out) + secondMask = MacroAssembler::ComputeShuffleMask(0, 2, z, w); + } + + masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy); + if (AssemblerX86Shared::HasAVX()) { + masm.vshufps(secondMask, lhs, rhsCopy, out); + } else { + masm.vshufps(secondMask, lhs, rhsCopy, rhsCopy); + masm.moveSimd128Float(rhsCopy, out); + } + return; + } + + // Two elements from one vector, two other elements from the other + MOZ_ASSERT(numLanesFromLHS == 2); + + // TODO Here and below, symmetric case would be more handy to avoid a move, + // but can't be reached because operands would get swapped (bug 1084404). + if (ins->lanesMatch(2, 3, 6, 7)) { + ScratchSimd128Scope scratch(masm); + if (AssemblerX86Shared::HasAVX()) { + FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); + masm.vmovhlps(lhs, rhsCopy, out); + } else { + masm.loadAlignedSimd128Float(rhs, scratch); + masm.vmovhlps(lhs, scratch, scratch); + masm.moveSimd128Float(scratch, out); + } + return; + } + + if (ins->lanesMatch(0, 1, 4, 5)) { + FloatRegister rhsCopy; + ScratchSimd128Scope scratch(masm); + if (rhs.kind() == Operand::FPREG) { + // No need to make an actual copy, since the operand is already + // in a register, and it won't be clobbered by the vmovlhps. + rhsCopy = FloatRegister::FromCode(rhs.fpu()); + } else { + masm.loadAlignedSimd128Float(rhs, scratch); + rhsCopy = scratch; + } + masm.vmovlhps(rhsCopy, lhs, out); + return; + } + + if (ins->lanesMatch(0, 4, 1, 5)) { + masm.vunpcklps(rhs, lhs, out); + return; + } + + // TODO swapped case would be better (bug 1084404) + if (ins->lanesMatch(4, 0, 5, 1)) { + ScratchSimd128Scope scratch(masm); + if (AssemblerX86Shared::HasAVX()) { + FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); + masm.vunpcklps(lhs, rhsCopy, out); + } else { + masm.loadAlignedSimd128Float(rhs, scratch); + masm.vunpcklps(lhs, scratch, scratch); + masm.moveSimd128Float(scratch, out); + } + return; + } + + if (ins->lanesMatch(2, 6, 3, 7)) { + masm.vunpckhps(rhs, lhs, out); + return; + } + + // TODO swapped case would be better (bug 1084404) + if (ins->lanesMatch(6, 2, 7, 3)) { + ScratchSimd128Scope scratch(masm); + if (AssemblerX86Shared::HasAVX()) { + FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); + masm.vunpckhps(lhs, rhsCopy, out); + } else { + masm.loadAlignedSimd128Float(rhs, scratch); + masm.vunpckhps(lhs, scratch, scratch); + masm.moveSimd128Float(scratch, out); + } + return; + } + + // In one vshufps + if (x < 4 && y < 4) { + mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4); + masm.vshufps(mask, rhs, lhs, out); + return; + } + + // At creation, we should have explicitly swapped in this case. + MOZ_ASSERT(!(z >= 4 && w >= 4)); + + // In two vshufps, for the most generic case: + uint32_t firstMask[4], secondMask[4]; + unsigned i = 0, j = 2, k = 0; + +#define COMPUTE_MASK(lane) \ + if (lane >= 4) { \ + firstMask[j] = lane % 4; \ + secondMask[k++] = j++; \ + } else { \ + firstMask[i] = lane; \ + secondMask[k++] = i++; \ + } + + COMPUTE_MASK(x) + COMPUTE_MASK(y) + COMPUTE_MASK(z) + COMPUTE_MASK(w) +#undef COMPUTE_MASK + + MOZ_ASSERT(i == 2 && j == 4 && k == 4); + + mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1], + firstMask[2], firstMask[3]); + masm.vshufps(mask, rhs, lhs, lhs); + + mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1], + secondMask[2], secondMask[3]); + masm.vshufps(mask, lhs, lhs, lhs); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryCompIx16(LSimdBinaryCompIx16* ins) +{ + static const SimdConstant allOnes = SimdConstant::SplatX16(-1); + + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs); + + ScratchSimd128Scope scratch(masm); + + MSimdBinaryComp::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryComp::greaterThan: + masm.vpcmpgtb(rhs, lhs, output); + return; + case MSimdBinaryComp::equal: + masm.vpcmpeqb(rhs, lhs, output); + return; + case MSimdBinaryComp::lessThan: + // src := rhs + if (rhs.kind() == Operand::FPREG) + masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch); + else + masm.loadAlignedSimd128Int(rhs, scratch); + + // src := src > lhs (i.e. lhs < rhs) + // Improve by doing custom lowering (rhs is tied to the output register) + masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch); + masm.moveSimd128Int(scratch, output); + return; + case MSimdBinaryComp::notEqual: + // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we + // should invert the comparison by, e.g. swapping the arms of a select + // if that's what it's used in. + masm.loadConstantSimd128Int(allOnes, scratch); + masm.vpcmpeqb(rhs, lhs, output); + masm.bitwiseXorSimd128(Operand(scratch), output); + return; + case MSimdBinaryComp::greaterThanOrEqual: + // src := rhs + if (rhs.kind() == Operand::FPREG) + masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch); + else + masm.loadAlignedSimd128Int(rhs, scratch); + masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch); + masm.loadConstantSimd128Int(allOnes, output); + masm.bitwiseXorSimd128(Operand(scratch), output); + return; + case MSimdBinaryComp::lessThanOrEqual: + // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here. + masm.loadConstantSimd128Int(allOnes, scratch); + masm.vpcmpgtb(rhs, lhs, output); + masm.bitwiseXorSimd128(Operand(scratch), output); + return; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryCompIx8(LSimdBinaryCompIx8* ins) +{ + static const SimdConstant allOnes = SimdConstant::SplatX8(-1); + + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs); + + ScratchSimd128Scope scratch(masm); + + MSimdBinaryComp::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryComp::greaterThan: + masm.vpcmpgtw(rhs, lhs, output); + return; + case MSimdBinaryComp::equal: + masm.vpcmpeqw(rhs, lhs, output); + return; + case MSimdBinaryComp::lessThan: + // src := rhs + if (rhs.kind() == Operand::FPREG) + masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch); + else + masm.loadAlignedSimd128Int(rhs, scratch); + + // src := src > lhs (i.e. lhs < rhs) + // Improve by doing custom lowering (rhs is tied to the output register) + masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch); + masm.moveSimd128Int(scratch, output); + return; + case MSimdBinaryComp::notEqual: + // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we + // should invert the comparison by, e.g. swapping the arms of a select + // if that's what it's used in. + masm.loadConstantSimd128Int(allOnes, scratch); + masm.vpcmpeqw(rhs, lhs, output); + masm.bitwiseXorSimd128(Operand(scratch), output); + return; + case MSimdBinaryComp::greaterThanOrEqual: + // src := rhs + if (rhs.kind() == Operand::FPREG) + masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch); + else + masm.loadAlignedSimd128Int(rhs, scratch); + masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch); + masm.loadConstantSimd128Int(allOnes, output); + masm.bitwiseXorSimd128(Operand(scratch), output); + return; + case MSimdBinaryComp::lessThanOrEqual: + // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here. + masm.loadConstantSimd128Int(allOnes, scratch); + masm.vpcmpgtw(rhs, lhs, output); + masm.bitwiseXorSimd128(Operand(scratch), output); + return; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins) +{ + static const SimdConstant allOnes = SimdConstant::SplatX4(-1); + + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs); + + ScratchSimd128Scope scratch(masm); + + MSimdBinaryComp::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryComp::greaterThan: + masm.packedGreaterThanInt32x4(rhs, lhs); + return; + case MSimdBinaryComp::equal: + masm.packedEqualInt32x4(rhs, lhs); + return; + case MSimdBinaryComp::lessThan: + // src := rhs + if (rhs.kind() == Operand::FPREG) + masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch); + else + masm.loadAlignedSimd128Int(rhs, scratch); + + // src := src > lhs (i.e. lhs < rhs) + // Improve by doing custom lowering (rhs is tied to the output register) + masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch); + masm.moveSimd128Int(scratch, lhs); + return; + case MSimdBinaryComp::notEqual: + // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we + // should invert the comparison by, e.g. swapping the arms of a select + // if that's what it's used in. + masm.loadConstantSimd128Int(allOnes, scratch); + masm.packedEqualInt32x4(rhs, lhs); + masm.bitwiseXorSimd128(Operand(scratch), lhs); + return; + case MSimdBinaryComp::greaterThanOrEqual: + // src := rhs + if (rhs.kind() == Operand::FPREG) + masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch); + else + masm.loadAlignedSimd128Int(rhs, scratch); + masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch); + masm.loadConstantSimd128Int(allOnes, lhs); + masm.bitwiseXorSimd128(Operand(scratch), lhs); + return; + case MSimdBinaryComp::lessThanOrEqual: + // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here. + masm.loadConstantSimd128Int(allOnes, scratch); + masm.packedGreaterThanInt32x4(rhs, lhs); + masm.bitwiseXorSimd128(Operand(scratch), lhs); + return; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryCompFx4(LSimdBinaryCompFx4* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + + MSimdBinaryComp::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryComp::equal: + masm.vcmpeqps(rhs, lhs, output); + return; + case MSimdBinaryComp::lessThan: + masm.vcmpltps(rhs, lhs, output); + return; + case MSimdBinaryComp::lessThanOrEqual: + masm.vcmpleps(rhs, lhs, output); + return; + case MSimdBinaryComp::notEqual: + masm.vcmpneqps(rhs, lhs, output); + return; + case MSimdBinaryComp::greaterThanOrEqual: + case MSimdBinaryComp::greaterThan: + // We reverse these before register allocation so that we don't have to + // copy into and out of temporaries after codegen. + MOZ_CRASH("lowering should have reversed this"); + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryArithIx16(LSimdBinaryArithIx16* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + + MSimdBinaryArith::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryArith::Op_add: + masm.vpaddb(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_sub: + masm.vpsubb(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_mul: + // 8x16 mul is a valid operation, but not supported in SSE or AVX. + // The operation is synthesized from 16x8 multiplies by + // MSimdBinaryArith::AddLegalized(). + break; + case MSimdBinaryArith::Op_div: + case MSimdBinaryArith::Op_max: + case MSimdBinaryArith::Op_min: + case MSimdBinaryArith::Op_minNum: + case MSimdBinaryArith::Op_maxNum: + break; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryArithIx8(LSimdBinaryArithIx8* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + + MSimdBinaryArith::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryArith::Op_add: + masm.vpaddw(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_sub: + masm.vpsubw(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_mul: + masm.vpmullw(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_div: + case MSimdBinaryArith::Op_max: + case MSimdBinaryArith::Op_min: + case MSimdBinaryArith::Op_minNum: + case MSimdBinaryArith::Op_maxNum: + break; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + + ScratchSimd128Scope scratch(masm); + + MSimdBinaryArith::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryArith::Op_add: + masm.vpaddd(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_sub: + masm.vpsubd(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_mul: { + if (AssemblerX86Shared::HasSSE41()) { + masm.vpmulld(rhs, lhs, output); + return; + } + + masm.loadAlignedSimd128Int(rhs, scratch); + masm.vpmuludq(lhs, scratch, scratch); + // scratch contains (Rx, _, Rz, _) where R is the resulting vector. + + FloatRegister temp = ToFloatRegister(ins->temp()); + masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), lhs, lhs); + masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), rhs, temp); + masm.vpmuludq(temp, lhs, lhs); + // lhs contains (Ry, _, Rw, _) where R is the resulting vector. + + masm.vshufps(MacroAssembler::ComputeShuffleMask(0, 2, 0, 2), scratch, lhs, lhs); + // lhs contains (Ry, Rw, Rx, Rz) + masm.vshufps(MacroAssembler::ComputeShuffleMask(2, 0, 3, 1), lhs, lhs, lhs); + return; + } + case MSimdBinaryArith::Op_div: + // x86 doesn't have SIMD i32 div. + break; + case MSimdBinaryArith::Op_max: + // we can do max with a single instruction only if we have SSE4.1 + // using the PMAXSD instruction. + break; + case MSimdBinaryArith::Op_min: + // we can do max with a single instruction only if we have SSE4.1 + // using the PMINSD instruction. + break; + case MSimdBinaryArith::Op_minNum: + case MSimdBinaryArith::Op_maxNum: + break; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + + ScratchSimd128Scope scratch(masm); + + MSimdBinaryArith::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryArith::Op_add: + masm.vaddps(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_sub: + masm.vsubps(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_mul: + masm.vmulps(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_div: + masm.vdivps(rhs, lhs, output); + return; + case MSimdBinaryArith::Op_max: { + FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, scratch); + masm.vcmpunordps(rhs, lhsCopy, scratch); + + FloatRegister tmp = ToFloatRegister(ins->temp()); + FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, tmp); + masm.vmaxps(Operand(lhs), rhsCopy, tmp); + masm.vmaxps(rhs, lhs, output); + + masm.vandps(tmp, output, output); + masm.vorps(scratch, output, output); // or in the all-ones NaNs + return; + } + case MSimdBinaryArith::Op_min: { + FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); + masm.vminps(Operand(lhs), rhsCopy, scratch); + masm.vminps(rhs, lhs, output); + masm.vorps(scratch, output, output); // NaN or'd with arbitrary bits is NaN + return; + } + case MSimdBinaryArith::Op_minNum: { + FloatRegister tmp = ToFloatRegister(ins->temp()); + masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp); + + FloatRegister mask = scratch; + FloatRegister tmpCopy = masm.reusedInputFloat32x4(tmp, scratch); + masm.vpcmpeqd(Operand(lhs), tmpCopy, mask); + masm.vandps(tmp, mask, mask); + + FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp); + masm.vminps(rhs, lhsCopy, tmp); + masm.vorps(mask, tmp, tmp); + + FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask); + masm.vcmpneqps(rhs, rhsCopy, mask); + + if (AssemblerX86Shared::HasAVX()) { + masm.vblendvps(mask, lhs, tmp, output); + } else { + // Emulate vblendvps. + // With SSE.4.1 we could use blendvps, however it's awkward since + // it requires the mask to be in xmm0. + if (lhs != output) + masm.moveSimd128Float(lhs, output); + masm.vandps(Operand(mask), output, output); + masm.vandnps(Operand(tmp), mask, mask); + masm.vorps(Operand(mask), output, output); + } + return; + } + case MSimdBinaryArith::Op_maxNum: { + FloatRegister mask = scratch; + masm.loadConstantSimd128Int(SimdConstant::SplatX4(0), mask); + masm.vpcmpeqd(Operand(lhs), mask, mask); + + FloatRegister tmp = ToFloatRegister(ins->temp()); + masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp); + masm.vandps(tmp, mask, mask); + + FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp); + masm.vmaxps(rhs, lhsCopy, tmp); + masm.vandnps(Operand(tmp), mask, mask); + + // Ensure tmp always contains the temporary result + mask = tmp; + tmp = scratch; + + FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask); + masm.vcmpneqps(rhs, rhsCopy, mask); + + if (AssemblerX86Shared::HasAVX()) { + masm.vblendvps(mask, lhs, tmp, output); + } else { + // Emulate vblendvps. + // With SSE.4.1 we could use blendvps, however it's awkward since + // it requires the mask to be in xmm0. + if (lhs != output) + masm.moveSimd128Float(lhs, output); + masm.vandps(Operand(mask), output, output); + masm.vandnps(Operand(tmp), mask, mask); + masm.vorps(Operand(mask), output, output); + } + return; + } + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinarySaturating(LSimdBinarySaturating* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + + SimdSign sign = ins->signedness(); + MOZ_ASSERT(sign != SimdSign::NotApplicable); + + switch (ins->type()) { + case MIRType::Int8x16: + switch (ins->operation()) { + case MSimdBinarySaturating::add: + if (sign == SimdSign::Signed) + masm.vpaddsb(rhs, lhs, output); + else + masm.vpaddusb(rhs, lhs, output); + return; + case MSimdBinarySaturating::sub: + if (sign == SimdSign::Signed) + masm.vpsubsb(rhs, lhs, output); + else + masm.vpsubusb(rhs, lhs, output); + return; + } + break; + + case MIRType::Int16x8: + switch (ins->operation()) { + case MSimdBinarySaturating::add: + if (sign == SimdSign::Signed) + masm.vpaddsw(rhs, lhs, output); + else + masm.vpaddusw(rhs, lhs, output); + return; + case MSimdBinarySaturating::sub: + if (sign == SimdSign::Signed) + masm.vpsubsw(rhs, lhs, output); + else + masm.vpsubusw(rhs, lhs, output); + return; + } + break; + + default: + break; + } + MOZ_CRASH("unsupported type for SIMD saturating arithmetic"); +} + +void +CodeGeneratorX86Shared::visitSimdUnaryArithIx16(LSimdUnaryArithIx16* ins) +{ + Operand in = ToOperand(ins->input()); + FloatRegister out = ToFloatRegister(ins->output()); + + static const SimdConstant allOnes = SimdConstant::SplatX16(-1); + + switch (ins->operation()) { + case MSimdUnaryArith::neg: + masm.zeroSimd128Int(out); + masm.packedSubInt8(in, out); + return; + case MSimdUnaryArith::not_: + masm.loadConstantSimd128Int(allOnes, out); + masm.bitwiseXorSimd128(in, out); + return; + case MSimdUnaryArith::abs: + case MSimdUnaryArith::reciprocalApproximation: + case MSimdUnaryArith::reciprocalSqrtApproximation: + case MSimdUnaryArith::sqrt: + break; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdUnaryArithIx8(LSimdUnaryArithIx8* ins) +{ + Operand in = ToOperand(ins->input()); + FloatRegister out = ToFloatRegister(ins->output()); + + static const SimdConstant allOnes = SimdConstant::SplatX8(-1); + + switch (ins->operation()) { + case MSimdUnaryArith::neg: + masm.zeroSimd128Int(out); + masm.packedSubInt16(in, out); + return; + case MSimdUnaryArith::not_: + masm.loadConstantSimd128Int(allOnes, out); + masm.bitwiseXorSimd128(in, out); + return; + case MSimdUnaryArith::abs: + case MSimdUnaryArith::reciprocalApproximation: + case MSimdUnaryArith::reciprocalSqrtApproximation: + case MSimdUnaryArith::sqrt: + break; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins) +{ + Operand in = ToOperand(ins->input()); + FloatRegister out = ToFloatRegister(ins->output()); + + static const SimdConstant allOnes = SimdConstant::SplatX4(-1); + + switch (ins->operation()) { + case MSimdUnaryArith::neg: + masm.zeroSimd128Int(out); + masm.packedSubInt32(in, out); + return; + case MSimdUnaryArith::not_: + masm.loadConstantSimd128Int(allOnes, out); + masm.bitwiseXorSimd128(in, out); + return; + case MSimdUnaryArith::abs: + case MSimdUnaryArith::reciprocalApproximation: + case MSimdUnaryArith::reciprocalSqrtApproximation: + case MSimdUnaryArith::sqrt: + break; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4* ins) +{ + Operand in = ToOperand(ins->input()); + FloatRegister out = ToFloatRegister(ins->output()); + + // All ones but the sign bit + float signMask = SpecificNaN(0, FloatingPoint::kSignificandBits); + static const SimdConstant signMasks = SimdConstant::SplatX4(signMask); + + // All ones including the sign bit + float ones = SpecificNaN(1, FloatingPoint::kSignificandBits); + static const SimdConstant allOnes = SimdConstant::SplatX4(ones); + + // All zeros but the sign bit + static const SimdConstant minusZero = SimdConstant::SplatX4(-0.f); + + switch (ins->operation()) { + case MSimdUnaryArith::abs: + masm.loadConstantSimd128Float(signMasks, out); + masm.bitwiseAndSimd128(in, out); + return; + case MSimdUnaryArith::neg: + masm.loadConstantSimd128Float(minusZero, out); + masm.bitwiseXorSimd128(in, out); + return; + case MSimdUnaryArith::not_: + masm.loadConstantSimd128Float(allOnes, out); + masm.bitwiseXorSimd128(in, out); + return; + case MSimdUnaryArith::reciprocalApproximation: + masm.packedRcpApproximationFloat32x4(in, out); + return; + case MSimdUnaryArith::reciprocalSqrtApproximation: + masm.packedRcpSqrtApproximationFloat32x4(in, out); + return; + case MSimdUnaryArith::sqrt: + masm.packedSqrtFloat32x4(in, out); + return; + } + MOZ_CRASH("unexpected SIMD op"); +} + +void +CodeGeneratorX86Shared::visitSimdBinaryBitwise(LSimdBinaryBitwise* ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + + MSimdBinaryBitwise::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryBitwise::and_: + if (ins->type() == MIRType::Float32x4) + masm.vandps(rhs, lhs, output); + else + masm.vpand(rhs, lhs, output); + return; + case MSimdBinaryBitwise::or_: + if (ins->type() == MIRType::Float32x4) + masm.vorps(rhs, lhs, output); + else + masm.vpor(rhs, lhs, output); + return; + case MSimdBinaryBitwise::xor_: + if (ins->type() == MIRType::Float32x4) + masm.vxorps(rhs, lhs, output); + else + masm.vpxor(rhs, lhs, output); + return; + } + MOZ_CRASH("unexpected SIMD bitwise op"); +} + +void +CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins) +{ + FloatRegister out = ToFloatRegister(ins->output()); + MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0); + + // The shift amount is masked to the number of bits in a lane. + uint32_t shiftmask = (128u / SimdTypeToLength(ins->type())) - 1; + + // Note that SSE doesn't have instructions for shifting 8x16 vectors. + // These shifts are synthesized by the MSimdShift::AddLegalized() function. + const LAllocation* val = ins->value(); + if (val->isConstant()) { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + Imm32 count(uint32_t(ToInt32(val)) & shiftmask); + switch (ins->type()) { + case MIRType::Int16x8: + switch (ins->operation()) { + case MSimdShift::lsh: + masm.packedLeftShiftByScalarInt16x8(count, out); + return; + case MSimdShift::rsh: + masm.packedRightShiftByScalarInt16x8(count, out); + return; + case MSimdShift::ursh: + masm.packedUnsignedRightShiftByScalarInt16x8(count, out); + return; + } + break; + case MIRType::Int32x4: + switch (ins->operation()) { + case MSimdShift::lsh: + masm.packedLeftShiftByScalarInt32x4(count, out); + return; + case MSimdShift::rsh: + masm.packedRightShiftByScalarInt32x4(count, out); + return; + case MSimdShift::ursh: + masm.packedUnsignedRightShiftByScalarInt32x4(count, out); + return; + } + break; + default: + MOZ_CRASH("unsupported type for SIMD shifts"); + } + MOZ_CRASH("unexpected SIMD bitwise op"); + } + + // Truncate val to 5 bits. We should have a temp register for that. + MOZ_ASSERT(val->isRegister()); + Register count = ToRegister(ins->temp()); + masm.mov(ToRegister(val), count); + masm.andl(Imm32(shiftmask), count); + ScratchFloat32Scope scratch(masm); + masm.vmovd(count, scratch); + + switch (ins->type()) { + case MIRType::Int16x8: + switch (ins->operation()) { + case MSimdShift::lsh: + masm.packedLeftShiftByScalarInt16x8(scratch, out); + return; + case MSimdShift::rsh: + masm.packedRightShiftByScalarInt16x8(scratch, out); + return; + case MSimdShift::ursh: + masm.packedUnsignedRightShiftByScalarInt16x8(scratch, out); + return; + } + break; + case MIRType::Int32x4: + switch (ins->operation()) { + case MSimdShift::lsh: + masm.packedLeftShiftByScalarInt32x4(scratch, out); + return; + case MSimdShift::rsh: + masm.packedRightShiftByScalarInt32x4(scratch, out); + return; + case MSimdShift::ursh: + masm.packedUnsignedRightShiftByScalarInt32x4(scratch, out); + return; + } + break; + default: + MOZ_CRASH("unsupported type for SIMD shifts"); + } + MOZ_CRASH("unexpected SIMD bitwise op"); +} + +void +CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect* ins) +{ + FloatRegister mask = ToFloatRegister(ins->mask()); + FloatRegister onTrue = ToFloatRegister(ins->lhs()); + FloatRegister onFalse = ToFloatRegister(ins->rhs()); + FloatRegister output = ToFloatRegister(ins->output()); + FloatRegister temp = ToFloatRegister(ins->temp()); + + if (onTrue != output) + masm.vmovaps(onTrue, output); + if (mask != temp) + masm.vmovaps(mask, temp); + + MSimdSelect* mir = ins->mir(); + unsigned lanes = SimdTypeToLength(mir->type()); + + if (AssemblerX86Shared::HasAVX() && lanes == 4) { + // TBD: Use vpblendvb for lanes > 4, HasAVX. + masm.vblendvps(mask, onTrue, onFalse, output); + return; + } + + // SSE4.1 has plain blendvps which can do this, but it is awkward + // to use because it requires the mask to be in xmm0. + + masm.bitwiseAndSimd128(Operand(temp), output); + masm.bitwiseAndNotSimd128(Operand(onFalse), temp); + masm.bitwiseOrSimd128(Operand(temp), output); +} + +void +CodeGeneratorX86Shared::visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir) +{ + Register elements = ToRegister(lir->elements()); + AnyRegister output = ToAnyRegister(lir->output()); + Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); + + Register oldval = ToRegister(lir->oldval()); + Register newval = ToRegister(lir->newval()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + int width = Scalar::byteSize(arrayType); + + if (lir->index()->isConstant()) { + Address dest(elements, ToInt32(lir->index()) * width); + masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); + masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output); + } +} + +void +CodeGeneratorX86Shared::visitAtomicExchangeTypedArrayElement(LAtomicExchangeTypedArrayElement* lir) +{ + Register elements = ToRegister(lir->elements()); + AnyRegister output = ToAnyRegister(lir->output()); + Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); + + Register value = ToRegister(lir->value()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + int width = Scalar::byteSize(arrayType); + + if (lir->index()->isConstant()) { + Address dest(elements, ToInt32(lir->index()) * width); + masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); + masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output); + } +} + +template +void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value, + const T& mem, Register temp1, Register temp2, AnyRegister output) +{ + switch (arrayType) { + case Scalar::Int8: + switch (op) { + case AtomicFetchAddOp: + masm.atomicFetchAdd8SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchSubOp: + masm.atomicFetchSub8SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchAndOp: + masm.atomicFetchAnd8SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchOrOp: + masm.atomicFetchOr8SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchXorOp: + masm.atomicFetchXor8SignExtend(value, mem, temp1, output.gpr()); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + case Scalar::Uint8: + switch (op) { + case AtomicFetchAddOp: + masm.atomicFetchAdd8ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchSubOp: + masm.atomicFetchSub8ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchAndOp: + masm.atomicFetchAnd8ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchOrOp: + masm.atomicFetchOr8ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchXorOp: + masm.atomicFetchXor8ZeroExtend(value, mem, temp1, output.gpr()); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + case Scalar::Int16: + switch (op) { + case AtomicFetchAddOp: + masm.atomicFetchAdd16SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchSubOp: + masm.atomicFetchSub16SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchAndOp: + masm.atomicFetchAnd16SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchOrOp: + masm.atomicFetchOr16SignExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchXorOp: + masm.atomicFetchXor16SignExtend(value, mem, temp1, output.gpr()); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + case Scalar::Uint16: + switch (op) { + case AtomicFetchAddOp: + masm.atomicFetchAdd16ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchSubOp: + masm.atomicFetchSub16ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchAndOp: + masm.atomicFetchAnd16ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchOrOp: + masm.atomicFetchOr16ZeroExtend(value, mem, temp1, output.gpr()); + break; + case AtomicFetchXorOp: + masm.atomicFetchXor16ZeroExtend(value, mem, temp1, output.gpr()); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + case Scalar::Int32: + switch (op) { + case AtomicFetchAddOp: + masm.atomicFetchAdd32(value, mem, temp1, output.gpr()); + break; + case AtomicFetchSubOp: + masm.atomicFetchSub32(value, mem, temp1, output.gpr()); + break; + case AtomicFetchAndOp: + masm.atomicFetchAnd32(value, mem, temp1, output.gpr()); + break; + case AtomicFetchOrOp: + masm.atomicFetchOr32(value, mem, temp1, output.gpr()); + break; + case AtomicFetchXorOp: + masm.atomicFetchXor32(value, mem, temp1, output.gpr()); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + case Scalar::Uint32: + // At the moment, the code in MCallOptimize.cpp requires the output + // type to be double for uint32 arrays. See bug 1077305. + MOZ_ASSERT(output.isFloat()); + switch (op) { + case AtomicFetchAddOp: + masm.atomicFetchAdd32(value, mem, InvalidReg, temp1); + break; + case AtomicFetchSubOp: + masm.atomicFetchSub32(value, mem, InvalidReg, temp1); + break; + case AtomicFetchAndOp: + masm.atomicFetchAnd32(value, mem, temp2, temp1); + break; + case AtomicFetchOrOp: + masm.atomicFetchOr32(value, mem, temp2, temp1); + break; + case AtomicFetchXorOp: + masm.atomicFetchXor32(value, mem, temp2, temp1); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + masm.convertUInt32ToDouble(temp1, output.fpu()); + break; + default: + MOZ_CRASH("Invalid typed array type"); + } +} + +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Imm32& value, const Address& mem, + Register temp1, Register temp2, AnyRegister output); +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Imm32& value, const BaseIndex& mem, + Register temp1, Register temp2, AnyRegister output); +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Register& value, const Address& mem, + Register temp1, Register temp2, AnyRegister output); +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Register& value, const BaseIndex& mem, + Register temp1, Register temp2, AnyRegister output); + +// Binary operation for effect, result discarded. +template +void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value, + const T& mem) +{ + switch (arrayType) { + case Scalar::Int8: + case Scalar::Uint8: + switch (op) { + case AtomicFetchAddOp: + masm.atomicAdd8(value, mem); + break; + case AtomicFetchSubOp: + masm.atomicSub8(value, mem); + break; + case AtomicFetchAndOp: + masm.atomicAnd8(value, mem); + break; + case AtomicFetchOrOp: + masm.atomicOr8(value, mem); + break; + case AtomicFetchXorOp: + masm.atomicXor8(value, mem); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + case Scalar::Int16: + case Scalar::Uint16: + switch (op) { + case AtomicFetchAddOp: + masm.atomicAdd16(value, mem); + break; + case AtomicFetchSubOp: + masm.atomicSub16(value, mem); + break; + case AtomicFetchAndOp: + masm.atomicAnd16(value, mem); + break; + case AtomicFetchOrOp: + masm.atomicOr16(value, mem); + break; + case AtomicFetchXorOp: + masm.atomicXor16(value, mem); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + case Scalar::Int32: + case Scalar::Uint32: + switch (op) { + case AtomicFetchAddOp: + masm.atomicAdd32(value, mem); + break; + case AtomicFetchSubOp: + masm.atomicSub32(value, mem); + break; + case AtomicFetchAndOp: + masm.atomicAnd32(value, mem); + break; + case AtomicFetchOrOp: + masm.atomicOr32(value, mem); + break; + case AtomicFetchXorOp: + masm.atomicXor32(value, mem); + break; + default: + MOZ_CRASH("Invalid typed array atomic operation"); + } + break; + default: + MOZ_CRASH("Invalid typed array type"); + } +} + +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Imm32& value, const Address& mem); +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Imm32& value, const BaseIndex& mem); +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Register& value, const Address& mem); +template void +CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, + const Register& value, const BaseIndex& mem); + + +template +static inline void +AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op, + Scalar::Type arrayType, const LAllocation* value, const T& mem, + Register temp1, Register temp2, AnyRegister output) +{ + if (value->isConstant()) + cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, temp1, temp2, output); + else + cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, temp1, temp2, output); +} + +void +CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir) +{ + MOZ_ASSERT(lir->mir()->hasUses()); + + AnyRegister output = ToAnyRegister(lir->output()); + Register elements = ToRegister(lir->elements()); + Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1()); + Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2()); + const LAllocation* value = lir->value(); + + Scalar::Type arrayType = lir->mir()->arrayType(); + int width = Scalar::byteSize(arrayType); + + if (lir->index()->isConstant()) { + Address mem(elements, ToInt32(lir->index()) * width); + AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output); + } else { + BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); + AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output); + } +} + +template +static inline void +AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op, + Scalar::Type arrayType, const LAllocation* value, const T& mem) +{ + if (value->isConstant()) + cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem); + else + cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem); +} + +void +CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir) +{ + MOZ_ASSERT(!lir->mir()->hasUses()); + + Register elements = ToRegister(lir->elements()); + const LAllocation* value = lir->value(); + Scalar::Type arrayType = lir->mir()->arrayType(); + int width = Scalar::byteSize(arrayType); + + if (lir->index()->isConstant()) { + Address mem(elements, ToInt32(lir->index()) * width); + AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem); + } else { + BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); + AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem); + } +} + +void +CodeGeneratorX86Shared::visitMemoryBarrier(LMemoryBarrier* ins) +{ + if (ins->type() & MembarStoreLoad) + masm.storeLoadFence(); +} + +void +CodeGeneratorX86Shared::setReturnDoubleRegs(LiveRegisterSet* regs) +{ + MOZ_ASSERT(ReturnFloat32Reg.encoding() == X86Encoding::xmm0); + MOZ_ASSERT(ReturnDoubleReg.encoding() == X86Encoding::xmm0); + MOZ_ASSERT(ReturnSimd128Reg.encoding() == X86Encoding::xmm0); + regs->add(ReturnFloat32Reg); + regs->add(ReturnDoubleReg); + regs->add(ReturnSimd128Reg); +} + +void +CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool) +{ + FloatRegister input = ool->input(); + MIRType fromType = ool->fromType(); + MIRType toType = ool->toType(); + Label* oolRejoin = ool->rejoin(); + bool isUnsigned = ool->isUnsigned(); + wasm::TrapOffset off = ool->trapOffset(); + + if (fromType == MIRType::Float32) { + if (toType == MIRType::Int32) + masm.outOfLineWasmTruncateFloat32ToInt32(input, isUnsigned, off, oolRejoin); + else if (toType == MIRType::Int64) + masm.outOfLineWasmTruncateFloat32ToInt64(input, isUnsigned, off, oolRejoin); + else + MOZ_CRASH("unexpected type"); + } else if (fromType == MIRType::Double) { + if (toType == MIRType::Int32) + masm.outOfLineWasmTruncateDoubleToInt32(input, isUnsigned, off, oolRejoin); + else if (toType == MIRType::Int64) + masm.outOfLineWasmTruncateDoubleToInt64(input, isUnsigned, off, oolRejoin); + else + MOZ_CRASH("unexpected type"); + } else { + MOZ_CRASH("unexpected type"); + } +} + +void +CodeGeneratorX86Shared::canonicalizeIfDeterministic(Scalar::Type type, const LAllocation* value) +{ +#ifdef JS_MORE_DETERMINISTIC + switch (type) { + case Scalar::Float32: { + FloatRegister in = ToFloatRegister(value); + masm.canonicalizeFloatIfDeterministic(in); + break; + } + case Scalar::Float64: { + FloatRegister in = ToFloatRegister(value); + masm.canonicalizeDoubleIfDeterministic(in); + break; + } + case Scalar::Float32x4: { + FloatRegister in = ToFloatRegister(value); + MOZ_ASSERT(in.isSimd128()); + FloatRegister scratch = in != xmm0.asSimd128() ? xmm0 : xmm1; + masm.push(scratch); + masm.canonicalizeFloat32x4(in, scratch); + masm.pop(scratch); + break; + } + default: { + // Other types don't need canonicalization. + break; + } + } +#endif // JS_MORE_DETERMINISTIC +} + +void +CodeGeneratorX86Shared::visitCopySignF(LCopySignF* lir) +{ + FloatRegister lhs = ToFloatRegister(lir->getOperand(0)); + FloatRegister rhs = ToFloatRegister(lir->getOperand(1)); + + FloatRegister out = ToFloatRegister(lir->output()); + + if (lhs == rhs) { + if (lhs != out) + masm.moveFloat32(lhs, out); + return; + } + + ScratchFloat32Scope scratch(masm); + + float clearSignMask = BitwiseCast(INT32_MAX); + masm.loadConstantFloat32(clearSignMask, scratch); + masm.vandps(scratch, lhs, out); + + float keepSignMask = BitwiseCast(INT32_MIN); + masm.loadConstantFloat32(keepSignMask, scratch); + masm.vandps(rhs, scratch, scratch); + + masm.vorps(scratch, out, out); +} + +void +CodeGeneratorX86Shared::visitCopySignD(LCopySignD* lir) +{ + FloatRegister lhs = ToFloatRegister(lir->getOperand(0)); + FloatRegister rhs = ToFloatRegister(lir->getOperand(1)); + + FloatRegister out = ToFloatRegister(lir->output()); + + if (lhs == rhs) { + if (lhs != out) + masm.moveDouble(lhs, out); + return; + } + + ScratchDoubleScope scratch(masm); + + double clearSignMask = BitwiseCast(INT64_MAX); + masm.loadConstantDouble(clearSignMask, scratch); + masm.vandpd(scratch, lhs, out); + + double keepSignMask = BitwiseCast(INT64_MIN); + masm.loadConstantDouble(keepSignMask, scratch); + masm.vandpd(rhs, scratch, scratch); + + masm.vorpd(scratch, out, out); +} + +void +CodeGeneratorX86Shared::visitRotateI64(LRotateI64* lir) +{ + MRotate* mir = lir->mir(); + LAllocation* count = lir->count(); + + Register64 input = ToRegister64(lir->input()); + Register64 output = ToOutRegister64(lir); + Register temp = ToTempRegisterOrInvalid(lir->temp()); + + MOZ_ASSERT(input == output); + + if (count->isConstant()) { + int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F); + if (!c) + return; + if (mir->isLeftRotate()) + masm.rotateLeft64(Imm32(c), input, output, temp); + else + masm.rotateRight64(Imm32(c), input, output, temp); + } else { + if (mir->isLeftRotate()) + masm.rotateLeft64(ToRegister(count), input, output, temp); + else + masm.rotateRight64(ToRegister(count), input, output, temp); + } +} + +void +CodeGeneratorX86Shared::visitPopcntI64(LPopcntI64* lir) +{ + Register64 input = ToRegister64(lir->getInt64Operand(0)); + Register64 output = ToOutRegister64(lir); + Register temp = InvalidReg; + if (!AssemblerX86Shared::HasPOPCNT()) + temp = ToRegister(lir->getTemp(0)); + + masm.popcnt64(input, output, temp); +} + +} // namespace jit +} // namespace js diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h new file mode 100644 index 000000000..d7abb1db7 --- /dev/null +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h @@ -0,0 +1,357 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_CodeGenerator_x86_shared_h +#define jit_x86_shared_CodeGenerator_x86_shared_h + +#include "jit/shared/CodeGenerator-shared.h" + +namespace js { +namespace jit { + +class OutOfLineBailout; +class OutOfLineUndoALUOperation; +class OutOfLineLoadTypedArrayOutOfBounds; +class MulNegativeZeroCheck; +class ModOverflowCheck; +class ReturnZero; +class OutOfLineTableSwitch; + +class CodeGeneratorX86Shared : public CodeGeneratorShared +{ + friend class MoveResolverX86; + + CodeGeneratorX86Shared* thisFromCtor() { + return this; + } + + template + void bailout(const T& t, LSnapshot* snapshot); + + protected: + // Load a NaN or zero into a register for an out of bounds AsmJS or static + // typed array load. + class OutOfLineLoadTypedArrayOutOfBounds : public OutOfLineCodeBase + { + AnyRegister dest_; + Scalar::Type viewType_; + public: + OutOfLineLoadTypedArrayOutOfBounds(AnyRegister dest, Scalar::Type viewType) + : dest_(dest), viewType_(viewType) + {} + + AnyRegister dest() const { return dest_; } + Scalar::Type viewType() const { return viewType_; } + void accept(CodeGeneratorX86Shared* codegen) { + codegen->visitOutOfLineLoadTypedArrayOutOfBounds(this); + } + }; + + // Additional bounds check for vector Float to Int conversion, when the + // undefined pattern is seen. Might imply a bailout. + class OutOfLineSimdFloatToIntCheck : public OutOfLineCodeBase + { + Register temp_; + FloatRegister input_; + LInstruction* ins_; + wasm::TrapOffset trapOffset_; + + public: + OutOfLineSimdFloatToIntCheck(Register temp, FloatRegister input, LInstruction *ins, + wasm::TrapOffset trapOffset) + : temp_(temp), input_(input), ins_(ins), trapOffset_(trapOffset) + {} + + Register temp() const { return temp_; } + FloatRegister input() const { return input_; } + LInstruction* ins() const { return ins_; } + wasm::TrapOffset trapOffset() const { return trapOffset_; } + + void accept(CodeGeneratorX86Shared* codegen) { + codegen->visitOutOfLineSimdFloatToIntCheck(this); + } + }; + + public: + NonAssertingLabel deoptLabel_; + + Operand ToOperand(const LAllocation& a); + Operand ToOperand(const LAllocation* a); + Operand ToOperand(const LDefinition* def); + +#ifdef JS_PUNBOX64 + Operand ToOperandOrRegister64(const LInt64Allocation input); +#else + Register64 ToOperandOrRegister64(const LInt64Allocation input); +#endif + + MoveOperand toMoveOperand(LAllocation a) const; + + void bailoutIf(Assembler::Condition condition, LSnapshot* snapshot); + void bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot); + void bailoutFrom(Label* label, LSnapshot* snapshot); + void bailout(LSnapshot* snapshot); + + template + void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs, LSnapshot* snapshot) { + masm.cmpPtr(lhs, rhs); + bailoutIf(c, snapshot); + } + void bailoutTestPtr(Assembler::Condition c, Register lhs, Register rhs, LSnapshot* snapshot) { + masm.testPtr(lhs, rhs); + bailoutIf(c, snapshot); + } + template + void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs, LSnapshot* snapshot) { + masm.cmp32(lhs, rhs); + bailoutIf(c, snapshot); + } + template + void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs, LSnapshot* snapshot) { + masm.test32(lhs, rhs); + bailoutIf(c, snapshot); + } + void bailoutIfFalseBool(Register reg, LSnapshot* snapshot) { + masm.test32(reg, Imm32(0xFF)); + bailoutIf(Assembler::Zero, snapshot); + } + void bailoutCvttsd2si(FloatRegister src, Register dest, LSnapshot* snapshot) { + // vcvttsd2si returns 0x80000000 on failure. Test for it by + // subtracting 1 and testing overflow. The other possibility is to test + // equality for INT_MIN after a comparison, but 1 costs fewer bytes to + // materialize. + masm.vcvttsd2si(src, dest); + masm.cmp32(dest, Imm32(1)); + bailoutIf(Assembler::Overflow, snapshot); + } + void bailoutCvttss2si(FloatRegister src, Register dest, LSnapshot* snapshot) { + // Same trick as explained in the above comment. + masm.vcvttss2si(src, dest); + masm.cmp32(dest, Imm32(1)); + bailoutIf(Assembler::Overflow, snapshot); + } + + protected: + bool generateOutOfLineCode(); + + void emitCompare(MCompare::CompareType type, const LAllocation* left, const LAllocation* right); + + // Emits a branch that directs control flow to the true block if |cond| is + // true, and the false block if |cond| is false. + void emitBranch(Assembler::Condition cond, MBasicBlock* ifTrue, MBasicBlock* ifFalse, + Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond); + void emitBranch(Assembler::DoubleCondition cond, MBasicBlock* ifTrue, MBasicBlock* ifFalse); + + void testNullEmitBranch(Assembler::Condition cond, const ValueOperand& value, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) + { + cond = masm.testNull(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + void testUndefinedEmitBranch(Assembler::Condition cond, const ValueOperand& value, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) + { + cond = masm.testUndefined(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + void testObjectEmitBranch(Assembler::Condition cond, const ValueOperand& value, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) + { + cond = masm.testObject(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + + void testZeroEmitBranch(Assembler::Condition cond, Register reg, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) + { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + masm.cmpPtr(reg, ImmWord(0)); + emitBranch(cond, ifTrue, ifFalse); + } + + void emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base); + + void emitSimdExtractLane8x16(FloatRegister input, Register output, unsigned lane, + SimdSign signedness); + void emitSimdExtractLane16x8(FloatRegister input, Register output, unsigned lane, + SimdSign signedness); + void emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane); + + public: + CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm); + + public: + // Instruction visitors. + virtual void visitDouble(LDouble* ins); + virtual void visitFloat32(LFloat32* ins); + virtual void visitMinMaxD(LMinMaxD* ins); + virtual void visitMinMaxF(LMinMaxF* ins); + virtual void visitAbsD(LAbsD* ins); + virtual void visitAbsF(LAbsF* ins); + virtual void visitClzI(LClzI* ins); + virtual void visitCtzI(LCtzI* ins); + virtual void visitPopcntI(LPopcntI* ins); + virtual void visitPopcntI64(LPopcntI64* lir); + virtual void visitSqrtD(LSqrtD* ins); + virtual void visitSqrtF(LSqrtF* ins); + virtual void visitPowHalfD(LPowHalfD* ins); + virtual void visitAddI(LAddI* ins); + virtual void visitAddI64(LAddI64* ins); + virtual void visitSubI(LSubI* ins); + virtual void visitSubI64(LSubI64* ins); + virtual void visitMulI(LMulI* ins); + virtual void visitMulI64(LMulI64* ins); + virtual void visitDivI(LDivI* ins); + virtual void visitDivPowTwoI(LDivPowTwoI* ins); + virtual void visitDivOrModConstantI(LDivOrModConstantI* ins); + virtual void visitModI(LModI* ins); + virtual void visitModPowTwoI(LModPowTwoI* ins); + virtual void visitBitNotI(LBitNotI* ins); + virtual void visitBitOpI(LBitOpI* ins); + virtual void visitBitOpI64(LBitOpI64* ins); + virtual void visitShiftI(LShiftI* ins); + virtual void visitShiftI64(LShiftI64* ins); + virtual void visitUrshD(LUrshD* ins); + virtual void visitTestIAndBranch(LTestIAndBranch* test); + virtual void visitTestDAndBranch(LTestDAndBranch* test); + virtual void visitTestFAndBranch(LTestFAndBranch* test); + virtual void visitCompare(LCompare* comp); + virtual void visitCompareAndBranch(LCompareAndBranch* comp); + virtual void visitCompareD(LCompareD* comp); + virtual void visitCompareDAndBranch(LCompareDAndBranch* comp); + virtual void visitCompareF(LCompareF* comp); + virtual void visitCompareFAndBranch(LCompareFAndBranch* comp); + virtual void visitBitAndAndBranch(LBitAndAndBranch* baab); + virtual void visitNotI(LNotI* comp); + virtual void visitNotD(LNotD* comp); + virtual void visitNotF(LNotF* comp); + virtual void visitMathD(LMathD* math); + virtual void visitMathF(LMathF* math); + virtual void visitFloor(LFloor* lir); + virtual void visitFloorF(LFloorF* lir); + virtual void visitCeil(LCeil* lir); + virtual void visitCeilF(LCeilF* lir); + virtual void visitRound(LRound* lir); + virtual void visitRoundF(LRoundF* lir); + virtual void visitGuardShape(LGuardShape* guard); + virtual void visitGuardObjectGroup(LGuardObjectGroup* guard); + virtual void visitGuardClass(LGuardClass* guard); + virtual void visitEffectiveAddress(LEffectiveAddress* ins); + virtual void visitUDivOrMod(LUDivOrMod* ins); + virtual void visitUDivOrModConstant(LUDivOrModConstant *ins); + virtual void visitWasmStackArg(LWasmStackArg* ins); + virtual void visitWasmStackArgI64(LWasmStackArgI64* ins); + virtual void visitWasmSelect(LWasmSelect* ins); + virtual void visitWasmReinterpret(LWasmReinterpret* lir); + virtual void visitMemoryBarrier(LMemoryBarrier* ins); + virtual void visitWasmAddOffset(LWasmAddOffset* lir); + virtual void visitWasmTruncateToInt32(LWasmTruncateToInt32* lir); + virtual void visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir); + virtual void visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir); + virtual void visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir); + virtual void visitAtomicExchangeTypedArrayElement(LAtomicExchangeTypedArrayElement* lir); + virtual void visitCopySignD(LCopySignD* lir); + virtual void visitCopySignF(LCopySignF* lir); + virtual void visitRotateI64(LRotateI64* lir); + + void visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool); + + void visitNegI(LNegI* lir); + void visitNegD(LNegD* lir); + void visitNegF(LNegF* lir); + + void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool); + + // SIMD operators + void visitSimdValueInt32x4(LSimdValueInt32x4* lir); + void visitSimdValueFloat32x4(LSimdValueFloat32x4* lir); + void visitSimdSplatX16(LSimdSplatX16* lir); + void visitSimdSplatX8(LSimdSplatX8* lir); + void visitSimdSplatX4(LSimdSplatX4* lir); + void visitSimd128Int(LSimd128Int* ins); + void visitSimd128Float(LSimd128Float* ins); + void visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins); + void visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins); + void visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins); + void visitSimdReinterpretCast(LSimdReinterpretCast* lir); + void visitSimdExtractElementB(LSimdExtractElementB* lir); + void visitSimdExtractElementI(LSimdExtractElementI* lir); + void visitSimdExtractElementU2D(LSimdExtractElementU2D* lir); + void visitSimdExtractElementF(LSimdExtractElementF* lir); + void visitSimdInsertElementI(LSimdInsertElementI* lir); + void visitSimdInsertElementF(LSimdInsertElementF* lir); + void visitSimdSwizzleI(LSimdSwizzleI* lir); + void visitSimdSwizzleF(LSimdSwizzleF* lir); + void visitSimdShuffleX4(LSimdShuffleX4* lir); + void visitSimdShuffle(LSimdShuffle* lir); + void visitSimdUnaryArithIx16(LSimdUnaryArithIx16* lir); + void visitSimdUnaryArithIx8(LSimdUnaryArithIx8* lir); + void visitSimdUnaryArithIx4(LSimdUnaryArithIx4* lir); + void visitSimdUnaryArithFx4(LSimdUnaryArithFx4* lir); + void visitSimdBinaryCompIx16(LSimdBinaryCompIx16* lir); + void visitSimdBinaryCompIx8(LSimdBinaryCompIx8* lir); + void visitSimdBinaryCompIx4(LSimdBinaryCompIx4* lir); + void visitSimdBinaryCompFx4(LSimdBinaryCompFx4* lir); + void visitSimdBinaryArithIx16(LSimdBinaryArithIx16* lir); + void visitSimdBinaryArithIx8(LSimdBinaryArithIx8* lir); + void visitSimdBinaryArithIx4(LSimdBinaryArithIx4* lir); + void visitSimdBinaryArithFx4(LSimdBinaryArithFx4* lir); + void visitSimdBinarySaturating(LSimdBinarySaturating* lir); + void visitSimdBinaryBitwise(LSimdBinaryBitwise* lir); + void visitSimdShift(LSimdShift* lir); + void visitSimdSelect(LSimdSelect* ins); + void visitSimdAllTrue(LSimdAllTrue* ins); + void visitSimdAnyTrue(LSimdAnyTrue* ins); + + template void visitSimdGeneralShuffle(LSimdGeneralShuffleBase* lir, Reg temp); + void visitSimdGeneralShuffleI(LSimdGeneralShuffleI* lir); + void visitSimdGeneralShuffleF(LSimdGeneralShuffleF* lir); + + // Out of line visitors. + void visitOutOfLineBailout(OutOfLineBailout* ool); + void visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool); + void visitMulNegativeZeroCheck(MulNegativeZeroCheck* ool); + void visitModOverflowCheck(ModOverflowCheck* ool); + void visitReturnZero(ReturnZero* ool); + void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool); + void visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIntCheck* ool); + void generateInvalidateEpilogue(); + + // Generating a result. + template + void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value, + const T& mem, Register temp1, Register temp2, AnyRegister output); + + // Generating no result. + template + void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value, const T& mem); + + void setReturnDoubleRegs(LiveRegisterSet* regs); + + void canonicalizeIfDeterministic(Scalar::Type type, const LAllocation* value); +}; + +// An out-of-line bailout thunk. +class OutOfLineBailout : public OutOfLineCodeBase +{ + LSnapshot* snapshot_; + + public: + explicit OutOfLineBailout(LSnapshot* snapshot) + : snapshot_(snapshot) + { } + + void accept(CodeGeneratorX86Shared* codegen); + + LSnapshot* snapshot() const { + return snapshot_; + } +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_CodeGenerator_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Constants-x86-shared.h b/js/src/jit/x86-shared/Constants-x86-shared.h new file mode 100644 index 000000000..7f0ba0744 --- /dev/null +++ b/js/src/jit/x86-shared/Constants-x86-shared.h @@ -0,0 +1,228 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Constants_x86_shared_h +#define jit_x86_shared_Constants_x86_shared_h + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" + +#include +#include + +namespace js { +namespace jit { + +namespace X86Encoding { + +enum RegisterID : uint8_t { + rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi +#ifdef JS_CODEGEN_X64 + ,r8, r9, r10, r11, r12, r13, r14, r15 +#endif + ,invalid_reg +}; + +enum HRegisterID { + ah = rsp, + ch = rbp, + dh = rsi, + bh = rdi +}; + +enum XMMRegisterID { + xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 +#ifdef JS_CODEGEN_X64 + ,xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 +#endif + ,invalid_xmm +}; + +inline const char* XMMRegName(XMMRegisterID reg) +{ + static const char* const names[] = { + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7" +#ifdef JS_CODEGEN_X64 + ,"%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" +#endif + }; + MOZ_ASSERT(size_t(reg) < mozilla::ArrayLength(names)); + return names[reg]; +} + +#ifdef JS_CODEGEN_X64 +inline const char* GPReg64Name(RegisterID reg) +{ + static const char* const names[] = { + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi" +#ifdef JS_CODEGEN_X64 + ,"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" +#endif + }; + MOZ_ASSERT(size_t(reg) < mozilla::ArrayLength(names)); + return names[reg]; +} +#endif + +inline const char* GPReg32Name(RegisterID reg) +{ + static const char* const names[] = { + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" +#ifdef JS_CODEGEN_X64 + ,"%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" +#endif + }; + MOZ_ASSERT(size_t(reg) < mozilla::ArrayLength(names)); + return names[reg]; +} + +inline const char* GPReg16Name(RegisterID reg) +{ + static const char* const names[] = { + "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" +#ifdef JS_CODEGEN_X64 + ,"%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" +#endif + }; + MOZ_ASSERT(size_t(reg) < mozilla::ArrayLength(names)); + return names[reg]; +} + +inline const char* GPReg8Name(RegisterID reg) +{ + static const char* const names[] = { + "%al", "%cl", "%dl", "%bl" +#ifdef JS_CODEGEN_X64 + ,"%spl", "%bpl", "%sil", "%dil", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" +#endif + }; + MOZ_ASSERT(size_t(reg) < mozilla::ArrayLength(names)); + return names[reg]; +} + +inline const char* GPRegName(RegisterID reg) +{ +#ifdef JS_CODEGEN_X64 + return GPReg64Name(reg); +#else + return GPReg32Name(reg); +#endif +} + +inline bool HasSubregL(RegisterID reg) +{ +#ifdef JS_CODEGEN_X64 + // In 64-bit mode, all registers have an 8-bit lo subreg. + return true; +#else + // In 32-bit mode, only the first four registers do. + return reg <= rbx; +#endif +} + +inline bool HasSubregH(RegisterID reg) +{ + // The first four registers always have h registers. However, note that + // on x64, h registers may not be used in instructions using REX + // prefixes. Also note that this may depend on what other registers are + // used! + return reg <= rbx; +} + +inline HRegisterID GetSubregH(RegisterID reg) +{ + MOZ_ASSERT(HasSubregH(reg)); + return HRegisterID(reg + 4); +} + +inline const char* HRegName8(HRegisterID reg) +{ + static const char* const names[] = { + "%ah", "%ch", "%dh", "%bh" + }; + size_t index = reg - GetSubregH(rax); + MOZ_ASSERT(index < mozilla::ArrayLength(names)); + return names[index]; +} + +enum Condition { + ConditionO, + ConditionNO, + ConditionB, + ConditionAE, + ConditionE, + ConditionNE, + ConditionBE, + ConditionA, + ConditionS, + ConditionNS, + ConditionP, + ConditionNP, + ConditionL, + ConditionGE, + ConditionLE, + ConditionG, + + ConditionC = ConditionB, + ConditionNC = ConditionAE +}; + +inline const char* CCName(Condition cc) +{ + static const char* const names[] = { + "o ", "no", "b ", "ae", "e ", "ne", "be", "a ", + "s ", "ns", "p ", "np", "l ", "ge", "le", "g " + }; + MOZ_ASSERT(size_t(cc) < mozilla::ArrayLength(names)); + return names[cc]; +} + +// Conditions for CMP instructions (CMPSS, CMPSD, CMPPS, CMPPD, etc). +enum ConditionCmp { + ConditionCmp_EQ = 0x0, + ConditionCmp_LT = 0x1, + ConditionCmp_LE = 0x2, + ConditionCmp_UNORD = 0x3, + ConditionCmp_NEQ = 0x4, + ConditionCmp_NLT = 0x5, + ConditionCmp_NLE = 0x6, + ConditionCmp_ORD = 0x7, +}; + +// Rounding modes for ROUNDSD. +enum RoundingMode { + RoundToNearest = 0x0, + RoundDown = 0x1, + RoundUp = 0x2, + RoundToZero = 0x3 +}; + +// Test whether the given address will fit in an address immediate field. +// This is always true on x86, but on x64 it's only true for addreses which +// fit in the 32-bit immediate field. +inline bool IsAddressImmediate(const void* address) +{ + intptr_t value = reinterpret_cast(address); + int32_t immediate = static_cast(value); + return value == immediate; +} + +// Convert the given address to a 32-bit immediate field value. This is a +// no-op on x86, but on x64 it asserts that the address is actually a valid +// address immediate. +inline int32_t AddressImmediate(const void* address) +{ + MOZ_ASSERT(IsAddressImmediate(address)); + return static_cast(reinterpret_cast(address)); +} + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Constants_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Disassembler-x86-shared.cpp b/js/src/jit/x86-shared/Disassembler-x86-shared.cpp new file mode 100644 index 000000000..e033cfa5c --- /dev/null +++ b/js/src/jit/x86-shared/Disassembler-x86-shared.cpp @@ -0,0 +1,568 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/Disassembler.h" + +#include "jit/x86-shared/Encoding-x86-shared.h" + +using namespace js; +using namespace js::jit; +using namespace js::jit::X86Encoding; +using namespace js::jit::Disassembler; + +MOZ_COLD static bool REX_W(uint8_t rex) { return (rex >> 3) & 0x1; } +MOZ_COLD static bool REX_R(uint8_t rex) { return (rex >> 2) & 0x1; } +MOZ_COLD static bool REX_X(uint8_t rex) { return (rex >> 1) & 0x1; } +MOZ_COLD static bool REX_B(uint8_t rex) { return (rex >> 0) & 0x1; } + +MOZ_COLD static uint8_t +MakeREXFlags(bool w, bool r, bool x, bool b) +{ + uint8_t rex = (w << 3) | (r << 2) | (x << 1) | (b << 0); + MOZ_RELEASE_ASSERT(REX_W(rex) == w); + MOZ_RELEASE_ASSERT(REX_R(rex) == r); + MOZ_RELEASE_ASSERT(REX_X(rex) == x); + MOZ_RELEASE_ASSERT(REX_B(rex) == b); + return rex; +} + +MOZ_COLD static ModRmMode +ModRM_Mode(uint8_t modrm) +{ + return ModRmMode((modrm >> 6) & 0x3); +} + +MOZ_COLD static uint8_t +ModRM_Reg(uint8_t modrm) +{ + return (modrm >> 3) & 0x7; +} + +MOZ_COLD static uint8_t +ModRM_RM(uint8_t modrm) +{ + return (modrm >> 0) & 0x7; +} + +MOZ_COLD static bool +ModRM_hasSIB(uint8_t modrm) +{ + return ModRM_Mode(modrm) != ModRmRegister && ModRM_RM(modrm) == hasSib; +} +MOZ_COLD static bool +ModRM_hasDisp8(uint8_t modrm) +{ + return ModRM_Mode(modrm) == ModRmMemoryDisp8; +} +MOZ_COLD static bool +ModRM_hasRIP(uint8_t modrm) +{ +#ifdef JS_CODEGEN_X64 + return ModRM_Mode(modrm) == ModRmMemoryNoDisp && ModRM_RM(modrm) == noBase; +#else + return false; +#endif +} +MOZ_COLD static bool +ModRM_hasDisp32(uint8_t modrm) +{ + return ModRM_Mode(modrm) == ModRmMemoryDisp32 || + ModRM_hasRIP(modrm); +} + +MOZ_COLD static uint8_t +SIB_SS(uint8_t sib) +{ + return (sib >> 6) & 0x3; +} + +MOZ_COLD static uint8_t +SIB_Index(uint8_t sib) +{ + return (sib >> 3) & 0x7; +} + +MOZ_COLD static uint8_t +SIB_Base(uint8_t sib) +{ + return (sib >> 0) & 0x7; +} + +MOZ_COLD static bool +SIB_hasRIP(uint8_t sib) +{ + return SIB_Base(sib) == noBase && SIB_Index(sib) == noIndex; +} + +MOZ_COLD static bool +HasRIP(uint8_t modrm, uint8_t sib, uint8_t rex) +{ + return ModRM_hasRIP(modrm) && SIB_hasRIP(sib); +} + +MOZ_COLD static bool +HasDisp8(uint8_t modrm) +{ + return ModRM_hasDisp8(modrm); +} + +MOZ_COLD static bool +HasDisp32(uint8_t modrm, uint8_t sib) +{ + return ModRM_hasDisp32(modrm) || + (SIB_Base(sib) == noBase && + SIB_Index(sib) == noIndex && + ModRM_Mode(modrm) == ModRmMemoryNoDisp); +} + +MOZ_COLD static uint32_t +Reg(uint8_t modrm, uint8_t sib, uint8_t rex) +{ + return ModRM_Reg(modrm) | (REX_R(rex) << 3); +} + +MOZ_COLD static bool +HasBase(uint8_t modrm, uint8_t sib) +{ + return !ModRM_hasSIB(modrm) || + SIB_Base(sib) != noBase || + SIB_Index(sib) != noIndex || + ModRM_Mode(modrm) != ModRmMemoryNoDisp; +} + +MOZ_COLD static RegisterID +DecodeBase(uint8_t modrm, uint8_t sib, uint8_t rex) +{ + return HasBase(modrm, sib) + ? RegisterID((ModRM_hasSIB(modrm) ? SIB_Base(sib) : ModRM_RM(modrm)) | (REX_B(rex) << 3)) + : invalid_reg; +} + +MOZ_COLD static RegisterID +DecodeIndex(uint8_t modrm, uint8_t sib, uint8_t rex) +{ + RegisterID index = RegisterID(SIB_Index(sib) | (REX_X(rex) << 3)); + return ModRM_hasSIB(modrm) && index != noIndex ? index : invalid_reg; +} + +MOZ_COLD static uint32_t +DecodeScale(uint8_t modrm, uint8_t sib, uint8_t rex) +{ + return ModRM_hasSIB(modrm) ? SIB_SS(sib) : 0; +} + +#define PackOpcode(op0, op1, op2) ((op0) | ((op1) << 8) | ((op2) << 16)) +#define Pack2ByteOpcode(op1) PackOpcode(OP_2BYTE_ESCAPE, op1, 0) +#define Pack3ByteOpcode(op1, op2) PackOpcode(OP_2BYTE_ESCAPE, op1, op2) + +uint8_t* +js::jit::Disassembler::DisassembleHeapAccess(uint8_t* ptr, HeapAccess* access) +{ + VexOperandType type = VEX_PS; + uint32_t opcode = OP_HLT; + uint8_t modrm = 0; + uint8_t sib = 0; + uint8_t rex = 0; + int32_t disp = 0; + int32_t imm = 0; + bool haveImm = false; + int opsize = 4; + + // Legacy prefixes + switch (*ptr) { + case PRE_LOCK: + case PRE_PREDICT_BRANCH_NOT_TAKEN: // (obsolete), aka %cs + case 0x3E: // aka predict-branch-taken (obsolete) + case 0x36: // %ss + case 0x26: // %es + case 0x64: // %fs + case 0x65: // %gs + case 0x67: // address-size override + MOZ_CRASH("Unable to disassemble instruction"); + case PRE_SSE_F2: // aka REPNZ/REPNE + type = VEX_SD; + ptr++; + break; + case PRE_SSE_F3: // aka REP/REPE/REPZ + type = VEX_SS; + ptr++; + break; + case PRE_SSE_66: // aka PRE_OPERAND_SIZE + type = VEX_PD; + opsize = 2; + ptr++; + break; + default: + break; + } + + // REX and VEX prefixes + { + int x = 0, b = 0, m = 1, w = 0; + int r, l, p; + switch (*ptr) { +#ifdef JS_CODEGEN_X64 + case PRE_REX | 0x0: case PRE_REX | 0x1: case PRE_REX | 0x2: case PRE_REX | 0x3: + case PRE_REX | 0x4: case PRE_REX | 0x5: case PRE_REX | 0x6: case PRE_REX | 0x7: + case PRE_REX | 0x8: case PRE_REX | 0x9: case PRE_REX | 0xa: case PRE_REX | 0xb: + case PRE_REX | 0xc: case PRE_REX | 0xd: case PRE_REX | 0xe: case PRE_REX | 0xf: + rex = *ptr++ & 0xf; + goto rex_done; +#endif + case PRE_VEX_C4: { + if (type != VEX_PS) + MOZ_CRASH("Unable to disassemble instruction"); + ++ptr; + uint8_t c4a = *ptr++ ^ 0xe0; + uint8_t c4b = *ptr++ ^ 0x78; + r = (c4a >> 7) & 0x1; + x = (c4a >> 6) & 0x1; + b = (c4a >> 5) & 0x1; + m = (c4a >> 0) & 0x1f; + w = (c4b >> 7) & 0x1; + l = (c4b >> 2) & 0x1; + p = (c4b >> 0) & 0x3; + break; + } + case PRE_VEX_C5: { + if (type != VEX_PS) + MOZ_CRASH("Unable to disassemble instruction"); + ++ptr; + uint8_t c5 = *ptr++ ^ 0xf8; + r = (c5 >> 7) & 0x1; + l = (c5 >> 2) & 0x1; + p = (c5 >> 0) & 0x3; + break; + } + default: + goto rex_done; + } + type = VexOperandType(p); + rex = MakeREXFlags(w, r, x, b); + switch (m) { + case 0x1: + opcode = Pack2ByteOpcode(*ptr++); + goto opcode_done; + case 0x2: + opcode = Pack3ByteOpcode(ESCAPE_38, *ptr++); + goto opcode_done; + case 0x3: + opcode = Pack3ByteOpcode(ESCAPE_3A, *ptr++); + goto opcode_done; + default: + MOZ_CRASH("Unable to disassemble instruction"); + } + if (l != 0) // 256-bit SIMD + MOZ_CRASH("Unable to disassemble instruction"); + } + rex_done:; + if (REX_W(rex)) + opsize = 8; + + // Opcode. + opcode = *ptr++; + switch (opcode) { +#ifdef JS_CODEGEN_X64 + case OP_PUSH_EAX + 0: case OP_PUSH_EAX + 1: case OP_PUSH_EAX + 2: case OP_PUSH_EAX + 3: + case OP_PUSH_EAX + 4: case OP_PUSH_EAX + 5: case OP_PUSH_EAX + 6: case OP_PUSH_EAX + 7: + case OP_POP_EAX + 0: case OP_POP_EAX + 1: case OP_POP_EAX + 2: case OP_POP_EAX + 3: + case OP_POP_EAX + 4: case OP_POP_EAX + 5: case OP_POP_EAX + 6: case OP_POP_EAX + 7: + case OP_PUSH_Iz: + case OP_PUSH_Ib: + opsize = 8; + break; +#endif + case OP_2BYTE_ESCAPE: + opcode |= *ptr << 8; + switch (*ptr++) { + case ESCAPE_38: + case ESCAPE_3A: + opcode |= *ptr++ << 16; + break; + default: + break; + } + break; + default: + break; + } + opcode_done:; + + // ModR/M + modrm = *ptr++; + + // SIB + if (ModRM_hasSIB(modrm)) + sib = *ptr++; + + // Address Displacement + if (HasDisp8(modrm)) { + disp = int8_t(*ptr++); + } else if (HasDisp32(modrm, sib)) { + memcpy(&disp, ptr, sizeof(int32_t)); + ptr += sizeof(int32_t); + } + + // Immediate operand + switch (opcode) { + case OP_PUSH_Ib: + case OP_IMUL_GvEvIb: + case OP_GROUP1_EbIb: + case OP_GROUP1_EvIb: + case OP_TEST_EAXIb: + case OP_GROUP2_EvIb: + case OP_GROUP11_EvIb: + case OP_GROUP3_EbIb: + case Pack2ByteOpcode(OP2_PSHUFD_VdqWdqIb): + case Pack2ByteOpcode(OP2_PSLLD_UdqIb): // aka OP2_PSRAD_UdqIb, aka OP2_PSRLD_UdqIb + case Pack2ByteOpcode(OP2_PEXTRW_GdUdIb): + case Pack2ByteOpcode(OP2_SHUFPS_VpsWpsIb): + case Pack3ByteOpcode(ESCAPE_3A, OP3_PEXTRD_EdVdqIb): + case Pack3ByteOpcode(ESCAPE_3A, OP3_BLENDPS_VpsWpsIb): + case Pack3ByteOpcode(ESCAPE_3A, OP3_PINSRD_VdqEdIb): + // 8-bit signed immediate + imm = int8_t(*ptr++); + haveImm = true; + break; + case OP_RET_Iz: + // 16-bit unsigned immediate + memcpy(&imm, ptr, sizeof(int16_t)); + ptr += sizeof(int16_t); + haveImm = true; + break; + case OP_ADD_EAXIv: + case OP_OR_EAXIv: + case OP_AND_EAXIv: + case OP_SUB_EAXIv: + case OP_XOR_EAXIv: + case OP_CMP_EAXIv: + case OP_PUSH_Iz: + case OP_IMUL_GvEvIz: + case OP_GROUP1_EvIz: + case OP_TEST_EAXIv: + case OP_MOV_EAXIv: + case OP_GROUP3_EvIz: + // 32-bit signed immediate + memcpy(&imm, ptr, sizeof(int32_t)); + ptr += sizeof(int32_t); + haveImm = true; + break; + case OP_GROUP11_EvIz: + // opsize-sized signed immediate + memcpy(&imm, ptr, opsize); + imm = (imm << (32 - opsize * 8)) >> (32 - opsize * 8); + ptr += opsize; + haveImm = true; + break; + default: + break; + } + + // Interpret the opcode. + if (HasRIP(modrm, sib, rex)) + MOZ_CRASH("Unable to disassemble instruction"); + + size_t memSize = 0; + OtherOperand otherOperand(imm); + HeapAccess::Kind kind = HeapAccess::Unknown; + RegisterID gpr(RegisterID(Reg(modrm, sib, rex))); + XMMRegisterID xmm(XMMRegisterID(Reg(modrm, sib, rex))); + ComplexAddress addr(disp, + DecodeBase(modrm, sib, rex), + DecodeIndex(modrm, sib, rex), + DecodeScale(modrm, sib, rex)); + switch (opcode) { + case OP_GROUP11_EvIb: + if (gpr != RegisterID(GROUP11_MOV)) + MOZ_CRASH("Unable to disassemble instruction"); + MOZ_RELEASE_ASSERT(haveImm); + memSize = 1; + kind = HeapAccess::Store; + break; + case OP_GROUP11_EvIz: + if (gpr != RegisterID(GROUP11_MOV)) + MOZ_CRASH("Unable to disassemble instruction"); + MOZ_RELEASE_ASSERT(haveImm); + memSize = opsize; + kind = HeapAccess::Store; + break; + case OP_MOV_GvEv: + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(gpr); + memSize = opsize; + kind = HeapAccess::Load; + break; + case OP_MOV_GvEb: + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(gpr); + memSize = 1; + kind = HeapAccess::Load; + break; + case OP_MOV_EvGv: + if (!haveImm) + otherOperand = OtherOperand(gpr); + memSize = opsize; + kind = HeapAccess::Store; + break; + case OP_MOV_EbGv: + if (!haveImm) + otherOperand = OtherOperand(gpr); + memSize = 1; + kind = HeapAccess::Store; + break; + case Pack2ByteOpcode(OP2_MOVZX_GvEb): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(gpr); + memSize = 1; + kind = HeapAccess::Load; + break; + case Pack2ByteOpcode(OP2_MOVZX_GvEw): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(gpr); + memSize = 2; + kind = HeapAccess::Load; + break; + case Pack2ByteOpcode(OP2_MOVSX_GvEb): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(gpr); + memSize = 1; + kind = opsize == 8 ? HeapAccess::LoadSext64 : HeapAccess::LoadSext32; + break; + case Pack2ByteOpcode(OP2_MOVSX_GvEw): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(gpr); + memSize = 2; + kind = opsize == 8 ? HeapAccess::LoadSext64 : HeapAccess::LoadSext32; + break; +#ifdef JS_CODEGEN_X64 + case OP_MOVSXD_GvEv: + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(gpr); + memSize = 4; + kind = HeapAccess::LoadSext64; + break; +#endif // JS_CODEGEN_X64 + case Pack2ByteOpcode(OP2_MOVDQ_VdqWdq): // aka OP2_MOVDQ_VsdWsd + case Pack2ByteOpcode(OP2_MOVAPS_VsdWsd): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + memSize = 16; + kind = HeapAccess::Load; + break; + case Pack2ByteOpcode(OP2_MOVSD_VsdWsd): // aka OP2_MOVPS_VpsWps + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_SS: memSize = 4; break; + case VEX_SD: memSize = 8; break; + case VEX_PS: + case VEX_PD: memSize = 16; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + kind = HeapAccess::Load; + break; + case Pack2ByteOpcode(OP2_MOVDQ_WdqVdq): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + memSize = 16; + kind = HeapAccess::Store; + break; + case Pack2ByteOpcode(OP2_MOVSD_WsdVsd): // aka OP2_MOVPS_WpsVps + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_SS: memSize = 4; break; + case VEX_SD: memSize = 8; break; + case VEX_PS: + case VEX_PD: memSize = 16; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + kind = HeapAccess::Store; + break; + case Pack2ByteOpcode(OP2_MOVD_VdEd): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_PD: memSize = 4; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + kind = HeapAccess::Load; + break; + case Pack2ByteOpcode(OP2_MOVQ_WdVd): + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_PD: memSize = 8; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + kind = HeapAccess::Store; + break; + case Pack2ByteOpcode(OP2_MOVD_EdVd): // aka OP2_MOVQ_VdWd + MOZ_RELEASE_ASSERT(!haveImm); + otherOperand = OtherOperand(xmm); + switch (type) { + case VEX_SS: memSize = 8; kind = HeapAccess::Load; break; + case VEX_PD: memSize = 4; kind = HeapAccess::Store; break; + default: MOZ_CRASH("Unexpected VEX type"); + } + break; + default: + MOZ_CRASH("Unable to disassemble instruction"); + } + + *access = HeapAccess(kind, memSize, addr, otherOperand); + return ptr; +} + +#ifdef DEBUG +void +js::jit::Disassembler::DumpHeapAccess(const HeapAccess& access) +{ + switch (access.kind()) { + case HeapAccess::Store: fprintf(stderr, "store"); break; + case HeapAccess::Load: fprintf(stderr, "load"); break; + case HeapAccess::LoadSext32: fprintf(stderr, "loadSext32"); break; + case HeapAccess::LoadSext64: fprintf(stderr, "loadSext64"); break; + default: fprintf(stderr, "unknown"); break; + } + fprintf(stderr, "%u ", unsigned(access.size())); + + switch (access.otherOperand().kind()) { + case OtherOperand::Imm: + fprintf(stderr, "imm %d", access.otherOperand().imm()); + break; + case OtherOperand::GPR: + fprintf(stderr, "gpr %s", X86Encoding::GPRegName(access.otherOperand().gpr())); + break; + case OtherOperand::FPR: + fprintf(stderr, "fpr %s", X86Encoding::XMMRegName(access.otherOperand().fpr())); + break; + default: fprintf(stderr, "unknown"); + } + + fprintf(stderr, " @ "); + + if (access.address().isPCRelative()) { + fprintf(stderr, MEM_o32r " ", ADDR_o32r(access.address().disp())); + } else if (access.address().hasIndex()) { + if (access.address().hasBase()) { + fprintf(stderr, MEM_obs " ", + ADDR_obs(access.address().disp(), access.address().base(), + access.address().index(), access.address().scale())); + } else { + fprintf(stderr, MEM_os " ", + ADDR_os(access.address().disp(), + access.address().index(), access.address().scale())); + } + } else if (access.address().hasBase()) { + fprintf(stderr, MEM_ob " ", ADDR_ob(access.address().disp(), access.address().base())); + } else { + fprintf(stderr, MEM_o " ", ADDR_o(access.address().disp())); + } + + fprintf(stderr, "\n"); +} +#endif diff --git a/js/src/jit/x86-shared/Encoding-x86-shared.h b/js/src/jit/x86-shared/Encoding-x86-shared.h new file mode 100644 index 000000000..5190164de --- /dev/null +++ b/js/src/jit/x86-shared/Encoding-x86-shared.h @@ -0,0 +1,413 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Encoding_x86_shared_h +#define jit_x86_shared_Encoding_x86_shared_h + +#include "jit/x86-shared/Constants-x86-shared.h" + +namespace js { +namespace jit { + +namespace X86Encoding { + +static const size_t MaxInstructionSize = 16; + +// These enumerated values are following the Intel documentation Volume 2C [1], +// Appendix A.2 and Appendix A.3. +// +// Operand size/types as listed in the Appendix A.2. Tables of the instructions +// and their operands can be found in the Appendix A.3. +// +// E = reg/mem +// G = reg (reg field of ModR/M) +// U = xmm (R/M field of ModR/M) +// V = xmm (reg field of ModR/M) +// W = xmm/mem64 +// I = immediate +// O = offset +// +// b = byte (8-bit) +// w = word (16-bit) +// v = register size +// d = double (32-bit) +// dq = double-quad (128-bit) (xmm) +// ss = scalar float 32 (xmm) +// ps = packed float 32 (xmm) +// sd = scalar double (xmm) +// pd = packed double (xmm) +// z = 16/32/64-bit +// vqp = (*) +// +// (*) Some website [2] provides a convenient list of all instructions, but be +// aware that they do not follow the Intel documentation naming, as the +// following enumeration does. Do not use these names as a reference for adding +// new instructions. +// +// [1] http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-manual-325462.html +// [2] http://ref.x86asm.net/geek.html +// +// OPn_NAME_DstSrc +enum OneByteOpcodeID { + OP_NOP_00 = 0x00, + OP_ADD_EbGb = 0x00, + OP_ADD_EvGv = 0x01, + OP_ADD_GvEv = 0x03, + OP_ADD_EAXIv = 0x05, + OP_OR_EbGb = 0x08, + OP_OR_EvGv = 0x09, + OP_OR_GvEv = 0x0B, + OP_OR_EAXIv = 0x0D, + OP_2BYTE_ESCAPE = 0x0F, + OP_NOP_0F = 0x0F, + OP_ADC_GvEv = 0x13, + OP_SBB_GvEv = 0x1B, + OP_NOP_1F = 0x1F, + OP_AND_EbGb = 0x20, + OP_AND_EvGv = 0x21, + OP_AND_GvEv = 0x23, + OP_AND_EAXIv = 0x25, + OP_SUB_EbGb = 0x28, + OP_SUB_EvGv = 0x29, + OP_SUB_GvEv = 0x2B, + OP_SUB_EAXIv = 0x2D, + PRE_PREDICT_BRANCH_NOT_TAKEN = 0x2E, + OP_XOR_EbGb = 0x30, + OP_XOR_EvGv = 0x31, + OP_XOR_GvEv = 0x33, + OP_XOR_EAXIv = 0x35, + OP_CMP_EvGv = 0x39, + OP_CMP_GvEv = 0x3B, + OP_CMP_EAXIv = 0x3D, +#ifdef JS_CODEGEN_X64 + PRE_REX = 0x40, +#endif + OP_NOP_40 = 0x40, + OP_NOP_44 = 0x44, + OP_PUSH_EAX = 0x50, + OP_POP_EAX = 0x58, +#ifdef JS_CODEGEN_X86 + OP_PUSHA = 0x60, + OP_POPA = 0x61, +#endif +#ifdef JS_CODEGEN_X64 + OP_MOVSXD_GvEv = 0x63, +#endif + PRE_OPERAND_SIZE = 0x66, + PRE_SSE_66 = 0x66, + OP_NOP_66 = 0x66, + OP_PUSH_Iz = 0x68, + OP_IMUL_GvEvIz = 0x69, + OP_PUSH_Ib = 0x6a, + OP_IMUL_GvEvIb = 0x6b, + OP_JCC_rel8 = 0x70, + OP_GROUP1_EbIb = 0x80, + OP_NOP_80 = 0x80, + OP_GROUP1_EvIz = 0x81, + OP_GROUP1_EvIb = 0x83, + OP_TEST_EbGb = 0x84, + OP_NOP_84 = 0x84, + OP_TEST_EvGv = 0x85, + OP_XCHG_GbEb = 0x86, + OP_XCHG_GvEv = 0x87, + OP_MOV_EbGv = 0x88, + OP_MOV_EvGv = 0x89, + OP_MOV_GvEb = 0x8A, + OP_MOV_GvEv = 0x8B, + OP_LEA = 0x8D, + OP_GROUP1A_Ev = 0x8F, + OP_NOP = 0x90, + OP_PUSHFLAGS = 0x9C, + OP_POPFLAGS = 0x9D, + OP_CDQ = 0x99, + OP_MOV_EAXOv = 0xA1, + OP_MOV_OvEAX = 0xA3, + OP_TEST_EAXIb = 0xA8, + OP_TEST_EAXIv = 0xA9, + OP_MOV_EbIb = 0xB0, + OP_MOV_EAXIv = 0xB8, + OP_GROUP2_EvIb = 0xC1, + OP_ADDP_ST0_ST1 = 0xC1, + OP_RET_Iz = 0xC2, + PRE_VEX_C4 = 0xC4, + PRE_VEX_C5 = 0xC5, + OP_RET = 0xC3, + OP_GROUP11_EvIb = 0xC6, + OP_GROUP11_EvIz = 0xC7, + OP_INT3 = 0xCC, + OP_GROUP2_Ev1 = 0xD1, + OP_GROUP2_EvCL = 0xD3, + OP_FPU6 = 0xDD, + OP_FPU6_F32 = 0xD9, + OP_FPU6_ADDP = 0xDE, + OP_FILD = 0xDF, + OP_CALL_rel32 = 0xE8, + OP_JMP_rel32 = 0xE9, + OP_JMP_rel8 = 0xEB, + PRE_LOCK = 0xF0, + PRE_SSE_F2 = 0xF2, + PRE_SSE_F3 = 0xF3, + OP_HLT = 0xF4, + OP_GROUP3_EbIb = 0xF6, + OP_GROUP3_Ev = 0xF7, + OP_GROUP3_EvIz = 0xF7, // OP_GROUP3_Ev has an immediate, when instruction is a test. + OP_GROUP5_Ev = 0xFF +}; + +enum class ShiftID { + vpsrlx = 2, + vpsrldq = 3, + vpsrad = 4, + vpsllx = 6 +}; + +enum TwoByteOpcodeID { + OP2_UD2 = 0x0B, + OP2_MOVSD_VsdWsd = 0x10, + OP2_MOVPS_VpsWps = 0x10, + OP2_MOVSD_WsdVsd = 0x11, + OP2_MOVPS_WpsVps = 0x11, + OP2_MOVDDUP_VqWq = 0x12, + OP2_MOVHLPS_VqUq = 0x12, + OP2_MOVSLDUP_VpsWps = 0x12, + OP2_UNPCKLPS_VsdWsd = 0x14, + OP2_UNPCKHPS_VsdWsd = 0x15, + OP2_MOVLHPS_VqUq = 0x16, + OP2_MOVSHDUP_VpsWps = 0x16, + OP2_MOVAPD_VsdWsd = 0x28, + OP2_MOVAPS_VsdWsd = 0x28, + OP2_MOVAPS_WsdVsd = 0x29, + OP2_CVTSI2SD_VsdEd = 0x2A, + OP2_CVTTSD2SI_GdWsd = 0x2C, + OP2_UCOMISD_VsdWsd = 0x2E, + OP2_CMOVZ_GvEv = 0x44, + OP2_MOVMSKPD_EdVd = 0x50, + OP2_ANDPS_VpsWps = 0x54, + OP2_ANDNPS_VpsWps = 0x55, + OP2_ORPS_VpsWps = 0x56, + OP2_XORPS_VpsWps = 0x57, + OP2_ADDSD_VsdWsd = 0x58, + OP2_ADDPS_VpsWps = 0x58, + OP2_MULSD_VsdWsd = 0x59, + OP2_MULPS_VpsWps = 0x59, + OP2_CVTSS2SD_VsdEd = 0x5A, + OP2_CVTSD2SS_VsdEd = 0x5A, + OP2_CVTTPS2DQ_VdqWps = 0x5B, + OP2_CVTDQ2PS_VpsWdq = 0x5B, + OP2_SUBSD_VsdWsd = 0x5C, + OP2_SUBPS_VpsWps = 0x5C, + OP2_MINSD_VsdWsd = 0x5D, + OP2_MINSS_VssWss = 0x5D, + OP2_MINPS_VpsWps = 0x5D, + OP2_DIVSD_VsdWsd = 0x5E, + OP2_DIVPS_VpsWps = 0x5E, + OP2_MAXSD_VsdWsd = 0x5F, + OP2_MAXSS_VssWss = 0x5F, + OP2_MAXPS_VpsWps = 0x5F, + OP2_SQRTSD_VsdWsd = 0x51, + OP2_SQRTSS_VssWss = 0x51, + OP2_SQRTPS_VpsWps = 0x51, + OP2_RSQRTPS_VpsWps = 0x52, + OP2_RCPPS_VpsWps = 0x53, + OP2_ANDPD_VpdWpd = 0x54, + OP2_ORPD_VpdWpd = 0x56, + OP2_XORPD_VpdWpd = 0x57, + OP2_PUNPCKLDQ = 0x62, + OP2_PCMPGTB_VdqWdq = 0x64, + OP2_PCMPGTW_VdqWdq = 0x65, + OP2_PCMPGTD_VdqWdq = 0x66, + OP2_MOVD_VdEd = 0x6E, + OP2_MOVDQ_VsdWsd = 0x6F, + OP2_MOVDQ_VdqWdq = 0x6F, + OP2_PSHUFD_VdqWdqIb = 0x70, + OP2_PSHUFLW_VdqWdqIb = 0x70, + OP2_PSHUFHW_VdqWdqIb = 0x70, + OP2_PSLLW_UdqIb = 0x71, + OP2_PSRAW_UdqIb = 0x71, + OP2_PSRLW_UdqIb = 0x71, + OP2_PSLLD_UdqIb = 0x72, + OP2_PSRAD_UdqIb = 0x72, + OP2_PSRLD_UdqIb = 0x72, + OP2_PSRLDQ_Vd = 0x73, + OP2_PCMPEQB_VdqWdq = 0x74, + OP2_PCMPEQW_VdqWdq = 0x75, + OP2_PCMPEQD_VdqWdq = 0x76, + OP2_HADDPD = 0x7C, + OP2_MOVD_EdVd = 0x7E, + OP2_MOVQ_VdWd = 0x7E, + OP2_MOVDQ_WdqVdq = 0x7F, + OP2_JCC_rel32 = 0x80, + OP_SETCC = 0x90, + OP2_SHLD = 0xA4, + OP2_SHLD_GvEv = 0xA5, + OP2_SHRD = 0xAC, + OP2_SHRD_GvEv = 0xAD, + OP_FENCE = 0xAE, + OP2_IMUL_GvEv = 0xAF, + OP2_CMPXCHG_GvEb = 0xB0, + OP2_CMPXCHG_GvEw = 0xB1, + OP2_POPCNT_GvEv = 0xB8, + OP2_BSF_GvEv = 0xBC, + OP2_BSR_GvEv = 0xBD, + OP2_MOVSX_GvEb = 0xBE, + OP2_MOVSX_GvEw = 0xBF, + OP2_MOVZX_GvEb = 0xB6, + OP2_MOVZX_GvEw = 0xB7, + OP2_XADD_EbGb = 0xC0, + OP2_XADD_EvGv = 0xC1, + OP2_CMPPS_VpsWps = 0xC2, + OP2_PINSRW = 0xC4, + OP2_PEXTRW_GdUdIb = 0xC5, + OP2_SHUFPS_VpsWpsIb = 0xC6, + OP2_PSRLW_VdqWdq = 0xD1, + OP2_PSRLD_VdqWdq = 0xD2, + OP2_PMULLW_VdqWdq = 0xD5, + OP2_MOVQ_WdVd = 0xD6, + OP2_PSUBUSB_VdqWdq = 0xD8, + OP2_PSUBUSW_VdqWdq = 0xD9, + OP2_PANDDQ_VdqWdq = 0xDB, + OP2_PADDUSB_VdqWdq = 0xDC, + OP2_PADDUSW_VdqWdq = 0xDD, + OP2_PANDNDQ_VdqWdq = 0xDF, + OP2_PSRAW_VdqWdq = 0xE1, + OP2_PSRAD_VdqWdq = 0xE2, + OP2_PSUBSB_VdqWdq = 0xE8, + OP2_PSUBSW_VdqWdq = 0xE9, + OP2_PORDQ_VdqWdq = 0xEB, + OP2_PADDSB_VdqWdq = 0xEC, + OP2_PADDSW_VdqWdq = 0xED, + OP2_PXORDQ_VdqWdq = 0xEF, + OP2_PSLLW_VdqWdq = 0xF1, + OP2_PSLLD_VdqWdq = 0xF2, + OP2_PMULUDQ_VdqWdq = 0xF4, + OP2_PSUBB_VdqWdq = 0xF8, + OP2_PSUBW_VdqWdq = 0xF9, + OP2_PSUBD_VdqWdq = 0xFA, + OP2_PADDB_VdqWdq = 0xFC, + OP2_PADDW_VdqWdq = 0xFD, + OP2_PADDD_VdqWdq = 0xFE +}; + +enum ThreeByteOpcodeID { + OP3_PSHUFB_VdqWdq = 0x00, + OP3_ROUNDSS_VsdWsd = 0x0A, + OP3_ROUNDSD_VsdWsd = 0x0B, + OP3_BLENDVPS_VdqWdq = 0x14, + OP3_PEXTRB_EdVdqIb = 0x14, + OP3_PEXTRD_EdVdqIb = 0x16, + OP3_BLENDPS_VpsWpsIb = 0x0C, + OP3_PTEST_VdVd = 0x17, + OP3_PINSRB_VdqEdIb = 0x20, + OP3_INSERTPS_VpsUps = 0x21, + OP3_PINSRD_VdqEdIb = 0x22, + OP3_PMULLD_VdqWdq = 0x40, + OP3_VBLENDVPS_VdqWdq = 0x4A +}; + +// Test whether the given opcode should be printed with its operands reversed. +inline bool IsXMMReversedOperands(TwoByteOpcodeID opcode) +{ + switch (opcode) { + case OP2_MOVSD_WsdVsd: // also OP2_MOVPS_WpsVps + case OP2_MOVAPS_WsdVsd: + case OP2_MOVDQ_WdqVdq: + case OP3_PEXTRD_EdVdqIb: + return true; + default: + break; + } + return false; +} + +enum ThreeByteEscape { + ESCAPE_38 = 0x38, + ESCAPE_3A = 0x3A +}; + +enum VexOperandType { + VEX_PS = 0, + VEX_PD = 1, + VEX_SS = 2, + VEX_SD = 3 +}; + +inline OneByteOpcodeID jccRel8(Condition cond) +{ + return OneByteOpcodeID(OP_JCC_rel8 + cond); +} +inline TwoByteOpcodeID jccRel32(Condition cond) +{ + return TwoByteOpcodeID(OP2_JCC_rel32 + cond); +} +inline TwoByteOpcodeID setccOpcode(Condition cond) +{ + return TwoByteOpcodeID(OP_SETCC + cond); +} + +enum GroupOpcodeID { + GROUP1_OP_ADD = 0, + GROUP1_OP_OR = 1, + GROUP1_OP_ADC = 2, + GROUP1_OP_SBB = 3, + GROUP1_OP_AND = 4, + GROUP1_OP_SUB = 5, + GROUP1_OP_XOR = 6, + GROUP1_OP_CMP = 7, + + GROUP1A_OP_POP = 0, + + GROUP2_OP_ROL = 0, + GROUP2_OP_ROR = 1, + GROUP2_OP_SHL = 4, + GROUP2_OP_SHR = 5, + GROUP2_OP_SAR = 7, + + GROUP3_OP_TEST = 0, + GROUP3_OP_NOT = 2, + GROUP3_OP_NEG = 3, + GROUP3_OP_MUL = 4, + GROUP3_OP_IMUL = 5, + GROUP3_OP_DIV = 6, + GROUP3_OP_IDIV = 7, + + GROUP5_OP_INC = 0, + GROUP5_OP_DEC = 1, + GROUP5_OP_CALLN = 2, + GROUP5_OP_JMPN = 4, + GROUP5_OP_PUSH = 6, + + FILD_OP_64 = 5, + + FPU6_OP_FLD = 0, + FPU6_OP_FISTTP = 1, + FPU6_OP_FSTP = 3, + FPU6_OP_FLDCW = 5, + FPU6_OP_FISTP = 7, + + GROUP11_MOV = 0 +}; + +static const RegisterID noBase = rbp; +static const RegisterID hasSib = rsp; +static const RegisterID noIndex = rsp; +#ifdef JS_CODEGEN_X64 +static const RegisterID noBase2 = r13; +static const RegisterID hasSib2 = r12; +#endif + +enum ModRmMode { + ModRmMemoryNoDisp, + ModRmMemoryDisp8, + ModRmMemoryDisp32, + ModRmRegister +}; + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Encoding_x86_shared_h */ diff --git a/js/src/jit/x86-shared/LIR-x86-shared.h b/js/src/jit/x86-shared/LIR-x86-shared.h new file mode 100644 index 000000000..7408b8fc2 --- /dev/null +++ b/js/src/jit/x86-shared/LIR-x86-shared.h @@ -0,0 +1,421 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_LIR_x86_shared_h +#define jit_x86_shared_LIR_x86_shared_h + +namespace js { +namespace jit { + +class LDivI : public LBinaryMath<1> +{ + public: + LIR_HEADER(DivI) + + LDivI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, temp); + } + + const char* extraName() const { + if (mir()->isTruncated()) { + if (mir()->canBeNegativeZero()) { + return mir()->canBeNegativeOverflow() + ? "Truncate_NegativeZero_NegativeOverflow" + : "Truncate_NegativeZero"; + } + return mir()->canBeNegativeOverflow() ? "Truncate_NegativeOverflow" : "Truncate"; + } + if (mir()->canBeNegativeZero()) + return mir()->canBeNegativeOverflow() ? "NegativeZero_NegativeOverflow" : "NegativeZero"; + return mir()->canBeNegativeOverflow() ? "NegativeOverflow" : nullptr; + } + + const LDefinition* remainder() { + return getTemp(0); + } + MDiv* mir() const { + return mir_->toDiv(); + } +}; + +// Signed division by a power-of-two constant. +class LDivPowTwoI : public LBinaryMath<0> +{ + const int32_t shift_; + const bool negativeDivisor_; + + public: + LIR_HEADER(DivPowTwoI) + + LDivPowTwoI(const LAllocation& lhs, const LAllocation& lhsCopy, int32_t shift, bool negativeDivisor) + : shift_(shift), negativeDivisor_(negativeDivisor) + { + setOperand(0, lhs); + setOperand(1, lhsCopy); + } + + const LAllocation* numerator() { + return getOperand(0); + } + const LAllocation* numeratorCopy() { + return getOperand(1); + } + int32_t shift() const { + return shift_; + } + bool negativeDivisor() const { + return negativeDivisor_; + } + MDiv* mir() const { + return mir_->toDiv(); + } +}; + +class LDivOrModConstantI : public LInstructionHelper<1, 1, 1> +{ + const int32_t denominator_; + + public: + LIR_HEADER(DivOrModConstantI) + + LDivOrModConstantI(const LAllocation& lhs, int32_t denominator, const LDefinition& temp) + : denominator_(denominator) + { + setOperand(0, lhs); + setTemp(0, temp); + } + + const LAllocation* numerator() { + return getOperand(0); + } + int32_t denominator() const { + return denominator_; + } + MBinaryArithInstruction* mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast(mir_); + } + bool canBeNegativeDividend() const { + if (mir_->isMod()) + return mir_->toMod()->canBeNegativeDividend(); + return mir_->toDiv()->canBeNegativeDividend(); + } +}; + +class LModI : public LBinaryMath<1> +{ + public: + LIR_HEADER(ModI) + + LModI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, temp); + } + + const char* extraName() const { + return mir()->isTruncated() ? "Truncated" : nullptr; + } + + const LDefinition* remainder() { + return getDef(0); + } + MMod* mir() const { + return mir_->toMod(); + } +}; + +// This class performs a simple x86 'div', yielding either a quotient or remainder depending on +// whether this instruction is defined to output eax (quotient) or edx (remainder). +class LUDivOrMod : public LBinaryMath<1> +{ + public: + LIR_HEADER(UDivOrMod); + + LUDivOrMod(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, temp); + } + + const LDefinition* remainder() { + return getTemp(0); + } + + const char* extraName() const { + return mir()->isTruncated() ? "Truncated" : nullptr; + } + + MBinaryArithInstruction* mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast(mir_); + } + + bool canBeDivideByZero() const { + if (mir_->isMod()) + return mir_->toMod()->canBeDivideByZero(); + return mir_->toDiv()->canBeDivideByZero(); + } + + bool trapOnError() const { + if (mir_->isMod()) + return mir_->toMod()->trapOnError(); + return mir_->toDiv()->trapOnError(); + } + + wasm::TrapOffset trapOffset() const { + if (mir_->isMod()) + return mir_->toMod()->trapOffset(); + return mir_->toDiv()->trapOffset(); + } +}; + +class LUDivOrModConstant : public LInstructionHelper<1, 1, 1> +{ + const uint32_t denominator_; + + public: + LIR_HEADER(UDivOrModConstant) + + LUDivOrModConstant(const LAllocation &lhs, uint32_t denominator, const LDefinition& temp) + : denominator_(denominator) + { + setOperand(0, lhs); + setTemp(0, temp); + } + + const LAllocation *numerator() { + return getOperand(0); + } + uint32_t denominator() const { + return denominator_; + } + MBinaryArithInstruction *mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast(mir_); + } + bool canBeNegativeDividend() const { + if (mir_->isMod()) + return mir_->toMod()->canBeNegativeDividend(); + return mir_->toDiv()->canBeNegativeDividend(); + } + bool trapOnError() const { + if (mir_->isMod()) + return mir_->toMod()->trapOnError(); + return mir_->toDiv()->trapOnError(); + } + wasm::TrapOffset trapOffset() const { + if (mir_->isMod()) + return mir_->toMod()->trapOffset(); + return mir_->toDiv()->trapOffset(); + } +}; + +class LModPowTwoI : public LInstructionHelper<1,1,0> +{ + const int32_t shift_; + + public: + LIR_HEADER(ModPowTwoI) + + LModPowTwoI(const LAllocation& lhs, int32_t shift) + : shift_(shift) + { + setOperand(0, lhs); + } + + int32_t shift() const { + return shift_; + } + const LDefinition* remainder() { + return getDef(0); + } + MMod* mir() const { + return mir_->toMod(); + } +}; + +// Takes a tableswitch with an integer to decide +class LTableSwitch : public LInstructionHelper<0, 1, 2> +{ + public: + LIR_HEADER(TableSwitch) + + LTableSwitch(const LAllocation& in, const LDefinition& inputCopy, + const LDefinition& jumpTablePointer, MTableSwitch* ins) + { + setOperand(0, in); + setTemp(0, inputCopy); + setTemp(1, jumpTablePointer); + setMir(ins); + } + + MTableSwitch* mir() const { + return mir_->toTableSwitch(); + } + + const LAllocation* index() { + return getOperand(0); + } + const LDefinition* tempInt() { + return getTemp(0); + } + const LDefinition* tempPointer() { + return getTemp(1); + } +}; + +// Takes a tableswitch with a value to decide +class LTableSwitchV : public LInstructionHelper<0, BOX_PIECES, 3> +{ + public: + LIR_HEADER(TableSwitchV) + + LTableSwitchV(const LBoxAllocation& input, const LDefinition& inputCopy, + const LDefinition& floatCopy, const LDefinition& jumpTablePointer, + MTableSwitch* ins) + { + setBoxOperand(InputValue, input); + setTemp(0, inputCopy); + setTemp(1, floatCopy); + setTemp(2, jumpTablePointer); + setMir(ins); + } + + MTableSwitch* mir() const { + return mir_->toTableSwitch(); + } + + static const size_t InputValue = 0; + + const LDefinition* tempInt() { + return getTemp(0); + } + const LDefinition* tempFloat() { + return getTemp(1); + } + const LDefinition* tempPointer() { + return getTemp(2); + } +}; + +class LGuardShape : public LInstructionHelper<0, 1, 0> +{ + public: + LIR_HEADER(GuardShape) + + explicit LGuardShape(const LAllocation& in) { + setOperand(0, in); + } + const MGuardShape* mir() const { + return mir_->toGuardShape(); + } +}; + +class LGuardObjectGroup : public LInstructionHelper<0, 1, 0> +{ + public: + LIR_HEADER(GuardObjectGroup) + + explicit LGuardObjectGroup(const LAllocation& in) { + setOperand(0, in); + } + const MGuardObjectGroup* mir() const { + return mir_->toGuardObjectGroup(); + } +}; + +class LMulI : public LBinaryMath<0, 1> +{ + public: + LIR_HEADER(MulI) + + LMulI(const LAllocation& lhs, const LAllocation& rhs, const LAllocation& lhsCopy) { + setOperand(0, lhs); + setOperand(1, rhs); + setOperand(2, lhsCopy); + } + + const char* extraName() const { + return (mir()->mode() == MMul::Integer) + ? "Integer" + : (mir()->canBeNegativeZero() ? "CanBeNegativeZero" : nullptr); + } + + MMul* mir() const { + return mir_->toMul(); + } + const LAllocation* lhsCopy() { + return this->getOperand(2); + } +}; + +// Constructs an int32x4 SIMD value. +class LSimdValueInt32x4 : public LInstructionHelper<1, 4, 0> +{ + public: + LIR_HEADER(SimdValueInt32x4) + LSimdValueInt32x4(const LAllocation& x, const LAllocation& y, + const LAllocation& z, const LAllocation& w) + { + setOperand(0, x); + setOperand(1, y); + setOperand(2, z); + setOperand(3, w); + } + + MSimdValueX4* mir() const { + return mir_->toSimdValueX4(); + } +}; + +// Constructs a float32x4 SIMD value, optimized for x86 family +class LSimdValueFloat32x4 : public LInstructionHelper<1, 4, 1> +{ + public: + LIR_HEADER(SimdValueFloat32x4) + LSimdValueFloat32x4(const LAllocation& x, const LAllocation& y, + const LAllocation& z, const LAllocation& w, + const LDefinition& copyY) + { + setOperand(0, x); + setOperand(1, y); + setOperand(2, z); + setOperand(3, w); + + setTemp(0, copyY); + } + + MSimdValueX4* mir() const { + return mir_->toSimdValueX4(); + } +}; + +class LInt64ToFloatingPoint : public LInstructionHelper<1, INT64_PIECES, 1> +{ + public: + LIR_HEADER(Int64ToFloatingPoint); + + explicit LInt64ToFloatingPoint(const LInt64Allocation& in, const LDefinition& temp) { + setInt64Operand(0, in); + setTemp(0, temp); + } + + MInt64ToFloatingPoint* mir() const { + return mir_->toInt64ToFloatingPoint(); + } + + const LDefinition* temp() { + return getTemp(0); + } +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_LIR_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp new file mode 100644 index 000000000..8e820070a --- /dev/null +++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -0,0 +1,1019 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/Lowering-x86-shared.h" + +#include "mozilla/MathAlgorithms.h" + +#include "jit/MIR.h" + +#include "jit/shared/Lowering-shared-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::Abs; +using mozilla::FloorLog2; +using mozilla::Swap; + +LTableSwitch* +LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy, + MTableSwitch* tableswitch) +{ + return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch); +} + +LTableSwitchV* +LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch) +{ + return new(alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), + temp(), tempDouble(), temp(), tableswitch); +} + +void +LIRGeneratorX86Shared::visitGuardShape(MGuardShape* ins) +{ + MOZ_ASSERT(ins->object()->type() == MIRType::Object); + + LGuardShape* guard = new(alloc()) LGuardShape(useRegisterAtStart(ins->object())); + assignSnapshot(guard, ins->bailoutKind()); + add(guard, ins); + redefine(ins, ins->object()); +} + +void +LIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup* ins) +{ + MOZ_ASSERT(ins->object()->type() == MIRType::Object); + + LGuardObjectGroup* guard = new(alloc()) LGuardObjectGroup(useRegisterAtStart(ins->object())); + assignSnapshot(guard, ins->bailoutKind()); + add(guard, ins); + redefine(ins, ins->object()); +} + +void +LIRGeneratorX86Shared::visitPowHalf(MPowHalf* ins) +{ + MDefinition* input = ins->input(); + MOZ_ASSERT(input->type() == MIRType::Double); + LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input)); + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs) +{ + ins->setOperand(0, useRegisterAtStart(lhs)); + + // shift operator should be constant or in register ecx + // x86 can't shift a non-ecx register + if (rhs->isConstant()) + ins->setOperand(1, useOrConstantAtStart(rhs)); + else + ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx)); + + defineReuseInput(ins, mir, 0); +} + +template +void +LIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs) +{ + ins->setInt64Operand(0, useInt64RegisterAtStart(lhs)); +#if defined(JS_NUNBOX32) + if (mir->isRotate()) + ins->setTemp(0, temp()); +#endif + + static_assert(LShiftI64::Rhs == INT64_PIECES, "Assume Rhs is located at INT64_PIECES."); + static_assert(LRotateI64::Count == INT64_PIECES, "Assume Count is located at INT64_PIECES."); + + // shift operator should be constant or in register ecx + // x86 can't shift a non-ecx register + if (rhs->isConstant()) { + ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs)); + } else { + // The operands are int64, but we only care about the lower 32 bits of + // the RHS. On 32-bit, the code below will load that part in ecx and + // will discard the upper half. + ensureDefined(rhs); + LUse use(ecx); + use.setVirtualRegister(rhs->virtualRegister()); + ins->setOperand(INT64_PIECES, use); + } + + defineInt64ReuseInput(ins, mir, 0); +} + +template void LIRGeneratorX86Shared::lowerForShiftInt64( + LInstructionHelper* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); +template void LIRGeneratorX86Shared::lowerForShiftInt64( + LInstructionHelper* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + +void +LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, + MDefinition* input) +{ + ins->setOperand(0, useRegisterAtStart(input)); + defineReuseInput(ins, mir, 0); +} + +void +LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs) +{ + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs)); + defineReuseInput(ins, mir, 0); +} + +template +void +LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs) +{ + // Without AVX, we'll need to use the x86 encodings where one of the + // inputs must be the same location as the output. + if (!Assembler::HasAVX()) { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs)); + defineReuseInput(ins, mir, 0); + } else { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, useAtStart(rhs)); + define(ins, mir); + } +} + +template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); +template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + +void +LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs) +{ + lowerForALU(ins, mir, lhs, rhs); +} + +void +LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs) +{ + // Swap the operands around to fit the instructions that x86 actually has. + // We do this here, before register allocation, so that we don't need + // temporaries and copying afterwards. + switch (mir->operation()) { + case MSimdBinaryComp::greaterThan: + case MSimdBinaryComp::greaterThanOrEqual: + mir->reverse(); + Swap(lhs, rhs); + break; + default: + break; + } + + lowerForFPU(ins, mir, lhs, rhs); +} + +void +LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir, + MDefinition* lhs, MDefinition* rhs) +{ + baab->setOperand(0, useRegisterAtStart(lhs)); + baab->setOperand(1, useRegisterOrConstantAtStart(rhs)); + add(baab, mir); +} + +void +LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs) +{ + // Note: If we need a negative zero check, lhs is used twice. + LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation(); + LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy); + if (mul->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, mul, 0); +} + +void +LIRGeneratorX86Shared::lowerDivI(MDiv* div) +{ + if (div->isUnsigned()) { + lowerUDiv(div); + return; + } + + // Division instructions are slow. Division by constant denominators can be + // rewritten to use other instructions. + if (div->rhs()->isConstant()) { + int32_t rhs = div->rhs()->toConstant()->toInt32(); + + // Division by powers of two can be done by shifting, and division by + // other numbers can be done by a reciprocal multiplication technique. + int32_t shift = FloorLog2(Abs(rhs)); + if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { + LAllocation lhs = useRegisterAtStart(div->lhs()); + LDivPowTwoI* lir; + if (!div->canBeNegativeDividend()) { + // Numerator is unsigned, so does not need adjusting. + lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0); + } else { + // Numerator is signed, and needs adjusting, and an extra + // lhs copy register is needed. + lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0); + } + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, div, 0); + return; + } + if (rhs != 0) { + LDivOrModConstantI* lir; + lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(edx))); + return; + } + } + + LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()), + tempFixed(edx)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(eax))); +} + +void +LIRGeneratorX86Shared::lowerModI(MMod* mod) +{ + if (mod->isUnsigned()) { + lowerUMod(mod); + return; + } + + if (mod->rhs()->isConstant()) { + int32_t rhs = mod->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(Abs(rhs)); + if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { + LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, mod, 0); + return; + } + if (rhs != 0) { + LDivOrModConstantI* lir; + lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(eax))); + return; + } + } + + LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()), + useRegister(mod->rhs()), + tempFixed(eax)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(edx))); +} + +void +LIRGeneratorX86Shared::visitWasmSelect(MWasmSelect* ins) +{ + if (ins->type() == MIRType::Int64) { + auto* lir = new(alloc()) LWasmSelectI64(useInt64RegisterAtStart(ins->trueExpr()), + useInt64(ins->falseExpr()), + useRegister(ins->condExpr())); + + defineInt64ReuseInput(lir, ins, LWasmSelectI64::TrueExprIndex); + return; + } + + auto* lir = new(alloc()) LWasmSelect(useRegisterAtStart(ins->trueExpr()), + use(ins->falseExpr()), + useRegister(ins->condExpr())); + + defineReuseInput(lir, ins, LWasmSelect::TrueExprIndex); +} + +void +LIRGeneratorX86Shared::visitAsmJSNeg(MAsmJSNeg* ins) +{ + switch (ins->type()) { + case MIRType::Int32: + defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0); + break; + case MIRType::Float32: + defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0); + break; + case MIRType::Double: + defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0); + break; + default: + MOZ_CRASH(); + } +} + +void +LIRGeneratorX86Shared::lowerWasmLoad(MWasmLoad* ins) +{ + MOZ_ASSERT(ins->type() != MIRType::Int64); + + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + auto* lir = new(alloc()) LWasmLoad(useRegisterOrZeroAtStart(base)); + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerUDiv(MDiv* div) +{ + if (div->rhs()->isConstant()) { + uint32_t rhs = div->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(rhs); + + LAllocation lhs = useRegisterAtStart(div->lhs()); + if (rhs != 0 && uint32_t(1) << shift == rhs) { + LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, div, 0); + } else { + LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()), + rhs, tempFixed(eax)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(edx))); + } + return; + } + + LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()), + useRegister(div->rhs()), + tempFixed(edx)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(eax))); +} + +void +LIRGeneratorX86Shared::lowerUMod(MMod* mod) +{ + if (mod->rhs()->isConstant()) { + uint32_t rhs = mod->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(rhs); + + if (rhs != 0 && uint32_t(1) << shift == rhs) { + LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, mod, 0); + } else { + LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()), + rhs, tempFixed(edx)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(eax))); + } + return; + } + + LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()), + useRegister(mod->rhs()), + tempFixed(eax)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(edx))); +} + +void +LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) +{ + MDefinition* lhs = mir->lhs(); + MDefinition* rhs = mir->rhs(); + + MOZ_ASSERT(lhs->type() == MIRType::Int32); + MOZ_ASSERT(rhs->type() == MIRType::Int32); + MOZ_ASSERT(mir->type() == MIRType::Double); + +#ifdef JS_CODEGEN_X64 + MOZ_ASSERT(ecx == rcx); +#endif + + LUse lhsUse = useRegisterAtStart(lhs); + LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx); + + LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0)); + define(lir, mir); +} + +void +LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) +{ + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double); + + LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble(); + define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins); +} + +void +LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) +{ + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Float32); + + LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32(); + define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins); +} + +void +LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins, + bool useI386ByteRegisters) +{ + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + + // If the target is a floating register then we need a temp at the + // lower level; that temp must be eax. + // + // Otherwise the target (if used) is an integer register, which + // must be eax. If the target is not used the machine code will + // still clobber eax, so just pretend it's used. + // + // oldval must be in a register. + // + // newval must be in a register. If the source is a byte array + // then newval must be a register that has a byte size: on x86 + // this must be ebx, ecx, or edx (eax is taken for the output). + // + // Bug #1077036 describes some further optimization opportunities. + + bool fixedOutput = false; + LDefinition tempDef = LDefinition::BogusTemp(); + LAllocation newval; + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { + tempDef = tempFixed(eax); + newval = useRegister(ins->newval()); + } else { + fixedOutput = true; + if (useI386ByteRegisters && ins->isByteArray()) + newval = useFixed(ins->newval(), ebx); + else + newval = useRegister(ins->newval()); + } + + const LAllocation oldval = useRegister(ins->oldval()); + + LCompareExchangeTypedArrayElement* lir = + new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef); + + if (fixedOutput) + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + else + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins, + bool useI386ByteRegisters) +{ + MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + const LAllocation value = useRegister(ins->value()); + + // The underlying instruction is XCHG, which can operate on any + // register. + // + // If the target is a floating register (for Uint32) then we need + // a temp into which to exchange. + // + // If the source is a byte array then we need a register that has + // a byte size; in this case -- on x86 only -- pin the output to + // an appropriate register and use that as a temp in the back-end. + + LDefinition tempDef = LDefinition::BogusTemp(); + if (ins->arrayType() == Scalar::Uint32) { + // This restriction is bug 1077305. + MOZ_ASSERT(ins->type() == MIRType::Double); + tempDef = temp(); + } + + LAtomicExchangeTypedArrayElement* lir = + new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef); + + if (useI386ByteRegisters && ins->isByteArray()) + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + else + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins, + bool useI386ByteRegisters) +{ + MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + + // Case 1: the result of the operation is not used. + // + // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND, + // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case. + + if (!ins->hasUses()) { + LAllocation value; + if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant()) + value = useFixed(ins->value(), ebx); + else + value = useRegisterOrConstant(ins->value()); + + LAtomicTypedArrayElementBinopForEffect* lir = + new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value); + + add(lir, ins); + return; + } + + // Case 2: the result of the operation is used. + // + // For ADD and SUB we'll use XADD: + // + // movl src, output + // lock xaddl output, mem + // + // For the 8-bit variants XADD needs a byte register for the output. + // + // For AND/OR/XOR we need to use a CMPXCHG loop: + // + // movl *mem, eax + // L: mov eax, temp + // andl src, temp + // lock cmpxchg temp, mem ; reads eax also + // jnz L + // ; result in eax + // + // Note the placement of L, cmpxchg will update eax with *mem if + // *mem does not have the expected value, so reloading it at the + // top of the loop would be redundant. + // + // If the array is not a uint32 array then: + // - eax should be the output (one result of the cmpxchg) + // - there is a temp, which must have a byte register if + // the array has 1-byte elements elements + // + // If the array is a uint32 array then: + // - eax is the first temp + // - we also need a second temp + // + // There are optimization opportunities: + // - better register allocation in the x86 8-bit case, Bug #1077036. + + bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp); + bool fixedOutput = true; + bool reuseInput = false; + LDefinition tempDef1 = LDefinition::BogusTemp(); + LDefinition tempDef2 = LDefinition::BogusTemp(); + LAllocation value; + + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { + value = useRegisterOrConstant(ins->value()); + fixedOutput = false; + if (bitOp) { + tempDef1 = tempFixed(eax); + tempDef2 = temp(); + } else { + tempDef1 = temp(); + } + } else if (useI386ByteRegisters && ins->isByteArray()) { + if (ins->value()->isConstant()) + value = useRegisterOrConstant(ins->value()); + else + value = useFixed(ins->value(), ebx); + if (bitOp) + tempDef1 = tempFixed(ecx); + } else if (bitOp) { + value = useRegisterOrConstant(ins->value()); + tempDef1 = temp(); + } else if (ins->value()->isConstant()) { + fixedOutput = false; + value = useRegisterOrConstant(ins->value()); + } else { + fixedOutput = false; + reuseInput = true; + value = useRegisterAtStart(ins->value()); + } + + LAtomicTypedArrayElementBinop* lir = + new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2); + + if (fixedOutput) + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + else if (reuseInput) + defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp); + else + define(lir, ins); +} + +void +LIRGeneratorX86Shared::visitSimdInsertElement(MSimdInsertElement* ins) +{ + MOZ_ASSERT(IsSimdType(ins->type())); + + LUse vec = useRegisterAtStart(ins->vector()); + LUse val = useRegister(ins->value()); + switch (ins->type()) { + case MIRType::Int8x16: + case MIRType::Bool8x16: + // When SSE 4.1 is not available, we need to go via the stack. + // This requires the value to be inserted to be in %eax-%edx. + // Pick %ebx since other instructions use %eax or %ecx hard-wired. +#if defined(JS_CODEGEN_X86) + if (!AssemblerX86Shared::HasSSE41()) + val = useFixed(ins->value(), ebx); +#endif + defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0); + break; + case MIRType::Int16x8: + case MIRType::Int32x4: + case MIRType::Bool16x8: + case MIRType::Bool32x4: + defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0); + break; + case MIRType::Float32x4: + defineReuseInput(new(alloc()) LSimdInsertElementF(vec, val), ins, 0); + break; + default: + MOZ_CRASH("Unknown SIMD kind when generating constant"); + } +} + +void +LIRGeneratorX86Shared::visitSimdExtractElement(MSimdExtractElement* ins) +{ + MOZ_ASSERT(IsSimdType(ins->input()->type())); + MOZ_ASSERT(!IsSimdType(ins->type())); + + switch (ins->input()->type()) { + case MIRType::Int8x16: + case MIRType::Int16x8: + case MIRType::Int32x4: { + MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable); + LUse use = useRegisterAtStart(ins->input()); + if (ins->type() == MIRType::Double) { + // Extract an Uint32 lane into a double. + MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned); + define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins); + } else { + auto* lir = new (alloc()) LSimdExtractElementI(use); +#if defined(JS_CODEGEN_X86) + // On x86 (32-bit), we may need to use movsbl or movzbl instructions + // to sign or zero extend the extracted lane to 32 bits. The 8-bit + // version of these instructions require a source register that is + // %al, %bl, %cl, or %dl. + // Fix it to %ebx since we can't express that constraint better. + if (ins->input()->type() == MIRType::Int8x16) { + defineFixed(lir, ins, LAllocation(AnyRegister(ebx))); + return; + } +#endif + define(lir, ins); + } + break; + } + case MIRType::Float32x4: { + MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable); + LUse use = useRegisterAtStart(ins->input()); + define(new(alloc()) LSimdExtractElementF(use), ins); + break; + } + case MIRType::Bool8x16: + case MIRType::Bool16x8: + case MIRType::Bool32x4: { + MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable); + LUse use = useRegisterAtStart(ins->input()); + define(new(alloc()) LSimdExtractElementB(use), ins); + break; + } + default: + MOZ_CRASH("Unknown SIMD kind when extracting element"); + } +} + +void +LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins) +{ + MOZ_ASSERT(IsSimdType(ins->lhs()->type())); + MOZ_ASSERT(IsSimdType(ins->rhs()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + if (ins->isCommutative()) + ReorderCommutative(&lhs, &rhs, ins); + + switch (ins->type()) { + case MIRType::Int8x16: { + LSimdBinaryArithIx16* lir = new (alloc()) LSimdBinaryArithIx16(); + lir->setTemp(0, LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + case MIRType::Int16x8: { + LSimdBinaryArithIx8* lir = new (alloc()) LSimdBinaryArithIx8(); + lir->setTemp(0, LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + case MIRType::Int32x4: { + LSimdBinaryArithIx4* lir = new (alloc()) LSimdBinaryArithIx4(); + bool needsTemp = + ins->operation() == MSimdBinaryArith::Op_mul && !MacroAssembler::HasSSE41(); + lir->setTemp(0, needsTemp ? temp(LDefinition::SIMD128INT) : LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + case MIRType::Float32x4: { + LSimdBinaryArithFx4* lir = new (alloc()) LSimdBinaryArithFx4(); + + bool needsTemp = ins->operation() == MSimdBinaryArith::Op_max || + ins->operation() == MSimdBinaryArith::Op_minNum || + ins->operation() == MSimdBinaryArith::Op_maxNum; + lir->setTemp(0, + needsTemp ? temp(LDefinition::SIMD128FLOAT) : LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + default: + MOZ_CRASH("unknown simd type on binary arith operation"); + } +} + +void +LIRGeneratorX86Shared::visitSimdBinarySaturating(MSimdBinarySaturating* ins) +{ + MOZ_ASSERT(IsSimdType(ins->lhs()->type())); + MOZ_ASSERT(IsSimdType(ins->rhs()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + if (ins->isCommutative()) + ReorderCommutative(&lhs, &rhs, ins); + + LSimdBinarySaturating* lir = new (alloc()) LSimdBinarySaturating(); + lowerForFPU(lir, ins, lhs, rhs); +} + +void +LIRGeneratorX86Shared::visitSimdSelect(MSimdSelect* ins) +{ + MOZ_ASSERT(IsSimdType(ins->type())); + + LSimdSelect* lins = new(alloc()) LSimdSelect; + MDefinition* r0 = ins->getOperand(0); + MDefinition* r1 = ins->getOperand(1); + MDefinition* r2 = ins->getOperand(2); + + lins->setOperand(0, useRegister(r0)); + lins->setOperand(1, useRegister(r1)); + lins->setOperand(2, useRegister(r2)); + lins->setTemp(0, temp(LDefinition::SIMD128FLOAT)); + + define(lins, ins); +} + +void +LIRGeneratorX86Shared::visitSimdSplat(MSimdSplat* ins) +{ + LAllocation x = useRegisterAtStart(ins->getOperand(0)); + + switch (ins->type()) { + case MIRType::Int8x16: + define(new (alloc()) LSimdSplatX16(x), ins); + break; + case MIRType::Int16x8: + define(new (alloc()) LSimdSplatX8(x), ins); + break; + case MIRType::Int32x4: + case MIRType::Float32x4: + case MIRType::Bool8x16: + case MIRType::Bool16x8: + case MIRType::Bool32x4: + // Use the SplatX4 instruction for all boolean splats. Since the input + // value is a 32-bit int that is either 0 or -1, the X4 splat gives + // the right result for all boolean geometries. + // For floats, (Non-AVX) codegen actually wants the input and the output + // to be in the same register, but we can't currently use + // defineReuseInput because they have different types (scalar vs + // vector), so a spill slot for one may not be suitable for the other. + define(new (alloc()) LSimdSplatX4(x), ins); + break; + default: + MOZ_CRASH("Unknown SIMD kind"); + } +} + +void +LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4* ins) +{ + switch (ins->type()) { + case MIRType::Float32x4: { + // Ideally, x would be used at start and reused for the output, however + // register allocation currently doesn't permit us to tie together two + // virtual registers with different types. + LAllocation x = useRegister(ins->getOperand(0)); + LAllocation y = useRegister(ins->getOperand(1)); + LAllocation z = useRegister(ins->getOperand(2)); + LAllocation w = useRegister(ins->getOperand(3)); + LDefinition t = temp(LDefinition::SIMD128FLOAT); + define(new (alloc()) LSimdValueFloat32x4(x, y, z, w, t), ins); + break; + } + case MIRType::Bool32x4: + case MIRType::Int32x4: { + // No defineReuseInput => useAtStart for everyone. + LAllocation x = useRegisterAtStart(ins->getOperand(0)); + LAllocation y = useRegisterAtStart(ins->getOperand(1)); + LAllocation z = useRegisterAtStart(ins->getOperand(2)); + LAllocation w = useRegisterAtStart(ins->getOperand(3)); + define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins); + break; + } + default: + MOZ_CRASH("Unknown SIMD kind"); + } +} + +void +LIRGeneratorX86Shared::visitSimdSwizzle(MSimdSwizzle* ins) +{ + MOZ_ASSERT(IsSimdType(ins->input()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + + if (IsIntegerSimdType(ins->input()->type())) { + LUse use = useRegisterAtStart(ins->input()); + LSimdSwizzleI* lir = new (alloc()) LSimdSwizzleI(use); + define(lir, ins); + // We need a GPR temp register for pre-SSSE3 codegen (no vpshufb). + if (Assembler::HasSSSE3()) { + lir->setTemp(0, LDefinition::BogusTemp()); + } else { + // The temp must be a GPR usable with 8-bit loads and stores. +#if defined(JS_CODEGEN_X86) + lir->setTemp(0, tempFixed(ebx)); +#else + lir->setTemp(0, temp()); +#endif + } + } else if (ins->input()->type() == MIRType::Float32x4) { + LUse use = useRegisterAtStart(ins->input()); + LSimdSwizzleF* lir = new (alloc()) LSimdSwizzleF(use); + define(lir, ins); + lir->setTemp(0, LDefinition::BogusTemp()); + } else { + MOZ_CRASH("Unknown SIMD kind when getting lane"); + } +} + +void +LIRGeneratorX86Shared::visitSimdShuffle(MSimdShuffle* ins) +{ + MOZ_ASSERT(IsSimdType(ins->lhs()->type())); + MOZ_ASSERT(IsSimdType(ins->rhs()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + if (ins->type() == MIRType::Int32x4 || ins->type() == MIRType::Float32x4) { + bool zFromLHS = ins->lane(2) < 4; + bool wFromLHS = ins->lane(3) < 4; + uint32_t lanesFromLHS = (ins->lane(0) < 4) + (ins->lane(1) < 4) + zFromLHS + wFromLHS; + + LSimdShuffleX4* lir = new (alloc()) LSimdShuffleX4(); + lowerForFPU(lir, ins, ins->lhs(), ins->rhs()); + + // See codegen for requirements details. + LDefinition temp = + (lanesFromLHS == 3) ? tempCopy(ins->rhs(), 1) : LDefinition::BogusTemp(); + lir->setTemp(0, temp); + } else { + MOZ_ASSERT(ins->type() == MIRType::Int8x16 || ins->type() == MIRType::Int16x8); + LSimdShuffle* lir = new (alloc()) LSimdShuffle(); + lir->setOperand(0, useRegister(ins->lhs())); + lir->setOperand(1, useRegister(ins->rhs())); + define(lir, ins); + // We need a GPR temp register for pre-SSSE3 codegen, and an SSE temp + // when using pshufb. + if (Assembler::HasSSSE3()) { + lir->setTemp(0, temp(LDefinition::SIMD128INT)); + } else { + // The temp must be a GPR usable with 8-bit loads and stores. +#if defined(JS_CODEGEN_X86) + lir->setTemp(0, tempFixed(ebx)); +#else + lir->setTemp(0, temp()); +#endif + } + } +} + +void +LIRGeneratorX86Shared::visitSimdGeneralShuffle(MSimdGeneralShuffle* ins) +{ + MOZ_ASSERT(IsSimdType(ins->type())); + + LSimdGeneralShuffleBase* lir; + if (IsIntegerSimdType(ins->type())) { +#if defined(JS_CODEGEN_X86) + // The temp register must be usable with 8-bit load and store + // instructions, so one of %eax-%edx. + LDefinition t; + if (ins->type() == MIRType::Int8x16) + t = tempFixed(ebx); + else + t = temp(); +#else + LDefinition t = temp(); +#endif + lir = new (alloc()) LSimdGeneralShuffleI(t); + } else if (ins->type() == MIRType::Float32x4) { + lir = new (alloc()) LSimdGeneralShuffleF(temp()); + } else { + MOZ_CRASH("Unknown SIMD kind when doing a shuffle"); + } + + if (!lir->init(alloc(), ins->numVectors() + ins->numLanes())) + return; + + for (unsigned i = 0; i < ins->numVectors(); i++) { + MOZ_ASSERT(IsSimdType(ins->vector(i)->type())); + lir->setOperand(i, useRegister(ins->vector(i))); + } + + for (unsigned i = 0; i < ins->numLanes(); i++) { + MOZ_ASSERT(ins->lane(i)->type() == MIRType::Int32); + // Note that there can be up to 16 lane arguments, so we can't assume + // that they all get an allocated register. + lir->setOperand(i + ins->numVectors(), use(ins->lane(i))); + } + + assignSnapshot(lir, Bailout_BoundsCheck); + define(lir, ins); +} + +void +LIRGeneratorX86Shared::visitCopySign(MCopySign* ins) +{ + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + MOZ_ASSERT(IsFloatingPointType(lhs->type())); + MOZ_ASSERT(lhs->type() == rhs->type()); + MOZ_ASSERT(lhs->type() == ins->type()); + + LInstructionHelper<1, 2, 2>* lir; + if (lhs->type() == MIRType::Double) + lir = new(alloc()) LCopySignD(); + else + lir = new(alloc()) LCopySignF(); + + // As lowerForFPU, but we want rhs to be in a FP register too. + lir->setOperand(0, useRegisterAtStart(lhs)); + lir->setOperand(1, lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs)); + if (!Assembler::HasAVX()) + defineReuseInput(lir, ins, 0); + else + define(lir, ins); +} diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.h b/js/src/jit/x86-shared/Lowering-x86-shared.h new file mode 100644 index 000000000..275cee301 --- /dev/null +++ b/js/src/jit/x86-shared/Lowering-x86-shared.h @@ -0,0 +1,81 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Lowering_x86_shared_h +#define jit_x86_shared_Lowering_x86_shared_h + +#include "jit/shared/Lowering-shared.h" + +namespace js { +namespace jit { + +class LIRGeneratorX86Shared : public LIRGeneratorShared +{ + protected: + LIRGeneratorX86Shared(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph) + : LIRGeneratorShared(gen, graph, lirGraph) + {} + + LTableSwitch* newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy, + MTableSwitch* ins); + LTableSwitchV* newLTableSwitchV(MTableSwitch* ins); + + void visitGuardShape(MGuardShape* ins); + void visitGuardObjectGroup(MGuardObjectGroup* ins); + void visitPowHalf(MPowHalf* ins); + void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); + void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input); + void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); + + template + void lowerForShiftInt64(LInstructionHelper* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + + template + void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); + void lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, + MDefinition* lhs, MDefinition* rhs); + void lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, + MDefinition* lhs, MDefinition* rhs); + void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir, + MDefinition* lhs, MDefinition* rhs); + void visitAsmJSNeg(MAsmJSNeg* ins); + void lowerWasmLoad(MWasmLoad* ins); + void visitWasmSelect(MWasmSelect* ins); + void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs); + void lowerDivI(MDiv* div); + void lowerModI(MMod* mod); + void lowerUDiv(MDiv* div); + void lowerUMod(MMod* mod); + void lowerUrshD(MUrsh* mir); + void lowerTruncateDToInt32(MTruncateToInt32* ins); + void lowerTruncateFToInt32(MTruncateToInt32* ins); + void visitSimdInsertElement(MSimdInsertElement* ins); + void visitSimdExtractElement(MSimdExtractElement* ins); + void visitSimdBinaryArith(MSimdBinaryArith* ins); + void visitSimdBinarySaturating(MSimdBinarySaturating* ins); + void visitSimdSelect(MSimdSelect* ins); + void visitSimdSplat(MSimdSplat* ins); + void visitSimdSwizzle(MSimdSwizzle* ins); + void visitSimdShuffle(MSimdShuffle* ins); + void visitSimdGeneralShuffle(MSimdGeneralShuffle* ins); + void visitSimdValueX4(MSimdValueX4* ins); + void lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins, + bool useI386ByteRegisters); + void lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins, + bool useI386ByteRegisters); + void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins, + bool useI386ByteRegisters); + void visitCopySign(MCopySign* ins); +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Lowering_x86_shared_h */ diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h new file mode 100644 index 000000000..33bfd46db --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -0,0 +1,1284 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_MacroAssembler_x86_shared_inl_h +#define jit_x86_shared_MacroAssembler_x86_shared_inl_h + +#include "jit/x86-shared/MacroAssembler-x86-shared.h" + +namespace js { +namespace jit { + +//{{{ check_macroassembler_style +// =============================================================== +// Move instructions + +void +MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) +{ + vmovd(src, dest); +} + +void +MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) +{ + vmovd(src, dest); +} + +void +MacroAssembler::move8SignExtend(Register src, Register dest) +{ + movsbl(src, dest); +} + +void +MacroAssembler::move16SignExtend(Register src, Register dest) +{ + movswl(src, dest); +} + +// =============================================================== +// Logical instructions + +void +MacroAssembler::not32(Register reg) +{ + notl(reg); +} + +void +MacroAssembler::and32(Register src, Register dest) +{ + andl(src, dest); +} + +void +MacroAssembler::and32(Imm32 imm, Register dest) +{ + andl(imm, dest); +} + +void +MacroAssembler::and32(Imm32 imm, const Address& dest) +{ + andl(imm, Operand(dest)); +} + +void +MacroAssembler::and32(const Address& src, Register dest) +{ + andl(Operand(src), dest); +} + +void +MacroAssembler::or32(Register src, Register dest) +{ + orl(src, dest); +} + +void +MacroAssembler::or32(Imm32 imm, Register dest) +{ + orl(imm, dest); +} + +void +MacroAssembler::or32(Imm32 imm, const Address& dest) +{ + orl(imm, Operand(dest)); +} + +void +MacroAssembler::xor32(Register src, Register dest) +{ + xorl(src, dest); +} + +void +MacroAssembler::xor32(Imm32 imm, Register dest) +{ + xorl(imm, dest); +} + +void +MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) +{ + // On very recent chips (Haswell and newer?) there is actually an + // LZCNT instruction that does all of this. + + bsrl(src, dest); + if (!knownNotZero) { + // If the source is zero then bsrl leaves garbage in the destination. + Label nonzero; + j(Assembler::NonZero, &nonzero); + movl(Imm32(0x3F), dest); + bind(&nonzero); + } + xorl(Imm32(0x1F), dest); +} + +void +MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) +{ + bsfl(src, dest); + if (!knownNotZero) { + Label nonzero; + j(Assembler::NonZero, &nonzero); + movl(Imm32(32), dest); + bind(&nonzero); + } +} + +void +MacroAssembler::popcnt32(Register input, Register output, Register tmp) +{ + if (AssemblerX86Shared::HasPOPCNT()) { + popcntl(input, output); + return; + } + + MOZ_ASSERT(tmp != InvalidReg); + + // Equivalent to mozilla::CountPopulation32() + + movl(input, tmp); + if (input != output) + movl(input, output); + shrl(Imm32(1), output); + andl(Imm32(0x55555555), output); + subl(output, tmp); + movl(tmp, output); + andl(Imm32(0x33333333), output); + shrl(Imm32(2), tmp); + andl(Imm32(0x33333333), tmp); + addl(output, tmp); + movl(tmp, output); + shrl(Imm32(4), output); + addl(tmp, output); + andl(Imm32(0xF0F0F0F), output); + imull(Imm32(0x1010101), output, output); + shrl(Imm32(24), output); +} + +// =============================================================== +// Arithmetic instructions + +void +MacroAssembler::add32(Register src, Register dest) +{ + addl(src, dest); +} + +void +MacroAssembler::add32(Imm32 imm, Register dest) +{ + addl(imm, dest); +} + +void +MacroAssembler::add32(Imm32 imm, const Address& dest) +{ + addl(imm, Operand(dest)); +} + +void +MacroAssembler::add32(Imm32 imm, const AbsoluteAddress& dest) +{ + addl(imm, Operand(dest)); +} + +void +MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) +{ + vaddss(src, dest, dest); +} + +void +MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) +{ + vaddsd(src, dest, dest); +} + +void +MacroAssembler::sub32(Register src, Register dest) +{ + subl(src, dest); +} + +void +MacroAssembler::sub32(Imm32 imm, Register dest) +{ + subl(imm, dest); +} + +void +MacroAssembler::sub32(const Address& src, Register dest) +{ + subl(Operand(src), dest); +} + +void +MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) +{ + vsubsd(src, dest, dest); +} + +void +MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) +{ + vsubss(src, dest, dest); +} + +void +MacroAssembler::mul32(Register rhs, Register srcDest) +{ + MOZ_ASSERT(srcDest == eax); + imull(rhs, srcDest); // Clobbers edx +} + +void +MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) +{ + vmulss(src, dest, dest); +} + +void +MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) +{ + vmulsd(src, dest, dest); +} + +void +MacroAssembler::quotient32(Register rhs, Register srcDest, bool isUnsigned) +{ + MOZ_ASSERT(srcDest == eax); + + // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. + if (isUnsigned) { + mov(ImmWord(0), edx); + udiv(rhs); + } else { + cdq(); + idiv(rhs); + } +} + +void +MacroAssembler::remainder32(Register rhs, Register srcDest, bool isUnsigned) +{ + MOZ_ASSERT(srcDest == eax); + + // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. + if (isUnsigned) { + mov(ImmWord(0), edx); + udiv(rhs); + } else { + cdq(); + idiv(rhs); + } + mov(edx, eax); +} + +void +MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) +{ + vdivss(src, dest, dest); +} + +void +MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) +{ + vdivsd(src, dest, dest); +} + +void +MacroAssembler::neg32(Register reg) +{ + negl(reg); +} + +void +MacroAssembler::negateFloat(FloatRegister reg) +{ + ScratchFloat32Scope scratch(*this); + vpcmpeqw(Operand(scratch), scratch, scratch); + vpsllq(Imm32(31), scratch, scratch); + + // XOR the float in a float register with -0.0. + vxorps(scratch, reg, reg); // s ^ 0x80000000 +} + +void +MacroAssembler::negateDouble(FloatRegister reg) +{ + // From MacroAssemblerX86Shared::maybeInlineDouble + ScratchDoubleScope scratch(*this); + vpcmpeqw(Operand(scratch), scratch, scratch); + vpsllq(Imm32(63), scratch, scratch); + + // XOR the float in a float register with -0.0. + vxorpd(scratch, reg, reg); // s ^ 0x80000000000000 +} + +void +MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) +{ + ScratchFloat32Scope scratch(*this); + loadConstantFloat32(mozilla::SpecificNaN(0, mozilla::FloatingPoint::kSignificandBits), scratch); + vandps(scratch, src, dest); +} + +void +MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) +{ + ScratchDoubleScope scratch(*this); + loadConstantDouble(mozilla::SpecificNaN(0, mozilla::FloatingPoint::kSignificandBits), scratch); + vandpd(scratch, src, dest); +} + +void +MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) +{ + vsqrtss(src, src, dest); +} + +void +MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) +{ + vsqrtsd(src, src, dest); +} + +void +MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest, bool handleNaN) +{ + minMaxFloat32(srcDest, other, handleNaN, false); +} + +void +MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest, bool handleNaN) +{ + minMaxDouble(srcDest, other, handleNaN, false); +} + +void +MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest, bool handleNaN) +{ + minMaxFloat32(srcDest, other, handleNaN, true); +} + +void +MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest, bool handleNaN) +{ + minMaxDouble(srcDest, other, handleNaN, true); +} + +// =============================================================== +// Rotation instructions +void +MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) +{ + MOZ_ASSERT(input == dest, "defineReuseInput"); + count.value &= 0x1f; + if (count.value) + roll(count, input); +} + +void +MacroAssembler::rotateLeft(Register count, Register input, Register dest) +{ + MOZ_ASSERT(input == dest, "defineReuseInput"); + MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); + roll_cl(input); +} + +void +MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) +{ + MOZ_ASSERT(input == dest, "defineReuseInput"); + count.value &= 0x1f; + if (count.value) + rorl(count, input); +} + +void +MacroAssembler::rotateRight(Register count, Register input, Register dest) +{ + MOZ_ASSERT(input == dest, "defineReuseInput"); + MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); + rorl_cl(input); +} + +// =============================================================== +// Shift instructions + +void +MacroAssembler::lshift32(Register shift, Register srcDest) +{ + MOZ_ASSERT(shift == ecx); + shll_cl(srcDest); +} + +void +MacroAssembler::rshift32(Register shift, Register srcDest) +{ + MOZ_ASSERT(shift == ecx); + shrl_cl(srcDest); +} + +void +MacroAssembler::rshift32Arithmetic(Register shift, Register srcDest) +{ + MOZ_ASSERT(shift == ecx); + sarl_cl(srcDest); +} + +void +MacroAssembler::lshift32(Imm32 shift, Register srcDest) +{ + shll(shift, srcDest); +} + +void +MacroAssembler::rshift32(Imm32 shift, Register srcDest) +{ + shrl(shift, srcDest); +} + +void +MacroAssembler::rshift32Arithmetic(Imm32 shift, Register srcDest) +{ + sarl(shift, srcDest); +} + +// =============================================================== +// Condition functions + +template +void +MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) +{ + cmp32(lhs, rhs); + emitSet(cond, dest); +} + +// =============================================================== +// Branch instructions + +template +void +MacroAssembler::branch32(Condition cond, Register lhs, Register rhs, L label) +{ + cmp32(lhs, rhs); + j(cond, label); +} + +template +void +MacroAssembler::branch32(Condition cond, Register lhs, Imm32 rhs, L label) +{ + cmp32(lhs, rhs); + j(cond, label); +} + +void +MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs, Label* label) +{ + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void +MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs, Label* label) +{ + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void +MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Register rhs, Label* label) +{ + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void +MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs, Label* label) +{ + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void +MacroAssembler::branch32(Condition cond, const Operand& lhs, Register rhs, Label* label) +{ + cmp32(lhs, rhs); + j(cond, label); +} + +void +MacroAssembler::branch32(Condition cond, const Operand& lhs, Imm32 rhs, Label* label) +{ + cmp32(lhs, rhs); + j(cond, label); +} + +template +void +MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs, L label) +{ + cmpPtr(lhs, rhs); + j(cond, label); +} + +void +MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs, Label* label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +void +MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs, Label* label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +void +MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs, Label* label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +void +MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs, Label* label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +template +void +MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs, L label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +void +MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs, Label* label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +void +MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs, Label* label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +void +MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs, Label* label) +{ + branchPtrImpl(cond, lhs, rhs, label); +} + +template +void +MacroAssembler::branchPtrImpl(Condition cond, const T& lhs, const S& rhs, L label) +{ + cmpPtr(Operand(lhs), rhs); + j(cond, label); +} + +template +CodeOffsetJump +MacroAssembler::branchPtrWithPatch(Condition cond, Register lhs, T rhs, RepatchLabel* label) +{ + cmpPtr(lhs, rhs); + return jumpWithPatch(label, cond); +} + +template +CodeOffsetJump +MacroAssembler::branchPtrWithPatch(Condition cond, Address lhs, T rhs, RepatchLabel* label) +{ + cmpPtr(lhs, rhs); + return jumpWithPatch(label, cond); +} + +void +MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs, + Label* label) +{ + compareFloat(cond, lhs, rhs); + + if (cond == DoubleEqual) { + Label unordered; + j(Parity, &unordered); + j(Equal, label); + bind(&unordered); + return; + } + + if (cond == DoubleNotEqualOrUnordered) { + j(NotEqual, label); + j(Parity, label); + return; + } + + MOZ_ASSERT(!(cond & DoubleConditionBitSpecial)); + j(ConditionFromDoubleCondition(cond), label); +} + +void +MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs, + Label* label) +{ + compareDouble(cond, lhs, rhs); + + if (cond == DoubleEqual) { + Label unordered; + j(Parity, &unordered); + j(Equal, label); + bind(&unordered); + return; + } + if (cond == DoubleNotEqualOrUnordered) { + j(NotEqual, label); + j(Parity, label); + return; + } + + MOZ_ASSERT(!(cond & DoubleConditionBitSpecial)); + j(ConditionFromDoubleCondition(cond), label); +} + +template +void +MacroAssembler::branchAdd32(Condition cond, T src, Register dest, L label) +{ + addl(src, dest); + j(cond, label); +} + +template +void +MacroAssembler::branchSub32(Condition cond, T src, Register dest, Label* label) +{ + subl(src, dest); + j(cond, label); +} + +void +MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs, Label* label) +{ + subPtr(rhs, lhs); + j(cond, label); +} + +template +void +MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs, L label) +{ + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || cond == NotSigned); + test32(lhs, rhs); + j(cond, label); +} + +template +void +MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs, L label) +{ + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || cond == NotSigned); + test32(lhs, rhs); + j(cond, label); +} + +void +MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs, Label* label) +{ + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || cond == NotSigned); + test32(Operand(lhs), rhs); + j(cond, label); +} + +template +void +MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs, L label) +{ + testPtr(lhs, rhs); + j(cond, label); +} + +void +MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs, Label* label) +{ + testPtr(lhs, rhs); + j(cond, label); +} + +void +MacroAssembler::branchTestPtr(Condition cond, const Address& lhs, Imm32 rhs, Label* label) +{ + testPtr(Operand(lhs), rhs); + j(cond, label); +} + +void +MacroAssembler::branchTestUndefined(Condition cond, Register tag, Label* label) +{ + branchTestUndefinedImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestUndefined(Condition cond, const Address& address, Label* label) +{ + branchTestUndefinedImpl(cond, address, label); +} + +void +MacroAssembler::branchTestUndefined(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestUndefinedImpl(cond, address, label); +} + +void +MacroAssembler::branchTestUndefined(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestUndefinedImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t, Label* label) +{ + cond = testUndefined(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestInt32(Condition cond, Register tag, Label* label) +{ + branchTestInt32Impl(cond, tag, label); +} + +void +MacroAssembler::branchTestInt32(Condition cond, const Address& address, Label* label) +{ + branchTestInt32Impl(cond, address, label); +} + +void +MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestInt32Impl(cond, address, label); +} + +void +MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestInt32Impl(cond, value, label); +} + +template +void +MacroAssembler::branchTestInt32Impl(Condition cond, const T& t, Label* label) +{ + cond = testInt32(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestInt32Truthy(bool truthy, const ValueOperand& value, Label* label) +{ + Condition cond = testInt32Truthy(truthy, value); + j(cond, label); +} + +void +MacroAssembler::branchTestDouble(Condition cond, Register tag, Label* label) +{ + branchTestDoubleImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestDouble(Condition cond, const Address& address, Label* label) +{ + branchTestDoubleImpl(cond, address, label); +} + +void +MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestDoubleImpl(cond, address, label); +} + +void +MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestDoubleImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t, Label* label) +{ + cond = testDouble(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg, Label* label) +{ + Condition cond = testDoubleTruthy(truthy, reg); + j(cond, label); +} + +void +MacroAssembler::branchTestNumber(Condition cond, Register tag, Label* label) +{ + branchTestNumberImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestNumberImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestNumberImpl(Condition cond, const T& t, Label* label) +{ + cond = testNumber(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestBoolean(Condition cond, Register tag, Label* label) +{ + branchTestBooleanImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestBoolean(Condition cond, const Address& address, Label* label) +{ + branchTestBooleanImpl(cond, address, label); +} + +void +MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestBooleanImpl(cond, address, label); +} + +void +MacroAssembler::branchTestBoolean(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestBooleanImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestBooleanImpl(Condition cond, const T& t, Label* label) +{ + cond = testBoolean(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestString(Condition cond, Register tag, Label* label) +{ + branchTestStringImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestString(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestStringImpl(cond, address, label); +} + +void +MacroAssembler::branchTestString(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestStringImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestStringImpl(Condition cond, const T& t, Label* label) +{ + cond = testString(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestStringTruthy(bool truthy, const ValueOperand& value, Label* label) +{ + Condition cond = testStringTruthy(truthy, value); + j(cond, label); +} + +void +MacroAssembler::branchTestSymbol(Condition cond, Register tag, Label* label) +{ + branchTestSymbolImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestSymbolImpl(cond, address, label); +} + +void +MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestSymbolImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t, Label* label) +{ + cond = testSymbol(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestNull(Condition cond, Register tag, Label* label) +{ + branchTestNullImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestNull(Condition cond, const Address& address, Label* label) +{ + branchTestNullImpl(cond, address, label); +} + +void +MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestNullImpl(cond, address, label); +} + +void +MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestNullImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestNullImpl(Condition cond, const T& t, Label* label) +{ + cond = testNull(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestObject(Condition cond, Register tag, Label* label) +{ + branchTestObjectImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestObject(Condition cond, const Address& address, Label* label) +{ + branchTestObjectImpl(cond, address, label); +} + +void +MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestObjectImpl(cond, address, label); +} + +void +MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestObjectImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestObjectImpl(Condition cond, const T& t, Label* label) +{ + cond = testObject(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestGCThing(Condition cond, const Address& address, Label* label) +{ + branchTestGCThingImpl(cond, address, label); +} + +void +MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestGCThingImpl(cond, address, label); +} + +template +void +MacroAssembler::branchTestGCThingImpl(Condition cond, const T& t, Label* label) +{ + cond = testGCThing(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestPrimitive(Condition cond, Register tag, Label* label) +{ + branchTestPrimitiveImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestPrimitive(Condition cond, const ValueOperand& value, Label* label) +{ + branchTestPrimitiveImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t, Label* label) +{ + cond = testPrimitive(cond, t); + j(cond, label); +} + +void +MacroAssembler::branchTestMagic(Condition cond, Register tag, Label* label) +{ + branchTestMagicImpl(cond, tag, label); +} + +void +MacroAssembler::branchTestMagic(Condition cond, const Address& address, Label* label) +{ + branchTestMagicImpl(cond, address, label); +} + +void +MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address, Label* label) +{ + branchTestMagicImpl(cond, address, label); +} + +template +void +MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value, L label) +{ + branchTestMagicImpl(cond, value, label); +} + +template +void +MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) +{ + cond = testMagic(cond, t); + j(cond, label); +} + +// ======================================================================== +// Canonicalization primitives. +void +MacroAssembler::canonicalizeFloat32x4(FloatRegister reg, FloatRegister scratch) +{ + ScratchSimd128Scope scratch2(*this); + + MOZ_ASSERT(scratch.asSimd128() != scratch2.asSimd128()); + MOZ_ASSERT(reg.asSimd128() != scratch2.asSimd128()); + MOZ_ASSERT(reg.asSimd128() != scratch.asSimd128()); + + FloatRegister mask = scratch; + vcmpordps(Operand(reg), reg, mask); + + FloatRegister ifFalse = scratch2; + float nanf = float(JS::GenericNaN()); + loadConstantSimd128Float(SimdConstant::SplatX4(nanf), ifFalse); + + bitwiseAndSimd128(Operand(mask), reg); + bitwiseAndNotSimd128(Operand(ifFalse), mask); + bitwiseOrSimd128(Operand(mask), reg); +} + +// ======================================================================== +// Memory access primitives. +void +MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& dest) +{ + vmovsd(src, dest); +} +void +MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const BaseIndex& dest) +{ + vmovsd(src, dest); +} +void +MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Operand& dest) +{ + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + storeUncanonicalizedDouble(src, dest.toAddress()); + break; + case Operand::MEM_SCALE: + storeUncanonicalizedDouble(src, dest.toBaseIndex()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } +} + +template void MacroAssembler::storeDouble(FloatRegister src, const Operand& dest); + +void +MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const Address& dest) +{ + vmovss(src, dest); +} +void +MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const BaseIndex& dest) +{ + vmovss(src, dest); +} +void +MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const Operand& dest) +{ + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + storeUncanonicalizedFloat32(src, dest.toAddress()); + break; + case Operand::MEM_SCALE: + storeUncanonicalizedFloat32(src, dest.toBaseIndex()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } +} + +template void MacroAssembler::storeFloat32(FloatRegister src, const Operand& dest); + +void +MacroAssembler::storeFloat32x3(FloatRegister src, const Address& dest) +{ + Address destZ(dest); + destZ.offset += 2 * sizeof(int32_t); + storeDouble(src, dest); + ScratchSimd128Scope scratch(*this); + vmovhlps(src, scratch, scratch); + storeFloat32(scratch, destZ); +} +void +MacroAssembler::storeFloat32x3(FloatRegister src, const BaseIndex& dest) +{ + BaseIndex destZ(dest); + destZ.offset += 2 * sizeof(int32_t); + storeDouble(src, dest); + ScratchSimd128Scope scratch(*this); + vmovhlps(src, scratch, scratch); + storeFloat32(scratch, destZ); +} + +void +MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) +{ + if (barrier & MembarStoreLoad) + storeLoadFence(); +} + +// ======================================================================== +// Truncate floating point. + +void +MacroAssembler::truncateFloat32ToInt64(Address src, Address dest, Register temp) +{ + if (Assembler::HasSSE3()) { + fld32(Operand(src)); + fisttp(Operand(dest)); + return; + } + + if (src.base == esp) + src.offset += 2 * sizeof(int32_t); + if (dest.base == esp) + dest.offset += 2 * sizeof(int32_t); + + reserveStack(2 * sizeof(int32_t)); + + // Set conversion to truncation. + fnstcw(Operand(esp, 0)); + load32(Operand(esp, 0), temp); + andl(Imm32(~0xFF00), temp); + orl(Imm32(0xCFF), temp); + store32(temp, Address(esp, sizeof(int32_t))); + fldcw(Operand(esp, sizeof(int32_t))); + + // Load double on fp stack, convert and load regular stack. + fld32(Operand(src)); + fistp(Operand(dest)); + + // Reset the conversion flag. + fldcw(Operand(esp, 0)); + + freeStack(2 * sizeof(int32_t)); +} +void +MacroAssembler::truncateDoubleToInt64(Address src, Address dest, Register temp) +{ + if (Assembler::HasSSE3()) { + fld(Operand(src)); + fisttp(Operand(dest)); + return; + } + + if (src.base == esp) + src.offset += 2*sizeof(int32_t); + if (dest.base == esp) + dest.offset += 2*sizeof(int32_t); + + reserveStack(2*sizeof(int32_t)); + + // Set conversion to truncation. + fnstcw(Operand(esp, 0)); + load32(Operand(esp, 0), temp); + andl(Imm32(~0xFF00), temp); + orl(Imm32(0xCFF), temp); + store32(temp, Address(esp, 1*sizeof(int32_t))); + fldcw(Operand(esp, 1*sizeof(int32_t))); + + // Load double on fp stack, convert and load regular stack. + fld(Operand(src)); + fistp(Operand(dest)); + + // Reset the conversion flag. + fldcw(Operand(esp, 0)); + + freeStack(2*sizeof(int32_t)); +} + +// =============================================================== +// Clamping functions. + +void +MacroAssembler::clampIntToUint8(Register reg) +{ + Label inRange; + branchTest32(Assembler::Zero, reg, Imm32(0xffffff00), &inRange); + { + sarl(Imm32(31), reg); + notl(reg); + andl(Imm32(255), reg); + } + bind(&inRange); +} + +//}}} check_macroassembler_style +// =============================================================== + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_MacroAssembler_x86_shared_inl_h */ diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp new file mode 100644 index 000000000..7d86e8edf --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp @@ -0,0 +1,855 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/MacroAssembler-x86-shared.h" + +#include "jit/JitFrames.h" +#include "jit/MacroAssembler.h" + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +// Note: this function clobbers the input register. +void +MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) +{ + ScratchDoubleScope scratch(*this); + MOZ_ASSERT(input != scratch); + Label positive, done; + + // <= 0 or NaN --> 0 + zeroDouble(scratch); + branchDouble(DoubleGreaterThan, input, scratch, &positive); + { + move32(Imm32(0), output); + jump(&done); + } + + bind(&positive); + + // Add 0.5 and truncate. + loadConstantDouble(0.5, scratch); + addDouble(scratch, input); + + Label outOfRange; + + // Truncate to int32 and ensure the result <= 255. This relies on the + // processor setting output to a value > 255 for doubles outside the int32 + // range (for instance 0x80000000). + vcvttsd2si(input, output); + branch32(Assembler::Above, output, Imm32(255), &outOfRange); + { + // Check if we had a tie. + convertInt32ToDouble(output, scratch); + branchDouble(DoubleNotEqual, input, scratch, &done); + + // It was a tie. Mask out the ones bit to get an even value. + // See also js_TypedArray_uint8_clamp_double. + and32(Imm32(~1), output); + jump(&done); + } + + // > 255 --> 255 + bind(&outOfRange); + { + move32(Imm32(255), output); + } + + bind(&done); +} + +void +MacroAssembler::alignFrameForICArguments(AfterICSaveLive& aic) +{ + // Exists for MIPS compatibility. +} + +void +MacroAssembler::restoreFrameAlignmentForICArguments(AfterICSaveLive& aic) +{ + // Exists for MIPS compatibility. +} + +bool +MacroAssemblerX86Shared::buildOOLFakeExitFrame(void* fakeReturnAddr) +{ + uint32_t descriptor = MakeFrameDescriptor(asMasm().framePushed(), JitFrame_IonJS, + ExitFrameLayout::Size()); + asMasm().Push(Imm32(descriptor)); + asMasm().Push(ImmPtr(fakeReturnAddr)); + return true; +} + +void +MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, + Register scratch, + Label* label, + bool maybeNonZero) +{ + // Determines whether the low double contained in the XMM register reg + // is equal to -0.0. + +#if defined(JS_CODEGEN_X86) + Label nonZero; + + // if not already compared to zero + if (maybeNonZero) { + ScratchDoubleScope scratchDouble(asMasm()); + + // Compare to zero. Lets through {0, -0}. + zeroDouble(scratchDouble); + + // If reg is non-zero, jump to nonZero. + asMasm().branchDouble(DoubleNotEqual, reg, scratchDouble, &nonZero); + } + // Input register is either zero or negative zero. Retrieve sign of input. + vmovmskpd(reg, scratch); + + // If reg is 1 or 3, input is negative zero. + // If reg is 0 or 2, input is a normal zero. + asMasm().branchTest32(NonZero, scratch, Imm32(1), label); + + bind(&nonZero); +#elif defined(JS_CODEGEN_X64) + vmovq(reg, scratch); + cmpq(Imm32(1), scratch); + j(Overflow, label); +#endif +} + +void +MacroAssemblerX86Shared::branchNegativeZeroFloat32(FloatRegister reg, + Register scratch, + Label* label) +{ + vmovd(reg, scratch); + cmp32(scratch, Imm32(1)); + j(Overflow, label); +} + +MacroAssembler& +MacroAssemblerX86Shared::asMasm() +{ + return *static_cast(this); +} + +const MacroAssembler& +MacroAssemblerX86Shared::asMasm() const +{ + return *static_cast(this); +} + +template +void +MacroAssemblerX86Shared::compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, + Register oldval, Register newval, + Register temp, AnyRegister output) +{ + switch (arrayType) { + case Scalar::Int8: + compareExchange8SignExtend(mem, oldval, newval, output.gpr()); + break; + case Scalar::Uint8: + compareExchange8ZeroExtend(mem, oldval, newval, output.gpr()); + break; + case Scalar::Int16: + compareExchange16SignExtend(mem, oldval, newval, output.gpr()); + break; + case Scalar::Uint16: + compareExchange16ZeroExtend(mem, oldval, newval, output.gpr()); + break; + case Scalar::Int32: + compareExchange32(mem, oldval, newval, output.gpr()); + break; + case Scalar::Uint32: + // At the moment, the code in MCallOptimize.cpp requires the output + // type to be double for uint32 arrays. See bug 1077305. + MOZ_ASSERT(output.isFloat()); + compareExchange32(mem, oldval, newval, temp); + asMasm().convertUInt32ToDouble(temp, output.fpu()); + break; + default: + MOZ_CRASH("Invalid typed array type"); + } +} + +template void +MacroAssemblerX86Shared::compareExchangeToTypedIntArray(Scalar::Type arrayType, const Address& mem, + Register oldval, Register newval, Register temp, + AnyRegister output); +template void +MacroAssemblerX86Shared::compareExchangeToTypedIntArray(Scalar::Type arrayType, const BaseIndex& mem, + Register oldval, Register newval, Register temp, + AnyRegister output); + +template +void +MacroAssemblerX86Shared::atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, + Register value, Register temp, AnyRegister output) +{ + switch (arrayType) { + case Scalar::Int8: + atomicExchange8SignExtend(mem, value, output.gpr()); + break; + case Scalar::Uint8: + atomicExchange8ZeroExtend(mem, value, output.gpr()); + break; + case Scalar::Int16: + atomicExchange16SignExtend(mem, value, output.gpr()); + break; + case Scalar::Uint16: + atomicExchange16ZeroExtend(mem, value, output.gpr()); + break; + case Scalar::Int32: + atomicExchange32(mem, value, output.gpr()); + break; + case Scalar::Uint32: + // At the moment, the code in MCallOptimize.cpp requires the output + // type to be double for uint32 arrays. See bug 1077305. + MOZ_ASSERT(output.isFloat()); + atomicExchange32(mem, value, temp); + asMasm().convertUInt32ToDouble(temp, output.fpu()); + break; + default: + MOZ_CRASH("Invalid typed array type"); + } +} + +template void +MacroAssemblerX86Shared::atomicExchangeToTypedIntArray(Scalar::Type arrayType, const Address& mem, + Register value, Register temp, AnyRegister output); +template void +MacroAssemblerX86Shared::atomicExchangeToTypedIntArray(Scalar::Type arrayType, const BaseIndex& mem, + Register value, Register temp, AnyRegister output); + +template +T* +MacroAssemblerX86Shared::getConstant(const typename T::Pod& value, Map& map, + Vector& vec) +{ + typedef typename Map::AddPtr AddPtr; + if (!map.initialized()) { + enoughMemory_ &= map.init(); + if (!enoughMemory_) + return nullptr; + } + size_t index; + if (AddPtr p = map.lookupForAdd(value)) { + index = p->value(); + } else { + index = vec.length(); + enoughMemory_ &= vec.append(T(value)); + if (!enoughMemory_) + return nullptr; + enoughMemory_ &= map.add(p, value, index); + if (!enoughMemory_) + return nullptr; + } + return &vec[index]; +} + +MacroAssemblerX86Shared::Float* +MacroAssemblerX86Shared::getFloat(wasm::RawF32 f) +{ + return getConstant(f.bits(), floatMap_, floats_); +} + +MacroAssemblerX86Shared::Double* +MacroAssemblerX86Shared::getDouble(wasm::RawF64 d) +{ + return getConstant(d.bits(), doubleMap_, doubles_); +} + +MacroAssemblerX86Shared::SimdData* +MacroAssemblerX86Shared::getSimdData(const SimdConstant& v) +{ + return getConstant(v, simdMap_, simds_); +} + +template +static bool +MergeConstants(size_t delta, const Vector& other, + Map& map, Vector& vec) +{ + typedef typename Map::AddPtr AddPtr; + if (!map.initialized() && !map.init()) + return false; + + for (const T& c : other) { + size_t index; + if (AddPtr p = map.lookupForAdd(c.value)) { + index = p->value(); + } else { + index = vec.length(); + if (!vec.append(T(c.value)) || !map.add(p, c.value, index)) + return false; + } + MacroAssemblerX86Shared::UsesVector& uses = vec[index].uses; + for (CodeOffset use : c.uses) { + use.offsetBy(delta); + if (!uses.append(use)) + return false; + } + } + + return true; +} + +bool +MacroAssemblerX86Shared::asmMergeWith(const MacroAssemblerX86Shared& other) +{ + size_t sizeBefore = masm.size(); + if (!Assembler::asmMergeWith(other)) + return false; + if (!MergeConstants(sizeBefore, other.doubles_, doubleMap_, doubles_)) + return false; + if (!MergeConstants(sizeBefore, other.floats_, floatMap_, floats_)) + return false; + if (!MergeConstants(sizeBefore, other.simds_, simdMap_, simds_)) + return false; + return true; +} + +void +MacroAssemblerX86Shared::minMaxDouble(FloatRegister first, FloatRegister second, bool canBeNaN, + bool isMax) +{ + Label done, nan, minMaxInst; + + // Do a vucomisd to catch equality and NaNs, which both require special + // handling. If the operands are ordered and inequal, we branch straight to + // the min/max instruction. If we wanted, we could also branch for less-than + // or greater-than here instead of using min/max, however these conditions + // will sometimes be hard on the branch predictor. + vucomisd(second, first); + j(Assembler::NotEqual, &minMaxInst); + if (canBeNaN) + j(Assembler::Parity, &nan); + + // Ordered and equal. The operands are bit-identical unless they are zero + // and negative zero. These instructions merge the sign bits in that + // case, and are no-ops otherwise. + if (isMax) + vandpd(second, first, first); + else + vorpd(second, first, first); + jump(&done); + + // x86's min/max are not symmetric; if either operand is a NaN, they return + // the read-only operand. We need to return a NaN if either operand is a + // NaN, so we explicitly check for a NaN in the read-write operand. + if (canBeNaN) { + bind(&nan); + vucomisd(first, first); + j(Assembler::Parity, &done); + } + + // When the values are inequal, or second is NaN, x86's min and max will + // return the value we need. + bind(&minMaxInst); + if (isMax) + vmaxsd(second, first, first); + else + vminsd(second, first, first); + + bind(&done); +} + +void +MacroAssemblerX86Shared::minMaxFloat32(FloatRegister first, FloatRegister second, bool canBeNaN, + bool isMax) +{ + Label done, nan, minMaxInst; + + // Do a vucomiss to catch equality and NaNs, which both require special + // handling. If the operands are ordered and inequal, we branch straight to + // the min/max instruction. If we wanted, we could also branch for less-than + // or greater-than here instead of using min/max, however these conditions + // will sometimes be hard on the branch predictor. + vucomiss(second, first); + j(Assembler::NotEqual, &minMaxInst); + if (canBeNaN) + j(Assembler::Parity, &nan); + + // Ordered and equal. The operands are bit-identical unless they are zero + // and negative zero. These instructions merge the sign bits in that + // case, and are no-ops otherwise. + if (isMax) + vandps(second, first, first); + else + vorps(second, first, first); + jump(&done); + + // x86's min/max are not symmetric; if either operand is a NaN, they return + // the read-only operand. We need to return a NaN if either operand is a + // NaN, so we explicitly check for a NaN in the read-write operand. + if (canBeNaN) { + bind(&nan); + vucomiss(first, first); + j(Assembler::Parity, &done); + } + + // When the values are inequal, or second is NaN, x86's min and max will + // return the value we need. + bind(&minMaxInst); + if (isMax) + vmaxss(second, first, first); + else + vminss(second, first, first); + + bind(&done); +} + +//{{{ check_macroassembler_style +// =============================================================== +// MacroAssembler high-level usage. + +void +MacroAssembler::flush() +{ +} + +void +MacroAssembler::comment(const char* msg) +{ + masm.comment(msg); +} + +// =============================================================== +// Stack manipulation functions. + +void +MacroAssembler::PushRegsInMask(LiveRegisterSet set) +{ + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffF = fpuSet.getPushSizeInBytes(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + + // On x86, always use push to push the integer registers, as it's fast + // on modern hardware and it's a small instruction. + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + Push(*iter); + } + MOZ_ASSERT(diffG == 0); + + reserveStack(diffF); + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + Address spillAddress(StackPointer, diffF); + if (reg.isDouble()) + storeDouble(reg, spillAddress); + else if (reg.isSingle()) + storeFloat32(reg, spillAddress); + else if (reg.isSimd128()) + storeUnalignedSimd128Float(reg, spillAddress); + else + MOZ_CRASH("Unknown register type."); + } + MOZ_ASSERT(numFpu == 0); + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushBytesInSize. + diffF -= diffF % sizeof(uintptr_t); + MOZ_ASSERT(diffF == 0); +} + +void +MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore) +{ + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + int32_t diffF = fpuSet.getPushSizeInBytes(); + const int32_t reservedG = diffG; + const int32_t reservedF = diffF; + + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + if (ignore.has(reg)) + continue; + + Address spillAddress(StackPointer, diffF); + if (reg.isDouble()) + loadDouble(spillAddress, reg); + else if (reg.isSingle()) + loadFloat32(spillAddress, reg); + else if (reg.isSimd128()) + loadUnalignedSimd128Float(spillAddress, reg); + else + MOZ_CRASH("Unknown register type."); + } + freeStack(reservedF); + MOZ_ASSERT(numFpu == 0); + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushBytesInSize. + diffF -= diffF % sizeof(uintptr_t); + MOZ_ASSERT(diffF == 0); + + // On x86, use pop to pop the integer registers, if we're not going to + // ignore any slots, as it's fast on modern hardware and it's a small + // instruction. + if (ignore.emptyGeneral()) { + for (GeneralRegisterForwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + Pop(*iter); + } + } else { + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + if (!ignore.has(*iter)) + loadPtr(Address(StackPointer, diffG), *iter); + } + freeStack(reservedG); + } + MOZ_ASSERT(diffG == 0); +} + +void +MacroAssembler::Push(const Operand op) +{ + push(op); + adjustFrame(sizeof(intptr_t)); +} + +void +MacroAssembler::Push(Register reg) +{ + push(reg); + adjustFrame(sizeof(intptr_t)); +} + +void +MacroAssembler::Push(const Imm32 imm) +{ + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void +MacroAssembler::Push(const ImmWord imm) +{ + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void +MacroAssembler::Push(const ImmPtr imm) +{ + Push(ImmWord(uintptr_t(imm.value))); +} + +void +MacroAssembler::Push(const ImmGCPtr ptr) +{ + push(ptr); + adjustFrame(sizeof(intptr_t)); +} + +void +MacroAssembler::Push(FloatRegister t) +{ + push(t); + adjustFrame(sizeof(double)); +} + +void +MacroAssembler::Pop(const Operand op) +{ + pop(op); + implicitPop(sizeof(intptr_t)); +} + +void +MacroAssembler::Pop(Register reg) +{ + pop(reg); + implicitPop(sizeof(intptr_t)); +} + +void +MacroAssembler::Pop(FloatRegister reg) +{ + pop(reg); + implicitPop(sizeof(double)); +} + +void +MacroAssembler::Pop(const ValueOperand& val) +{ + popValue(val); + implicitPop(sizeof(Value)); +} + +// =============================================================== +// Simple call functions. + +CodeOffset +MacroAssembler::call(Register reg) +{ + return Assembler::call(reg); +} + +CodeOffset +MacroAssembler::call(Label* label) +{ + return Assembler::call(label); +} + +void +MacroAssembler::call(const Address& addr) +{ + Assembler::call(Operand(addr.base, addr.offset)); +} + +void +MacroAssembler::call(wasm::SymbolicAddress target) +{ + mov(target, eax); + Assembler::call(eax); +} + +void +MacroAssembler::call(ImmWord target) +{ + Assembler::call(target); +} + +void +MacroAssembler::call(ImmPtr target) +{ + Assembler::call(target); +} + +void +MacroAssembler::call(JitCode* target) +{ + Assembler::call(target); +} + +CodeOffset +MacroAssembler::callWithPatch() +{ + return Assembler::callWithPatch(); +} +void +MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) +{ + Assembler::patchCall(callerOffset, calleeOffset); +} + +void +MacroAssembler::callAndPushReturnAddress(Register reg) +{ + call(reg); +} + +void +MacroAssembler::callAndPushReturnAddress(Label* label) +{ + call(label); +} + +// =============================================================== +// Patchable near/far jumps. + +CodeOffset +MacroAssembler::farJumpWithPatch() +{ + return Assembler::farJumpWithPatch(); +} + +void +MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) +{ + Assembler::patchFarJump(farJump, targetOffset); +} + +void +MacroAssembler::repatchFarJump(uint8_t* code, uint32_t farJumpOffset, uint32_t targetOffset) +{ + Assembler::repatchFarJump(code, farJumpOffset, targetOffset); +} + +CodeOffset +MacroAssembler::nopPatchableToNearJump() +{ + return Assembler::twoByteNop(); +} + +void +MacroAssembler::patchNopToNearJump(uint8_t* jump, uint8_t* target) +{ + Assembler::patchTwoByteNopToJump(jump, target); +} + +void +MacroAssembler::patchNearJumpToNop(uint8_t* jump) +{ + Assembler::patchJumpToTwoByteNop(jump); +} + +// =============================================================== +// Jit Frames. + +uint32_t +MacroAssembler::pushFakeReturnAddress(Register scratch) +{ + CodeLabel cl; + + mov(cl.patchAt(), scratch); + Push(scratch); + use(cl.target()); + uint32_t retAddr = currentOffset(); + + addCodeLabel(cl); + return retAddr; +} + +// wasm specific methods, used in both the wasm baseline compiler and ion. + +// RAII class that generates the jumps to traps when it's destructed, to +// prevent some code duplication in the outOfLineWasmTruncateXtoY methods. +struct MOZ_RAII AutoHandleWasmTruncateToIntErrors +{ + MacroAssembler& masm; + Label inputIsNaN; + Label fail; + wasm::TrapOffset off; + + explicit AutoHandleWasmTruncateToIntErrors(MacroAssembler& masm, wasm::TrapOffset off) + : masm(masm), off(off) + { } + + ~AutoHandleWasmTruncateToIntErrors() { + // Handle errors. + masm.bind(&fail); + masm.jump(wasm::TrapDesc(off, wasm::Trap::IntegerOverflow, masm.framePushed())); + + masm.bind(&inputIsNaN); + masm.jump(wasm::TrapDesc(off, wasm::Trap::InvalidConversionToInteger, masm.framePushed())); + } +}; + +void +MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input, Register output, Label* oolEntry) +{ + vcvttsd2si(input, output); + cmp32(output, Imm32(1)); + j(Assembler::Overflow, oolEntry); +} + +void +MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input, Register output, Label* oolEntry) +{ + vcvttss2si(input, output); + cmp32(output, Imm32(1)); + j(Assembler::Overflow, oolEntry); +} + +void +MacroAssembler::outOfLineWasmTruncateDoubleToInt32(FloatRegister input, bool isUnsigned, + wasm::TrapOffset off, Label* rejoin) +{ + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values (not needed for unsigned values). + if (isUnsigned) + return; + + // We've used vcvttsd2si. The only valid double values that can + // truncate to INT32_MIN are in ]INT32_MIN - 1; INT32_MIN]. + loadConstantDouble(double(INT32_MIN) - 1.0, ScratchDoubleReg); + branchDouble(Assembler::DoubleLessThanOrEqual, input, ScratchDoubleReg, &traps.fail); + + loadConstantDouble(double(INT32_MIN), ScratchDoubleReg); + branchDouble(Assembler::DoubleGreaterThan, input, ScratchDoubleReg, &traps.fail); + jump(rejoin); +} + +void +MacroAssembler::outOfLineWasmTruncateFloat32ToInt32(FloatRegister input, bool isUnsigned, + wasm::TrapOffset off, Label* rejoin) +{ + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values (not needed for unsigned values). + if (isUnsigned) + return; + + // We've used vcvttss2si. Check that the input wasn't + // float(INT32_MIN), which is the only legimitate input that + // would truncate to INT32_MIN. + loadConstantFloat32(float(INT32_MIN), ScratchFloat32Reg); + branchFloat(Assembler::DoubleNotEqual, input, ScratchFloat32Reg, &traps.fail); + jump(rejoin); +} + +void +MacroAssembler::outOfLineWasmTruncateDoubleToInt64(FloatRegister input, bool isUnsigned, + wasm::TrapOffset off, Label* rejoin) +{ + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values. + if (isUnsigned) { + loadConstantDouble(-0.0, ScratchDoubleReg); + branchDouble(Assembler::DoubleGreaterThan, input, ScratchDoubleReg, &traps.fail); + loadConstantDouble(-1.0, ScratchDoubleReg); + branchDouble(Assembler::DoubleLessThanOrEqual, input, ScratchDoubleReg, &traps.fail); + jump(rejoin); + return; + } + + // We've used vcvtsd2sq. The only legit value whose i64 + // truncation is INT64_MIN is double(INT64_MIN): exponent is so + // high that the highest resolution around is much more than 1. + loadConstantDouble(double(int64_t(INT64_MIN)), ScratchDoubleReg); + branchDouble(Assembler::DoubleNotEqual, input, ScratchDoubleReg, &traps.fail); + jump(rejoin); +} + +void +MacroAssembler::outOfLineWasmTruncateFloat32ToInt64(FloatRegister input, bool isUnsigned, + wasm::TrapOffset off, Label* rejoin) +{ + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values. + if (isUnsigned) { + loadConstantFloat32(-0.0f, ScratchFloat32Reg); + branchFloat(Assembler::DoubleGreaterThan, input, ScratchFloat32Reg, &traps.fail); + loadConstantFloat32(-1.0f, ScratchFloat32Reg); + branchFloat(Assembler::DoubleLessThanOrEqual, input, ScratchFloat32Reg, &traps.fail); + jump(rejoin); + return; + } + + // We've used vcvtss2sq. See comment in outOfLineWasmTruncateDoubleToInt64. + loadConstantFloat32(float(int64_t(INT64_MIN)), ScratchFloat32Reg); + branchFloat(Assembler::DoubleNotEqual, input, ScratchFloat32Reg, &traps.fail); + jump(rejoin); +} + +//}}} check_macroassembler_style diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h new file mode 100644 index 000000000..8a0e154f1 --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h @@ -0,0 +1,1411 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_MacroAssembler_x86_shared_h +#define jit_x86_shared_MacroAssembler_x86_shared_h + +#include "mozilla/Casting.h" + +#if defined(JS_CODEGEN_X86) +# include "jit/x86/Assembler-x86.h" +#elif defined(JS_CODEGEN_X64) +# include "jit/x64/Assembler-x64.h" +#endif + +#ifdef DEBUG + #define CHECK_BYTEREG(reg) \ + JS_BEGIN_MACRO \ + AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \ + MOZ_ASSERT(byteRegs.has(reg)); \ + JS_END_MACRO + #define CHECK_BYTEREGS(r1, r2) \ + JS_BEGIN_MACRO \ + AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \ + MOZ_ASSERT(byteRegs.has(r1)); \ + MOZ_ASSERT(byteRegs.has(r2)); \ + JS_END_MACRO +#else + #define CHECK_BYTEREG(reg) (void)0 + #define CHECK_BYTEREGS(r1, r2) (void)0 +#endif + +namespace js { +namespace jit { + +class MacroAssembler; + +class MacroAssemblerX86Shared : public Assembler +{ + private: + // Perform a downcast. Should be removed by Bug 996602. + MacroAssembler& asMasm(); + const MacroAssembler& asMasm() const; + + public: + typedef Vector UsesVector; + + protected: + + // For Double, Float and SimdData, make the move ctors explicit so that MSVC + // knows what to use instead of copying these data structures. + template + struct Constant { + typedef T Pod; + + T value; + UsesVector uses; + + explicit Constant(const T& value) : value(value) {} + Constant(Constant&& other) : value(other.value), uses(mozilla::Move(other.uses)) {} + explicit Constant(const Constant&) = delete; + }; + + // Containers use SystemAllocPolicy since wasm releases memory after each + // function is compiled, and these need to live until after all functions + // are compiled. + using Double = Constant; + Vector doubles_; + typedef HashMap, SystemAllocPolicy> DoubleMap; + DoubleMap doubleMap_; + + using Float = Constant; + Vector floats_; + typedef HashMap, SystemAllocPolicy> FloatMap; + FloatMap floatMap_; + + struct SimdData : public Constant { + explicit SimdData(SimdConstant d) : Constant(d) {} + SimdData(SimdData&& d) : Constant(mozilla::Move(d)) {} + explicit SimdData(const SimdData&) = delete; + SimdConstant::Type type() const { return value.type(); } + }; + + Vector simds_; + typedef HashMap SimdMap; + SimdMap simdMap_; + + template + T* getConstant(const typename T::Pod& value, Map& map, Vector& vec); + + Float* getFloat(wasm::RawF32 f); + Double* getDouble(wasm::RawF64 d); + SimdData* getSimdData(const SimdConstant& v); + + public: + using Assembler::call; + + MacroAssemblerX86Shared() + { } + + bool asmMergeWith(const MacroAssemblerX86Shared& other); + + // Evaluate srcDest = minmax{Float32,Double}(srcDest, second). + // Checks for NaN if canBeNaN is true. + void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax); + void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax); + + void compareDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) { + if (cond & DoubleConditionBitInvert) + vucomisd(lhs, rhs); + else + vucomisd(rhs, lhs); + } + + void compareFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) { + if (cond & DoubleConditionBitInvert) + vucomiss(lhs, rhs); + else + vucomiss(rhs, lhs); + } + + void branchNegativeZero(FloatRegister reg, Register scratch, Label* label, bool maybeNonZero = true); + void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label* label); + + void move32(Imm32 imm, Register dest) { + // Use the ImmWord version of mov to register, which has special + // optimizations. Casting to uint32_t here ensures that the value + // is zero-extended. + mov(ImmWord(uint32_t(imm.value)), dest); + } + void move32(Imm32 imm, const Operand& dest) { + movl(imm, dest); + } + void move32(Register src, Register dest) { + movl(src, dest); + } + void move32(Register src, const Operand& dest) { + movl(src, dest); + } + void test32(Register lhs, Register rhs) { + testl(rhs, lhs); + } + void test32(const Address& addr, Imm32 imm) { + testl(imm, Operand(addr)); + } + void test32(const Operand lhs, Imm32 imm) { + testl(imm, lhs); + } + void test32(Register lhs, Imm32 rhs) { + testl(rhs, lhs); + } + void cmp32(Register lhs, Imm32 rhs) { + cmpl(rhs, lhs); + } + void cmp32(Register lhs, Register rhs) { + cmpl(rhs, lhs); + } + void cmp32(const Address& lhs, Register rhs) { + cmp32(Operand(lhs), rhs); + } + void cmp32(const Address& lhs, Imm32 rhs) { + cmp32(Operand(lhs), rhs); + } + void cmp32(const Operand& lhs, Imm32 rhs) { + cmpl(rhs, lhs); + } + void cmp32(const Operand& lhs, Register rhs) { + cmpl(rhs, lhs); + } + void cmp32(Register lhs, const Operand& rhs) { + cmpl(rhs, lhs); + } + CodeOffset cmp32WithPatch(Register lhs, Imm32 rhs) { + return cmplWithPatch(rhs, lhs); + } + void atomic_inc32(const Operand& addr) { + lock_incl(addr); + } + void atomic_dec32(const Operand& addr) { + lock_decl(addr); + } + + template + void atomicFetchAdd8SignExtend(Register src, const T& mem, Register temp, Register output) { + CHECK_BYTEREGS(src, output); + if (src != output) + movl(src, output); + lock_xaddb(output, Operand(mem)); + movsbl(output, output); + } + + template + void atomicFetchAdd8ZeroExtend(Register src, const T& mem, Register temp, Register output) { + CHECK_BYTEREGS(src, output); + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + lock_xaddb(output, Operand(mem)); + movzbl(output, output); + } + + template + void atomicFetchAdd8SignExtend(Imm32 src, const T& mem, Register temp, Register output) { + CHECK_BYTEREG(output); + MOZ_ASSERT(temp == InvalidReg); + movb(src, output); + lock_xaddb(output, Operand(mem)); + movsbl(output, output); + } + + template + void atomicFetchAdd8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) { + CHECK_BYTEREG(output); + MOZ_ASSERT(temp == InvalidReg); + movb(src, output); + lock_xaddb(output, Operand(mem)); + movzbl(output, output); + } + + template + void atomicFetchAdd16SignExtend(Register src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + lock_xaddw(output, Operand(mem)); + movswl(output, output); + } + + template + void atomicFetchAdd16ZeroExtend(Register src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + lock_xaddw(output, Operand(mem)); + movzwl(output, output); + } + + template + void atomicFetchAdd16SignExtend(Imm32 src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + movl(src, output); + lock_xaddw(output, Operand(mem)); + movswl(output, output); + } + + template + void atomicFetchAdd16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + movl(src, output); + lock_xaddw(output, Operand(mem)); + movzwl(output, output); + } + + template + void atomicFetchAdd32(Register src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + lock_xaddl(output, Operand(mem)); + } + + template + void atomicFetchAdd32(Imm32 src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + movl(src, output); + lock_xaddl(output, Operand(mem)); + } + + template + void atomicFetchSub8SignExtend(Register src, const T& mem, Register temp, Register output) { + CHECK_BYTEREGS(src, output); + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + negl(output); + lock_xaddb(output, Operand(mem)); + movsbl(output, output); + } + + template + void atomicFetchSub8ZeroExtend(Register src, const T& mem, Register temp, Register output) { + CHECK_BYTEREGS(src, output); + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + negl(output); + lock_xaddb(output, Operand(mem)); + movzbl(output, output); + } + + template + void atomicFetchSub8SignExtend(Imm32 src, const T& mem, Register temp, Register output) { + CHECK_BYTEREG(output); + MOZ_ASSERT(temp == InvalidReg); + movb(Imm32(-src.value), output); + lock_xaddb(output, Operand(mem)); + movsbl(output, output); + } + + template + void atomicFetchSub8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) { + CHECK_BYTEREG(output); + MOZ_ASSERT(temp == InvalidReg); + movb(Imm32(-src.value), output); + lock_xaddb(output, Operand(mem)); + movzbl(output, output); + } + + template + void atomicFetchSub16SignExtend(Register src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + negl(output); + lock_xaddw(output, Operand(mem)); + movswl(output, output); + } + + template + void atomicFetchSub16ZeroExtend(Register src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + negl(output); + lock_xaddw(output, Operand(mem)); + movzwl(output, output); + } + + template + void atomicFetchSub16SignExtend(Imm32 src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + movl(Imm32(-src.value), output); + lock_xaddw(output, Operand(mem)); + movswl(output, output); + } + + template + void atomicFetchSub16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + movl(Imm32(-src.value), output); + lock_xaddw(output, Operand(mem)); + movzwl(output, output); + } + + template + void atomicFetchSub32(Register src, const T& mem, Register temp, Register output) { + MOZ_ASSERT(temp == InvalidReg); + if (src != output) + movl(src, output); + negl(output); + lock_xaddl(output, Operand(mem)); + } + + template + void atomicFetchSub32(Imm32 src, const T& mem, Register temp, Register output) { + movl(Imm32(-src.value), output); + lock_xaddl(output, Operand(mem)); + } + + // requires output == eax +#define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \ + MOZ_ASSERT(output == eax); \ + LOAD(Operand(mem), eax); \ + Label again; \ + bind(&again); \ + movl(eax, temp); \ + OP(src, temp); \ + LOCK_CMPXCHG(temp, Operand(mem)); \ + j(NonZero, &again); + + template + void atomicFetchAnd8SignExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb) + CHECK_BYTEREG(temp); + movsbl(eax, eax); + } + template + void atomicFetchAnd8ZeroExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb) + CHECK_BYTEREG(temp); + movzbl(eax, eax); + } + template + void atomicFetchAnd16SignExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw) + movswl(eax, eax); + } + template + void atomicFetchAnd16ZeroExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw) + movzwl(eax, eax); + } + template + void atomicFetchAnd32(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl) + } + + template + void atomicFetchOr8SignExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb) + CHECK_BYTEREG(temp); + movsbl(eax, eax); + } + template + void atomicFetchOr8ZeroExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb) + CHECK_BYTEREG(temp); + movzbl(eax, eax); + } + template + void atomicFetchOr16SignExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw) + movswl(eax, eax); + } + template + void atomicFetchOr16ZeroExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw) + movzwl(eax, eax); + } + template + void atomicFetchOr32(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl) + } + + template + void atomicFetchXor8SignExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb) + CHECK_BYTEREG(temp); + movsbl(eax, eax); + } + template + void atomicFetchXor8ZeroExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb) + CHECK_BYTEREG(temp); + movzbl(eax, eax); + } + template + void atomicFetchXor16SignExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw) + movswl(eax, eax); + } + template + void atomicFetchXor16ZeroExtend(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw) + movzwl(eax, eax); + } + template + void atomicFetchXor32(const S& src, const T& mem, Register temp, Register output) { + ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchgl) + } + +#undef ATOMIC_BITOP_BODY + + // S is Register or Imm32; T is Address or BaseIndex. + + template + void atomicAdd8(const S& src, const T& mem) { + lock_addb(src, Operand(mem)); + } + template + void atomicAdd16(const S& src, const T& mem) { + lock_addw(src, Operand(mem)); + } + template + void atomicAdd32(const S& src, const T& mem) { + lock_addl(src, Operand(mem)); + } + template + void atomicSub8(const S& src, const T& mem) { + lock_subb(src, Operand(mem)); + } + template + void atomicSub16(const S& src, const T& mem) { + lock_subw(src, Operand(mem)); + } + template + void atomicSub32(const S& src, const T& mem) { + lock_subl(src, Operand(mem)); + } + template + void atomicAnd8(const S& src, const T& mem) { + lock_andb(src, Operand(mem)); + } + template + void atomicAnd16(const S& src, const T& mem) { + lock_andw(src, Operand(mem)); + } + template + void atomicAnd32(const S& src, const T& mem) { + lock_andl(src, Operand(mem)); + } + template + void atomicOr8(const S& src, const T& mem) { + lock_orb(src, Operand(mem)); + } + template + void atomicOr16(const S& src, const T& mem) { + lock_orw(src, Operand(mem)); + } + template + void atomicOr32(const S& src, const T& mem) { + lock_orl(src, Operand(mem)); + } + template + void atomicXor8(const S& src, const T& mem) { + lock_xorb(src, Operand(mem)); + } + template + void atomicXor16(const S& src, const T& mem) { + lock_xorw(src, Operand(mem)); + } + template + void atomicXor32(const S& src, const T& mem) { + lock_xorl(src, Operand(mem)); + } + + void storeLoadFence() { + // This implementation follows Linux. + if (HasSSE2()) + masm.mfence(); + else + lock_addl(Imm32(0), Operand(Address(esp, 0))); + } + + void branch16(Condition cond, Register lhs, Register rhs, Label* label) { + cmpw(rhs, lhs); + j(cond, label); + } + void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) { + testw(rhs, lhs); + j(cond, label); + } + + void jump(Label* label) { + jmp(label); + } + void jump(JitCode* code) { + jmp(code); + } + void jump(RepatchLabel* label) { + jmp(label); + } + void jump(Register reg) { + jmp(Operand(reg)); + } + void jump(const Address& addr) { + jmp(Operand(addr)); + } + void jump(wasm::TrapDesc target) { + jmp(target); + } + + void convertInt32ToDouble(Register src, FloatRegister dest) { + // vcvtsi2sd and friends write only part of their output register, which + // causes slowdowns on out-of-order processors. Explicitly break + // dependencies with vxorpd (and vxorps elsewhere), which are handled + // specially in modern CPUs, for this purpose. See sections 8.14, 9.8, + // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture + // document. + zeroDouble(dest); + vcvtsi2sd(src, dest, dest); + } + void convertInt32ToDouble(const Address& src, FloatRegister dest) { + convertInt32ToDouble(Operand(src), dest); + } + void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) { + convertInt32ToDouble(Operand(src), dest); + } + void convertInt32ToDouble(const Operand& src, FloatRegister dest) { + // Clear the output register first to break dependencies; see above; + zeroDouble(dest); + vcvtsi2sd(Operand(src), dest, dest); + } + void convertInt32ToFloat32(Register src, FloatRegister dest) { + // Clear the output register first to break dependencies; see above; + zeroFloat32(dest); + vcvtsi2ss(src, dest, dest); + } + void convertInt32ToFloat32(const Address& src, FloatRegister dest) { + convertInt32ToFloat32(Operand(src), dest); + } + void convertInt32ToFloat32(const Operand& src, FloatRegister dest) { + // Clear the output register first to break dependencies; see above; + zeroFloat32(dest); + vcvtsi2ss(src, dest, dest); + } + Condition testDoubleTruthy(bool truthy, FloatRegister reg) { + ScratchDoubleScope scratch(asMasm()); + zeroDouble(scratch); + vucomisd(reg, scratch); + return truthy ? NonZero : Zero; + } + + // Class which ensures that registers used in byte ops are compatible with + // such instructions, even if the original register passed in wasn't. This + // only applies to x86, as on x64 all registers are valid single byte regs. + // This doesn't lead to great code but helps to simplify code generation. + // + // Note that this can currently only be used in cases where the register is + // read from by the guarded instruction, not written to. + class AutoEnsureByteRegister { + MacroAssemblerX86Shared* masm; + Register original_; + Register substitute_; + + public: + template + AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address, Register reg) + : masm(masm), original_(reg) + { + AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs); + if (singleByteRegs.has(reg)) { + substitute_ = reg; + } else { + MOZ_ASSERT(address.base != StackPointer); + do { + substitute_ = singleByteRegs.takeAny(); + } while (Operand(address).containsReg(substitute_)); + + masm->push(substitute_); + masm->mov(reg, substitute_); + } + } + + ~AutoEnsureByteRegister() { + if (original_ != substitute_) + masm->pop(substitute_); + } + + Register reg() { + return substitute_; + } + }; + + void load8ZeroExtend(const Operand& src, Register dest) { + movzbl(src, dest); + } + void load8ZeroExtend(const Address& src, Register dest) { + movzbl(Operand(src), dest); + } + void load8ZeroExtend(const BaseIndex& src, Register dest) { + movzbl(Operand(src), dest); + } + void load8SignExtend(const Operand& src, Register dest) { + movsbl(src, dest); + } + void load8SignExtend(const Address& src, Register dest) { + movsbl(Operand(src), dest); + } + void load8SignExtend(const BaseIndex& src, Register dest) { + movsbl(Operand(src), dest); + } + template + void store8(Imm32 src, const T& dest) { + movb(src, Operand(dest)); + } + template + void store8(Register src, const T& dest) { + AutoEnsureByteRegister ensure(this, dest, src); + movb(ensure.reg(), Operand(dest)); + } + template + void compareExchange8ZeroExtend(const T& mem, Register oldval, Register newval, Register output) { + MOZ_ASSERT(output == eax); + CHECK_BYTEREG(newval); + if (oldval != output) + movl(oldval, output); + lock_cmpxchgb(newval, Operand(mem)); + movzbl(output, output); + } + template + void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output) { + MOZ_ASSERT(output == eax); + CHECK_BYTEREG(newval); + if (oldval != output) + movl(oldval, output); + lock_cmpxchgb(newval, Operand(mem)); + movsbl(output, output); + } + template + void atomicExchange8ZeroExtend(const T& mem, Register value, Register output) { + if (value != output) + movl(value, output); + xchgb(output, Operand(mem)); + movzbl(output, output); + } + template + void atomicExchange8SignExtend(const T& mem, Register value, Register output) { + if (value != output) + movl(value, output); + xchgb(output, Operand(mem)); + movsbl(output, output); + } + void load16ZeroExtend(const Operand& src, Register dest) { + movzwl(src, dest); + } + void load16ZeroExtend(const Address& src, Register dest) { + movzwl(Operand(src), dest); + } + void load16ZeroExtend(const BaseIndex& src, Register dest) { + movzwl(Operand(src), dest); + } + template + void store16(const S& src, const T& dest) { + movw(src, Operand(dest)); + } + template + void compareExchange16ZeroExtend(const T& mem, Register oldval, Register newval, Register output) { + MOZ_ASSERT(output == eax); + if (oldval != output) + movl(oldval, output); + lock_cmpxchgw(newval, Operand(mem)); + movzwl(output, output); + } + template + void compareExchange16SignExtend(const T& mem, Register oldval, Register newval, Register output) { + MOZ_ASSERT(output == eax); + if (oldval != output) + movl(oldval, output); + lock_cmpxchgw(newval, Operand(mem)); + movswl(output, output); + } + template + void atomicExchange16ZeroExtend(const T& mem, Register value, Register output) { + if (value != output) + movl(value, output); + xchgw(output, Operand(mem)); + movzwl(output, output); + } + template + void atomicExchange16SignExtend(const T& mem, Register value, Register output) { + if (value != output) + movl(value, output); + xchgw(output, Operand(mem)); + movswl(output, output); + } + void load16SignExtend(const Operand& src, Register dest) { + movswl(src, dest); + } + void load16SignExtend(const Address& src, Register dest) { + movswl(Operand(src), dest); + } + void load16SignExtend(const BaseIndex& src, Register dest) { + movswl(Operand(src), dest); + } + void load32(const Address& address, Register dest) { + movl(Operand(address), dest); + } + void load32(const BaseIndex& src, Register dest) { + movl(Operand(src), dest); + } + void load32(const Operand& src, Register dest) { + movl(src, dest); + } + template + void store32(const S& src, const T& dest) { + movl(src, Operand(dest)); + } + template + void compareExchange32(const T& mem, Register oldval, Register newval, Register output) { + MOZ_ASSERT(output == eax); + if (oldval != output) + movl(oldval, output); + lock_cmpxchgl(newval, Operand(mem)); + } + template + void atomicExchange32(const T& mem, Register value, Register output) { + if (value != output) + movl(value, output); + xchgl(output, Operand(mem)); + } + template + void store32_NoSecondScratch(const S& src, const T& dest) { + store32(src, dest); + } + void loadDouble(const Address& src, FloatRegister dest) { + vmovsd(src, dest); + } + void loadDouble(const BaseIndex& src, FloatRegister dest) { + vmovsd(src, dest); + } + void loadDouble(const Operand& src, FloatRegister dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + loadDouble(src.toAddress(), dest); + break; + case Operand::MEM_SCALE: + loadDouble(src.toBaseIndex(), dest); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void moveDouble(FloatRegister src, FloatRegister dest) { + // Use vmovapd instead of vmovsd to avoid dependencies. + vmovapd(src, dest); + } + void zeroDouble(FloatRegister reg) { + vxorpd(reg, reg, reg); + } + void zeroFloat32(FloatRegister reg) { + vxorps(reg, reg, reg); + } + void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) { + vcvtss2sd(src, dest, dest); + } + void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) { + vcvtsd2ss(src, dest, dest); + } + + void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) { + // Note that if the conversion failed (because the converted + // result is larger than the maximum signed int32, or less than the + // least signed int32, or NaN), this will return the undefined integer + // value (0x8000000). + vcvttps2dq(src, dest); + } + void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) { + vcvtdq2ps(src, dest); + } + + void bitwiseAndSimd128(const Operand& src, FloatRegister dest) { + // TODO Using the "ps" variant for all types incurs a domain crossing + // penalty for integer types and double. + vandps(src, dest, dest); + } + void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) { + vandnps(src, dest, dest); + } + void bitwiseOrSimd128(const Operand& src, FloatRegister dest) { + vorps(src, dest, dest); + } + void bitwiseXorSimd128(const Operand& src, FloatRegister dest) { + vxorps(src, dest, dest); + } + void zeroSimd128Float(FloatRegister dest) { + vxorps(dest, dest, dest); + } + void zeroSimd128Int(FloatRegister dest) { + vpxor(dest, dest, dest); + } + + template inline void loadScalar(const Operand& src, Reg dest); + template inline void storeScalar(Reg src, const Address& dest); + template inline void loadAlignedVector(const Address& src, FloatRegister dest); + template inline void storeAlignedVector(FloatRegister src, const Address& dest); + + void loadInt32x1(const Address& src, FloatRegister dest) { + vmovd(Operand(src), dest); + } + void loadInt32x1(const BaseIndex& src, FloatRegister dest) { + vmovd(Operand(src), dest); + } + void loadInt32x2(const Address& src, FloatRegister dest) { + vmovq(Operand(src), dest); + } + void loadInt32x2(const BaseIndex& src, FloatRegister dest) { + vmovq(Operand(src), dest); + } + void loadInt32x3(const BaseIndex& src, FloatRegister dest) { + BaseIndex srcZ(src); + srcZ.offset += 2 * sizeof(int32_t); + + ScratchSimd128Scope scratch(asMasm()); + vmovq(Operand(src), dest); + vmovd(Operand(srcZ), scratch); + vmovlhps(scratch, dest, dest); + } + void loadInt32x3(const Address& src, FloatRegister dest) { + Address srcZ(src); + srcZ.offset += 2 * sizeof(int32_t); + + ScratchSimd128Scope scratch(asMasm()); + vmovq(Operand(src), dest); + vmovd(Operand(srcZ), scratch); + vmovlhps(scratch, dest, dest); + } + + void loadAlignedSimd128Int(const Address& src, FloatRegister dest) { + vmovdqa(Operand(src), dest); + } + void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) { + vmovdqa(src, dest); + } + void storeAlignedSimd128Int(FloatRegister src, const Address& dest) { + vmovdqa(src, Operand(dest)); + } + void moveSimd128Int(FloatRegister src, FloatRegister dest) { + vmovdqa(src, dest); + } + FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) { + if (HasAVX()) + return src; + moveSimd128Int(src, dest); + return dest; + } + FloatRegister reusedInputAlignedInt32x4(const Operand& src, FloatRegister dest) { + if (HasAVX() && src.kind() == Operand::FPREG) + return FloatRegister::FromCode(src.fpu()); + loadAlignedSimd128Int(src, dest); + return dest; + } + void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) { + vmovdqu(Operand(src), dest); + } + void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) { + vmovdqu(Operand(src), dest); + } + void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) { + vmovdqu(src, dest); + } + + void storeInt32x1(FloatRegister src, const Address& dest) { + vmovd(src, Operand(dest)); + } + void storeInt32x1(FloatRegister src, const BaseIndex& dest) { + vmovd(src, Operand(dest)); + } + void storeInt32x2(FloatRegister src, const Address& dest) { + vmovq(src, Operand(dest)); + } + void storeInt32x2(FloatRegister src, const BaseIndex& dest) { + vmovq(src, Operand(dest)); + } + void storeInt32x3(FloatRegister src, const Address& dest) { + Address destZ(dest); + destZ.offset += 2 * sizeof(int32_t); + vmovq(src, Operand(dest)); + ScratchSimd128Scope scratch(asMasm()); + vmovhlps(src, scratch, scratch); + vmovd(scratch, Operand(destZ)); + } + void storeInt32x3(FloatRegister src, const BaseIndex& dest) { + BaseIndex destZ(dest); + destZ.offset += 2 * sizeof(int32_t); + vmovq(src, Operand(dest)); + ScratchSimd128Scope scratch(asMasm()); + vmovhlps(src, scratch, scratch); + vmovd(scratch, Operand(destZ)); + } + + void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) { + vmovdqu(src, Operand(dest)); + } + void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) { + vmovdqu(src, Operand(dest)); + } + void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) { + vmovdqu(src, dest); + } + void packedEqualInt32x4(const Operand& src, FloatRegister dest) { + vpcmpeqd(src, dest, dest); + } + void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) { + vpcmpgtd(src, dest, dest); + } + void packedAddInt8(const Operand& src, FloatRegister dest) { + vpaddb(src, dest, dest); + } + void packedSubInt8(const Operand& src, FloatRegister dest) { + vpsubb(src, dest, dest); + } + void packedAddInt16(const Operand& src, FloatRegister dest) { + vpaddw(src, dest, dest); + } + void packedSubInt16(const Operand& src, FloatRegister dest) { + vpsubw(src, dest, dest); + } + void packedAddInt32(const Operand& src, FloatRegister dest) { + vpaddd(src, dest, dest); + } + void packedSubInt32(const Operand& src, FloatRegister dest) { + vpsubd(src, dest, dest); + } + void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) { + // This function is an approximation of the result, this might need + // fix up if the spec requires a given precision for this operation. + // TODO See also bug 1068028. + vrcpps(src, dest); + } + void packedRcpSqrtApproximationFloat32x4(const Operand& src, FloatRegister dest) { + // TODO See comment above. See also bug 1068028. + vrsqrtps(src, dest); + } + void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) { + vsqrtps(src, dest); + } + + void packedLeftShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) { + vpsllw(src, dest, dest); + } + void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) { + vpsllw(count, dest, dest); + } + void packedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) { + vpsraw(src, dest, dest); + } + void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) { + vpsraw(count, dest, dest); + } + void packedUnsignedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) { + vpsrlw(src, dest, dest); + } + void packedUnsignedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) { + vpsrlw(count, dest, dest); + } + + void packedLeftShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) { + vpslld(src, dest, dest); + } + void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) { + vpslld(count, dest, dest); + } + void packedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) { + vpsrad(src, dest, dest); + } + void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) { + vpsrad(count, dest, dest); + } + void packedUnsignedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) { + vpsrld(src, dest, dest); + } + void packedUnsignedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) { + vpsrld(count, dest, dest); + } + + void loadFloat32x3(const Address& src, FloatRegister dest) { + Address srcZ(src); + srcZ.offset += 2 * sizeof(float); + vmovsd(src, dest); + ScratchSimd128Scope scratch(asMasm()); + vmovss(srcZ, scratch); + vmovlhps(scratch, dest, dest); + } + void loadFloat32x3(const BaseIndex& src, FloatRegister dest) { + BaseIndex srcZ(src); + srcZ.offset += 2 * sizeof(float); + vmovsd(src, dest); + ScratchSimd128Scope scratch(asMasm()); + vmovss(srcZ, scratch); + vmovlhps(scratch, dest, dest); + } + + void loadAlignedSimd128Float(const Address& src, FloatRegister dest) { + vmovaps(Operand(src), dest); + } + void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) { + vmovaps(src, dest); + } + + void storeAlignedSimd128Float(FloatRegister src, const Address& dest) { + vmovaps(src, Operand(dest)); + } + void moveSimd128Float(FloatRegister src, FloatRegister dest) { + vmovaps(src, dest); + } + FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) { + if (HasAVX()) + return src; + moveSimd128Float(src, dest); + return dest; + } + FloatRegister reusedInputAlignedFloat32x4(const Operand& src, FloatRegister dest) { + if (HasAVX() && src.kind() == Operand::FPREG) + return FloatRegister::FromCode(src.fpu()); + loadAlignedSimd128Float(src, dest); + return dest; + } + void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) { + vmovups(Operand(src), dest); + } + void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) { + vmovdqu(Operand(src), dest); + } + void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) { + vmovups(src, dest); + } + void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) { + vmovups(src, Operand(dest)); + } + void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) { + vmovups(src, Operand(dest)); + } + void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) { + vmovups(src, dest); + } + void packedAddFloat32(const Operand& src, FloatRegister dest) { + vaddps(src, dest, dest); + } + void packedSubFloat32(const Operand& src, FloatRegister dest) { + vsubps(src, dest, dest); + } + void packedMulFloat32(const Operand& src, FloatRegister dest) { + vmulps(src, dest, dest); + } + void packedDivFloat32(const Operand& src, FloatRegister dest) { + vdivps(src, dest, dest); + } + + static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1, + uint32_t z = 2, uint32_t w = 3) + { + MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4); + uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0); + MOZ_ASSERT(r < 256); + return r; + } + + void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) { + vpshufd(mask, src, dest); + } + void moveLowInt32(FloatRegister src, Register dest) { + vmovd(src, dest); + } + + void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) { + vmovhlps(src, dest, dest); + } + void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) { + // The shuffle instruction on x86 is such that it moves 2 words from + // the dest and 2 words from the src operands. To simplify things, just + // clobber the output with the input and apply the instruction + // afterwards. + // Note: this is useAtStart-safe because src isn't read afterwards. + FloatRegister srcCopy = reusedInputFloat32x4(src, dest); + vshufps(mask, srcCopy, srcCopy, dest); + } + void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) { + // Note this uses vshufps, which is a cross-domain penalty on CPU where it + // applies, but that's the way clang and gcc do it. + vshufps(mask, src, dest, dest); + } + + void moveFloatAsDouble(Register src, FloatRegister dest) { + vmovd(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloatAsDouble(const Address& src, FloatRegister dest) { + vmovss(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) { + vmovss(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloatAsDouble(const Operand& src, FloatRegister dest) { + loadFloat32(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloat32(const Address& src, FloatRegister dest) { + vmovss(src, dest); + } + void loadFloat32(const BaseIndex& src, FloatRegister dest) { + vmovss(src, dest); + } + void loadFloat32(const Operand& src, FloatRegister dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + loadFloat32(src.toAddress(), dest); + break; + case Operand::MEM_SCALE: + loadFloat32(src.toBaseIndex(), dest); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void moveFloat32(FloatRegister src, FloatRegister dest) { + // Use vmovaps instead of vmovss to avoid dependencies. + vmovaps(src, dest); + } + + // Checks whether a double is representable as a 32-bit integer. If so, the + // integer is written to the output register. Otherwise, a bailout is taken to + // the given snapshot. This function overwrites the scratch float register. + void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) + { + // Check for -0.0 + if (negativeZeroCheck) + branchNegativeZero(src, dest, fail); + + ScratchDoubleScope scratch(asMasm()); + vcvttsd2si(src, dest); + convertInt32ToDouble(dest, scratch); + vucomisd(scratch, src); + j(Assembler::Parity, fail); + j(Assembler::NotEqual, fail); + } + + // Checks whether a float32 is representable as a 32-bit integer. If so, the + // integer is written to the output register. Otherwise, a bailout is taken to + // the given snapshot. This function overwrites the scratch float register. + void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) + { + // Check for -0.0 + if (negativeZeroCheck) + branchNegativeZeroFloat32(src, dest, fail); + + ScratchFloat32Scope scratch(asMasm()); + vcvttss2si(src, dest); + convertInt32ToFloat32(dest, scratch); + vucomiss(scratch, src); + j(Assembler::Parity, fail); + j(Assembler::NotEqual, fail); + } + + inline void clampIntToUint8(Register reg); + + bool maybeInlineDouble(wasm::RawF64 d, FloatRegister dest) { + // Loading zero with xor is specially optimized in hardware. + if (d.bits() == 0) { + zeroDouble(dest); + return true; + } + + // It is also possible to load several common constants using vpcmpeqw + // to get all ones and then vpsllq and vpsrlq to get zeros at the ends, + // as described in "13.4 Generating constants" of + // "2. Optimizing subroutines in assembly language" by Agner Fog, and as + // previously implemented here. However, with x86 and x64 both using + // constant pool loads for double constants, this is probably only + // worthwhile in cases where a load is likely to be delayed. + + return false; + } + + bool maybeInlineFloat(wasm::RawF32 f, FloatRegister dest) { + // See comment above + if (f.bits() == 0) { + zeroFloat32(dest); + return true; + } + return false; + } + + bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) { + static const SimdConstant zero = SimdConstant::SplatX4(0); + static const SimdConstant minusOne = SimdConstant::SplatX4(-1); + if (v == zero) { + zeroSimd128Int(dest); + return true; + } + if (v == minusOne) { + vpcmpeqw(Operand(dest), dest, dest); + return true; + } + return false; + } + bool maybeInlineSimd128Float(const SimdConstant& v, const FloatRegister& dest) { + static const SimdConstant zero = SimdConstant::SplatX4(0.f); + if (v == zero) { + // This won't get inlined if the SimdConstant v contains -0 in any + // lane, as operator== here does a memcmp. + zeroSimd128Float(dest); + return true; + } + return false; + } + + void convertBoolToInt32(Register source, Register dest) { + // Note that C++ bool is only 1 byte, so zero extend it to clear the + // higher-order bits. + movzbl(source, dest); + } + + void emitSet(Assembler::Condition cond, Register dest, + Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) { + if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) { + // If the register we're defining is a single byte register, + // take advantage of the setCC instruction + setCC(cond, dest); + movzbl(dest, dest); + + if (ifNaN != Assembler::NaN_HandledByCond) { + Label noNaN; + j(Assembler::NoParity, &noNaN); + mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest); + bind(&noNaN); + } + } else { + Label end; + Label ifFalse; + + if (ifNaN == Assembler::NaN_IsFalse) + j(Assembler::Parity, &ifFalse); + // Note a subtlety here: FLAGS is live at this point, and the + // mov interface doesn't guarantee to preserve FLAGS. Use + // movl instead of mov, because the movl instruction + // preserves FLAGS. + movl(Imm32(1), dest); + j(cond, &end); + if (ifNaN == Assembler::NaN_IsTrue) + j(Assembler::Parity, &end); + bind(&ifFalse); + mov(ImmWord(0), dest); + + bind(&end); + } + } + + // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp(). + CodeOffset toggledJump(Label* label) { + CodeOffset offset(size()); + jump(label); + return offset; + } + + template + void computeEffectiveAddress(const T& address, Register dest) { + lea(Operand(address), dest); + } + + void checkStackAlignment() { + // Exists for ARM compatibility. + } + + CodeOffset labelForPatch() { + return CodeOffset(size()); + } + + void abiret() { + ret(); + } + + template + void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register oldval, Register newval, + Register temp, AnyRegister output); + + template + void atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register value, + Register temp, AnyRegister output); + + protected: + bool buildOOLFakeExitFrame(void* fakeReturnAddr); +}; + +// Specialize for float to use movaps. Use movdqa for everything else. +template <> +inline void +MacroAssemblerX86Shared::loadAlignedVector(const Address& src, FloatRegister dest) +{ + loadAlignedSimd128Float(src, dest); +} + +template +inline void +MacroAssemblerX86Shared::loadAlignedVector(const Address& src, FloatRegister dest) +{ + loadAlignedSimd128Int(src, dest); +} + +// Specialize for float to use movaps. Use movdqa for everything else. +template <> +inline void +MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src, const Address& dest) +{ + storeAlignedSimd128Float(src, dest); +} + +template +inline void +MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src, const Address& dest) +{ + storeAlignedSimd128Int(src, dest); +} + +template <> inline void +MacroAssemblerX86Shared::loadScalar(const Operand& src, Register dest) { + load8ZeroExtend(src, dest); +} +template <> inline void +MacroAssemblerX86Shared::loadScalar(const Operand& src, Register dest) { + load16ZeroExtend(src, dest); +} +template <> inline void +MacroAssemblerX86Shared::loadScalar(const Operand& src, Register dest) { + load32(src, dest); +} +template <> inline void +MacroAssemblerX86Shared::loadScalar(const Operand& src, FloatRegister dest) { + loadFloat32(src, dest); +} + +template <> inline void +MacroAssemblerX86Shared::storeScalar(Register src, const Address& dest) { + store8(src, dest); +} +template <> inline void +MacroAssemblerX86Shared::storeScalar(Register src, const Address& dest) { + store16(src, dest); +} +template <> inline void +MacroAssemblerX86Shared::storeScalar(Register src, const Address& dest) { + store32(src, dest); +} +template <> inline void +MacroAssemblerX86Shared::storeScalar(FloatRegister src, const Address& dest) { + vmovss(src, dest); +} + +} // namespace jit +} // namespace js + +#undef CHECK_BYTEREG +#undef CHECK_BYTEREGS + +#endif /* jit_x86_shared_MacroAssembler_x86_shared_h */ diff --git a/js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp b/js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp new file mode 100644 index 000000000..1ca4a1e1c --- /dev/null +++ b/js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp @@ -0,0 +1,581 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/MoveEmitter-x86-shared.h" + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::Maybe; + +MoveEmitterX86::MoveEmitterX86(MacroAssembler& masm) + : inCycle_(false), + masm(masm), + pushedAtCycle_(-1) +{ + pushedAtStart_ = masm.framePushed(); +} + +// Examine the cycle in moves starting at position i. Determine if it's a +// simple cycle consisting of all register-to-register moves in a single class, +// and whether it can be implemented entirely by swaps. +size_t +MoveEmitterX86::characterizeCycle(const MoveResolver& moves, size_t i, + bool* allGeneralRegs, bool* allFloatRegs) +{ + size_t swapCount = 0; + + for (size_t j = i; ; j++) { + const MoveOp& move = moves.getMove(j); + + // If it isn't a cycle of registers of the same kind, we won't be able + // to optimize it. + if (!move.to().isGeneralReg()) + *allGeneralRegs = false; + if (!move.to().isFloatReg()) + *allFloatRegs = false; + if (!*allGeneralRegs && !*allFloatRegs) + return -1; + + // Stop iterating when we see the last one. + if (j != i && move.isCycleEnd()) + break; + + // Check that this move is actually part of the cycle. This is + // over-conservative when there are multiple reads from the same source, + // but that's expected to be rare. + if (move.from() != moves.getMove(j + 1).to()) { + *allGeneralRegs = false; + *allFloatRegs = false; + return -1; + } + + swapCount++; + } + + // Check that the last move cycles back to the first move. + const MoveOp& move = moves.getMove(i + swapCount); + if (move.from() != moves.getMove(i).to()) { + *allGeneralRegs = false; + *allFloatRegs = false; + return -1; + } + + return swapCount; +} + +// If we can emit optimized code for the cycle in moves starting at position i, +// do so, and return true. +bool +MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver& moves, size_t i, + bool allGeneralRegs, bool allFloatRegs, size_t swapCount) +{ + if (allGeneralRegs && swapCount <= 2) { + // Use x86's swap-integer-registers instruction if we only have a few + // swaps. (x86 also has a swap between registers and memory but it's + // slow.) + for (size_t k = 0; k < swapCount; k++) + masm.xchg(moves.getMove(i + k).to().reg(), moves.getMove(i + k + 1).to().reg()); + return true; + } + + if (allFloatRegs && swapCount == 1) { + // There's no xchg for xmm registers, but if we only need a single swap, + // it's cheap to do an XOR swap. + FloatRegister a = moves.getMove(i).to().floatReg(); + FloatRegister b = moves.getMove(i + 1).to().floatReg(); + masm.vxorpd(a, b, b); + masm.vxorpd(b, a, a); + masm.vxorpd(a, b, b); + return true; + } + + return false; +} + +void +MoveEmitterX86::emit(const MoveResolver& moves) +{ +#if defined(JS_CODEGEN_X86) && defined(DEBUG) + // Clobber any scratch register we have, to make regalloc bugs more visible. + if (scratchRegister_.isSome()) + masm.mov(ImmWord(0xdeadbeef), scratchRegister_.value()); +#endif + + for (size_t i = 0; i < moves.numMoves(); i++) { +#if defined(JS_CODEGEN_X86) && defined(DEBUG) + if (!scratchRegister_.isSome()) { + Maybe reg = findScratchRegister(moves, i); + if (reg.isSome()) + masm.mov(ImmWord(0xdeadbeef), reg.value()); + } +#endif + + const MoveOp& move = moves.getMove(i); + const MoveOperand& from = move.from(); + const MoveOperand& to = move.to(); + + if (move.isCycleEnd()) { + MOZ_ASSERT(inCycle_); + completeCycle(to, move.type()); + inCycle_ = false; + continue; + } + + if (move.isCycleBegin()) { + MOZ_ASSERT(!inCycle_); + + // Characterize the cycle. + bool allGeneralRegs = true, allFloatRegs = true; + size_t swapCount = characterizeCycle(moves, i, &allGeneralRegs, &allFloatRegs); + + // Attempt to optimize it to avoid using the stack. + if (maybeEmitOptimizedCycle(moves, i, allGeneralRegs, allFloatRegs, swapCount)) { + i += swapCount; + continue; + } + + // Otherwise use the stack. + breakCycle(to, move.endCycleType()); + inCycle_ = true; + } + + // A normal move which is not part of a cycle. + switch (move.type()) { + case MoveOp::FLOAT32: + emitFloat32Move(from, to); + break; + case MoveOp::DOUBLE: + emitDoubleMove(from, to); + break; + case MoveOp::INT32: + emitInt32Move(from, to, moves, i); + break; + case MoveOp::GENERAL: + emitGeneralMove(from, to, moves, i); + break; + case MoveOp::SIMD128INT: + emitSimd128IntMove(from, to); + break; + case MoveOp::SIMD128FLOAT: + emitSimd128FloatMove(from, to); + break; + default: + MOZ_CRASH("Unexpected move type"); + } + } +} + +MoveEmitterX86::~MoveEmitterX86() +{ + assertDone(); +} + +Address +MoveEmitterX86::cycleSlot() +{ + if (pushedAtCycle_ == -1) { + // Reserve stack for cycle resolution + masm.reserveStack(Simd128DataSize); + pushedAtCycle_ = masm.framePushed(); + } + + return Address(StackPointer, masm.framePushed() - pushedAtCycle_); +} + +Address +MoveEmitterX86::toAddress(const MoveOperand& operand) const +{ + if (operand.base() != StackPointer) + return Address(operand.base(), operand.disp()); + + MOZ_ASSERT(operand.disp() >= 0); + + // Otherwise, the stack offset may need to be adjusted. + return Address(StackPointer, operand.disp() + (masm.framePushed() - pushedAtStart_)); +} + +// Warning, do not use the resulting operand with pop instructions, since they +// compute the effective destination address after altering the stack pointer. +// Use toPopOperand if an Operand is needed for a pop. +Operand +MoveEmitterX86::toOperand(const MoveOperand& operand) const +{ + if (operand.isMemoryOrEffectiveAddress()) + return Operand(toAddress(operand)); + if (operand.isGeneralReg()) + return Operand(operand.reg()); + + MOZ_ASSERT(operand.isFloatReg()); + return Operand(operand.floatReg()); +} + +// This is the same as toOperand except that it computes an Operand suitable for +// use in a pop. +Operand +MoveEmitterX86::toPopOperand(const MoveOperand& operand) const +{ + if (operand.isMemory()) { + if (operand.base() != StackPointer) + return Operand(operand.base(), operand.disp()); + + MOZ_ASSERT(operand.disp() >= 0); + + // Otherwise, the stack offset may need to be adjusted. + // Note the adjustment by the stack slot here, to offset for the fact that pop + // computes its effective address after incrementing the stack pointer. + return Operand(StackPointer, + operand.disp() + (masm.framePushed() - sizeof(void*) - pushedAtStart_)); + } + if (operand.isGeneralReg()) + return Operand(operand.reg()); + + MOZ_ASSERT(operand.isFloatReg()); + return Operand(operand.floatReg()); +} + +void +MoveEmitterX86::breakCycle(const MoveOperand& to, MoveOp::Type type) +{ + // There is some pattern: + // (A -> B) + // (B -> A) + // + // This case handles (A -> B), which we reach first. We save B, then allow + // the original move to continue. + switch (type) { + case MoveOp::SIMD128INT: + if (to.isMemory()) { + ScratchSimd128Scope scratch(masm); + masm.loadAlignedSimd128Int(toAddress(to), scratch); + masm.storeAlignedSimd128Int(scratch, cycleSlot()); + } else { + masm.storeAlignedSimd128Int(to.floatReg(), cycleSlot()); + } + break; + case MoveOp::SIMD128FLOAT: + if (to.isMemory()) { + ScratchSimd128Scope scratch(masm); + masm.loadAlignedSimd128Float(toAddress(to), scratch); + masm.storeAlignedSimd128Float(scratch, cycleSlot()); + } else { + masm.storeAlignedSimd128Float(to.floatReg(), cycleSlot()); + } + break; + case MoveOp::FLOAT32: + if (to.isMemory()) { + ScratchFloat32Scope scratch(masm); + masm.loadFloat32(toAddress(to), scratch); + masm.storeFloat32(scratch, cycleSlot()); + } else { + masm.storeFloat32(to.floatReg(), cycleSlot()); + } + break; + case MoveOp::DOUBLE: + if (to.isMemory()) { + ScratchDoubleScope scratch(masm); + masm.loadDouble(toAddress(to), scratch); + masm.storeDouble(scratch, cycleSlot()); + } else { + masm.storeDouble(to.floatReg(), cycleSlot()); + } + break; + case MoveOp::INT32: +#ifdef JS_CODEGEN_X64 + // x64 can't pop to a 32-bit destination, so don't push. + if (to.isMemory()) { + masm.load32(toAddress(to), ScratchReg); + masm.store32(ScratchReg, cycleSlot()); + } else { + masm.store32(to.reg(), cycleSlot()); + } + break; +#endif + case MoveOp::GENERAL: + masm.Push(toOperand(to)); + break; + default: + MOZ_CRASH("Unexpected move type"); + } +} + +void +MoveEmitterX86::completeCycle(const MoveOperand& to, MoveOp::Type type) +{ + // There is some pattern: + // (A -> B) + // (B -> A) + // + // This case handles (B -> A), which we reach last. We emit a move from the + // saved value of B, to A. + switch (type) { + case MoveOp::SIMD128INT: + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= Simd128DataSize); + if (to.isMemory()) { + ScratchSimd128Scope scratch(masm); + masm.loadAlignedSimd128Int(cycleSlot(), scratch); + masm.storeAlignedSimd128Int(scratch, toAddress(to)); + } else { + masm.loadAlignedSimd128Int(cycleSlot(), to.floatReg()); + } + break; + case MoveOp::SIMD128FLOAT: + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= Simd128DataSize); + if (to.isMemory()) { + ScratchSimd128Scope scratch(masm); + masm.loadAlignedSimd128Float(cycleSlot(), scratch); + masm.storeAlignedSimd128Float(scratch, toAddress(to)); + } else { + masm.loadAlignedSimd128Float(cycleSlot(), to.floatReg()); + } + break; + case MoveOp::FLOAT32: + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(float)); + if (to.isMemory()) { + ScratchFloat32Scope scratch(masm); + masm.loadFloat32(cycleSlot(), scratch); + masm.storeFloat32(scratch, toAddress(to)); + } else { + masm.loadFloat32(cycleSlot(), to.floatReg()); + } + break; + case MoveOp::DOUBLE: + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(double)); + if (to.isMemory()) { + ScratchDoubleScope scratch(masm); + masm.loadDouble(cycleSlot(), scratch); + masm.storeDouble(scratch, toAddress(to)); + } else { + masm.loadDouble(cycleSlot(), to.floatReg()); + } + break; + case MoveOp::INT32: +#ifdef JS_CODEGEN_X64 + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(int32_t)); + // x64 can't pop to a 32-bit destination. + if (to.isMemory()) { + masm.load32(cycleSlot(), ScratchReg); + masm.store32(ScratchReg, toAddress(to)); + } else { + masm.load32(cycleSlot(), to.reg()); + } + break; +#endif + case MoveOp::GENERAL: + MOZ_ASSERT(masm.framePushed() - pushedAtStart_ >= sizeof(intptr_t)); + masm.Pop(toPopOperand(to)); + break; + default: + MOZ_CRASH("Unexpected move type"); + } +} + +void +MoveEmitterX86::emitInt32Move(const MoveOperand& from, const MoveOperand& to, + const MoveResolver& moves, size_t i) +{ + if (from.isGeneralReg()) { + masm.move32(from.reg(), toOperand(to)); + } else if (to.isGeneralReg()) { + MOZ_ASSERT(from.isMemory()); + masm.load32(toAddress(from), to.reg()); + } else { + // Memory to memory gpr move. + MOZ_ASSERT(from.isMemory()); + Maybe reg = findScratchRegister(moves, i); + if (reg.isSome()) { + masm.load32(toAddress(from), reg.value()); + masm.move32(reg.value(), toOperand(to)); + } else { + // No scratch register available; bounce it off the stack. + masm.Push(toOperand(from)); + masm.Pop(toPopOperand(to)); + } + } +} + +void +MoveEmitterX86::emitGeneralMove(const MoveOperand& from, const MoveOperand& to, + const MoveResolver& moves, size_t i) +{ + if (from.isGeneralReg()) { + masm.mov(from.reg(), toOperand(to)); + } else if (to.isGeneralReg()) { + MOZ_ASSERT(from.isMemoryOrEffectiveAddress()); + if (from.isMemory()) + masm.loadPtr(toAddress(from), to.reg()); + else + masm.lea(toOperand(from), to.reg()); + } else if (from.isMemory()) { + // Memory to memory gpr move. + Maybe reg = findScratchRegister(moves, i); + if (reg.isSome()) { + masm.loadPtr(toAddress(from), reg.value()); + masm.mov(reg.value(), toOperand(to)); + } else { + // No scratch register available; bounce it off the stack. + masm.Push(toOperand(from)); + masm.Pop(toPopOperand(to)); + } + } else { + // Effective address to memory move. + MOZ_ASSERT(from.isEffectiveAddress()); + Maybe reg = findScratchRegister(moves, i); + if (reg.isSome()) { + masm.lea(toOperand(from), reg.value()); + masm.mov(reg.value(), toOperand(to)); + } else { + // This is tricky without a scratch reg. We can't do an lea. Bounce the + // base register off the stack, then add the offset in place. Note that + // this clobbers FLAGS! + masm.Push(from.base()); + masm.Pop(toPopOperand(to)); + MOZ_ASSERT(to.isMemoryOrEffectiveAddress()); + masm.addPtr(Imm32(from.disp()), toAddress(to)); + } + } +} + +void +MoveEmitterX86::emitFloat32Move(const MoveOperand& from, const MoveOperand& to) +{ + MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isSingle()); + MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isSingle()); + + if (from.isFloatReg()) { + if (to.isFloatReg()) + masm.moveFloat32(from.floatReg(), to.floatReg()); + else + masm.storeFloat32(from.floatReg(), toAddress(to)); + } else if (to.isFloatReg()) { + masm.loadFloat32(toAddress(from), to.floatReg()); + } else { + // Memory to memory move. + MOZ_ASSERT(from.isMemory()); + ScratchFloat32Scope scratch(masm); + masm.loadFloat32(toAddress(from), scratch); + masm.storeFloat32(scratch, toAddress(to)); + } +} + +void +MoveEmitterX86::emitDoubleMove(const MoveOperand& from, const MoveOperand& to) +{ + MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isDouble()); + MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isDouble()); + + if (from.isFloatReg()) { + if (to.isFloatReg()) + masm.moveDouble(from.floatReg(), to.floatReg()); + else + masm.storeDouble(from.floatReg(), toAddress(to)); + } else if (to.isFloatReg()) { + masm.loadDouble(toAddress(from), to.floatReg()); + } else { + // Memory to memory move. + MOZ_ASSERT(from.isMemory()); + ScratchDoubleScope scratch(masm); + masm.loadDouble(toAddress(from), scratch); + masm.storeDouble(scratch, toAddress(to)); + } +} + +void +MoveEmitterX86::emitSimd128IntMove(const MoveOperand& from, const MoveOperand& to) +{ + MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isSimd128()); + MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isSimd128()); + + if (from.isFloatReg()) { + if (to.isFloatReg()) + masm.moveSimd128Int(from.floatReg(), to.floatReg()); + else + masm.storeAlignedSimd128Int(from.floatReg(), toAddress(to)); + } else if (to.isFloatReg()) { + masm.loadAlignedSimd128Int(toAddress(from), to.floatReg()); + } else { + // Memory to memory move. + MOZ_ASSERT(from.isMemory()); + ScratchSimd128Scope scratch(masm); + masm.loadAlignedSimd128Int(toAddress(from), scratch); + masm.storeAlignedSimd128Int(scratch, toAddress(to)); + } +} + +void +MoveEmitterX86::emitSimd128FloatMove(const MoveOperand& from, const MoveOperand& to) +{ + MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isSimd128()); + MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isSimd128()); + + if (from.isFloatReg()) { + if (to.isFloatReg()) + masm.moveSimd128Float(from.floatReg(), to.floatReg()); + else + masm.storeAlignedSimd128Float(from.floatReg(), toAddress(to)); + } else if (to.isFloatReg()) { + masm.loadAlignedSimd128Float(toAddress(from), to.floatReg()); + } else { + // Memory to memory move. + MOZ_ASSERT(from.isMemory()); + ScratchSimd128Scope scratch(masm); + masm.loadAlignedSimd128Float(toAddress(from), scratch); + masm.storeAlignedSimd128Float(scratch, toAddress(to)); + } +} + +void +MoveEmitterX86::assertDone() +{ + MOZ_ASSERT(!inCycle_); +} + +void +MoveEmitterX86::finish() +{ + assertDone(); + + masm.freeStack(masm.framePushed() - pushedAtStart_); +} + +Maybe +MoveEmitterX86::findScratchRegister(const MoveResolver& moves, size_t initial) +{ +#ifdef JS_CODEGEN_X86 + if (scratchRegister_.isSome()) + return scratchRegister_; + + // All registers are either in use by this move group or are live + // afterwards. Look through the remaining moves for a register which is + // clobbered before it is used, and is thus dead at this point. + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + for (size_t i = initial; i < moves.numMoves(); i++) { + const MoveOp& move = moves.getMove(i); + if (move.from().isGeneralReg()) + regs.takeUnchecked(move.from().reg()); + else if (move.from().isMemoryOrEffectiveAddress()) + regs.takeUnchecked(move.from().base()); + if (move.to().isGeneralReg()) { + if (i != initial && !move.isCycleBegin() && regs.has(move.to().reg())) + return mozilla::Some(move.to().reg()); + regs.takeUnchecked(move.to().reg()); + } else if (move.to().isMemoryOrEffectiveAddress()) { + regs.takeUnchecked(move.to().base()); + } + } + + return mozilla::Nothing(); +#else + return mozilla::Some(ScratchReg); +#endif +} diff --git a/js/src/jit/x86-shared/MoveEmitter-x86-shared.h b/js/src/jit/x86-shared/MoveEmitter-x86-shared.h new file mode 100644 index 000000000..6602206f2 --- /dev/null +++ b/js/src/jit/x86-shared/MoveEmitter-x86-shared.h @@ -0,0 +1,74 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_MoveEmitter_x86_shared_h +#define jit_MoveEmitter_x86_shared_h + +#include "jit/MacroAssembler.h" +#include "jit/MoveResolver.h" + +namespace js { +namespace jit { + +class MoveEmitterX86 +{ + bool inCycle_; + MacroAssembler& masm; + + // Original stack push value. + uint32_t pushedAtStart_; + + // This is a store stack offset for the cycle-break spill slot, snapshotting + // codegen->framePushed_ at the time it is allocated. -1 if not allocated. + int32_t pushedAtCycle_; + +#ifdef JS_CODEGEN_X86 + // Optional scratch register for performing moves. + mozilla::Maybe scratchRegister_; +#endif + + void assertDone(); + Address cycleSlot(); + Address toAddress(const MoveOperand& operand) const; + Operand toOperand(const MoveOperand& operand) const; + Operand toPopOperand(const MoveOperand& operand) const; + + size_t characterizeCycle(const MoveResolver& moves, size_t i, + bool* allGeneralRegs, bool* allFloatRegs); + bool maybeEmitOptimizedCycle(const MoveResolver& moves, size_t i, + bool allGeneralRegs, bool allFloatRegs, size_t swapCount); + void emitInt32Move(const MoveOperand& from, const MoveOperand& to, + const MoveResolver& moves, size_t i); + void emitGeneralMove(const MoveOperand& from, const MoveOperand& to, + const MoveResolver& moves, size_t i); + void emitFloat32Move(const MoveOperand& from, const MoveOperand& to); + void emitDoubleMove(const MoveOperand& from, const MoveOperand& to); + void emitSimd128FloatMove(const MoveOperand& from, const MoveOperand& to); + void emitSimd128IntMove(const MoveOperand& from, const MoveOperand& to); + void breakCycle(const MoveOperand& to, MoveOp::Type type); + void completeCycle(const MoveOperand& to, MoveOp::Type type); + + public: + explicit MoveEmitterX86(MacroAssembler& masm); + ~MoveEmitterX86(); + void emit(const MoveResolver& moves); + void finish(); + + void setScratchRegister(Register reg) { +#ifdef JS_CODEGEN_X86 + scratchRegister_.emplace(reg); +#endif + } + + mozilla::Maybe findScratchRegister(const MoveResolver& moves, size_t i); +}; + +typedef MoveEmitterX86 MoveEmitter; + +} // namespace jit +} // namespace js + +#endif /* jit_MoveEmitter_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Patching-x86-shared.h b/js/src/jit/x86-shared/Patching-x86-shared.h new file mode 100644 index 000000000..b73492870 --- /dev/null +++ b/js/src/jit/x86-shared/Patching-x86-shared.h @@ -0,0 +1,124 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Patching_x86_shared_h +#define jit_x86_shared_Patching_x86_shared_h + +namespace js { +namespace jit { + +namespace X86Encoding { + +inline void* +GetPointer(const void* where) +{ + void* res; + memcpy(&res, (const char*)where - sizeof(void*), sizeof(void*)); + return res; +} + +inline void +SetPointer(void* where, const void* value) +{ + memcpy((char*)where - sizeof(void*), &value, sizeof(void*)); +} + +inline int32_t +GetInt32(const void* where) +{ + int32_t res; + memcpy(&res, (const char*)where - sizeof(int32_t), sizeof(int32_t)); + return res; +} + +inline void +SetInt32(void* where, int32_t value) +{ + memcpy((char*)where - sizeof(int32_t), &value, sizeof(int32_t)); +} + +inline void +SetRel32(void* from, void* to) +{ + intptr_t offset = reinterpret_cast(to) - reinterpret_cast(from); + MOZ_ASSERT(offset == static_cast(offset), + "offset is too great for a 32-bit relocation"); + if (offset != static_cast(offset)) + MOZ_CRASH("offset is too great for a 32-bit relocation"); + + SetInt32(from, offset); +} + +inline void* +GetRel32Target(void* where) +{ + int32_t rel = GetInt32(where); + return (char*)where + rel; +} + +class JmpSrc { + public: + JmpSrc() + : offset_(-1) + { + } + + explicit JmpSrc(int32_t offset) + : offset_(offset) + { + } + + int32_t offset() const { + return offset_; + } + + bool isSet() const { + return offset_ != -1; + } + + private: + int offset_; +}; + +class JmpDst { + public: + JmpDst() + : offset_(-1) + , used_(false) + { + } + + bool isUsed() const { return used_; } + void used() { used_ = true; } + bool isValid() const { return offset_ != -1; } + + explicit JmpDst(int32_t offset) + : offset_(offset) + , used_(false) + { + MOZ_ASSERT(offset_ == offset); + } + int32_t offset() const { + return offset_; + } + private: + int32_t offset_ : 31; + bool used_ : 1; +}; + +inline bool +CanRelinkJump(void* from, void* to) +{ + intptr_t offset = static_cast(to) - static_cast(from); + return (offset == static_cast(offset)); +} + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Patching_x86_shared_h */ -- cgit v1.2.3