1 files changed, 4727 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
new file mode 100644
index 000000000..9cf03aede
--- /dev/null
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -0,0 +1,4727 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/x86-shared/CodeGenerator-x86-shared.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include "jsmath.h"
+
+#include "jit/JitCompartment.h"
+#include "jit/JitFrames.h"
+#include "jit/Linker.h"
+#include "jit/RangeAnalysis.h"
+#include "vm/TraceLogging.h"
+
+#include "jit/MacroAssembler-inl.h"
+#include "jit/shared/CodeGenerator-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::Abs;
+using mozilla::BitwiseCast;
+using mozilla::DebugOnly;
+using mozilla::FloatingPoint;
+using mozilla::FloorLog2;
+using mozilla::NegativeInfinity;
+using mozilla::SpecificNaN;
+
+using JS::GenericNaN;
+
+namespace js {
+namespace jit {
+
+CodeGeneratorX86Shared::CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm)
+  : CodeGeneratorShared(gen, graph, masm)
+{
+}
+
+#ifdef JS_PUNBOX64
+Operand
+CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input)
+{
+    return ToOperand(input.value());
+}
+#else
+Register64
+CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input)
+{
+    return ToRegister64(input);
+}
+#endif
+
+void
+OutOfLineBailout::accept(CodeGeneratorX86Shared* codegen)
+{
+    codegen->visitOutOfLineBailout(this);
+}
+
+void
+CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, MBasicBlock* mirTrue,
+                                   MBasicBlock* mirFalse, Assembler::NaNCond ifNaN)
+{
+    if (ifNaN == Assembler::NaN_IsFalse)
+        jumpToBlock(mirFalse, Assembler::Parity);
+    else if (ifNaN == Assembler::NaN_IsTrue)
+        jumpToBlock(mirTrue, Assembler::Parity);
+
+    if (isNextBlock(mirFalse->lir())) {
+        jumpToBlock(mirTrue, cond);
+    } else {
+        jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
+        jumpToBlock(mirTrue);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitDouble(LDouble* ins)
+{
+    const LDefinition* out = ins->getDef(0);
+    masm.loadConstantDouble(ins->getDouble(), ToFloatRegister(out));
+}
+
+void
+CodeGeneratorX86Shared::visitFloat32(LFloat32* ins)
+{
+    const LDefinition* out = ins->getDef(0);
+    masm.loadConstantFloat32(ins->getFloat(), ToFloatRegister(out));
+}
+
+void
+CodeGeneratorX86Shared::visitTestIAndBranch(LTestIAndBranch* test)
+{
+    Register input = ToRegister(test->input());
+    masm.test32(input, input);
+    emitBranch(Assembler::NonZero, test->ifTrue(), test->ifFalse());
+}
+
+void
+CodeGeneratorX86Shared::visitTestDAndBranch(LTestDAndBranch* test)
+{
+    const LAllocation* opd = test->input();
+
+    // vucomisd flags:
+    //             Z  P  C
+    //            ---------
+    //      NaN    1  1  1
+    //        >    0  0  0
+    //        <    0  0  1
+    //        =    1  0  0
+    //
+    // NaN is falsey, so comparing against 0 and then using the Z flag is
+    // enough to determine which branch to take.
+    ScratchDoubleScope scratch(masm);
+    masm.zeroDouble(scratch);
+    masm.vucomisd(scratch, ToFloatRegister(opd));
+    emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
+}
+
+void
+CodeGeneratorX86Shared::visitTestFAndBranch(LTestFAndBranch* test)
+{
+    const LAllocation* opd = test->input();
+    // vucomiss flags are the same as doubles; see comment above
+    {
+        ScratchFloat32Scope scratch(masm);
+        masm.zeroFloat32(scratch);
+        masm.vucomiss(scratch, ToFloatRegister(opd));
+    }
+    emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
+}
+
+void
+CodeGeneratorX86Shared::visitBitAndAndBranch(LBitAndAndBranch* baab)
+{
+    if (baab->right()->isConstant())
+        masm.test32(ToRegister(baab->left()), Imm32(ToInt32(baab->right())));
+    else
+        masm.test32(ToRegister(baab->left()), ToRegister(baab->right()));
+    emitBranch(Assembler::NonZero, baab->ifTrue(), baab->ifFalse());
+}
+
+void
+CodeGeneratorX86Shared::emitCompare(MCompare::CompareType type, const LAllocation* left, const LAllocation* right)
+{
+#ifdef JS_CODEGEN_X64
+    if (type == MCompare::Compare_Object) {
+        masm.cmpPtr(ToRegister(left), ToOperand(right));
+        return;
+    }
+#endif
+
+    if (right->isConstant())
+        masm.cmp32(ToRegister(left), Imm32(ToInt32(right)));
+    else
+        masm.cmp32(ToRegister(left), ToOperand(right));
+}
+
+void
+CodeGeneratorX86Shared::visitCompare(LCompare* comp)
+{
+    MCompare* mir = comp->mir();
+    emitCompare(mir->compareType(), comp->left(), comp->right());
+    masm.emitSet(JSOpToCondition(mir->compareType(), comp->jsop()), ToRegister(comp->output()));
+}
+
+void
+CodeGeneratorX86Shared::visitCompareAndBranch(LCompareAndBranch* comp)
+{
+    MCompare* mir = comp->cmpMir();
+    emitCompare(mir->compareType(), comp->left(), comp->right());
+    Assembler::Condition cond = JSOpToCondition(mir->compareType(), comp->jsop());
+    emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void
+CodeGeneratorX86Shared::visitCompareD(LCompareD* comp)
+{
+    FloatRegister lhs = ToFloatRegister(comp->left());
+    FloatRegister rhs = ToFloatRegister(comp->right());
+
+    Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+
+    Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
+    if (comp->mir()->operandsAreNeverNaN())
+        nanCond = Assembler::NaN_HandledByCond;
+
+    masm.compareDouble(cond, lhs, rhs);
+    masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond);
+}
+
+void
+CodeGeneratorX86Shared::visitCompareF(LCompareF* comp)
+{
+    FloatRegister lhs = ToFloatRegister(comp->left());
+    FloatRegister rhs = ToFloatRegister(comp->right());
+
+    Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+
+    Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
+    if (comp->mir()->operandsAreNeverNaN())
+        nanCond = Assembler::NaN_HandledByCond;
+
+    masm.compareFloat(cond, lhs, rhs);
+    masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond);
+}
+
+void
+CodeGeneratorX86Shared::visitNotI(LNotI* ins)
+{
+    masm.cmp32(ToRegister(ins->input()), Imm32(0));
+    masm.emitSet(Assembler::Equal, ToRegister(ins->output()));
+}
+
+void
+CodeGeneratorX86Shared::visitNotD(LNotD* ins)
+{
+    FloatRegister opd = ToFloatRegister(ins->input());
+
+    // Not returns true if the input is a NaN. We don't have to worry about
+    // it if we know the input is never NaN though.
+    Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
+    if (ins->mir()->operandIsNeverNaN())
+        nanCond = Assembler::NaN_HandledByCond;
+
+    ScratchDoubleScope scratch(masm);
+    masm.zeroDouble(scratch);
+    masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch);
+    masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond);
+}
+
+void
+CodeGeneratorX86Shared::visitNotF(LNotF* ins)
+{
+    FloatRegister opd = ToFloatRegister(ins->input());
+
+    // Not returns true if the input is a NaN. We don't have to worry about
+    // it if we know the input is never NaN though.
+    Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
+    if (ins->mir()->operandIsNeverNaN())
+        nanCond = Assembler::NaN_HandledByCond;
+
+    ScratchFloat32Scope scratch(masm);
+    masm.zeroFloat32(scratch);
+    masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch);
+    masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond);
+}
+
+void
+CodeGeneratorX86Shared::visitCompareDAndBranch(LCompareDAndBranch* comp)
+{
+    FloatRegister lhs = ToFloatRegister(comp->left());
+    FloatRegister rhs = ToFloatRegister(comp->right());
+
+    Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop());
+
+    Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
+    if (comp->cmpMir()->operandsAreNeverNaN())
+        nanCond = Assembler::NaN_HandledByCond;
+
+    masm.compareDouble(cond, lhs, rhs);
+    emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond);
+}
+
+void
+CodeGeneratorX86Shared::visitCompareFAndBranch(LCompareFAndBranch* comp)
+{
+    FloatRegister lhs = ToFloatRegister(comp->left());
+    FloatRegister rhs = ToFloatRegister(comp->right());
+
+    Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop());
+
+    Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
+    if (comp->cmpMir()->operandsAreNeverNaN())
+        nanCond = Assembler::NaN_HandledByCond;
+
+    masm.compareFloat(cond, lhs, rhs);
+    emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond);
+}
+
+void
+CodeGeneratorX86Shared::visitWasmStackArg(LWasmStackArg* ins)
+{
+    const MWasmStackArg* mir = ins->mir();
+    Address dst(StackPointer, mir->spOffset());
+    if (ins->arg()->isConstant()) {
+        masm.storePtr(ImmWord(ToInt32(ins->arg())), dst);
+    } else if (ins->arg()->isGeneralReg()) {
+        masm.storePtr(ToRegister(ins->arg()), dst);
+    } else {
+        switch (mir->input()->type()) {
+          case MIRType::Double:
+            masm.storeDouble(ToFloatRegister(ins->arg()), dst);
+            return;
+          case MIRType::Float32:
+            masm.storeFloat32(ToFloatRegister(ins->arg()), dst);
+            return;
+          // StackPointer is SIMD-aligned and ABIArgGenerator guarantees
+          // stack offsets are SIMD-aligned.
+          case MIRType::Int32x4:
+          case MIRType::Bool32x4:
+            masm.storeAlignedSimd128Int(ToFloatRegister(ins->arg()), dst);
+            return;
+          case MIRType::Float32x4:
+            masm.storeAlignedSimd128Float(ToFloatRegister(ins->arg()), dst);
+            return;
+          default: break;
+        }
+        MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("unexpected mir type in WasmStackArg");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitWasmStackArgI64(LWasmStackArgI64* ins)
+{
+    const MWasmStackArg* mir = ins->mir();
+    Address dst(StackPointer, mir->spOffset());
+    if (IsConstant(ins->arg()))
+        masm.store64(Imm64(ToInt64(ins->arg())), dst);
+    else
+        masm.store64(ToRegister64(ins->arg()), dst);
+}
+
+void
+CodeGeneratorX86Shared::visitWasmSelect(LWasmSelect* ins)
+{
+    MIRType mirType = ins->mir()->type();
+
+    Register cond = ToRegister(ins->condExpr());
+    Operand falseExpr = ToOperand(ins->falseExpr());
+
+    masm.test32(cond, cond);
+
+    if (mirType == MIRType::Int32) {
+        Register out = ToRegister(ins->output());
+        MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, "true expr input is reused for output");
+        masm.cmovz(falseExpr, out);
+        return;
+    }
+
+    FloatRegister out = ToFloatRegister(ins->output());
+    MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out, "true expr input is reused for output");
+
+    Label done;
+    masm.j(Assembler::NonZero, &done);
+
+    if (mirType == MIRType::Float32) {
+        if (falseExpr.kind() == Operand::FPREG)
+            masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out);
+        else
+            masm.loadFloat32(falseExpr, out);
+    } else if (mirType == MIRType::Double) {
+        if (falseExpr.kind() == Operand::FPREG)
+            masm.moveDouble(ToFloatRegister(ins->falseExpr()), out);
+        else
+            masm.loadDouble(falseExpr, out);
+    } else {
+        MOZ_CRASH("unhandled type in visitWasmSelect!");
+    }
+
+    masm.bind(&done);
+    return;
+}
+
+void
+CodeGeneratorX86Shared::visitWasmReinterpret(LWasmReinterpret* lir)
+{
+    MOZ_ASSERT(gen->compilingWasm());
+    MWasmReinterpret* ins = lir->mir();
+
+    MIRType to = ins->type();
+#ifdef DEBUG
+    MIRType from = ins->input()->type();
+#endif
+
+    switch (to) {
+      case MIRType::Int32:
+        MOZ_ASSERT(from == MIRType::Float32);
+        masm.vmovd(ToFloatRegister(lir->input()), ToRegister(lir->output()));
+        break;
+      case MIRType::Float32:
+        MOZ_ASSERT(from == MIRType::Int32);
+        masm.vmovd(ToRegister(lir->input()), ToFloatRegister(lir->output()));
+        break;
+      case MIRType::Double:
+      case MIRType::Int64:
+        MOZ_CRASH("not handled by this LIR opcode");
+      default:
+        MOZ_CRASH("unexpected WasmReinterpret");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool)
+{
+    switch (ool->viewType()) {
+      case Scalar::Int64:
+      case Scalar::Float32x4:
+      case Scalar::Int8x16:
+      case Scalar::Int16x8:
+      case Scalar::Int32x4:
+      case Scalar::MaxTypedArrayViewType:
+        MOZ_CRASH("unexpected array type");
+      case Scalar::Float32:
+        masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu());
+        break;
+      case Scalar::Float64:
+        masm.loadConstantDouble(GenericNaN(), ool->dest().fpu());
+        break;
+      case Scalar::Int8:
+      case Scalar::Uint8:
+      case Scalar::Int16:
+      case Scalar::Uint16:
+      case Scalar::Int32:
+      case Scalar::Uint32:
+      case Scalar::Uint8Clamped:
+        Register destReg = ool->dest().gpr();
+        masm.mov(ImmWord(0), destReg);
+        break;
+    }
+    masm.jmp(ool->rejoin());
+}
+
+void
+CodeGeneratorX86Shared::visitWasmAddOffset(LWasmAddOffset* lir)
+{
+    MWasmAddOffset* mir = lir->mir();
+    Register base = ToRegister(lir->base());
+    Register out = ToRegister(lir->output());
+
+    if (base != out)
+        masm.move32(base, out);
+    masm.add32(Imm32(mir->offset()), out);
+
+    masm.j(Assembler::CarrySet, trap(mir, wasm::Trap::OutOfBounds));
+}
+
+void
+CodeGeneratorX86Shared::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    Register output = ToRegister(lir->output());
+
+    MWasmTruncateToInt32* mir = lir->mir();
+    MIRType inputType = mir->input()->type();
+
+    MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32);
+
+    auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input);
+    addOutOfLineCode(ool, mir);
+
+    Label* oolEntry = ool->entry();
+    if (mir->isUnsigned()) {
+        if (inputType == MIRType::Double)
+            masm.wasmTruncateDoubleToUInt32(input, output, oolEntry);
+        else if (inputType == MIRType::Float32)
+            masm.wasmTruncateFloat32ToUInt32(input, output, oolEntry);
+        else
+            MOZ_CRASH("unexpected type");
+        return;
+    }
+
+    if (inputType == MIRType::Double)
+        masm.wasmTruncateDoubleToInt32(input, output, oolEntry);
+    else if (inputType == MIRType::Float32)
+        masm.wasmTruncateFloat32ToInt32(input, output, oolEntry);
+    else
+        MOZ_CRASH("unexpected type");
+
+    masm.bind(ool->rejoin());
+}
+
+bool
+CodeGeneratorX86Shared::generateOutOfLineCode()
+{
+    if (!CodeGeneratorShared::generateOutOfLineCode())
+        return false;
+
+    if (deoptLabel_.used()) {
+        // All non-table-based bailouts will go here.
+        masm.bind(&deoptLabel_);
+
+        // Push the frame size, so the handler can recover the IonScript.
+        masm.push(Imm32(frameSize()));
+
+        JitCode* handler = gen->jitRuntime()->getGenericBailoutHandler();
+        masm.jmp(ImmPtr(handler->raw()), Relocation::JITCODE);
+    }
+
+    return !masm.oom();
+}
+
+class BailoutJump {
+    Assembler::Condition cond_;
+
+  public:
+    explicit BailoutJump(Assembler::Condition cond) : cond_(cond)
+    { }
+#ifdef JS_CODEGEN_X86
+    void operator()(MacroAssembler& masm, uint8_t* code) const {
+        masm.j(cond_, ImmPtr(code), Relocation::HARDCODED);
+    }
+#endif
+    void operator()(MacroAssembler& masm, Label* label) const {
+        masm.j(cond_, label);
+    }
+};
+
+class BailoutLabel {
+    Label* label_;
+
+  public:
+    explicit BailoutLabel(Label* label) : label_(label)
+    { }
+#ifdef JS_CODEGEN_X86
+    void operator()(MacroAssembler& masm, uint8_t* code) const {
+        masm.retarget(label_, ImmPtr(code), Relocation::HARDCODED);
+    }
+#endif
+    void operator()(MacroAssembler& masm, Label* label) const {
+        masm.retarget(label_, label);
+    }
+};
+
+template <typename T> void
+CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot)
+{
+    encode(snapshot);
+
+    // Though the assembler doesn't track all frame pushes, at least make sure
+    // the known value makes sense. We can't use bailout tables if the stack
+    // isn't properly aligned to the static frame size.
+    MOZ_ASSERT_IF(frameClass_ != FrameSizeClass::None() && deoptTable_,
+                  frameClass_.frameSize() == masm.framePushed());
+
+#ifdef JS_CODEGEN_X86
+    // On x64, bailout tables are pointless, because 16 extra bytes are
+    // reserved per external jump, whereas it takes only 10 bytes to encode a
+    // a non-table based bailout.
+    if (assignBailoutId(snapshot)) {
+        binder(masm, deoptTable_->raw() + snapshot->bailoutId() * BAILOUT_TABLE_ENTRY_SIZE);
+        return;
+    }
+#endif
+
+    // We could not use a jump table, either because all bailout IDs were
+    // reserved, or a jump table is not optimal for this frame size or
+    // platform. Whatever, we will generate a lazy bailout.
+    //
+    // All bailout code is associated with the bytecodeSite of the block we are
+    // bailing out from.
+    InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+    OutOfLineBailout* ool = new(alloc()) OutOfLineBailout(snapshot);
+    addOutOfLineCode(ool, new(alloc()) BytecodeSite(tree, tree->script()->code()));
+
+    binder(masm, ool->entry());
+}
+
+void
+CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, LSnapshot* snapshot)
+{
+    bailout(BailoutJump(condition), snapshot);
+}
+
+void
+CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot)
+{
+    MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == Assembler::NaN_HandledByCond);
+    bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot);
+}
+
+void
+CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot)
+{
+    MOZ_ASSERT(label->used() && !label->bound());
+    bailout(BailoutLabel(label), snapshot);
+}
+
+void
+CodeGeneratorX86Shared::bailout(LSnapshot* snapshot)
+{
+    Label label;
+    masm.jump(&label);
+    bailoutFrom(&label, snapshot);
+}
+
+void
+CodeGeneratorX86Shared::visitOutOfLineBailout(OutOfLineBailout* ool)
+{
+    masm.push(Imm32(ool->snapshot()->snapshotOffset()));
+    masm.jmp(&deoptLabel_);
+}
+
+void
+CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD* ins)
+{
+    FloatRegister first = ToFloatRegister(ins->first());
+    FloatRegister second = ToFloatRegister(ins->second());
+#ifdef DEBUG
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT(first == output);
+#endif
+
+    bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
+
+    if (ins->mir()->isMax())
+        masm.maxDouble(second, first, handleNaN);
+    else
+        masm.minDouble(second, first, handleNaN);
+}
+
+void
+CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF* ins)
+{
+    FloatRegister first = ToFloatRegister(ins->first());
+    FloatRegister second = ToFloatRegister(ins->second());
+#ifdef DEBUG
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT(first == output);
+#endif
+
+    bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
+
+    if (ins->mir()->isMax())
+        masm.maxFloat32(second, first, handleNaN);
+    else
+        masm.minFloat32(second, first, handleNaN);
+}
+
+void
+CodeGeneratorX86Shared::visitAbsD(LAbsD* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+    // Load a value which is all ones except for the sign bit.
+    ScratchDoubleScope scratch(masm);
+    masm.loadConstantDouble(SpecificNaN<double>(0, FloatingPoint<double>::kSignificandBits), scratch);
+    masm.vandpd(scratch, input, input);
+}
+
+void
+CodeGeneratorX86Shared::visitAbsF(LAbsF* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+    // Same trick as visitAbsD above.
+    ScratchFloat32Scope scratch(masm);
+    masm.loadConstantFloat32(SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits), scratch);
+    masm.vandps(scratch, input, input);
+}
+
+void
+CodeGeneratorX86Shared::visitClzI(LClzI* ins)
+{
+    Register input = ToRegister(ins->input());
+    Register output = ToRegister(ins->output());
+    bool knownNotZero = ins->mir()->operandIsNeverZero();
+
+    masm.clz32(input, output, knownNotZero);
+}
+
+void
+CodeGeneratorX86Shared::visitCtzI(LCtzI* ins)
+{
+    Register input = ToRegister(ins->input());
+    Register output = ToRegister(ins->output());
+    bool knownNotZero = ins->mir()->operandIsNeverZero();
+
+    masm.ctz32(input, output, knownNotZero);
+}
+
+void
+CodeGeneratorX86Shared::visitPopcntI(LPopcntI* ins)
+{
+    Register input = ToRegister(ins->input());
+    Register output = ToRegister(ins->output());
+    Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
+
+    masm.popcnt32(input, output, temp);
+}
+
+void
+CodeGeneratorX86Shared::visitSqrtD(LSqrtD* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+    masm.vsqrtsd(input, output, output);
+}
+
+void
+CodeGeneratorX86Shared::visitSqrtF(LSqrtF* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+    masm.vsqrtss(input, output, output);
+}
+
+void
+CodeGeneratorX86Shared::visitPowHalfD(LPowHalfD* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    ScratchDoubleScope scratch(masm);
+
+    Label done, sqrt;
+
+    if (!ins->mir()->operandIsNeverNegativeInfinity()) {
+        // Branch if not -Infinity.
+        masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
+
+        Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered;
+        if (ins->mir()->operandIsNeverNaN())
+            cond = Assembler::DoubleNotEqual;
+        masm.branchDouble(cond, input, scratch, &sqrt);
+
+        // Math.pow(-Infinity, 0.5) == Infinity.
+        masm.zeroDouble(output);
+        masm.subDouble(scratch, output);
+        masm.jump(&done);
+
+        masm.bind(&sqrt);
+    }
+
+    if (!ins->mir()->operandIsNeverNegativeZero()) {
+        // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). Adding 0 converts any -0 to 0.
+        masm.zeroDouble(scratch);
+        masm.addDouble(input, scratch);
+        masm.vsqrtsd(scratch, output, output);
+    } else {
+        masm.vsqrtsd(input, output, output);
+    }
+
+    masm.bind(&done);
+}
+
+class OutOfLineUndoALUOperation : public OutOfLineCodeBase<CodeGeneratorX86Shared>
+{
+    LInstruction* ins_;
+
+  public:
+    explicit OutOfLineUndoALUOperation(LInstruction* ins)
+        : ins_(ins)
+    { }
+
+    virtual void accept(CodeGeneratorX86Shared* codegen) {
+        codegen->visitOutOfLineUndoALUOperation(this);
+    }
+    LInstruction* ins() const {
+        return ins_;
+    }
+};
+
+void
+CodeGeneratorX86Shared::visitAddI(LAddI* ins)
+{
+    if (ins->rhs()->isConstant())
+        masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
+    else
+        masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
+
+    if (ins->snapshot()) {
+        if (ins->recoversInput()) {
+            OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins);
+            addOutOfLineCode(ool, ins->mir());
+            masm.j(Assembler::Overflow, ool->entry());
+        } else {
+            bailoutIf(Assembler::Overflow, ins->snapshot());
+        }
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitAddI64(LAddI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs);
+    const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs);
+
+    MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+    if (IsConstant(rhs)) {
+        masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+        return;
+    }
+
+    masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+}
+
+void
+CodeGeneratorX86Shared::visitSubI(LSubI* ins)
+{
+    if (ins->rhs()->isConstant())
+        masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
+    else
+        masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
+
+    if (ins->snapshot()) {
+        if (ins->recoversInput()) {
+            OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins);
+            addOutOfLineCode(ool, ins->mir());
+            masm.j(Assembler::Overflow, ool->entry());
+        } else {
+            bailoutIf(Assembler::Overflow, ins->snapshot());
+        }
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitSubI64(LSubI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs);
+    const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs);
+
+    MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+    if (IsConstant(rhs)) {
+        masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+        return;
+    }
+
+    masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+}
+
+void
+CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool)
+{
+    LInstruction* ins = ool->ins();
+    Register reg = ToRegister(ins->getDef(0));
+
+    DebugOnly<LAllocation*> lhs = ins->getOperand(0);
+    LAllocation* rhs = ins->getOperand(1);
+
+    MOZ_ASSERT(reg == ToRegister(lhs));
+    MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs));
+
+    // Undo the effect of the ALU operation, which was performed on the output
+    // register and overflowed. Writing to the output register clobbered an
+    // input reg, and the original value of the input needs to be recovered
+    // to satisfy the constraint imposed by any RECOVERED_INPUT operands to
+    // the bailout snapshot.
+
+    if (rhs->isConstant()) {
+        Imm32 constant(ToInt32(rhs));
+        if (ins->isAddI())
+            masm.subl(constant, reg);
+        else
+            masm.addl(constant, reg);
+    } else {
+        if (ins->isAddI())
+            masm.subl(ToOperand(rhs), reg);
+        else
+            masm.addl(ToOperand(rhs), reg);
+    }
+
+    bailout(ool->ins()->snapshot());
+}
+
+class MulNegativeZeroCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
+{
+    LMulI* ins_;
+
+  public:
+    explicit MulNegativeZeroCheck(LMulI* ins)
+      : ins_(ins)
+    { }
+
+    virtual void accept(CodeGeneratorX86Shared* codegen) {
+        codegen->visitMulNegativeZeroCheck(this);
+    }
+    LMulI* ins() const {
+        return ins_;
+    }
+};
+
+void
+CodeGeneratorX86Shared::visitMulI(LMulI* ins)
+{
+    const LAllocation* lhs = ins->lhs();
+    const LAllocation* rhs = ins->rhs();
+    MMul* mul = ins->mir();
+    MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow());
+
+    if (rhs->isConstant()) {
+        // Bailout on -0.0
+        int32_t constant = ToInt32(rhs);
+        if (mul->canBeNegativeZero() && constant <= 0) {
+            Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal;
+            masm.test32(ToRegister(lhs), ToRegister(lhs));
+            bailoutIf(bailoutCond, ins->snapshot());
+        }
+
+        switch (constant) {
+          case -1:
+            masm.negl(ToOperand(lhs));
+            break;
+          case 0:
+            masm.xorl(ToOperand(lhs), ToRegister(lhs));
+            return; // escape overflow check;
+          case 1:
+            // nop
+            return; // escape overflow check;
+          case 2:
+            masm.addl(ToOperand(lhs), ToRegister(lhs));
+            break;
+          default:
+            if (!mul->canOverflow() && constant > 0) {
+                // Use shift if cannot overflow and constant is power of 2
+                int32_t shift = FloorLog2(constant);
+                if ((1 << shift) == constant) {
+                    masm.shll(Imm32(shift), ToRegister(lhs));
+                    return;
+                }
+            }
+            masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs));
+        }
+
+        // Bailout on overflow
+        if (mul->canOverflow())
+            bailoutIf(Assembler::Overflow, ins->snapshot());
+    } else {
+        masm.imull(ToOperand(rhs), ToRegister(lhs));
+
+        // Bailout on overflow
+        if (mul->canOverflow())
+            bailoutIf(Assembler::Overflow, ins->snapshot());
+
+        if (mul->canBeNegativeZero()) {
+            // Jump to an OOL path if the result is 0.
+            MulNegativeZeroCheck* ool = new(alloc()) MulNegativeZeroCheck(ins);
+            addOutOfLineCode(ool, mul);
+
+            masm.test32(ToRegister(lhs), ToRegister(lhs));
+            masm.j(Assembler::Zero, ool->entry());
+            masm.bind(ool->rejoin());
+        }
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitMulI64(LMulI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
+    const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
+
+    MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
+
+    if (IsConstant(rhs)) {
+        int64_t constant = ToInt64(rhs);
+        switch (constant) {
+          case -1:
+            masm.neg64(ToRegister64(lhs));
+            return;
+          case 0:
+            masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
+            return;
+          case 1:
+            // nop
+            return;
+          case 2:
+            masm.add64(ToRegister64(lhs), ToRegister64(lhs));
+            return;
+          default:
+            if (constant > 0) {
+                // Use shift if constant is power of 2.
+                int32_t shift = mozilla::FloorLog2(constant);
+                if (int64_t(1) << shift == constant) {
+                    masm.lshift64(Imm32(shift), ToRegister64(lhs));
+                    return;
+                }
+            }
+            Register temp = ToTempRegisterOrInvalid(lir->temp());
+            masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
+        }
+    } else {
+        Register temp = ToTempRegisterOrInvalid(lir->temp());
+        masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
+    }
+}
+
+class ReturnZero : public OutOfLineCodeBase<CodeGeneratorX86Shared>
+{
+    Register reg_;
+
+  public:
+    explicit ReturnZero(Register reg)
+      : reg_(reg)
+    { }
+
+    virtual void accept(CodeGeneratorX86Shared* codegen) {
+        codegen->visitReturnZero(this);
+    }
+    Register reg() const {
+        return reg_;
+    }
+};
+
+void
+CodeGeneratorX86Shared::visitReturnZero(ReturnZero* ool)
+{
+    masm.mov(ImmWord(0), ool->reg());
+    masm.jmp(ool->rejoin());
+}
+
+void
+CodeGeneratorX86Shared::visitUDivOrMod(LUDivOrMod* ins)
+{
+    Register lhs = ToRegister(ins->lhs());
+    Register rhs = ToRegister(ins->rhs());
+    Register output = ToRegister(ins->output());
+
+    MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
+    MOZ_ASSERT(rhs != edx);
+    MOZ_ASSERT_IF(output == eax, ToRegister(ins->remainder()) == edx);
+
+    ReturnZero* ool = nullptr;
+
+    // Put the lhs in eax.
+    if (lhs != eax)
+        masm.mov(lhs, eax);
+
+    // Prevent divide by zero.
+    if (ins->canBeDivideByZero()) {
+        masm.test32(rhs, rhs);
+        if (ins->mir()->isTruncated()) {
+            if (ins->trapOnError()) {
+                masm.j(Assembler::Zero, trap(ins, wasm::Trap::IntegerDivideByZero));
+            } else {
+                ool = new(alloc()) ReturnZero(output);
+                masm.j(Assembler::Zero, ool->entry());
+            }
+        } else {
+            bailoutIf(Assembler::Zero, ins->snapshot());
+        }
+    }
+
+    // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit.
+    masm.mov(ImmWord(0), edx);
+    masm.udiv(rhs);
+
+    // If the remainder is > 0, bailout since this must be a double.
+    if (ins->mir()->isDiv() && !ins->mir()->toDiv()->canTruncateRemainder()) {
+        Register remainder = ToRegister(ins->remainder());
+        masm.test32(remainder, remainder);
+        bailoutIf(Assembler::NonZero, ins->snapshot());
+    }
+
+    // Unsigned div or mod can return a value that's not a signed int32.
+    // If our users aren't expecting that, bail.
+    if (!ins->mir()->isTruncated()) {
+        masm.test32(output, output);
+        bailoutIf(Assembler::Signed, ins->snapshot());
+    }
+
+    if (ool) {
+        addOutOfLineCode(ool, ins->mir());
+        masm.bind(ool->rejoin());
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitUDivOrModConstant(LUDivOrModConstant *ins) {
+    Register lhs = ToRegister(ins->numerator());
+    Register output = ToRegister(ins->output());
+    uint32_t d = ins->denominator();
+
+    // This emits the division answer into edx or the modulus answer into eax.
+    MOZ_ASSERT(output == eax || output == edx);
+    MOZ_ASSERT(lhs != eax && lhs != edx);
+    bool isDiv = (output == edx);
+
+    if (d == 0) {
+        if (ins->mir()->isTruncated()) {
+            if (ins->trapOnError())
+                masm.jump(trap(ins, wasm::Trap::IntegerDivideByZero));
+            else
+                masm.xorl(output, output);
+        } else {
+            bailout(ins->snapshot());
+        }
+        return;
+    }
+
+    // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI).
+    MOZ_ASSERT((d & (d - 1)) != 0);
+
+    ReciprocalMulConstants rmc = computeDivisionConstants(d, /* maxLog = */ 32);
+
+    // We first compute (M * n) >> 32, where M = rmc.multiplier.
+    masm.movl(Imm32(rmc.multiplier), eax);
+    masm.umull(lhs);
+    if (rmc.multiplier > UINT32_MAX) {
+        // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
+        // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, contradicting
+        // the proof of correctness in computeDivisionConstants.
+        MOZ_ASSERT(rmc.shiftAmount > 0);
+        MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
+
+        // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since
+        // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can
+        // correct for the overflow. This case is a bit trickier than the signed
+        // case, though, as the (edx + n) addition itself can overflow; however,
+        // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1),
+        // which is overflow-free. See Hacker's Delight, section 10-8 for details.
+
+        // Compute (n - edx) >> 1 into eax.
+        masm.movl(lhs, eax);
+        masm.subl(edx, eax);
+        masm.shrl(Imm32(1), eax);
+
+        // Finish the computation.
+        masm.addl(eax, edx);
+        masm.shrl(Imm32(rmc.shiftAmount - 1), edx);
+    } else {
+        masm.shrl(Imm32(rmc.shiftAmount), edx);
+    }
+
+    // We now have the truncated division value in edx. If we're
+    // computing a modulus or checking whether the division resulted
+    // in an integer, we need to multiply the obtained value by d and
+    // finish the computation/check.
+    if (!isDiv) {
+        masm.imull(Imm32(d), edx, edx);
+        masm.movl(lhs, eax);
+        masm.subl(edx, eax);
+
+        // The final result of the modulus op, just computed above by the
+        // sub instruction, can be a number in the range [2^31, 2^32). If
+        // this is the case and the modulus is not truncated, we must bail
+        // out.
+        if (!ins->mir()->isTruncated())
+            bailoutIf(Assembler::Signed, ins->snapshot());
+    } else if (!ins->mir()->isTruncated()) {
+        masm.imull(Imm32(d), edx, eax);
+        masm.cmpl(lhs, eax);
+        bailoutIf(Assembler::NotEqual, ins->snapshot());
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitMulNegativeZeroCheck(MulNegativeZeroCheck* ool)
+{
+    LMulI* ins = ool->ins();
+    Register result = ToRegister(ins->output());
+    Operand lhsCopy = ToOperand(ins->lhsCopy());
+    Operand rhs = ToOperand(ins->rhs());
+    MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, lhsCopy.reg() != result.code());
+
+    // Result is -0 if lhs or rhs is negative.
+    masm.movl(lhsCopy, result);
+    masm.orl(rhs, result);
+    bailoutIf(Assembler::Signed, ins->snapshot());
+
+    masm.mov(ImmWord(0), result);
+    masm.jmp(ool->rejoin());
+}
+
+void
+CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI* ins)
+{
+    Register lhs = ToRegister(ins->numerator());
+    DebugOnly<Register> output = ToRegister(ins->output());
+
+    int32_t shift = ins->shift();
+    bool negativeDivisor = ins->negativeDivisor();
+    MDiv* mir = ins->mir();
+
+    // We use defineReuseInput so these should always be the same, which is
+    // convenient since all of our instructions here are two-address.
+    MOZ_ASSERT(lhs == output);
+
+    if (!mir->isTruncated() && negativeDivisor) {
+        // 0 divided by a negative number must return a double.
+        masm.test32(lhs, lhs);
+        bailoutIf(Assembler::Zero, ins->snapshot());
+    }
+
+    if (shift) {
+        if (!mir->isTruncated()) {
+            // If the remainder is != 0, bailout since this must be a double.
+            masm.test32(lhs, Imm32(UINT32_MAX >> (32 - shift)));
+            bailoutIf(Assembler::NonZero, ins->snapshot());
+        }
+
+        if (mir->isUnsigned()) {
+            masm.shrl(Imm32(shift), lhs);
+        } else {
+            // Adjust the value so that shifting produces a correctly
+            // rounded result when the numerator is negative. See 10-1
+            // "Signed Division by a Known Power of 2" in Henry
+            // S. Warren, Jr.'s Hacker's Delight.
+            if (mir->canBeNegativeDividend()) {
+                Register lhsCopy = ToRegister(ins->numeratorCopy());
+                MOZ_ASSERT(lhsCopy != lhs);
+                if (shift > 1)
+                    masm.sarl(Imm32(31), lhs);
+                masm.shrl(Imm32(32 - shift), lhs);
+                masm.addl(lhsCopy, lhs);
+            }
+            masm.sarl(Imm32(shift), lhs);
+
+            if (negativeDivisor)
+                masm.negl(lhs);
+        }
+        return;
+    }
+
+    if (negativeDivisor) {
+        // INT32_MIN / -1 overflows.
+        masm.negl(lhs);
+        if (!mir->isTruncated())
+            bailoutIf(Assembler::Overflow, ins->snapshot());
+        else if (mir->trapOnError())
+            masm.j(Assembler::Overflow, trap(mir, wasm::Trap::IntegerOverflow));
+    } else if (mir->isUnsigned() && !mir->isTruncated()) {
+        // Unsigned division by 1 can overflow if output is not
+        // truncated.
+        masm.test32(lhs, lhs);
+        bailoutIf(Assembler::Signed, ins->snapshot());
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitDivOrModConstantI(LDivOrModConstantI* ins) {
+    Register lhs = ToRegister(ins->numerator());
+    Register output = ToRegister(ins->output());
+    int32_t d = ins->denominator();
+
+    // This emits the division answer into edx or the modulus answer into eax.
+    MOZ_ASSERT(output == eax || output == edx);
+    MOZ_ASSERT(lhs != eax && lhs != edx);
+    bool isDiv = (output == edx);
+
+    // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI
+    // and LModPowTwoI).
+    MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
+
+    // We will first divide by Abs(d), and negate the answer if d is negative.
+    // If desired, this can be avoided by generalizing computeDivisionConstants.
+    ReciprocalMulConstants rmc = computeDivisionConstants(Abs(d), /* maxLog = */ 31);
+
+    // We first compute (M * n) >> 32, where M = rmc.multiplier.
+    masm.movl(Imm32(rmc.multiplier), eax);
+    masm.imull(lhs);
+    if (rmc.multiplier > INT32_MAX) {
+        MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
+
+        // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since
+        // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow.
+        // (edx + n) can't overflow, as n and edx have opposite signs because int32_t(M)
+        // is negative.
+        masm.addl(lhs, edx);
+    }
+    // (M * n) >> (32 + shift) is the truncated division answer if n is non-negative,
+    // as proved in the comments of computeDivisionConstants. We must add 1 later if n is
+    // negative to get the right answer in all cases.
+    masm.sarl(Imm32(rmc.shiftAmount), edx);
+
+    // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+    // computed with just a sign-extending shift of 31 bits.
+    if (ins->canBeNegativeDividend()) {
+        masm.movl(lhs, eax);
+        masm.sarl(Imm32(31), eax);
+        masm.subl(eax, edx);
+    }
+
+    // After this, edx contains the correct truncated division result.
+    if (d < 0)
+        masm.negl(edx);
+
+    if (!isDiv) {
+        masm.imull(Imm32(-d), edx, eax);
+        masm.addl(lhs, eax);
+    }
+
+    if (!ins->mir()->isTruncated()) {
+        if (isDiv) {
+            // This is a division op. Multiply the obtained value by d to check if
+            // the correct answer is an integer. This cannot overflow, since |d| > 1.
+            masm.imull(Imm32(d), edx, eax);
+            masm.cmp32(lhs, eax);
+            bailoutIf(Assembler::NotEqual, ins->snapshot());
+
+            // If lhs is zero and the divisor is negative, the answer should have
+            // been -0.
+            if (d < 0) {
+                masm.test32(lhs, lhs);
+                bailoutIf(Assembler::Zero, ins->snapshot());
+            }
+        } else if (ins->canBeNegativeDividend()) {
+            // This is a mod op. If the computed value is zero and lhs
+            // is negative, the answer should have been -0.
+            Label done;
+
+            masm.cmp32(lhs, Imm32(0));
+            masm.j(Assembler::GreaterThanOrEqual, &done);
+
+            masm.test32(eax, eax);
+            bailoutIf(Assembler::Zero, ins->snapshot());
+
+            masm.bind(&done);
+        }
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitDivI(LDivI* ins)
+{
+    Register remainder = ToRegister(ins->remainder());
+    Register lhs = ToRegister(ins->lhs());
+    Register rhs = ToRegister(ins->rhs());
+    Register output = ToRegister(ins->output());
+
+    MDiv* mir = ins->mir();
+
+    MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
+    MOZ_ASSERT(rhs != edx);
+    MOZ_ASSERT(remainder == edx);
+    MOZ_ASSERT(output == eax);
+
+    Label done;
+    ReturnZero* ool = nullptr;
+
+    // Put the lhs in eax, for either the negative overflow case or the regular
+    // divide case.
+    if (lhs != eax)
+        masm.mov(lhs, eax);
+
+    // Handle divide by zero.
+    if (mir->canBeDivideByZero()) {
+        masm.test32(rhs, rhs);
+        if (mir->trapOnError()) {
+            masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero));
+        } else if (mir->canTruncateInfinities()) {
+            // Truncated division by zero is zero (Infinity|0 == 0)
+            if (!ool)
+                ool = new(alloc()) ReturnZero(output);
+            masm.j(Assembler::Zero, ool->entry());
+        } else {
+            MOZ_ASSERT(mir->fallible());
+            bailoutIf(Assembler::Zero, ins->snapshot());
+        }
+    }
+
+    // Handle an integer overflow exception from -2147483648 / -1.
+    if (mir->canBeNegativeOverflow()) {
+        Label notmin;
+        masm.cmp32(lhs, Imm32(INT32_MIN));
+        masm.j(Assembler::NotEqual, &notmin);
+        masm.cmp32(rhs, Imm32(-1));
+        if (mir->trapOnError()) {
+            masm.j(Assembler::Equal, trap(mir, wasm::Trap::IntegerOverflow));
+        } else if (mir->canTruncateOverflow()) {
+            // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the
+            // output register (lhs == eax).
+            masm.j(Assembler::Equal, &done);
+        } else {
+            MOZ_ASSERT(mir->fallible());
+            bailoutIf(Assembler::Equal, ins->snapshot());
+        }
+        masm.bind(&notmin);
+    }
+
+    // Handle negative 0.
+    if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
+        Label nonzero;
+        masm.test32(lhs, lhs);
+        masm.j(Assembler::NonZero, &nonzero);
+        masm.cmp32(rhs, Imm32(0));
+        bailoutIf(Assembler::LessThan, ins->snapshot());
+        masm.bind(&nonzero);
+    }
+
+    // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit.
+    if (lhs != eax)
+        masm.mov(lhs, eax);
+    masm.cdq();
+    masm.idiv(rhs);
+
+    if (!mir->canTruncateRemainder()) {
+        // If the remainder is > 0, bailout since this must be a double.
+        masm.test32(remainder, remainder);
+        bailoutIf(Assembler::NonZero, ins->snapshot());
+    }
+
+    masm.bind(&done);
+
+    if (ool) {
+        addOutOfLineCode(ool, mir);
+        masm.bind(ool->rejoin());
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitModPowTwoI(LModPowTwoI* ins)
+{
+    Register lhs = ToRegister(ins->getOperand(0));
+    int32_t shift = ins->shift();
+
+    Label negative;
+
+    if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) {
+        // Switch based on sign of the lhs.
+        // Positive numbers are just a bitmask
+        masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
+    }
+
+    masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
+
+    if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) {
+        Label done;
+        masm.jump(&done);
+
+        // Negative numbers need a negate, bitmask, negate
+        masm.bind(&negative);
+
+        // Unlike in the visitModI case, we are not computing the mod by means of a
+        // division. Therefore, the divisor = -1 case isn't problematic (the andl
+        // always returns 0, which is what we expect).
+        //
+        // The negl instruction overflows if lhs == INT32_MIN, but this is also not
+        // a problem: shift is at most 31, and so the andl also always returns 0.
+        masm.negl(lhs);
+        masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
+        masm.negl(lhs);
+
+        // Since a%b has the same sign as b, and a is negative in this branch,
+        // an answer of 0 means the correct result is actually -0. Bail out.
+        if (!ins->mir()->isTruncated())
+            bailoutIf(Assembler::Zero, ins->snapshot());
+        masm.bind(&done);
+    }
+}
+
+class ModOverflowCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
+{
+    Label done_;
+    LModI* ins_;
+    Register rhs_;
+
+  public:
+    explicit ModOverflowCheck(LModI* ins, Register rhs)
+      : ins_(ins), rhs_(rhs)
+    { }
+
+    virtual void accept(CodeGeneratorX86Shared* codegen) {
+        codegen->visitModOverflowCheck(this);
+    }
+    Label* done() {
+        return &done_;
+    }
+    LModI* ins() const {
+        return ins_;
+    }
+    Register rhs() const {
+        return rhs_;
+    }
+};
+
+void
+CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool)
+{
+    masm.cmp32(ool->rhs(), Imm32(-1));
+    if (ool->ins()->mir()->isTruncated()) {
+        masm.j(Assembler::NotEqual, ool->rejoin());
+        masm.mov(ImmWord(0), edx);
+        masm.jmp(ool->done());
+    } else {
+        bailoutIf(Assembler::Equal, ool->ins()->snapshot());
+        masm.jmp(ool->rejoin());
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitModI(LModI* ins)
+{
+    Register remainder = ToRegister(ins->remainder());
+    Register lhs = ToRegister(ins->lhs());
+    Register rhs = ToRegister(ins->rhs());
+
+    // Required to use idiv.
+    MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
+    MOZ_ASSERT(rhs != edx);
+    MOZ_ASSERT(remainder == edx);
+    MOZ_ASSERT(ToRegister(ins->getTemp(0)) == eax);
+
+    Label done;
+    ReturnZero* ool = nullptr;
+    ModOverflowCheck* overflow = nullptr;
+
+    // Set up eax in preparation for doing a div.
+    if (lhs != eax)
+        masm.mov(lhs, eax);
+
+    MMod* mir = ins->mir();
+
+    // Prevent divide by zero.
+    if (mir->canBeDivideByZero()) {
+        masm.test32(rhs, rhs);
+        if (mir->isTruncated()) {
+            if (mir->trapOnError()) {
+                masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero));
+            } else {
+                if (!ool)
+                    ool = new(alloc()) ReturnZero(edx);
+                masm.j(Assembler::Zero, ool->entry());
+            }
+        } else {
+            bailoutIf(Assembler::Zero, ins->snapshot());
+        }
+    }
+
+    Label negative;
+
+    // Switch based on sign of the lhs.
+    if (mir->canBeNegativeDividend())
+        masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
+
+    // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive.
+    {
+        // Check if rhs is a power-of-two.
+        if (mir->canBePowerOfTwoDivisor()) {
+            MOZ_ASSERT(rhs != remainder);
+
+            // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if
+            // y is any negative number other than INT32_MIN, both y and
+            // y-1 will have the sign bit set so these are never optimized
+            // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX
+            // and because lhs >= 0 at this point, lhs & INT32_MAX returns
+            // the correct value.
+            Label notPowerOfTwo;
+            masm.mov(rhs, remainder);
+            masm.subl(Imm32(1), remainder);
+            masm.branchTest32(Assembler::NonZero, remainder, rhs, &notPowerOfTwo);
+            {
+                masm.andl(lhs, remainder);
+                masm.jmp(&done);
+            }
+            masm.bind(&notPowerOfTwo);
+        }
+
+        // Since lhs >= 0, the sign-extension will be 0
+        masm.mov(ImmWord(0), edx);
+        masm.idiv(rhs);
+    }
+
+    // Otherwise, we have to beware of two special cases:
+    if (mir->canBeNegativeDividend()) {
+        masm.jump(&done);
+
+        masm.bind(&negative);
+
+        // Prevent an integer overflow exception from -2147483648 % -1
+        Label notmin;
+        masm.cmp32(lhs, Imm32(INT32_MIN));
+        overflow = new(alloc()) ModOverflowCheck(ins, rhs);
+        masm.j(Assembler::Equal, overflow->entry());
+        masm.bind(overflow->rejoin());
+        masm.cdq();
+        masm.idiv(rhs);
+
+        if (!mir->isTruncated()) {
+            // A remainder of 0 means that the rval must be -0, which is a double.
+            masm.test32(remainder, remainder);
+            bailoutIf(Assembler::Zero, ins->snapshot());
+        }
+    }
+
+    masm.bind(&done);
+
+    if (overflow) {
+        addOutOfLineCode(overflow, mir);
+        masm.bind(overflow->done());
+    }
+
+    if (ool) {
+        addOutOfLineCode(ool, mir);
+        masm.bind(ool->rejoin());
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitBitNotI(LBitNotI* ins)
+{
+    const LAllocation* input = ins->getOperand(0);
+    MOZ_ASSERT(!input->isConstant());
+
+    masm.notl(ToOperand(input));
+}
+
+void
+CodeGeneratorX86Shared::visitBitOpI(LBitOpI* ins)
+{
+    const LAllocation* lhs = ins->getOperand(0);
+    const LAllocation* rhs = ins->getOperand(1);
+
+    switch (ins->bitop()) {
+        case JSOP_BITOR:
+            if (rhs->isConstant())
+                masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs));
+            else
+                masm.orl(ToOperand(rhs), ToRegister(lhs));
+            break;
+        case JSOP_BITXOR:
+            if (rhs->isConstant())
+                masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs));
+            else
+                masm.xorl(ToOperand(rhs), ToRegister(lhs));
+            break;
+        case JSOP_BITAND:
+            if (rhs->isConstant())
+                masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs));
+            else
+                masm.andl(ToOperand(rhs), ToRegister(lhs));
+            break;
+        default:
+            MOZ_CRASH("unexpected binary opcode");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitBitOpI64(LBitOpI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs);
+    const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs);
+
+    MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+    switch (lir->bitop()) {
+      case JSOP_BITOR:
+        if (IsConstant(rhs))
+            masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+        else
+            masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+        break;
+      case JSOP_BITXOR:
+        if (IsConstant(rhs))
+            masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+        else
+            masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+        break;
+      case JSOP_BITAND:
+        if (IsConstant(rhs))
+            masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+        else
+            masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+        break;
+      default:
+        MOZ_CRASH("unexpected binary opcode");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitShiftI(LShiftI* ins)
+{
+    Register lhs = ToRegister(ins->lhs());
+    const LAllocation* rhs = ins->rhs();
+
+    if (rhs->isConstant()) {
+        int32_t shift = ToInt32(rhs) & 0x1F;
+        switch (ins->bitop()) {
+          case JSOP_LSH:
+            if (shift)
+                masm.shll(Imm32(shift), lhs);
+            break;
+          case JSOP_RSH:
+            if (shift)
+                masm.sarl(Imm32(shift), lhs);
+            break;
+          case JSOP_URSH:
+            if (shift) {
+                masm.shrl(Imm32(shift), lhs);
+            } else if (ins->mir()->toUrsh()->fallible()) {
+                // x >>> 0 can overflow.
+                masm.test32(lhs, lhs);
+                bailoutIf(Assembler::Signed, ins->snapshot());
+            }
+            break;
+          default:
+            MOZ_CRASH("Unexpected shift op");
+        }
+    } else {
+        MOZ_ASSERT(ToRegister(rhs) == ecx);
+        switch (ins->bitop()) {
+          case JSOP_LSH:
+            masm.shll_cl(lhs);
+            break;
+          case JSOP_RSH:
+            masm.sarl_cl(lhs);
+            break;
+          case JSOP_URSH:
+            masm.shrl_cl(lhs);
+            if (ins->mir()->toUrsh()->fallible()) {
+                // x >>> 0 can overflow.
+                masm.test32(lhs, lhs);
+                bailoutIf(Assembler::Signed, ins->snapshot());
+            }
+            break;
+          default:
+            MOZ_CRASH("Unexpected shift op");
+        }
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitShiftI64(LShiftI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs);
+    LAllocation* rhs = lir->getOperand(LShiftI64::Rhs);
+
+    MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+    if (rhs->isConstant()) {
+        int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
+        switch (lir->bitop()) {
+          case JSOP_LSH:
+            if (shift)
+                masm.lshift64(Imm32(shift), ToRegister64(lhs));
+            break;
+          case JSOP_RSH:
+            if (shift)
+                masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs));
+            break;
+          case JSOP_URSH:
+            if (shift)
+                masm.rshift64(Imm32(shift), ToRegister64(lhs));
+            break;
+          default:
+            MOZ_CRASH("Unexpected shift op");
+        }
+        return;
+    }
+
+    MOZ_ASSERT(ToRegister(rhs) == ecx);
+    switch (lir->bitop()) {
+      case JSOP_LSH:
+        masm.lshift64(ecx, ToRegister64(lhs));
+        break;
+      case JSOP_RSH:
+        masm.rshift64Arithmetic(ecx, ToRegister64(lhs));
+        break;
+      case JSOP_URSH:
+        masm.rshift64(ecx, ToRegister64(lhs));
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitUrshD(LUrshD* ins)
+{
+    Register lhs = ToRegister(ins->lhs());
+    MOZ_ASSERT(ToRegister(ins->temp()) == lhs);
+
+    const LAllocation* rhs = ins->rhs();
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    if (rhs->isConstant()) {
+        int32_t shift = ToInt32(rhs) & 0x1F;
+        if (shift)
+            masm.shrl(Imm32(shift), lhs);
+    } else {
+        MOZ_ASSERT(ToRegister(rhs) == ecx);
+        masm.shrl_cl(lhs);
+    }
+
+    masm.convertUInt32ToDouble(lhs, out);
+}
+
+Operand
+CodeGeneratorX86Shared::ToOperand(const LAllocation& a)
+{
+    if (a.isGeneralReg())
+        return Operand(a.toGeneralReg()->reg());
+    if (a.isFloatReg())
+        return Operand(a.toFloatReg()->reg());
+    return Operand(masm.getStackPointer(), ToStackOffset(&a));
+}
+
+Operand
+CodeGeneratorX86Shared::ToOperand(const LAllocation* a)
+{
+    return ToOperand(*a);
+}
+
+Operand
+CodeGeneratorX86Shared::ToOperand(const LDefinition* def)
+{
+    return ToOperand(def->output());
+}
+
+MoveOperand
+CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const
+{
+    if (a.isGeneralReg())
+        return MoveOperand(ToRegister(a));
+    if (a.isFloatReg())
+        return MoveOperand(ToFloatRegister(a));
+    return MoveOperand(StackPointer, ToStackOffset(a));
+}
+
+class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorX86Shared>
+{
+    MTableSwitch* mir_;
+    CodeLabel jumpLabel_;
+
+    void accept(CodeGeneratorX86Shared* codegen) {
+        codegen->visitOutOfLineTableSwitch(this);
+    }
+
+  public:
+    explicit OutOfLineTableSwitch(MTableSwitch* mir)
+      : mir_(mir)
+    {}
+
+    MTableSwitch* mir() const {
+        return mir_;
+    }
+
+    CodeLabel* jumpLabel() {
+        return &jumpLabel_;
+    }
+};
+
+void
+CodeGeneratorX86Shared::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool)
+{
+    MTableSwitch* mir = ool->mir();
+
+    masm.haltingAlign(sizeof(void*));
+    masm.use(ool->jumpLabel()->target());
+    masm.addCodeLabel(*ool->jumpLabel());
+
+    for (size_t i = 0; i < mir->numCases(); i++) {
+        LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir();
+        Label* caseheader = caseblock->label();
+        uint32_t caseoffset = caseheader->offset();
+
+        // The entries of the jump table need to be absolute addresses and thus
+        // must be patched after codegen is finished.
+        CodeLabel cl;
+        masm.writeCodePointer(cl.patchAt());
+        cl.target()->bind(caseoffset);
+        masm.addCodeLabel(cl);
+    }
+}
+
+void
+CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base)
+{
+    Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
+
+    // Lower value with low value
+    if (mir->low() != 0)
+        masm.subl(Imm32(mir->low()), index);
+
+    // Jump to default case if input is out of range
+    int32_t cases = mir->numCases();
+    masm.cmp32(index, Imm32(cases));
+    masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase);
+
+    // To fill in the CodeLabels for the case entries, we need to first
+    // generate the case entries (we don't yet know their offsets in the
+    // instruction stream).
+    OutOfLineTableSwitch* ool = new(alloc()) OutOfLineTableSwitch(mir);
+    addOutOfLineCode(ool, mir);
+
+    // Compute the position where a pointer to the right case stands.
+    masm.mov(ool->jumpLabel()->patchAt(), base);
+    Operand pointer = Operand(base, index, ScalePointer);
+
+    // Jump to the right case
+    masm.jmp(pointer);
+}
+
+void
+CodeGeneratorX86Shared::visitMathD(LMathD* math)
+{
+    FloatRegister lhs = ToFloatRegister(math->lhs());
+    Operand rhs = ToOperand(math->rhs());
+    FloatRegister output = ToFloatRegister(math->output());
+
+    switch (math->jsop()) {
+      case JSOP_ADD:
+        masm.vaddsd(rhs, lhs, output);
+        break;
+      case JSOP_SUB:
+        masm.vsubsd(rhs, lhs, output);
+        break;
+      case JSOP_MUL:
+        masm.vmulsd(rhs, lhs, output);
+        break;
+      case JSOP_DIV:
+        masm.vdivsd(rhs, lhs, output);
+        break;
+      default:
+        MOZ_CRASH("unexpected opcode");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitMathF(LMathF* math)
+{
+    FloatRegister lhs = ToFloatRegister(math->lhs());
+    Operand rhs = ToOperand(math->rhs());
+    FloatRegister output = ToFloatRegister(math->output());
+
+    switch (math->jsop()) {
+      case JSOP_ADD:
+        masm.vaddss(rhs, lhs, output);
+        break;
+      case JSOP_SUB:
+        masm.vsubss(rhs, lhs, output);
+        break;
+      case JSOP_MUL:
+        masm.vmulss(rhs, lhs, output);
+        break;
+      case JSOP_DIV:
+        masm.vdivss(rhs, lhs, output);
+        break;
+      default:
+        MOZ_CRASH("unexpected opcode");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitFloor(LFloor* lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    Register output = ToRegister(lir->output());
+
+    Label bailout;
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // Bail on negative-zero.
+        masm.branchNegativeZero(input, output, &bailout);
+        bailoutFrom(&bailout, lir->snapshot());
+
+        // Round toward -Infinity.
+        {
+            ScratchDoubleScope scratch(masm);
+            masm.vroundsd(X86Encoding::RoundDown, input, scratch, scratch);
+            bailoutCvttsd2si(scratch, output, lir->snapshot());
+        }
+    } else {
+        Label negative, end;
+
+        // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
+        {
+            ScratchDoubleScope scratch(masm);
+            masm.zeroDouble(scratch);
+            masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
+        }
+
+        // Bail on negative-zero.
+        masm.branchNegativeZero(input, output, &bailout);
+        bailoutFrom(&bailout, lir->snapshot());
+
+        // Input is non-negative, so truncation correctly rounds.
+        bailoutCvttsd2si(input, output, lir->snapshot());
+
+        masm.jump(&end);
+
+        // Input is negative, but isn't -0.
+        // Negative values go on a comparatively expensive path, since no
+        // native rounding mode matches JS semantics. Still better than callVM.
+        masm.bind(&negative);
+        {
+            // Truncate and round toward zero.
+            // This is off-by-one for everything but integer-valued inputs.
+            bailoutCvttsd2si(input, output, lir->snapshot());
+
+            // Test whether the input double was integer-valued.
+            {
+                ScratchDoubleScope scratch(masm);
+                masm.convertInt32ToDouble(output, scratch);
+                masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
+            }
+
+            // Input is not integer-valued, so we rounded off-by-one in the
+            // wrong direction. Correct by subtraction.
+            masm.subl(Imm32(1), output);
+            // Cannot overflow: output was already checked against INT_MIN.
+        }
+
+        masm.bind(&end);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitFloorF(LFloorF* lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    Register output = ToRegister(lir->output());
+
+    Label bailout;
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // Bail on negative-zero.
+        masm.branchNegativeZeroFloat32(input, output, &bailout);
+        bailoutFrom(&bailout, lir->snapshot());
+
+        // Round toward -Infinity.
+        {
+            ScratchFloat32Scope scratch(masm);
+            masm.vroundss(X86Encoding::RoundDown, input, scratch, scratch);
+            bailoutCvttss2si(scratch, output, lir->snapshot());
+        }
+    } else {
+        Label negative, end;
+
+        // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
+        {
+            ScratchFloat32Scope scratch(masm);
+            masm.zeroFloat32(scratch);
+            masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
+        }
+
+        // Bail on negative-zero.
+        masm.branchNegativeZeroFloat32(input, output, &bailout);
+        bailoutFrom(&bailout, lir->snapshot());
+
+        // Input is non-negative, so truncation correctly rounds.
+        bailoutCvttss2si(input, output, lir->snapshot());
+
+        masm.jump(&end);
+
+        // Input is negative, but isn't -0.
+        // Negative values go on a comparatively expensive path, since no
+        // native rounding mode matches JS semantics. Still better than callVM.
+        masm.bind(&negative);
+        {
+            // Truncate and round toward zero.
+            // This is off-by-one for everything but integer-valued inputs.
+            bailoutCvttss2si(input, output, lir->snapshot());
+
+            // Test whether the input double was integer-valued.
+            {
+                ScratchFloat32Scope scratch(masm);
+                masm.convertInt32ToFloat32(output, scratch);
+                masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
+            }
+
+            // Input is not integer-valued, so we rounded off-by-one in the
+            // wrong direction. Correct by subtraction.
+            masm.subl(Imm32(1), output);
+            // Cannot overflow: output was already checked against INT_MIN.
+        }
+
+        masm.bind(&end);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitCeil(LCeil* lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    ScratchDoubleScope scratch(masm);
+    Register output = ToRegister(lir->output());
+
+    Label bailout, lessThanMinusOne;
+
+    // Bail on ]-1; -0] range
+    masm.loadConstantDouble(-1, scratch);
+    masm.branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, input,
+                      scratch, &lessThanMinusOne);
+
+    // Test for remaining values with the sign bit set, i.e. ]-1; -0]
+    masm.vmovmskpd(input, output);
+    masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
+    bailoutFrom(&bailout, lir->snapshot());
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // x <= -1 or x > -0
+        masm.bind(&lessThanMinusOne);
+        // Round toward +Infinity.
+        masm.vroundsd(X86Encoding::RoundUp, input, scratch, scratch);
+        bailoutCvttsd2si(scratch, output, lir->snapshot());
+        return;
+    }
+
+    // No SSE4.1
+    Label end;
+
+    // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
+    // integer (resp. non-integer) values.
+    // Will also work for values >= INT_MAX + 1, as the truncate
+    // operation will return INT_MIN and there'll be a bailout.
+    bailoutCvttsd2si(input, output, lir->snapshot());
+    masm.convertInt32ToDouble(output, scratch);
+    masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
+
+    // Input is not integer-valued, add 1 to obtain the ceiling value
+    masm.addl(Imm32(1), output);
+    // if input > INT_MAX, output == INT_MAX so adding 1 will overflow.
+    bailoutIf(Assembler::Overflow, lir->snapshot());
+    masm.jump(&end);
+
+    // x <= -1, truncation is the way to go.
+    masm.bind(&lessThanMinusOne);
+    bailoutCvttsd2si(input, output, lir->snapshot());
+
+    masm.bind(&end);
+}
+
+void
+CodeGeneratorX86Shared::visitCeilF(LCeilF* lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    ScratchFloat32Scope scratch(masm);
+    Register output = ToRegister(lir->output());
+
+    Label bailout, lessThanMinusOne;
+
+    // Bail on ]-1; -0] range
+    masm.loadConstantFloat32(-1.f, scratch);
+    masm.branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, input,
+                     scratch, &lessThanMinusOne);
+
+    // Test for remaining values with the sign bit set, i.e. ]-1; -0]
+    masm.vmovmskps(input, output);
+    masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
+    bailoutFrom(&bailout, lir->snapshot());
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // x <= -1 or x > -0
+        masm.bind(&lessThanMinusOne);
+        // Round toward +Infinity.
+        masm.vroundss(X86Encoding::RoundUp, input, scratch, scratch);
+        bailoutCvttss2si(scratch, output, lir->snapshot());
+        return;
+    }
+
+    // No SSE4.1
+    Label end;
+
+    // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
+    // integer (resp. non-integer) values.
+    // Will also work for values >= INT_MAX + 1, as the truncate
+    // operation will return INT_MIN and there'll be a bailout.
+    bailoutCvttss2si(input, output, lir->snapshot());
+    masm.convertInt32ToFloat32(output, scratch);
+    masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
+
+    // Input is not integer-valued, add 1 to obtain the ceiling value
+    masm.addl(Imm32(1), output);
+    // if input > INT_MAX, output == INT_MAX so adding 1 will overflow.
+    bailoutIf(Assembler::Overflow, lir->snapshot());
+    masm.jump(&end);
+
+    // x <= -1, truncation is the way to go.
+    masm.bind(&lessThanMinusOne);
+    bailoutCvttss2si(input, output, lir->snapshot());
+
+    masm.bind(&end);
+}
+
+void
+CodeGeneratorX86Shared::visitRound(LRound* lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    FloatRegister temp = ToFloatRegister(lir->temp());
+    ScratchDoubleScope scratch(masm);
+    Register output = ToRegister(lir->output());
+
+    Label negativeOrZero, negative, end, bailout;
+
+    // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
+    masm.zeroDouble(scratch);
+    masm.loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
+    masm.branchDouble(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
+
+    // Input is positive. Add the biggest double less than 0.5 and
+    // truncate, rounding down (because if the input is the biggest double less
+    // than 0.5, adding 0.5 would undesirably round up to 1). Note that we have
+    // to add the input to the temp register because we're not allowed to
+    // modify the input register.
+    masm.addDouble(input, temp);
+    bailoutCvttsd2si(temp, output, lir->snapshot());
+
+    masm.jump(&end);
+
+    // Input is negative, +0 or -0.
+    masm.bind(&negativeOrZero);
+    // Branch on negative input.
+    masm.j(Assembler::NotEqual, &negative);
+
+    // Bail on negative-zero.
+    masm.branchNegativeZero(input, output, &bailout, /* maybeNonZero = */ false);
+    bailoutFrom(&bailout, lir->snapshot());
+
+    // Input is +0
+    masm.xor32(output, output);
+    masm.jump(&end);
+
+    // Input is negative.
+    masm.bind(&negative);
+
+    // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to
+    // be added the biggest double less than 0.5.
+    Label loadJoin;
+    masm.loadConstantDouble(-0.5, scratch);
+    masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &loadJoin);
+    masm.loadConstantDouble(0.5, temp);
+    masm.bind(&loadJoin);
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // Add 0.5 and round toward -Infinity. The result is stored in the temp
+        // register (currently contains 0.5).
+        masm.addDouble(input, temp);
+        masm.vroundsd(X86Encoding::RoundDown, temp, scratch, scratch);
+
+        // Truncate.
+        bailoutCvttsd2si(scratch, output, lir->snapshot());
+
+        // If the result is positive zero, then the actual result is -0. Bail.
+        // Otherwise, the truncation will have produced the correct negative integer.
+        masm.test32(output, output);
+        bailoutIf(Assembler::Zero, lir->snapshot());
+    } else {
+        masm.addDouble(input, temp);
+
+        // Round toward -Infinity without the benefit of ROUNDSD.
+        {
+            // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0.
+            masm.compareDouble(Assembler::DoubleGreaterThanOrEqual, temp, scratch);
+            bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot());
+
+            // Truncate and round toward zero.
+            // This is off-by-one for everything but integer-valued inputs.
+            bailoutCvttsd2si(temp, output, lir->snapshot());
+
+            // Test whether the truncated double was integer-valued.
+            masm.convertInt32ToDouble(output, scratch);
+            masm.branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end);
+
+            // Input is not integer-valued, so we rounded off-by-one in the
+            // wrong direction. Correct by subtraction.
+            masm.subl(Imm32(1), output);
+            // Cannot overflow: output was already checked against INT_MIN.
+        }
+    }
+
+    masm.bind(&end);
+}
+
+void
+CodeGeneratorX86Shared::visitRoundF(LRoundF* lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    FloatRegister temp = ToFloatRegister(lir->temp());
+    ScratchFloat32Scope scratch(masm);
+    Register output = ToRegister(lir->output());
+
+    Label negativeOrZero, negative, end, bailout;
+
+    // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
+    masm.zeroFloat32(scratch);
+    masm.loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
+    masm.branchFloat(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
+
+    // Input is non-negative. Add the biggest float less than 0.5 and truncate,
+    // rounding down (because if the input is the biggest float less than 0.5,
+    // adding 0.5 would undesirably round up to 1). Note that we have to add
+    // the input to the temp register because we're not allowed to modify the
+    // input register.
+    masm.addFloat32(input, temp);
+
+    bailoutCvttss2si(temp, output, lir->snapshot());
+
+    masm.jump(&end);
+
+    // Input is negative, +0 or -0.
+    masm.bind(&negativeOrZero);
+    // Branch on negative input.
+    masm.j(Assembler::NotEqual, &negative);
+
+    // Bail on negative-zero.
+    masm.branchNegativeZeroFloat32(input, output, &bailout);
+    bailoutFrom(&bailout, lir->snapshot());
+
+    // Input is +0.
+    masm.xor32(output, output);
+    masm.jump(&end);
+
+    // Input is negative.
+    masm.bind(&negative);
+
+    // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to
+    // be added the biggest double less than 0.5.
+    Label loadJoin;
+    masm.loadConstantFloat32(-0.5f, scratch);
+    masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &loadJoin);
+    masm.loadConstantFloat32(0.5f, temp);
+    masm.bind(&loadJoin);
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // Add 0.5 and round toward -Infinity. The result is stored in the temp
+        // register (currently contains 0.5).
+        masm.addFloat32(input, temp);
+        masm.vroundss(X86Encoding::RoundDown, temp, scratch, scratch);
+
+        // Truncate.
+        bailoutCvttss2si(scratch, output, lir->snapshot());
+
+        // If the result is positive zero, then the actual result is -0. Bail.
+        // Otherwise, the truncation will have produced the correct negative integer.
+        masm.test32(output, output);
+        bailoutIf(Assembler::Zero, lir->snapshot());
+    } else {
+        masm.addFloat32(input, temp);
+        // Round toward -Infinity without the benefit of ROUNDSS.
+        {
+            // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0.
+            masm.compareFloat(Assembler::DoubleGreaterThanOrEqual, temp, scratch);
+            bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot());
+
+            // Truncate and round toward zero.
+            // This is off-by-one for everything but integer-valued inputs.
+            bailoutCvttss2si(temp, output, lir->snapshot());
+
+            // Test whether the truncated double was integer-valued.
+            masm.convertInt32ToFloat32(output, scratch);
+            masm.branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end);
+
+            // Input is not integer-valued, so we rounded off-by-one in the
+            // wrong direction. Correct by subtraction.
+            masm.subl(Imm32(1), output);
+            // Cannot overflow: output was already checked against INT_MIN.
+        }
+    }
+
+    masm.bind(&end);
+}
+
+void
+CodeGeneratorX86Shared::visitGuardShape(LGuardShape* guard)
+{
+    Register obj = ToRegister(guard->input());
+    masm.cmpPtr(Operand(obj, ShapedObject::offsetOfShape()), ImmGCPtr(guard->mir()->shape()));
+
+    bailoutIf(Assembler::NotEqual, guard->snapshot());
+}
+
+void
+CodeGeneratorX86Shared::visitGuardObjectGroup(LGuardObjectGroup* guard)
+{
+    Register obj = ToRegister(guard->input());
+
+    masm.cmpPtr(Operand(obj, JSObject::offsetOfGroup()), ImmGCPtr(guard->mir()->group()));
+
+    Assembler::Condition cond =
+        guard->mir()->bailOnEquality() ? Assembler::Equal : Assembler::NotEqual;
+    bailoutIf(cond, guard->snapshot());
+}
+
+void
+CodeGeneratorX86Shared::visitGuardClass(LGuardClass* guard)
+{
+    Register obj = ToRegister(guard->input());
+    Register tmp = ToRegister(guard->tempInt());
+
+    masm.loadPtr(Address(obj, JSObject::offsetOfGroup()), tmp);
+    masm.cmpPtr(Operand(tmp, ObjectGroup::offsetOfClasp()), ImmPtr(guard->mir()->getClass()));
+    bailoutIf(Assembler::NotEqual, guard->snapshot());
+}
+
+void
+CodeGeneratorX86Shared::visitEffectiveAddress(LEffectiveAddress* ins)
+{
+    const MEffectiveAddress* mir = ins->mir();
+    Register base = ToRegister(ins->base());
+    Register index = ToRegister(ins->index());
+    Register output = ToRegister(ins->output());
+    masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output);
+}
+
+void
+CodeGeneratorX86Shared::generateInvalidateEpilogue()
+{
+    // Ensure that there is enough space in the buffer for the OsiPoint
+    // patching to occur. Otherwise, we could overwrite the invalidation
+    // epilogue.
+    for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize())
+        masm.nop();
+
+    masm.bind(&invalidate_);
+
+    // Push the Ion script onto the stack (when we determine what that pointer is).
+    invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
+    JitCode* thunk = gen->jitRuntime()->getInvalidationThunk();
+
+    masm.call(thunk);
+
+    // We should never reach this point in JIT code -- the invalidation thunk should
+    // pop the invalidated JS frame and return directly to its caller.
+    masm.assumeUnreachable("Should have returned directly to its caller instead of here.");
+}
+
+void
+CodeGeneratorX86Shared::visitNegI(LNegI* ins)
+{
+    Register input = ToRegister(ins->input());
+    MOZ_ASSERT(input == ToRegister(ins->output()));
+
+    masm.neg32(input);
+}
+
+void
+CodeGeneratorX86Shared::visitNegD(LNegD* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+
+    masm.negateDouble(input);
+}
+
+void
+CodeGeneratorX86Shared::visitNegF(LNegF* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+
+    masm.negateFloat(input);
+}
+
+void
+CodeGeneratorX86Shared::visitSimd128Int(LSimd128Int* ins)
+{
+    const LDefinition* out = ins->getDef(0);
+    masm.loadConstantSimd128Int(ins->getValue(), ToFloatRegister(out));
+}
+
+void
+CodeGeneratorX86Shared::visitSimd128Float(LSimd128Float* ins)
+{
+    const LDefinition* out = ins->getDef(0);
+    masm.loadConstantSimd128Float(ins->getValue(), ToFloatRegister(out));
+}
+
+void
+CodeGeneratorX86Shared::visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins)
+{
+    FloatRegister in = ToFloatRegister(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+    masm.convertInt32x4ToFloat32x4(in, out);
+}
+
+void
+CodeGeneratorX86Shared::visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins)
+{
+    FloatRegister in = ToFloatRegister(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+    Register temp = ToRegister(ins->temp());
+
+    masm.convertFloat32x4ToInt32x4(in, out);
+
+    auto* ool = new(alloc()) OutOfLineSimdFloatToIntCheck(temp, in, ins, ins->mir()->trapOffset());
+    addOutOfLineCode(ool, ins->mir());
+
+    static const SimdConstant InvalidResult = SimdConstant::SplatX4(int32_t(-2147483648));
+
+    ScratchSimd128Scope scratch(masm);
+    masm.loadConstantSimd128Int(InvalidResult, scratch);
+    masm.packedEqualInt32x4(Operand(out), scratch);
+    // TODO (bug 1156228): If we have SSE4.1, we can use PTEST here instead of
+    // the two following instructions.
+    masm.vmovmskps(scratch, temp);
+    masm.cmp32(temp, Imm32(0));
+    masm.j(Assembler::NotEqual, ool->entry());
+
+    masm.bind(ool->rejoin());
+}
+
+void
+CodeGeneratorX86Shared::visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIntCheck *ool)
+{
+    static const SimdConstant Int32MaxX4 = SimdConstant::SplatX4(2147483647.f);
+    static const SimdConstant Int32MinX4 = SimdConstant::SplatX4(-2147483648.f);
+
+    Label onConversionError;
+
+    FloatRegister input = ool->input();
+    Register temp = ool->temp();
+
+    ScratchSimd128Scope scratch(masm);
+    masm.loadConstantSimd128Float(Int32MinX4, scratch);
+    masm.vcmpleps(Operand(input), scratch, scratch);
+    masm.vmovmskps(scratch, temp);
+    masm.cmp32(temp, Imm32(15));
+    masm.j(Assembler::NotEqual, &onConversionError);
+
+    masm.loadConstantSimd128Float(Int32MaxX4, scratch);
+    masm.vcmpleps(Operand(input), scratch, scratch);
+    masm.vmovmskps(scratch, temp);
+    masm.cmp32(temp, Imm32(0));
+    masm.j(Assembler::NotEqual, &onConversionError);
+
+    masm.jump(ool->rejoin());
+
+    if (gen->compilingWasm()) {
+        masm.bindLater(&onConversionError, trap(ool, wasm::Trap::ImpreciseSimdConversion));
+    } else {
+        masm.bind(&onConversionError);
+        bailout(ool->ins()->snapshot());
+    }
+}
+
+// Convert Float32x4 to Uint32x4.
+//
+// If any input lane value is out of range or NaN, bail out.
+void
+CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins)
+{
+    const MSimdConvert* mir = ins->mir();
+    FloatRegister in = ToFloatRegister(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+    Register temp = ToRegister(ins->tempR());
+    FloatRegister tempF = ToFloatRegister(ins->tempF());
+
+    // Classify lane values into 4 disjoint classes:
+    //
+    //   N-lanes:             in <= -1.0
+    //   A-lanes:      -1.0 < in <= 0x0.ffffffp31
+    //   B-lanes: 0x1.0p31 <= in <= 0x0.ffffffp32
+    //   V-lanes: 0x1.0p32 <= in, or isnan(in)
+    //
+    // We need to bail out to throw a RangeError if we see any N-lanes or
+    // V-lanes.
+    //
+    // For A-lanes and B-lanes, we make two float -> int32 conversions:
+    //
+    //   A = cvttps2dq(in)
+    //   B = cvttps2dq(in - 0x1.0p31f)
+    //
+    // Note that the subtraction for the B computation is exact for B-lanes.
+    // There is no rounding, so B is the low 31 bits of the correctly converted
+    // result.
+    //
+    // The cvttps2dq instruction produces 0x80000000 when the input is NaN or
+    // out of range for a signed int32_t. This conveniently provides the missing
+    // high bit for B, so the desired result is A for A-lanes and A|B for
+    // B-lanes.
+
+    ScratchSimd128Scope scratch(masm);
+
+    // TODO: If the majority of lanes are A-lanes, it could be faster to compute
+    // A first, use vmovmskps to check for any non-A-lanes and handle them in
+    // ool code. OTOH, we we're wrong about the lane distribution, that would be
+    // slower.
+
+    // Compute B in |scratch|.
+    static const float Adjust = 0x80000000; // 0x1.0p31f for the benefit of MSVC.
+    static const SimdConstant Bias = SimdConstant::SplatX4(-Adjust);
+    masm.loadConstantSimd128Float(Bias, scratch);
+    masm.packedAddFloat32(Operand(in), scratch);
+    masm.convertFloat32x4ToInt32x4(scratch, scratch);
+
+    // Compute A in |out|. This is the last time we use |in| and the first time
+    // we use |out|, so we can tolerate if they are the same register.
+    masm.convertFloat32x4ToInt32x4(in, out);
+
+    // We can identify A-lanes by the sign bits in A: Any A-lanes will be
+    // positive in A, and N, B, and V-lanes will be 0x80000000 in A. Compute a
+    // mask of non-A-lanes into |tempF|.
+    masm.zeroSimd128Float(tempF);
+    masm.packedGreaterThanInt32x4(Operand(out), tempF);
+
+    // Clear the A-lanes in B.
+    masm.bitwiseAndSimd128(Operand(tempF), scratch);
+
+    // Compute the final result: A for A-lanes, A|B for B-lanes.
+    masm.bitwiseOrSimd128(Operand(scratch), out);
+
+    // We still need to filter out the V-lanes. They would show up as 0x80000000
+    // in both A and B. Since we cleared the valid A-lanes in B, the V-lanes are
+    // the remaining negative lanes in B.
+    masm.vmovmskps(scratch, temp);
+    masm.cmp32(temp, Imm32(0));
+
+    if (gen->compilingWasm())
+        masm.j(Assembler::NotEqual, trap(mir, wasm::Trap::ImpreciseSimdConversion));
+    else
+        bailoutIf(Assembler::NotEqual, ins->snapshot());
+}
+
+void
+CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4* ins)
+{
+    MOZ_ASSERT(ins->mir()->type() == MIRType::Int32x4 || ins->mir()->type() == MIRType::Bool32x4);
+
+    FloatRegister output = ToFloatRegister(ins->output());
+    if (AssemblerX86Shared::HasSSE41()) {
+        masm.vmovd(ToRegister(ins->getOperand(0)), output);
+        for (size_t i = 1; i < 4; ++i) {
+            Register r = ToRegister(ins->getOperand(i));
+            masm.vpinsrd(i, r, output, output);
+        }
+        return;
+    }
+
+    masm.reserveStack(Simd128DataSize);
+    for (size_t i = 0; i < 4; ++i) {
+        Register r = ToRegister(ins->getOperand(i));
+        masm.store32(r, Address(StackPointer, i * sizeof(int32_t)));
+    }
+    masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
+    masm.freeStack(Simd128DataSize);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdValueFloat32x4(LSimdValueFloat32x4* ins)
+{
+    MOZ_ASSERT(ins->mir()->type() == MIRType::Float32x4);
+
+    FloatRegister r0 = ToFloatRegister(ins->getOperand(0));
+    FloatRegister r1 = ToFloatRegister(ins->getOperand(1));
+    FloatRegister r2 = ToFloatRegister(ins->getOperand(2));
+    FloatRegister r3 = ToFloatRegister(ins->getOperand(3));
+    FloatRegister tmp = ToFloatRegister(ins->getTemp(0));
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    FloatRegister r0Copy = masm.reusedInputFloat32x4(r0, output);
+    FloatRegister r1Copy = masm.reusedInputFloat32x4(r1, tmp);
+
+    masm.vunpcklps(r3, r1Copy, tmp);
+    masm.vunpcklps(r2, r0Copy, output);
+    masm.vunpcklps(tmp, output, output);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdSplatX16(LSimdSplatX16* ins)
+{
+    MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 16);
+    Register input = ToRegister(ins->getOperand(0));
+    FloatRegister output = ToFloatRegister(ins->output());
+    masm.vmovd(input, output);
+    if (AssemblerX86Shared::HasSSSE3()) {
+        masm.zeroSimd128Int(ScratchSimd128Reg);
+        masm.vpshufb(ScratchSimd128Reg, output, output);
+    } else {
+        // Use two shifts to duplicate the low 8 bits into the low 16 bits.
+        masm.vpsllw(Imm32(8), output, output);
+        masm.vmovdqa(output, ScratchSimd128Reg);
+        masm.vpsrlw(Imm32(8), ScratchSimd128Reg, ScratchSimd128Reg);
+        masm.vpor(ScratchSimd128Reg, output, output);
+        // Then do an X8 splat.
+        masm.vpshuflw(0, output, output);
+        masm.vpshufd(0, output, output);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitSimdSplatX8(LSimdSplatX8* ins)
+{
+    MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 8);
+    Register input = ToRegister(ins->getOperand(0));
+    FloatRegister output = ToFloatRegister(ins->output());
+    masm.vmovd(input, output);
+    masm.vpshuflw(0, output, output);
+    masm.vpshufd(0, output, output);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4* ins)
+{
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    MSimdSplat* mir = ins->mir();
+    MOZ_ASSERT(IsSimdType(mir->type()));
+    JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
+
+    if (mir->type() == MIRType::Float32x4) {
+        FloatRegister r = ToFloatRegister(ins->getOperand(0));
+        FloatRegister rCopy = masm.reusedInputFloat32x4(r, output);
+        masm.vshufps(0, rCopy, rCopy, output);
+    } else {
+        Register r = ToRegister(ins->getOperand(0));
+        masm.vmovd(r, output);
+        masm.vpshufd(0, output, output);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    if (input.aliases(output))
+        return;
+
+    if (IsIntegerSimdType(ins->mir()->type()))
+        masm.vmovdqa(input, output);
+    else
+        masm.vmovaps(input, output);
+}
+
+// Extract an integer lane from the 32x4 vector register |input| and place it in
+// |output|.
+void
+CodeGeneratorX86Shared::emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane)
+{
+    if (lane == 0) {
+        // The value we want to extract is in the low double-word
+        masm.moveLowInt32(input, output);
+    } else if (AssemblerX86Shared::HasSSE41()) {
+        masm.vpextrd(lane, input, output);
+    } else {
+        uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
+        masm.shuffleInt32(mask, input, ScratchSimd128Reg);
+        masm.moveLowInt32(ScratchSimd128Reg, output);
+    }
+}
+
+// Extract an integer lane from the 16x8 vector register |input|, sign- or
+// zero-extend to 32 bits and place the result in |output|.
+void
+CodeGeneratorX86Shared::emitSimdExtractLane16x8(FloatRegister input, Register output,
+                                                unsigned lane, SimdSign signedness)
+{
+    // Unlike pextrd and pextrb, this is available in SSE2.
+    masm.vpextrw(lane, input, output);
+
+    if (signedness == SimdSign::Signed)
+        masm.movswl(output, output);
+}
+
+// Extract an integer lane from the 8x16 vector register |input|, sign- or
+// zero-extend to 32 bits and place the result in |output|.
+void
+CodeGeneratorX86Shared::emitSimdExtractLane8x16(FloatRegister input, Register output,
+                                                unsigned lane, SimdSign signedness)
+{
+    if (AssemblerX86Shared::HasSSE41()) {
+        masm.vpextrb(lane, input, output);
+        // vpextrb clears the high bits, so no further extension required.
+        if (signedness == SimdSign::Unsigned)
+            signedness = SimdSign::NotApplicable;
+    } else {
+        // Extract the relevant 16 bits containing our lane, then shift the
+        // right 8 bits into place.
+        emitSimdExtractLane16x8(input, output, lane / 2, SimdSign::Unsigned);
+        if (lane % 2) {
+            masm.shrl(Imm32(8), output);
+            // The shrl handles the zero-extension. Don't repeat it.
+            if (signedness == SimdSign::Unsigned)
+                signedness = SimdSign::NotApplicable;
+        }
+    }
+
+    // We have the right low 8 bits in |output|, but we may need to fix the high
+    // bits. Note that this requires |output| to be one of the %eax-%edx
+    // registers.
+    switch (signedness) {
+      case SimdSign::Signed:
+        masm.movsbl(output, output);
+        break;
+      case SimdSign::Unsigned:
+        masm.movzbl(output, output);
+        break;
+      case SimdSign::NotApplicable:
+        // No adjustment needed.
+        break;
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    Register output = ToRegister(ins->output());
+    MSimdExtractElement* mir = ins->mir();
+    unsigned length = SimdTypeToLength(mir->specialization());
+
+    switch (length) {
+      case 4:
+        emitSimdExtractLane32x4(input, output, mir->lane());
+        break;
+      case 8:
+        // Get a lane, don't bother fixing the high bits since we'll mask below.
+        emitSimdExtractLane16x8(input, output, mir->lane(), SimdSign::NotApplicable);
+        break;
+      case 16:
+        emitSimdExtractLane8x16(input, output, mir->lane(), SimdSign::NotApplicable);
+        break;
+      default:
+        MOZ_CRASH("Unhandled SIMD length");
+    }
+
+    // We need to generate a 0/1 value. We have 0/-1 and possibly dirty high bits.
+    masm.and32(Imm32(1), output);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    Register output = ToRegister(ins->output());
+    MSimdExtractElement* mir = ins->mir();
+    unsigned length = SimdTypeToLength(mir->specialization());
+
+    switch (length) {
+      case 4:
+        emitSimdExtractLane32x4(input, output, mir->lane());
+        break;
+      case 8:
+        emitSimdExtractLane16x8(input, output, mir->lane(), mir->signedness());
+        break;
+      case 16:
+        emitSimdExtractLane8x16(input, output, mir->lane(), mir->signedness());
+        break;
+      default:
+        MOZ_CRASH("Unhandled SIMD length");
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitSimdExtractElementU2D(LSimdExtractElementU2D* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+    Register temp = ToRegister(ins->temp());
+    MSimdExtractElement* mir = ins->mir();
+    MOZ_ASSERT(mir->specialization() == MIRType::Int32x4);
+    emitSimdExtractLane32x4(input, temp, mir->lane());
+    masm.convertUInt32ToDouble(temp, output);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    unsigned lane = ins->mir()->lane();
+    if (lane == 0) {
+        // The value we want to extract is in the low double-word
+        if (input != output)
+            masm.moveFloat32(input, output);
+    } else if (lane == 2) {
+        masm.moveHighPairToLowPairFloat32(input, output);
+    } else {
+        uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
+        masm.shuffleFloat32(mask, input, output);
+    }
+    // NaNs contained within SIMD values are not enforced to be canonical, so
+    // when we extract an element into a "regular" scalar JS value, we have to
+    // canonicalize. In wasm code, we can skip this, as wasm only has to
+    // canonicalize NaNs at FFI boundaries.
+    if (!gen->compilingWasm())
+        masm.canonicalizeFloat(output);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdInsertElementI(LSimdInsertElementI* ins)
+{
+    FloatRegister vector = ToFloatRegister(ins->vector());
+    Register value = ToRegister(ins->value());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT(vector == output); // defineReuseInput(0)
+
+    unsigned lane = ins->lane();
+    unsigned length = ins->length();
+
+    if (length == 8) {
+        // Available in SSE 2.
+        masm.vpinsrw(lane, value, vector, output);
+        return;
+    }
+
+    // Note that, contrarily to float32x4, we cannot use vmovd if the inserted
+    // value goes into the first component, as vmovd clears out the higher lanes
+    // of the output.
+    if (AssemblerX86Shared::HasSSE41()) {
+        // TODO: Teach Lowering that we don't need defineReuseInput if we have AVX.
+        switch (length) {
+          case 4:
+            masm.vpinsrd(lane, value, vector, output);
+            return;
+          case 16:
+            masm.vpinsrb(lane, value, vector, output);
+            return;
+        }
+    }
+
+    masm.reserveStack(Simd128DataSize);
+    masm.storeAlignedSimd128Int(vector, Address(StackPointer, 0));
+    switch (length) {
+      case 4:
+        masm.store32(value, Address(StackPointer, lane * sizeof(int32_t)));
+        break;
+      case 16:
+        // Note that this requires `value` to be in one the registers where the
+        // low 8 bits are addressible (%eax - %edx on x86, all of them on x86-64).
+        masm.store8(value, Address(StackPointer, lane * sizeof(int8_t)));
+        break;
+      default:
+        MOZ_CRASH("Unsupported SIMD length");
+    }
+    masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
+    masm.freeStack(Simd128DataSize);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF* ins)
+{
+    FloatRegister vector = ToFloatRegister(ins->vector());
+    FloatRegister value = ToFloatRegister(ins->value());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT(vector == output); // defineReuseInput(0)
+
+    if (ins->lane() == 0) {
+        // As both operands are registers, vmovss doesn't modify the upper bits
+        // of the destination operand.
+        if (value != output)
+            masm.vmovss(value, vector, output);
+        return;
+    }
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // The input value is in the low float32 of the 'value' FloatRegister.
+        masm.vinsertps(masm.vinsertpsMask(0, ins->lane()), value, output, output);
+        return;
+    }
+
+    unsigned component = unsigned(ins->lane());
+    masm.reserveStack(Simd128DataSize);
+    masm.storeAlignedSimd128Float(vector, Address(StackPointer, 0));
+    masm.storeFloat32(value, Address(StackPointer, component * sizeof(int32_t)));
+    masm.loadAlignedSimd128Float(Address(StackPointer, 0), output);
+    masm.freeStack(Simd128DataSize);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdAllTrue(LSimdAllTrue* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    Register output = ToRegister(ins->output());
+
+    masm.vmovmskps(input, output);
+    masm.cmp32(output, Imm32(0xf));
+    masm.emitSet(Assembler::Zero, output);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdAnyTrue(LSimdAnyTrue* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    Register output = ToRegister(ins->output());
+
+    masm.vmovmskps(input, output);
+    masm.cmp32(output, Imm32(0x0));
+    masm.emitSet(Assembler::NonZero, output);
+}
+
+template <class T, class Reg> void
+CodeGeneratorX86Shared::visitSimdGeneralShuffle(LSimdGeneralShuffleBase* ins, Reg tempRegister)
+{
+    MSimdGeneralShuffle* mir = ins->mir();
+    unsigned numVectors = mir->numVectors();
+
+    Register laneTemp = ToRegister(ins->temp());
+
+    // This won't generate fast code, but it's fine because we expect users
+    // to have used constant indices (and thus MSimdGeneralShuffle to be fold
+    // into MSimdSwizzle/MSimdShuffle, which are fast).
+
+    // We need stack space for the numVectors inputs and for the output vector.
+    unsigned stackSpace = Simd128DataSize * (numVectors + 1);
+    masm.reserveStack(stackSpace);
+
+    for (unsigned i = 0; i < numVectors; i++) {
+        masm.storeAlignedVector<T>(ToFloatRegister(ins->vector(i)),
+                                   Address(StackPointer, Simd128DataSize * (1 + i)));
+    }
+
+    Label bail;
+    const Scale laneScale = ScaleFromElemWidth(sizeof(T));
+
+    for (size_t i = 0; i < mir->numLanes(); i++) {
+        Operand lane = ToOperand(ins->lane(i));
+
+        masm.cmp32(lane, Imm32(numVectors * mir->numLanes() - 1));
+        masm.j(Assembler::Above, &bail);
+
+        if (lane.kind() == Operand::REG) {
+            masm.loadScalar<T>(Operand(StackPointer, ToRegister(ins->lane(i)), laneScale, Simd128DataSize),
+                               tempRegister);
+        } else {
+            masm.load32(lane, laneTemp);
+            masm.loadScalar<T>(Operand(StackPointer, laneTemp, laneScale, Simd128DataSize), tempRegister);
+        }
+
+        masm.storeScalar<T>(tempRegister, Address(StackPointer, i * sizeof(T)));
+    }
+
+    FloatRegister output = ToFloatRegister(ins->output());
+    masm.loadAlignedVector<T>(Address(StackPointer, 0), output);
+
+    Label join;
+    masm.jump(&join);
+
+    {
+        masm.bind(&bail);
+        masm.freeStack(stackSpace);
+        bailout(ins->snapshot());
+    }
+
+    masm.bind(&join);
+    masm.setFramePushed(masm.framePushed() + stackSpace);
+    masm.freeStack(stackSpace);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdGeneralShuffleI(LSimdGeneralShuffleI* ins)
+{
+    switch (ins->mir()->type()) {
+      case MIRType::Int8x16:
+        return visitSimdGeneralShuffle<int8_t, Register>(ins, ToRegister(ins->temp()));
+      case MIRType::Int16x8:
+        return visitSimdGeneralShuffle<int16_t, Register>(ins, ToRegister(ins->temp()));
+      case MIRType::Int32x4:
+        return visitSimdGeneralShuffle<int32_t, Register>(ins, ToRegister(ins->temp()));
+      default:
+        MOZ_CRASH("unsupported type for general shuffle");
+    }
+}
+void
+CodeGeneratorX86Shared::visitSimdGeneralShuffleF(LSimdGeneralShuffleF* ins)
+{
+    ScratchFloat32Scope scratch(masm);
+    visitSimdGeneralShuffle<float, FloatRegister>(ins, scratch);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+    const unsigned numLanes = ins->numLanes();
+
+    switch (numLanes) {
+        case 4: {
+            uint32_t x = ins->lane(0);
+            uint32_t y = ins->lane(1);
+            uint32_t z = ins->lane(2);
+            uint32_t w = ins->lane(3);
+
+            uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
+            masm.shuffleInt32(mask, input, output);
+            return;
+        }
+    }
+
+    // In the general case, use pshufb if it is available. Convert to a
+    // byte-wise swizzle.
+    const unsigned bytesPerLane = 16 / numLanes;
+    int8_t bLane[16];
+    for (unsigned i = 0; i < numLanes; i++) {
+        for (unsigned b = 0; b < bytesPerLane; b++) {
+            bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b;
+        }
+    }
+
+    if (AssemblerX86Shared::HasSSSE3()) {
+        ScratchSimd128Scope scratch(masm);
+        masm.loadConstantSimd128Int(SimdConstant::CreateX16(bLane), scratch);
+        FloatRegister inputCopy = masm.reusedInputInt32x4(input, output);
+        masm.vpshufb(scratch, inputCopy, output);
+        return;
+    }
+
+    // Worst-case fallback for pre-SSSE3 machines. Bounce through memory.
+    Register temp = ToRegister(ins->getTemp(0));
+    masm.reserveStack(2 * Simd128DataSize);
+    masm.storeAlignedSimd128Int(input, Address(StackPointer, Simd128DataSize));
+    for (unsigned i = 0; i < 16; i++) {
+        masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp);
+        masm.store8(temp, Address(StackPointer, i));
+    }
+    masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
+    masm.freeStack(2 * Simd128DataSize);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT(ins->numLanes() == 4);
+
+    uint32_t x = ins->lane(0);
+    uint32_t y = ins->lane(1);
+    uint32_t z = ins->lane(2);
+    uint32_t w = ins->lane(3);
+
+    if (AssemblerX86Shared::HasSSE3()) {
+        if (ins->lanesMatch(0, 0, 2, 2)) {
+            masm.vmovsldup(input, output);
+            return;
+        }
+        if (ins->lanesMatch(1, 1, 3, 3)) {
+            masm.vmovshdup(input, output);
+            return;
+        }
+    }
+
+    // TODO Here and below, arch specific lowering could identify this pattern
+    // and use defineReuseInput to avoid this move (bug 1084404)
+    if (ins->lanesMatch(2, 3, 2, 3)) {
+        FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
+        masm.vmovhlps(input, inputCopy, output);
+        return;
+    }
+
+    if (ins->lanesMatch(0, 1, 0, 1)) {
+        if (AssemblerX86Shared::HasSSE3() && !AssemblerX86Shared::HasAVX()) {
+            masm.vmovddup(input, output);
+            return;
+        }
+        FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
+        masm.vmovlhps(input, inputCopy, output);
+        return;
+    }
+
+    if (ins->lanesMatch(0, 0, 1, 1)) {
+        FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
+        masm.vunpcklps(input, inputCopy, output);
+        return;
+    }
+
+    if (ins->lanesMatch(2, 2, 3, 3)) {
+        FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
+        masm.vunpckhps(input, inputCopy, output);
+        return;
+    }
+
+    uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
+    masm.shuffleFloat32(mask, input, output);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    FloatRegister rhs = ToFloatRegister(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+    const unsigned numLanes = ins->numLanes();
+    const unsigned bytesPerLane = 16 / numLanes;
+
+    // Convert the shuffle to a byte-wise shuffle.
+    uint8_t bLane[16];
+    for (unsigned i = 0; i < numLanes; i++) {
+        for (unsigned b = 0; b < bytesPerLane; b++) {
+            bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b;
+        }
+    }
+
+    // Use pshufb if it is available.
+    if (AssemblerX86Shared::HasSSSE3()) {
+        FloatRegister scratch1 = ToFloatRegister(ins->temp());
+        ScratchSimd128Scope scratch2(masm);
+
+        // Use pshufb instructions to gather the lanes from each source vector.
+        // A negative index creates a zero lane, so the two vectors can be combined.
+
+        // Set scratch2 = lanes from lhs.
+        int8_t idx[16];
+        for (unsigned i = 0; i < 16; i++)
+            idx[i] = bLane[i] < 16 ? bLane[i] : -1;
+        masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1);
+        FloatRegister lhsCopy = masm.reusedInputInt32x4(lhs, scratch2);
+        masm.vpshufb(scratch1, lhsCopy, scratch2);
+
+        // Set output = lanes from rhs.
+        for (unsigned i = 0; i < 16; i++)
+            idx[i] = bLane[i] >= 16 ? bLane[i] - 16 : -1;
+        masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1);
+        FloatRegister rhsCopy = masm.reusedInputInt32x4(rhs, output);
+        masm.vpshufb(scratch1, rhsCopy, output);
+
+        // Combine.
+        masm.vpor(scratch2, output, output);
+        return;
+    }
+
+    // Worst-case fallback for pre-SSE3 machines. Bounce through memory.
+    Register temp = ToRegister(ins->getTemp(0));
+    masm.reserveStack(3 * Simd128DataSize);
+    masm.storeAlignedSimd128Int(lhs, Address(StackPointer, Simd128DataSize));
+    masm.storeAlignedSimd128Int(rhs, Address(StackPointer, 2 * Simd128DataSize));
+    for (unsigned i = 0; i < 16; i++) {
+        masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp);
+        masm.store8(temp, Address(StackPointer, i));
+    }
+    masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
+    masm.freeStack(3 * Simd128DataSize);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdShuffleX4(LSimdShuffleX4* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    uint32_t x = ins->lane(0);
+    uint32_t y = ins->lane(1);
+    uint32_t z = ins->lane(2);
+    uint32_t w = ins->lane(3);
+
+    // Check that lanes come from LHS in majority:
+    unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);
+    MOZ_ASSERT(numLanesFromLHS >= 2);
+
+    // When reading this method, remember that vshufps takes the two first
+    // inputs of the destination operand (right operand) and the two last
+    // inputs of the source operand (left operand).
+    //
+    // Legend for explanations:
+    // - L: LHS
+    // - R: RHS
+    // - T: temporary
+
+    uint32_t mask;
+
+    // If all lanes came from a single vector, we should have constructed a
+    // MSimdSwizzle instead.
+    MOZ_ASSERT(numLanesFromLHS < 4);
+
+    // If all values stay in their lane, this is a blend.
+    if (AssemblerX86Shared::HasSSE41()) {
+        if (x % 4 == 0 && y % 4 == 1 && z % 4 == 2 && w % 4 == 3) {
+            masm.vblendps(masm.blendpsMask(x >= 4, y >= 4, z >= 4, w >= 4), rhs, lhs, out);
+            return;
+        }
+    }
+
+    // One element of the second, all other elements of the first
+    if (numLanesFromLHS == 3) {
+        unsigned firstMask = -1, secondMask = -1;
+
+        // register-register vmovss preserves the high lanes.
+        if (ins->lanesMatch(4, 1, 2, 3) && rhs.kind() == Operand::FPREG) {
+            masm.vmovss(FloatRegister::FromCode(rhs.fpu()), lhs, out);
+            return;
+        }
+
+        // SSE4.1 vinsertps can handle any single element.
+        unsigned numLanesUnchanged = (x == 0) + (y == 1) + (z == 2) + (w == 3);
+        if (AssemblerX86Shared::HasSSE41() && numLanesUnchanged == 3) {
+            unsigned srcLane;
+            unsigned dstLane;
+            if (x >= 4) {
+                srcLane = x - 4;
+                dstLane = 0;
+            } else if (y >= 4) {
+                srcLane = y - 4;
+                dstLane = 1;
+            } else if (z >= 4) {
+                srcLane = z - 4;
+                dstLane = 2;
+            } else {
+                MOZ_ASSERT(w >= 4);
+                srcLane = w - 4;
+                dstLane = 3;
+            }
+            masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, lhs, out);
+            return;
+        }
+
+        FloatRegister rhsCopy = ToFloatRegister(ins->temp());
+
+        if (x < 4 && y < 4) {
+            if (w >= 4) {
+                w %= 4;
+                // T = (Rw Rw Lz Lz) = vshufps(firstMask, lhs, rhs, rhsCopy)
+                firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z);
+                // (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = vshufps(secondMask, T, lhs, out)
+                secondMask = MacroAssembler::ComputeShuffleMask(x, y, 2, 0);
+            } else {
+                MOZ_ASSERT(z >= 4);
+                z %= 4;
+                // T = (Rz Rz Lw Lw) = vshufps(firstMask, lhs, rhs, rhsCopy)
+                firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w);
+                // (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = vshufps(secondMask, T, lhs, out)
+                secondMask = MacroAssembler::ComputeShuffleMask(x, y, 0, 2);
+            }
+
+            masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
+            masm.vshufps(secondMask, rhsCopy, lhs, out);
+            return;
+        }
+
+        MOZ_ASSERT(z < 4 && w < 4);
+
+        if (y >= 4) {
+            y %= 4;
+            // T = (Ry Ry Lx Lx) = vshufps(firstMask, lhs, rhs, rhsCopy)
+            firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x);
+            // (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = vshufps(secondMask, lhs, T, out)
+            secondMask = MacroAssembler::ComputeShuffleMask(2, 0, z, w);
+        } else {
+            MOZ_ASSERT(x >= 4);
+            x %= 4;
+            // T = (Rx Rx Ly Ly) = vshufps(firstMask, lhs, rhs, rhsCopy)
+            firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y);
+            // (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = vshufps(secondMask, lhs, T, out)
+            secondMask = MacroAssembler::ComputeShuffleMask(0, 2, z, w);
+        }
+
+        masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
+        if (AssemblerX86Shared::HasAVX()) {
+            masm.vshufps(secondMask, lhs, rhsCopy, out);
+        } else {
+            masm.vshufps(secondMask, lhs, rhsCopy, rhsCopy);
+            masm.moveSimd128Float(rhsCopy, out);
+        }
+        return;
+    }
+
+    // Two elements from one vector, two other elements from the other
+    MOZ_ASSERT(numLanesFromLHS == 2);
+
+    // TODO Here and below, symmetric case would be more handy to avoid a move,
+    // but can't be reached because operands would get swapped (bug 1084404).
+    if (ins->lanesMatch(2, 3, 6, 7)) {
+        ScratchSimd128Scope scratch(masm);
+        if (AssemblerX86Shared::HasAVX()) {
+            FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
+            masm.vmovhlps(lhs, rhsCopy, out);
+        } else {
+            masm.loadAlignedSimd128Float(rhs, scratch);
+            masm.vmovhlps(lhs, scratch, scratch);
+            masm.moveSimd128Float(scratch, out);
+        }
+        return;
+    }
+
+    if (ins->lanesMatch(0, 1, 4, 5)) {
+        FloatRegister rhsCopy;
+        ScratchSimd128Scope scratch(masm);
+        if (rhs.kind() == Operand::FPREG) {
+            // No need to make an actual copy, since the operand is already
+            // in a register, and it won't be clobbered by the vmovlhps.
+            rhsCopy = FloatRegister::FromCode(rhs.fpu());
+        } else {
+            masm.loadAlignedSimd128Float(rhs, scratch);
+            rhsCopy = scratch;
+        }
+        masm.vmovlhps(rhsCopy, lhs, out);
+        return;
+    }
+
+    if (ins->lanesMatch(0, 4, 1, 5)) {
+        masm.vunpcklps(rhs, lhs, out);
+        return;
+    }
+
+    // TODO swapped case would be better (bug 1084404)
+    if (ins->lanesMatch(4, 0, 5, 1)) {
+        ScratchSimd128Scope scratch(masm);
+        if (AssemblerX86Shared::HasAVX()) {
+            FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
+            masm.vunpcklps(lhs, rhsCopy, out);
+        } else {
+            masm.loadAlignedSimd128Float(rhs, scratch);
+            masm.vunpcklps(lhs, scratch, scratch);
+            masm.moveSimd128Float(scratch, out);
+        }
+        return;
+    }
+
+    if (ins->lanesMatch(2, 6, 3, 7)) {
+        masm.vunpckhps(rhs, lhs, out);
+        return;
+    }
+
+    // TODO swapped case would be better (bug 1084404)
+    if (ins->lanesMatch(6, 2, 7, 3)) {
+        ScratchSimd128Scope scratch(masm);
+        if (AssemblerX86Shared::HasAVX()) {
+            FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
+            masm.vunpckhps(lhs, rhsCopy, out);
+        } else {
+            masm.loadAlignedSimd128Float(rhs, scratch);
+            masm.vunpckhps(lhs, scratch, scratch);
+            masm.moveSimd128Float(scratch, out);
+        }
+        return;
+    }
+
+    // In one vshufps
+    if (x < 4 && y < 4) {
+        mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);
+        masm.vshufps(mask, rhs, lhs, out);
+        return;
+    }
+
+    // At creation, we should have explicitly swapped in this case.
+    MOZ_ASSERT(!(z >= 4 && w >= 4));
+
+    // In two vshufps, for the most generic case:
+    uint32_t firstMask[4], secondMask[4];
+    unsigned i = 0, j = 2, k = 0;
+
+#define COMPUTE_MASK(lane)       \
+    if (lane >= 4) {             \
+        firstMask[j] = lane % 4; \
+        secondMask[k++] = j++;   \
+    } else {                     \
+        firstMask[i] = lane;     \
+        secondMask[k++] = i++;   \
+    }
+
+    COMPUTE_MASK(x)
+    COMPUTE_MASK(y)
+    COMPUTE_MASK(z)
+    COMPUTE_MASK(w)
+#undef COMPUTE_MASK
+
+    MOZ_ASSERT(i == 2 && j == 4 && k == 4);
+
+    mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1],
+                                              firstMask[2], firstMask[3]);
+    masm.vshufps(mask, rhs, lhs, lhs);
+
+    mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1],
+                                              secondMask[2], secondMask[3]);
+    masm.vshufps(mask, lhs, lhs, lhs);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryCompIx16(LSimdBinaryCompIx16* ins)
+{
+    static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
+
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
+
+    ScratchSimd128Scope scratch(masm);
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::greaterThan:
+        masm.vpcmpgtb(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::equal:
+        masm.vpcmpeqb(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::lessThan:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+
+        // src := src > lhs (i.e. lhs < rhs)
+        // Improve by doing custom lowering (rhs is tied to the output register)
+        masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
+        masm.moveSimd128Int(scratch, output);
+        return;
+      case MSimdBinaryComp::notEqual:
+        // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
+        // should invert the comparison by, e.g. swapping the arms of a select
+        // if that's what it's used in.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpeqb(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::greaterThanOrEqual:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+        masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
+        masm.loadConstantSimd128Int(allOnes, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::lessThanOrEqual:
+        // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpgtb(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryCompIx8(LSimdBinaryCompIx8* ins)
+{
+    static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
+
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
+
+    ScratchSimd128Scope scratch(masm);
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::greaterThan:
+        masm.vpcmpgtw(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::equal:
+        masm.vpcmpeqw(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::lessThan:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+
+        // src := src > lhs (i.e. lhs < rhs)
+        // Improve by doing custom lowering (rhs is tied to the output register)
+        masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
+        masm.moveSimd128Int(scratch, output);
+        return;
+      case MSimdBinaryComp::notEqual:
+        // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
+        // should invert the comparison by, e.g. swapping the arms of a select
+        // if that's what it's used in.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpeqw(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::greaterThanOrEqual:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+        masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
+        masm.loadConstantSimd128Int(allOnes, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::lessThanOrEqual:
+        // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpgtw(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
+{
+    static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
+
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
+
+    ScratchSimd128Scope scratch(masm);
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::greaterThan:
+        masm.packedGreaterThanInt32x4(rhs, lhs);
+        return;
+      case MSimdBinaryComp::equal:
+        masm.packedEqualInt32x4(rhs, lhs);
+        return;
+      case MSimdBinaryComp::lessThan:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+
+        // src := src > lhs (i.e. lhs < rhs)
+        // Improve by doing custom lowering (rhs is tied to the output register)
+        masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
+        masm.moveSimd128Int(scratch, lhs);
+        return;
+      case MSimdBinaryComp::notEqual:
+        // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
+        // should invert the comparison by, e.g. swapping the arms of a select
+        // if that's what it's used in.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.packedEqualInt32x4(rhs, lhs);
+        masm.bitwiseXorSimd128(Operand(scratch), lhs);
+        return;
+      case MSimdBinaryComp::greaterThanOrEqual:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+        masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
+        masm.loadConstantSimd128Int(allOnes, lhs);
+        masm.bitwiseXorSimd128(Operand(scratch), lhs);
+        return;
+      case MSimdBinaryComp::lessThanOrEqual:
+        // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.packedGreaterThanInt32x4(rhs, lhs);
+        masm.bitwiseXorSimd128(Operand(scratch), lhs);
+        return;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryCompFx4(LSimdBinaryCompFx4* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::equal:
+        masm.vcmpeqps(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::lessThan:
+        masm.vcmpltps(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::lessThanOrEqual:
+        masm.vcmpleps(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::notEqual:
+        masm.vcmpneqps(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::greaterThanOrEqual:
+      case MSimdBinaryComp::greaterThan:
+        // We reverse these before register allocation so that we don't have to
+        // copy into and out of temporaries after codegen.
+        MOZ_CRASH("lowering should have reversed this");
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryArithIx16(LSimdBinaryArithIx16* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    MSimdBinaryArith::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryArith::Op_add:
+        masm.vpaddb(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_sub:
+        masm.vpsubb(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_mul:
+        // 8x16 mul is a valid operation, but not supported in SSE or AVX.
+        // The operation is synthesized from 16x8 multiplies by
+        // MSimdBinaryArith::AddLegalized().
+        break;
+      case MSimdBinaryArith::Op_div:
+      case MSimdBinaryArith::Op_max:
+      case MSimdBinaryArith::Op_min:
+      case MSimdBinaryArith::Op_minNum:
+      case MSimdBinaryArith::Op_maxNum:
+        break;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryArithIx8(LSimdBinaryArithIx8* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    MSimdBinaryArith::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryArith::Op_add:
+        masm.vpaddw(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_sub:
+        masm.vpsubw(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_mul:
+        masm.vpmullw(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_div:
+      case MSimdBinaryArith::Op_max:
+      case MSimdBinaryArith::Op_min:
+      case MSimdBinaryArith::Op_minNum:
+      case MSimdBinaryArith::Op_maxNum:
+        break;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    ScratchSimd128Scope scratch(masm);
+
+    MSimdBinaryArith::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryArith::Op_add:
+        masm.vpaddd(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_sub:
+        masm.vpsubd(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_mul: {
+        if (AssemblerX86Shared::HasSSE41()) {
+            masm.vpmulld(rhs, lhs, output);
+            return;
+        }
+
+        masm.loadAlignedSimd128Int(rhs, scratch);
+        masm.vpmuludq(lhs, scratch, scratch);
+        // scratch contains (Rx, _, Rz, _) where R is the resulting vector.
+
+        FloatRegister temp = ToFloatRegister(ins->temp());
+        masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), lhs, lhs);
+        masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), rhs, temp);
+        masm.vpmuludq(temp, lhs, lhs);
+        // lhs contains (Ry, _, Rw, _) where R is the resulting vector.
+
+        masm.vshufps(MacroAssembler::ComputeShuffleMask(0, 2, 0, 2), scratch, lhs, lhs);
+        // lhs contains (Ry, Rw, Rx, Rz)
+        masm.vshufps(MacroAssembler::ComputeShuffleMask(2, 0, 3, 1), lhs, lhs, lhs);
+        return;
+      }
+      case MSimdBinaryArith::Op_div:
+        // x86 doesn't have SIMD i32 div.
+        break;
+      case MSimdBinaryArith::Op_max:
+        // we can do max with a single instruction only if we have SSE4.1
+        // using the PMAXSD instruction.
+        break;
+      case MSimdBinaryArith::Op_min:
+        // we can do max with a single instruction only if we have SSE4.1
+        // using the PMINSD instruction.
+        break;
+      case MSimdBinaryArith::Op_minNum:
+      case MSimdBinaryArith::Op_maxNum:
+        break;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    ScratchSimd128Scope scratch(masm);
+
+    MSimdBinaryArith::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryArith::Op_add:
+        masm.vaddps(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_sub:
+        masm.vsubps(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_mul:
+        masm.vmulps(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_div:
+        masm.vdivps(rhs, lhs, output);
+        return;
+      case MSimdBinaryArith::Op_max: {
+        FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, scratch);
+        masm.vcmpunordps(rhs, lhsCopy, scratch);
+
+        FloatRegister tmp = ToFloatRegister(ins->temp());
+        FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, tmp);
+        masm.vmaxps(Operand(lhs), rhsCopy, tmp);
+        masm.vmaxps(rhs, lhs, output);
+
+        masm.vandps(tmp, output, output);
+        masm.vorps(scratch, output, output); // or in the all-ones NaNs
+        return;
+      }
+      case MSimdBinaryArith::Op_min: {
+        FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
+        masm.vminps(Operand(lhs), rhsCopy, scratch);
+        masm.vminps(rhs, lhs, output);
+        masm.vorps(scratch, output, output); // NaN or'd with arbitrary bits is NaN
+        return;
+      }
+      case MSimdBinaryArith::Op_minNum: {
+        FloatRegister tmp = ToFloatRegister(ins->temp());
+        masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
+
+        FloatRegister mask = scratch;
+        FloatRegister tmpCopy = masm.reusedInputFloat32x4(tmp, scratch);
+        masm.vpcmpeqd(Operand(lhs), tmpCopy, mask);
+        masm.vandps(tmp, mask, mask);
+
+        FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
+        masm.vminps(rhs, lhsCopy, tmp);
+        masm.vorps(mask, tmp, tmp);
+
+        FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
+        masm.vcmpneqps(rhs, rhsCopy, mask);
+
+        if (AssemblerX86Shared::HasAVX()) {
+            masm.vblendvps(mask, lhs, tmp, output);
+        } else {
+            // Emulate vblendvps.
+            // With SSE.4.1 we could use blendvps, however it's awkward since
+            // it requires the mask to be in xmm0.
+            if (lhs != output)
+                masm.moveSimd128Float(lhs, output);
+            masm.vandps(Operand(mask), output, output);
+            masm.vandnps(Operand(tmp), mask, mask);
+            masm.vorps(Operand(mask), output, output);
+        }
+        return;
+      }
+      case MSimdBinaryArith::Op_maxNum: {
+        FloatRegister mask = scratch;
+        masm.loadConstantSimd128Int(SimdConstant::SplatX4(0), mask);
+        masm.vpcmpeqd(Operand(lhs), mask, mask);
+
+        FloatRegister tmp = ToFloatRegister(ins->temp());
+        masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
+        masm.vandps(tmp, mask, mask);
+
+        FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
+        masm.vmaxps(rhs, lhsCopy, tmp);
+        masm.vandnps(Operand(tmp), mask, mask);
+
+        // Ensure tmp always contains the temporary result
+        mask = tmp;
+        tmp = scratch;
+
+        FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
+        masm.vcmpneqps(rhs, rhsCopy, mask);
+
+        if (AssemblerX86Shared::HasAVX()) {
+            masm.vblendvps(mask, lhs, tmp, output);
+        } else {
+            // Emulate vblendvps.
+            // With SSE.4.1 we could use blendvps, however it's awkward since
+            // it requires the mask to be in xmm0.
+            if (lhs != output)
+                masm.moveSimd128Float(lhs, output);
+            masm.vandps(Operand(mask), output, output);
+            masm.vandnps(Operand(tmp), mask, mask);
+            masm.vorps(Operand(mask), output, output);
+        }
+        return;
+      }
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinarySaturating(LSimdBinarySaturating* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    SimdSign sign = ins->signedness();
+    MOZ_ASSERT(sign != SimdSign::NotApplicable);
+
+    switch (ins->type()) {
+      case MIRType::Int8x16:
+        switch (ins->operation()) {
+          case MSimdBinarySaturating::add:
+            if (sign == SimdSign::Signed)
+                masm.vpaddsb(rhs, lhs, output);
+            else
+                masm.vpaddusb(rhs, lhs, output);
+            return;
+          case MSimdBinarySaturating::sub:
+            if (sign == SimdSign::Signed)
+                masm.vpsubsb(rhs, lhs, output);
+            else
+                masm.vpsubusb(rhs, lhs, output);
+            return;
+        }
+        break;
+
+      case MIRType::Int16x8:
+        switch (ins->operation()) {
+          case MSimdBinarySaturating::add:
+            if (sign == SimdSign::Signed)
+                masm.vpaddsw(rhs, lhs, output);
+            else
+                masm.vpaddusw(rhs, lhs, output);
+            return;
+          case MSimdBinarySaturating::sub:
+            if (sign == SimdSign::Signed)
+                masm.vpsubsw(rhs, lhs, output);
+            else
+                masm.vpsubusw(rhs, lhs, output);
+            return;
+        }
+        break;
+
+      default:
+        break;
+    }
+    MOZ_CRASH("unsupported type for SIMD saturating arithmetic");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdUnaryArithIx16(LSimdUnaryArithIx16* ins)
+{
+    Operand in = ToOperand(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
+
+    switch (ins->operation()) {
+      case MSimdUnaryArith::neg:
+        masm.zeroSimd128Int(out);
+        masm.packedSubInt8(in, out);
+        return;
+      case MSimdUnaryArith::not_:
+        masm.loadConstantSimd128Int(allOnes, out);
+        masm.bitwiseXorSimd128(in, out);
+        return;
+      case MSimdUnaryArith::abs:
+      case MSimdUnaryArith::reciprocalApproximation:
+      case MSimdUnaryArith::reciprocalSqrtApproximation:
+      case MSimdUnaryArith::sqrt:
+        break;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdUnaryArithIx8(LSimdUnaryArithIx8* ins)
+{
+    Operand in = ToOperand(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
+
+    switch (ins->operation()) {
+      case MSimdUnaryArith::neg:
+        masm.zeroSimd128Int(out);
+        masm.packedSubInt16(in, out);
+        return;
+      case MSimdUnaryArith::not_:
+        masm.loadConstantSimd128Int(allOnes, out);
+        masm.bitwiseXorSimd128(in, out);
+        return;
+      case MSimdUnaryArith::abs:
+      case MSimdUnaryArith::reciprocalApproximation:
+      case MSimdUnaryArith::reciprocalSqrtApproximation:
+      case MSimdUnaryArith::sqrt:
+        break;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins)
+{
+    Operand in = ToOperand(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
+
+    switch (ins->operation()) {
+      case MSimdUnaryArith::neg:
+        masm.zeroSimd128Int(out);
+        masm.packedSubInt32(in, out);
+        return;
+      case MSimdUnaryArith::not_:
+        masm.loadConstantSimd128Int(allOnes, out);
+        masm.bitwiseXorSimd128(in, out);
+        return;
+      case MSimdUnaryArith::abs:
+      case MSimdUnaryArith::reciprocalApproximation:
+      case MSimdUnaryArith::reciprocalSqrtApproximation:
+      case MSimdUnaryArith::sqrt:
+        break;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4* ins)
+{
+    Operand in = ToOperand(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+
+    // All ones but the sign bit
+    float signMask = SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits);
+    static const SimdConstant signMasks = SimdConstant::SplatX4(signMask);
+
+    // All ones including the sign bit
+    float ones = SpecificNaN<float>(1, FloatingPoint<float>::kSignificandBits);
+    static const SimdConstant allOnes = SimdConstant::SplatX4(ones);
+
+    // All zeros but the sign bit
+    static const SimdConstant minusZero = SimdConstant::SplatX4(-0.f);
+
+    switch (ins->operation()) {
+      case MSimdUnaryArith::abs:
+        masm.loadConstantSimd128Float(signMasks, out);
+        masm.bitwiseAndSimd128(in, out);
+        return;
+      case MSimdUnaryArith::neg:
+        masm.loadConstantSimd128Float(minusZero, out);
+        masm.bitwiseXorSimd128(in, out);
+        return;
+      case MSimdUnaryArith::not_:
+        masm.loadConstantSimd128Float(allOnes, out);
+        masm.bitwiseXorSimd128(in, out);
+        return;
+      case MSimdUnaryArith::reciprocalApproximation:
+        masm.packedRcpApproximationFloat32x4(in, out);
+        return;
+      case MSimdUnaryArith::reciprocalSqrtApproximation:
+        masm.packedRcpSqrtApproximationFloat32x4(in, out);
+        return;
+      case MSimdUnaryArith::sqrt:
+        masm.packedSqrtFloat32x4(in, out);
+        return;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryBitwise(LSimdBinaryBitwise* ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    MSimdBinaryBitwise::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryBitwise::and_:
+        if (ins->type() == MIRType::Float32x4)
+            masm.vandps(rhs, lhs, output);
+        else
+            masm.vpand(rhs, lhs, output);
+        return;
+      case MSimdBinaryBitwise::or_:
+        if (ins->type() == MIRType::Float32x4)
+            masm.vorps(rhs, lhs, output);
+        else
+            masm.vpor(rhs, lhs, output);
+        return;
+      case MSimdBinaryBitwise::xor_:
+        if (ins->type() == MIRType::Float32x4)
+            masm.vxorps(rhs, lhs, output);
+        else
+            masm.vpxor(rhs, lhs, output);
+        return;
+    }
+    MOZ_CRASH("unexpected SIMD bitwise op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
+{
+    FloatRegister out = ToFloatRegister(ins->output());
+    MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0);
+
+    // The shift amount is masked to the number of bits in a lane.
+    uint32_t shiftmask = (128u / SimdTypeToLength(ins->type())) - 1;
+
+    // Note that SSE doesn't have instructions for shifting 8x16 vectors.
+    // These shifts are synthesized by the MSimdShift::AddLegalized() function.
+    const LAllocation* val = ins->value();
+    if (val->isConstant()) {
+        MOZ_ASSERT(ins->temp()->isBogusTemp());
+        Imm32 count(uint32_t(ToInt32(val)) & shiftmask);
+        switch (ins->type()) {
+          case MIRType::Int16x8:
+            switch (ins->operation()) {
+              case MSimdShift::lsh:
+                masm.packedLeftShiftByScalarInt16x8(count, out);
+                return;
+              case MSimdShift::rsh:
+                masm.packedRightShiftByScalarInt16x8(count, out);
+                return;
+              case MSimdShift::ursh:
+                masm.packedUnsignedRightShiftByScalarInt16x8(count, out);
+                return;
+            }
+            break;
+          case MIRType::Int32x4:
+            switch (ins->operation()) {
+              case MSimdShift::lsh:
+                masm.packedLeftShiftByScalarInt32x4(count, out);
+                return;
+              case MSimdShift::rsh:
+                masm.packedRightShiftByScalarInt32x4(count, out);
+                return;
+              case MSimdShift::ursh:
+                masm.packedUnsignedRightShiftByScalarInt32x4(count, out);
+                return;
+            }
+            break;
+          default:
+            MOZ_CRASH("unsupported type for SIMD shifts");
+        }
+        MOZ_CRASH("unexpected SIMD bitwise op");
+    }
+
+    // Truncate val to 5 bits. We should have a temp register for that.
+    MOZ_ASSERT(val->isRegister());
+    Register count = ToRegister(ins->temp());
+    masm.mov(ToRegister(val), count);
+    masm.andl(Imm32(shiftmask), count);
+    ScratchFloat32Scope scratch(masm);
+    masm.vmovd(count, scratch);
+
+    switch (ins->type()) {
+      case MIRType::Int16x8:
+        switch (ins->operation()) {
+          case MSimdShift::lsh:
+            masm.packedLeftShiftByScalarInt16x8(scratch, out);
+            return;
+          case MSimdShift::rsh:
+            masm.packedRightShiftByScalarInt16x8(scratch, out);
+            return;
+          case MSimdShift::ursh:
+            masm.packedUnsignedRightShiftByScalarInt16x8(scratch, out);
+            return;
+        }
+        break;
+      case MIRType::Int32x4:
+        switch (ins->operation()) {
+          case MSimdShift::lsh:
+            masm.packedLeftShiftByScalarInt32x4(scratch, out);
+            return;
+          case MSimdShift::rsh:
+            masm.packedRightShiftByScalarInt32x4(scratch, out);
+            return;
+          case MSimdShift::ursh:
+            masm.packedUnsignedRightShiftByScalarInt32x4(scratch, out);
+            return;
+        }
+        break;
+      default:
+        MOZ_CRASH("unsupported type for SIMD shifts");
+    }
+    MOZ_CRASH("unexpected SIMD bitwise op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect* ins)
+{
+    FloatRegister mask = ToFloatRegister(ins->mask());
+    FloatRegister onTrue = ToFloatRegister(ins->lhs());
+    FloatRegister onFalse = ToFloatRegister(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+    FloatRegister temp = ToFloatRegister(ins->temp());
+
+    if (onTrue != output)
+        masm.vmovaps(onTrue, output);
+    if (mask != temp)
+        masm.vmovaps(mask, temp);
+
+    MSimdSelect* mir = ins->mir();
+    unsigned lanes = SimdTypeToLength(mir->type());
+
+    if (AssemblerX86Shared::HasAVX() && lanes == 4) {
+        // TBD: Use vpblendvb for lanes > 4, HasAVX.
+        masm.vblendvps(mask, onTrue, onFalse, output);
+        return;
+    }
+
+    // SSE4.1 has plain blendvps which can do this, but it is awkward
+    // to use because it requires the mask to be in xmm0.
+
+    masm.bitwiseAndSimd128(Operand(temp), output);
+    masm.bitwiseAndNotSimd128(Operand(onFalse), temp);
+    masm.bitwiseOrSimd128(Operand(temp), output);
+}
+
+void
+CodeGeneratorX86Shared::visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir)
+{
+    Register elements = ToRegister(lir->elements());
+    AnyRegister output = ToAnyRegister(lir->output());
+    Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+    Register oldval = ToRegister(lir->oldval());
+    Register newval = ToRegister(lir->newval());
+
+    Scalar::Type arrayType = lir->mir()->arrayType();
+    int width = Scalar::byteSize(arrayType);
+
+    if (lir->index()->isConstant()) {
+        Address dest(elements, ToInt32(lir->index()) * width);
+        masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
+    } else {
+        BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
+        masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitAtomicExchangeTypedArrayElement(LAtomicExchangeTypedArrayElement* lir)
+{
+    Register elements = ToRegister(lir->elements());
+    AnyRegister output = ToAnyRegister(lir->output());
+    Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+    Register value = ToRegister(lir->value());
+
+    Scalar::Type arrayType = lir->mir()->arrayType();
+    int width = Scalar::byteSize(arrayType);
+
+    if (lir->index()->isConstant()) {
+        Address dest(elements, ToInt32(lir->index()) * width);
+        masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output);
+    } else {
+        BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
+        masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output);
+    }
+}
+
+template<typename S, typename T>
+void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
+                                                   const T& mem, Register temp1, Register temp2, AnyRegister output)
+{
+    switch (arrayType) {
+      case Scalar::Int8:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicFetchAdd8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicFetchSub8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicFetchAnd8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicFetchOr8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicFetchXor8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Uint8:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicFetchAdd8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicFetchSub8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicFetchAnd8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicFetchOr8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicFetchXor8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Int16:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicFetchAdd16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicFetchSub16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicFetchAnd16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicFetchOr16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicFetchXor16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Uint16:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicFetchAdd16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicFetchSub16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicFetchAnd16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicFetchOr16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicFetchXor16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Int32:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicFetchAdd32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicFetchSub32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicFetchAnd32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicFetchOr32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicFetchXor32(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Uint32:
+        // At the moment, the code in MCallOptimize.cpp requires the output
+        // type to be double for uint32 arrays.  See bug 1077305.
+        MOZ_ASSERT(output.isFloat());
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicFetchAdd32(value, mem, InvalidReg, temp1);
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicFetchSub32(value, mem, InvalidReg, temp1);
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicFetchAnd32(value, mem, temp2, temp1);
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicFetchOr32(value, mem, temp2, temp1);
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicFetchXor32(value, mem, temp2, temp1);
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        masm.convertUInt32ToDouble(temp1, output.fpu());
+        break;
+      default:
+        MOZ_CRASH("Invalid typed array type");
+    }
+}
+
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                    const Imm32& value, const Address& mem,
+                                                    Register temp1, Register temp2, AnyRegister output);
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                    const Imm32& value, const BaseIndex& mem,
+                                                    Register temp1, Register temp2, AnyRegister output);
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                    const Register& value, const Address& mem,
+                                                    Register temp1, Register temp2, AnyRegister output);
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                    const Register& value, const BaseIndex& mem,
+                                                    Register temp1, Register temp2, AnyRegister output);
+
+// Binary operation for effect, result discarded.
+template<typename S, typename T>
+void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
+                                                    const T& mem)
+{
+    switch (arrayType) {
+      case Scalar::Int8:
+      case Scalar::Uint8:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicAdd8(value, mem);
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicSub8(value, mem);
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicAnd8(value, mem);
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicOr8(value, mem);
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicXor8(value, mem);
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Int16:
+      case Scalar::Uint16:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicAdd16(value, mem);
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicSub16(value, mem);
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicAnd16(value, mem);
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicOr16(value, mem);
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicXor16(value, mem);
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Int32:
+      case Scalar::Uint32:
+        switch (op) {
+          case AtomicFetchAddOp:
+            masm.atomicAdd32(value, mem);
+            break;
+          case AtomicFetchSubOp:
+            masm.atomicSub32(value, mem);
+            break;
+          case AtomicFetchAndOp:
+            masm.atomicAnd32(value, mem);
+            break;
+          case AtomicFetchOrOp:
+            masm.atomicOr32(value, mem);
+            break;
+          case AtomicFetchXorOp:
+            masm.atomicXor32(value, mem);
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      default:
+        MOZ_CRASH("Invalid typed array type");
+    }
+}
+
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                   const Imm32& value, const Address& mem);
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                   const Imm32& value, const BaseIndex& mem);
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                   const Register& value, const Address& mem);
+template void
+CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                                   const Register& value, const BaseIndex& mem);
+
+
+template <typename T>
+static inline void
+AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op,
+                        Scalar::Type arrayType, const LAllocation* value, const T& mem,
+                        Register temp1, Register temp2, AnyRegister output)
+{
+    if (value->isConstant())
+        cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, temp1, temp2, output);
+    else
+        cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, temp1, temp2, output);
+}
+
+void
+CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir)
+{
+    MOZ_ASSERT(lir->mir()->hasUses());
+
+    AnyRegister output = ToAnyRegister(lir->output());
+    Register elements = ToRegister(lir->elements());
+    Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1());
+    Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
+    const LAllocation* value = lir->value();
+
+    Scalar::Type arrayType = lir->mir()->arrayType();
+    int width = Scalar::byteSize(arrayType);
+
+    if (lir->index()->isConstant()) {
+        Address mem(elements, ToInt32(lir->index()) * width);
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
+    } else {
+        BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
+    }
+}
+
+template <typename T>
+static inline void
+AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op,
+                        Scalar::Type arrayType, const LAllocation* value, const T& mem)
+{
+    if (value->isConstant())
+        cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem);
+    else
+        cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem);
+}
+
+void
+CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir)
+{
+    MOZ_ASSERT(!lir->mir()->hasUses());
+
+    Register elements = ToRegister(lir->elements());
+    const LAllocation* value = lir->value();
+    Scalar::Type arrayType = lir->mir()->arrayType();
+    int width = Scalar::byteSize(arrayType);
+
+    if (lir->index()->isConstant()) {
+        Address mem(elements, ToInt32(lir->index()) * width);
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
+    } else {
+        BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitMemoryBarrier(LMemoryBarrier* ins)
+{
+    if (ins->type() & MembarStoreLoad)
+        masm.storeLoadFence();
+}
+
+void
+CodeGeneratorX86Shared::setReturnDoubleRegs(LiveRegisterSet* regs)
+{
+    MOZ_ASSERT(ReturnFloat32Reg.encoding() == X86Encoding::xmm0);
+    MOZ_ASSERT(ReturnDoubleReg.encoding() == X86Encoding::xmm0);
+    MOZ_ASSERT(ReturnSimd128Reg.encoding() == X86Encoding::xmm0);
+    regs->add(ReturnFloat32Reg);
+    regs->add(ReturnDoubleReg);
+    regs->add(ReturnSimd128Reg);
+}
+
+void
+CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool)
+{
+    FloatRegister input = ool->input();
+    MIRType fromType = ool->fromType();
+    MIRType toType = ool->toType();
+    Label* oolRejoin = ool->rejoin();
+    bool isUnsigned = ool->isUnsigned();
+    wasm::TrapOffset off = ool->trapOffset();
+
+    if (fromType == MIRType::Float32) {
+        if (toType == MIRType::Int32)
+            masm.outOfLineWasmTruncateFloat32ToInt32(input, isUnsigned, off, oolRejoin);
+        else if (toType == MIRType::Int64)
+            masm.outOfLineWasmTruncateFloat32ToInt64(input, isUnsigned, off, oolRejoin);
+        else
+            MOZ_CRASH("unexpected type");
+    } else if (fromType == MIRType::Double) {
+        if (toType == MIRType::Int32)
+            masm.outOfLineWasmTruncateDoubleToInt32(input, isUnsigned, off, oolRejoin);
+        else if (toType == MIRType::Int64)
+            masm.outOfLineWasmTruncateDoubleToInt64(input, isUnsigned, off, oolRejoin);
+        else
+            MOZ_CRASH("unexpected type");
+    } else {
+        MOZ_CRASH("unexpected type");
+    }
+}
+
+void
+CodeGeneratorX86Shared::canonicalizeIfDeterministic(Scalar::Type type, const LAllocation* value)
+{
+#ifdef JS_MORE_DETERMINISTIC
+    switch (type) {
+      case Scalar::Float32: {
+        FloatRegister in = ToFloatRegister(value);
+        masm.canonicalizeFloatIfDeterministic(in);
+        break;
+      }
+      case Scalar::Float64: {
+        FloatRegister in = ToFloatRegister(value);
+        masm.canonicalizeDoubleIfDeterministic(in);
+        break;
+      }
+      case Scalar::Float32x4: {
+        FloatRegister in = ToFloatRegister(value);
+        MOZ_ASSERT(in.isSimd128());
+        FloatRegister scratch = in != xmm0.asSimd128() ? xmm0 : xmm1;
+        masm.push(scratch);
+        masm.canonicalizeFloat32x4(in, scratch);
+        masm.pop(scratch);
+        break;
+      }
+      default: {
+        // Other types don't need canonicalization.
+        break;
+      }
+    }
+#endif // JS_MORE_DETERMINISTIC
+}
+
+void
+CodeGeneratorX86Shared::visitCopySignF(LCopySignF* lir)
+{
+    FloatRegister lhs = ToFloatRegister(lir->getOperand(0));
+    FloatRegister rhs = ToFloatRegister(lir->getOperand(1));
+
+    FloatRegister out = ToFloatRegister(lir->output());
+
+    if (lhs == rhs) {
+        if (lhs != out)
+            masm.moveFloat32(lhs, out);
+        return;
+    }
+
+    ScratchFloat32Scope scratch(masm);
+
+    float clearSignMask = BitwiseCast<float>(INT32_MAX);
+    masm.loadConstantFloat32(clearSignMask, scratch);
+    masm.vandps(scratch, lhs, out);
+
+    float keepSignMask = BitwiseCast<float>(INT32_MIN);
+    masm.loadConstantFloat32(keepSignMask, scratch);
+    masm.vandps(rhs, scratch, scratch);
+
+    masm.vorps(scratch, out, out);
+}
+
+void
+CodeGeneratorX86Shared::visitCopySignD(LCopySignD* lir)
+{
+    FloatRegister lhs = ToFloatRegister(lir->getOperand(0));
+    FloatRegister rhs = ToFloatRegister(lir->getOperand(1));
+
+    FloatRegister out = ToFloatRegister(lir->output());
+
+    if (lhs == rhs) {
+        if (lhs != out)
+            masm.moveDouble(lhs, out);
+        return;
+    }
+
+    ScratchDoubleScope scratch(masm);
+
+    double clearSignMask = BitwiseCast<double>(INT64_MAX);
+    masm.loadConstantDouble(clearSignMask, scratch);
+    masm.vandpd(scratch, lhs, out);
+
+    double keepSignMask = BitwiseCast<double>(INT64_MIN);
+    masm.loadConstantDouble(keepSignMask, scratch);
+    masm.vandpd(rhs, scratch, scratch);
+
+    masm.vorpd(scratch, out, out);
+}
+
+void
+CodeGeneratorX86Shared::visitRotateI64(LRotateI64* lir)
+{
+    MRotate* mir = lir->mir();
+    LAllocation* count = lir->count();
+
+    Register64 input = ToRegister64(lir->input());
+    Register64 output = ToOutRegister64(lir);
+    Register temp = ToTempRegisterOrInvalid(lir->temp());
+
+    MOZ_ASSERT(input == output);
+
+    if (count->isConstant()) {
+        int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
+        if (!c)
+            return;
+        if (mir->isLeftRotate())
+            masm.rotateLeft64(Imm32(c), input, output, temp);
+        else
+            masm.rotateRight64(Imm32(c), input, output, temp);
+    } else {
+        if (mir->isLeftRotate())
+            masm.rotateLeft64(ToRegister(count), input, output, temp);
+        else
+            masm.rotateRight64(ToRegister(count), input, output, temp);
+    }
+}
+
+void
+CodeGeneratorX86Shared::visitPopcntI64(LPopcntI64* lir)
+{
+    Register64 input = ToRegister64(lir->getInt64Operand(0));
+    Register64 output = ToOutRegister64(lir);
+    Register temp = InvalidReg;
+    if (!AssemblerX86Shared::HasPOPCNT())
+        temp = ToRegister(lir->getTemp(0));
+
+    masm.popcnt64(input, output, temp);
+}
+
+} // namespace jit
+} // namespace js