1 files changed, 1019 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
new file mode 100644
index 000000000..8e820070a
--- /dev/null
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -0,0 +1,1019 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/x86-shared/Lowering-x86-shared.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/MIR.h"
+
+#include "jit/shared/Lowering-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::Abs;
+using mozilla::FloorLog2;
+using mozilla::Swap;
+
+LTableSwitch*
+LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
+                                       MTableSwitch* tableswitch)
+{
+    return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
+}
+
+LTableSwitchV*
+LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch)
+{
+    return new(alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)),
+                                      temp(), tempDouble(), temp(), tableswitch);
+}
+
+void
+LIRGeneratorX86Shared::visitGuardShape(MGuardShape* ins)
+{
+    MOZ_ASSERT(ins->object()->type() == MIRType::Object);
+
+    LGuardShape* guard = new(alloc()) LGuardShape(useRegisterAtStart(ins->object()));
+    assignSnapshot(guard, ins->bailoutKind());
+    add(guard, ins);
+    redefine(ins, ins->object());
+}
+
+void
+LIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup* ins)
+{
+    MOZ_ASSERT(ins->object()->type() == MIRType::Object);
+
+    LGuardObjectGroup* guard = new(alloc()) LGuardObjectGroup(useRegisterAtStart(ins->object()));
+    assignSnapshot(guard, ins->bailoutKind());
+    add(guard, ins);
+    redefine(ins, ins->object());
+}
+
+void
+LIRGeneratorX86Shared::visitPowHalf(MPowHalf* ins)
+{
+    MDefinition* input = ins->input();
+    MOZ_ASSERT(input->type() == MIRType::Double);
+    LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input));
+    define(lir, ins);
+}
+
+void
+LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                                     MDefinition* lhs, MDefinition* rhs)
+{
+    ins->setOperand(0, useRegisterAtStart(lhs));
+
+    // shift operator should be constant or in register ecx
+    // x86 can't shift a non-ecx register
+    if (rhs->isConstant())
+        ins->setOperand(1, useOrConstantAtStart(rhs));
+    else
+        ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx));
+
+    defineReuseInput(ins, mir, 0);
+}
+
+template<size_t Temps>
+void
+LIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+                                          MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+#if defined(JS_NUNBOX32)
+    if (mir->isRotate())
+        ins->setTemp(0, temp());
+#endif
+
+    static_assert(LShiftI64::Rhs == INT64_PIECES, "Assume Rhs is located at INT64_PIECES.");
+    static_assert(LRotateI64::Count == INT64_PIECES, "Assume Count is located at INT64_PIECES.");
+
+    // shift operator should be constant or in register ecx
+    // x86 can't shift a non-ecx register
+    if (rhs->isConstant()) {
+        ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs));
+    } else {
+        // The operands are int64, but we only care about the lower 32 bits of
+        // the RHS. On 32-bit, the code below will load that part in ecx and
+        // will discard the upper half.
+        ensureDefined(rhs);
+        LUse use(ecx);
+        use.setVirtualRegister(rhs->virtualRegister());
+        ins->setOperand(INT64_PIECES, use);
+    }
+
+    defineInt64ReuseInput(ins, mir, 0);
+}
+
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+
+void
+LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+                                   MDefinition* input)
+{
+    ins->setOperand(0, useRegisterAtStart(input));
+    defineReuseInput(ins, mir, 0);
+}
+
+void
+LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                                   MDefinition* lhs, MDefinition* rhs)
+{
+    ins->setOperand(0, useRegisterAtStart(lhs));
+    ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
+    defineReuseInput(ins, mir, 0);
+}
+
+template<size_t Temps>
+void
+LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    // Without AVX, we'll need to use the x86 encodings where one of the
+    // inputs must be the same location as the output.
+    if (!Assembler::HasAVX()) {
+        ins->setOperand(0, useRegisterAtStart(lhs));
+        ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
+        defineReuseInput(ins, mir, 0);
+    } else {
+        ins->setOperand(0, useRegisterAtStart(lhs));
+        ins->setOperand(1, useAtStart(rhs));
+        define(ins, mir);
+    }
+}
+
+template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+                                                 MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir,
+                                                 MDefinition* lhs, MDefinition* rhs);
+
+void
+LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    lowerForALU(ins, mir, lhs, rhs);
+}
+
+void
+LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    // Swap the operands around to fit the instructions that x86 actually has.
+    // We do this here, before register allocation, so that we don't need
+    // temporaries and copying afterwards.
+    switch (mir->operation()) {
+      case MSimdBinaryComp::greaterThan:
+      case MSimdBinaryComp::greaterThanOrEqual:
+        mir->reverse();
+        Swap(lhs, rhs);
+        break;
+      default:
+        break;
+    }
+
+    lowerForFPU(ins, mir, lhs, rhs);
+}
+
+void
+LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
+                                               MDefinition* lhs, MDefinition* rhs)
+{
+    baab->setOperand(0, useRegisterAtStart(lhs));
+    baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
+    add(baab, mir);
+}
+
+void
+LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs)
+{
+    // Note: If we need a negative zero check, lhs is used twice.
+    LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
+    LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy);
+    if (mul->fallible())
+        assignSnapshot(lir, Bailout_DoubleOutput);
+    defineReuseInput(lir, mul, 0);
+}
+
+void
+LIRGeneratorX86Shared::lowerDivI(MDiv* div)
+{
+    if (div->isUnsigned()) {
+        lowerUDiv(div);
+        return;
+    }
+
+    // Division instructions are slow. Division by constant denominators can be
+    // rewritten to use other instructions.
+    if (div->rhs()->isConstant()) {
+        int32_t rhs = div->rhs()->toConstant()->toInt32();
+
+        // Division by powers of two can be done by shifting, and division by
+        // other numbers can be done by a reciprocal multiplication technique.
+        int32_t shift = FloorLog2(Abs(rhs));
+        if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
+            LAllocation lhs = useRegisterAtStart(div->lhs());
+            LDivPowTwoI* lir;
+            if (!div->canBeNegativeDividend()) {
+                // Numerator is unsigned, so does not need adjusting.
+                lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
+            } else {
+                // Numerator is signed, and needs adjusting, and an extra
+                // lhs copy register is needed.
+                lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
+            }
+            if (div->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineReuseInput(lir, div, 0);
+            return;
+        }
+        if (rhs != 0) {
+            LDivOrModConstantI* lir;
+            lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
+            if (div->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+            return;
+        }
+    }
+
+    LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()),
+                                    tempFixed(edx));
+    if (div->fallible())
+        assignSnapshot(lir, Bailout_DoubleOutput);
+    defineFixed(lir, div, LAllocation(AnyRegister(eax)));
+}
+
+void
+LIRGeneratorX86Shared::lowerModI(MMod* mod)
+{
+    if (mod->isUnsigned()) {
+        lowerUMod(mod);
+        return;
+    }
+
+    if (mod->rhs()->isConstant()) {
+        int32_t rhs = mod->rhs()->toConstant()->toInt32();
+        int32_t shift = FloorLog2(Abs(rhs));
+        if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
+            LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
+            if (mod->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineReuseInput(lir, mod, 0);
+            return;
+        }
+        if (rhs != 0) {
+            LDivOrModConstantI* lir;
+            lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
+            if (mod->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+            return;
+        }
+    }
+
+    LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()),
+                                    useRegister(mod->rhs()),
+                                    tempFixed(eax));
+    if (mod->fallible())
+        assignSnapshot(lir, Bailout_DoubleOutput);
+    defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
+}
+
+void
+LIRGeneratorX86Shared::visitWasmSelect(MWasmSelect* ins)
+{
+    if (ins->type() == MIRType::Int64) {
+        auto* lir = new(alloc()) LWasmSelectI64(useInt64RegisterAtStart(ins->trueExpr()),
+                                                useInt64(ins->falseExpr()),
+                                                useRegister(ins->condExpr()));
+
+        defineInt64ReuseInput(lir, ins, LWasmSelectI64::TrueExprIndex);
+        return;
+    }
+
+    auto* lir = new(alloc()) LWasmSelect(useRegisterAtStart(ins->trueExpr()),
+                                         use(ins->falseExpr()),
+                                         useRegister(ins->condExpr()));
+
+    defineReuseInput(lir, ins, LWasmSelect::TrueExprIndex);
+}
+
+void
+LIRGeneratorX86Shared::visitAsmJSNeg(MAsmJSNeg* ins)
+{
+    switch (ins->type()) {
+      case MIRType::Int32:
+        defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0);
+        break;
+      case MIRType::Float32:
+        defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0);
+        break;
+      case MIRType::Double:
+        defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0);
+        break;
+      default:
+        MOZ_CRASH();
+    }
+}
+
+void
+LIRGeneratorX86Shared::lowerWasmLoad(MWasmLoad* ins)
+{
+    MOZ_ASSERT(ins->type() != MIRType::Int64);
+
+    MDefinition* base = ins->base();
+    MOZ_ASSERT(base->type() == MIRType::Int32);
+
+    auto* lir = new(alloc()) LWasmLoad(useRegisterOrZeroAtStart(base));
+    define(lir, ins);
+}
+
+void
+LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
+{
+    if (div->rhs()->isConstant()) {
+        uint32_t rhs = div->rhs()->toConstant()->toInt32();
+        int32_t shift = FloorLog2(rhs);
+
+        LAllocation lhs = useRegisterAtStart(div->lhs());
+        if (rhs != 0 && uint32_t(1) << shift == rhs) {
+            LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false);
+            if (div->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineReuseInput(lir, div, 0);
+        } else {
+            LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()),
+                                                                      rhs, tempFixed(eax));
+            if (div->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+        }
+        return;
+    }
+
+    LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()),
+                                              useRegister(div->rhs()),
+                                              tempFixed(edx));
+    if (div->fallible())
+        assignSnapshot(lir, Bailout_DoubleOutput);
+    defineFixed(lir, div, LAllocation(AnyRegister(eax)));
+}
+
+void
+LIRGeneratorX86Shared::lowerUMod(MMod* mod)
+{
+    if (mod->rhs()->isConstant()) {
+        uint32_t rhs = mod->rhs()->toConstant()->toInt32();
+        int32_t shift = FloorLog2(rhs);
+
+        if (rhs != 0 && uint32_t(1) << shift == rhs) {
+            LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
+            if (mod->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineReuseInput(lir, mod, 0);
+        } else {
+            LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()),
+                                                                      rhs, tempFixed(edx));
+            if (mod->fallible())
+                assignSnapshot(lir, Bailout_DoubleOutput);
+            defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+        }
+        return;
+    }
+
+    LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()),
+                                              useRegister(mod->rhs()),
+                                              tempFixed(eax));
+    if (mod->fallible())
+        assignSnapshot(lir, Bailout_DoubleOutput);
+    defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
+}
+
+void
+LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir)
+{
+    MDefinition* lhs = mir->lhs();
+    MDefinition* rhs = mir->rhs();
+
+    MOZ_ASSERT(lhs->type() == MIRType::Int32);
+    MOZ_ASSERT(rhs->type() == MIRType::Int32);
+    MOZ_ASSERT(mir->type() == MIRType::Double);
+
+#ifdef JS_CODEGEN_X64
+    MOZ_ASSERT(ecx == rcx);
+#endif
+
+    LUse lhsUse = useRegisterAtStart(lhs);
+    LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx);
+
+    LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
+    define(lir, mir);
+}
+
+void
+LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins)
+{
+    MDefinition* opd = ins->input();
+    MOZ_ASSERT(opd->type() == MIRType::Double);
+
+    LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
+    define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
+}
+
+void
+LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins)
+{
+    MDefinition* opd = ins->input();
+    MOZ_ASSERT(opd->type() == MIRType::Float32);
+
+    LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
+    define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
+}
+
+void
+LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins,
+                                                             bool useI386ByteRegisters)
+{
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+    MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+    MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
+
+    const LUse elements = useRegister(ins->elements());
+    const LAllocation index = useRegisterOrConstant(ins->index());
+
+    // If the target is a floating register then we need a temp at the
+    // lower level; that temp must be eax.
+    //
+    // Otherwise the target (if used) is an integer register, which
+    // must be eax.  If the target is not used the machine code will
+    // still clobber eax, so just pretend it's used.
+    //
+    // oldval must be in a register.
+    //
+    // newval must be in a register.  If the source is a byte array
+    // then newval must be a register that has a byte size: on x86
+    // this must be ebx, ecx, or edx (eax is taken for the output).
+    //
+    // Bug #1077036 describes some further optimization opportunities.
+
+    bool fixedOutput = false;
+    LDefinition tempDef = LDefinition::BogusTemp();
+    LAllocation newval;
+    if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+        tempDef = tempFixed(eax);
+        newval = useRegister(ins->newval());
+    } else {
+        fixedOutput = true;
+        if (useI386ByteRegisters && ins->isByteArray())
+            newval = useFixed(ins->newval(), ebx);
+        else
+            newval = useRegister(ins->newval());
+    }
+
+    const LAllocation oldval = useRegister(ins->oldval());
+
+    LCompareExchangeTypedArrayElement* lir =
+        new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef);
+
+    if (fixedOutput)
+        defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+    else
+        define(lir, ins);
+}
+
+void
+LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins,
+                                                            bool useI386ByteRegisters)
+{
+    MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
+
+    MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+    MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
+
+    const LUse elements = useRegister(ins->elements());
+    const LAllocation index = useRegisterOrConstant(ins->index());
+    const LAllocation value = useRegister(ins->value());
+
+    // The underlying instruction is XCHG, which can operate on any
+    // register.
+    //
+    // If the target is a floating register (for Uint32) then we need
+    // a temp into which to exchange.
+    //
+    // If the source is a byte array then we need a register that has
+    // a byte size; in this case -- on x86 only -- pin the output to
+    // an appropriate register and use that as a temp in the back-end.
+
+    LDefinition tempDef = LDefinition::BogusTemp();
+    if (ins->arrayType() == Scalar::Uint32) {
+        // This restriction is bug 1077305.
+        MOZ_ASSERT(ins->type() == MIRType::Double);
+        tempDef = temp();
+    }
+
+    LAtomicExchangeTypedArrayElement* lir =
+        new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
+
+    if (useI386ByteRegisters && ins->isByteArray())
+        defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+    else
+        define(lir, ins);
+}
+
+void
+LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins,
+                                                         bool useI386ByteRegisters)
+{
+    MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+    MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+    MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
+
+    const LUse elements = useRegister(ins->elements());
+    const LAllocation index = useRegisterOrConstant(ins->index());
+
+    // Case 1: the result of the operation is not used.
+    //
+    // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
+    // LOCK OR, or LOCK XOR.  We can do this even for the Uint32 case.
+
+    if (!ins->hasUses()) {
+        LAllocation value;
+        if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant())
+            value = useFixed(ins->value(), ebx);
+        else
+            value = useRegisterOrConstant(ins->value());
+
+        LAtomicTypedArrayElementBinopForEffect* lir =
+            new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);
+
+        add(lir, ins);
+        return;
+    }
+
+    // Case 2: the result of the operation is used.
+    //
+    // For ADD and SUB we'll use XADD:
+    //
+    //    movl       src, output
+    //    lock xaddl output, mem
+    //
+    // For the 8-bit variants XADD needs a byte register for the output.
+    //
+    // For AND/OR/XOR we need to use a CMPXCHG loop:
+    //
+    //    movl          *mem, eax
+    // L: mov           eax, temp
+    //    andl          src, temp
+    //    lock cmpxchg  temp, mem  ; reads eax also
+    //    jnz           L
+    //    ; result in eax
+    //
+    // Note the placement of L, cmpxchg will update eax with *mem if
+    // *mem does not have the expected value, so reloading it at the
+    // top of the loop would be redundant.
+    //
+    // If the array is not a uint32 array then:
+    //  - eax should be the output (one result of the cmpxchg)
+    //  - there is a temp, which must have a byte register if
+    //    the array has 1-byte elements elements
+    //
+    // If the array is a uint32 array then:
+    //  - eax is the first temp
+    //  - we also need a second temp
+    //
+    // There are optimization opportunities:
+    //  - better register allocation in the x86 8-bit case, Bug #1077036.
+
+    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
+    bool fixedOutput = true;
+    bool reuseInput = false;
+    LDefinition tempDef1 = LDefinition::BogusTemp();
+    LDefinition tempDef2 = LDefinition::BogusTemp();
+    LAllocation value;
+
+    if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+        value = useRegisterOrConstant(ins->value());
+        fixedOutput = false;
+        if (bitOp) {
+            tempDef1 = tempFixed(eax);
+            tempDef2 = temp();
+        } else {
+            tempDef1 = temp();
+        }
+    } else if (useI386ByteRegisters && ins->isByteArray()) {
+        if (ins->value()->isConstant())
+            value = useRegisterOrConstant(ins->value());
+        else
+            value = useFixed(ins->value(), ebx);
+        if (bitOp)
+            tempDef1 = tempFixed(ecx);
+    } else if (bitOp) {
+        value = useRegisterOrConstant(ins->value());
+        tempDef1 = temp();
+    } else if (ins->value()->isConstant()) {
+        fixedOutput = false;
+        value = useRegisterOrConstant(ins->value());
+    } else {
+        fixedOutput = false;
+        reuseInput = true;
+        value = useRegisterAtStart(ins->value());
+    }
+
+    LAtomicTypedArrayElementBinop* lir =
+        new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
+
+    if (fixedOutput)
+        defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+    else if (reuseInput)
+        defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
+    else
+        define(lir, ins);
+}
+
+void
+LIRGeneratorX86Shared::visitSimdInsertElement(MSimdInsertElement* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    LUse vec = useRegisterAtStart(ins->vector());
+    LUse val = useRegister(ins->value());
+    switch (ins->type()) {
+      case MIRType::Int8x16:
+      case MIRType::Bool8x16:
+        // When SSE 4.1 is not available, we need to go via the stack.
+        // This requires the value to be inserted to be in %eax-%edx.
+        // Pick %ebx since other instructions use %eax or %ecx hard-wired.
+#if defined(JS_CODEGEN_X86)
+        if (!AssemblerX86Shared::HasSSE41())
+            val = useFixed(ins->value(), ebx);
+#endif
+        defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0);
+        break;
+      case MIRType::Int16x8:
+      case MIRType::Int32x4:
+      case MIRType::Bool16x8:
+      case MIRType::Bool32x4:
+        defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0);
+        break;
+      case MIRType::Float32x4:
+        defineReuseInput(new(alloc()) LSimdInsertElementF(vec, val), ins, 0);
+        break;
+      default:
+        MOZ_CRASH("Unknown SIMD kind when generating constant");
+    }
+}
+
+void
+LIRGeneratorX86Shared::visitSimdExtractElement(MSimdExtractElement* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->input()->type()));
+    MOZ_ASSERT(!IsSimdType(ins->type()));
+
+    switch (ins->input()->type()) {
+      case MIRType::Int8x16:
+      case MIRType::Int16x8:
+      case MIRType::Int32x4: {
+        MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable);
+        LUse use = useRegisterAtStart(ins->input());
+        if (ins->type() == MIRType::Double) {
+            // Extract an Uint32 lane into a double.
+            MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned);
+            define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins);
+        } else {
+            auto* lir = new (alloc()) LSimdExtractElementI(use);
+#if defined(JS_CODEGEN_X86)
+            // On x86 (32-bit), we may need to use movsbl or movzbl instructions
+            // to sign or zero extend the extracted lane to 32 bits. The 8-bit
+            // version of these instructions require a source register that is
+            // %al, %bl, %cl, or %dl.
+            // Fix it to %ebx since we can't express that constraint better.
+            if (ins->input()->type() == MIRType::Int8x16) {
+                defineFixed(lir, ins, LAllocation(AnyRegister(ebx)));
+                return;
+            }
+#endif
+            define(lir, ins);
+        }
+        break;
+      }
+      case MIRType::Float32x4: {
+        MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
+        LUse use = useRegisterAtStart(ins->input());
+        define(new(alloc()) LSimdExtractElementF(use), ins);
+        break;
+      }
+      case MIRType::Bool8x16:
+      case MIRType::Bool16x8:
+      case MIRType::Bool32x4: {
+        MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
+        LUse use = useRegisterAtStart(ins->input());
+        define(new(alloc()) LSimdExtractElementB(use), ins);
+        break;
+      }
+      default:
+        MOZ_CRASH("Unknown SIMD kind when extracting element");
+    }
+}
+
+void
+LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
+    MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    MDefinition* lhs = ins->lhs();
+    MDefinition* rhs = ins->rhs();
+
+    if (ins->isCommutative())
+        ReorderCommutative(&lhs, &rhs, ins);
+
+    switch (ins->type()) {
+      case MIRType::Int8x16: {
+          LSimdBinaryArithIx16* lir = new (alloc()) LSimdBinaryArithIx16();
+          lir->setTemp(0, LDefinition::BogusTemp());
+          lowerForFPU(lir, ins, lhs, rhs);
+          return;
+      }
+
+      case MIRType::Int16x8: {
+          LSimdBinaryArithIx8* lir = new (alloc()) LSimdBinaryArithIx8();
+          lir->setTemp(0, LDefinition::BogusTemp());
+          lowerForFPU(lir, ins, lhs, rhs);
+          return;
+      }
+
+      case MIRType::Int32x4: {
+          LSimdBinaryArithIx4* lir = new (alloc()) LSimdBinaryArithIx4();
+          bool needsTemp =
+              ins->operation() == MSimdBinaryArith::Op_mul && !MacroAssembler::HasSSE41();
+          lir->setTemp(0, needsTemp ? temp(LDefinition::SIMD128INT) : LDefinition::BogusTemp());
+          lowerForFPU(lir, ins, lhs, rhs);
+          return;
+      }
+
+      case MIRType::Float32x4: {
+          LSimdBinaryArithFx4* lir = new (alloc()) LSimdBinaryArithFx4();
+
+          bool needsTemp = ins->operation() == MSimdBinaryArith::Op_max ||
+              ins->operation() == MSimdBinaryArith::Op_minNum ||
+              ins->operation() == MSimdBinaryArith::Op_maxNum;
+          lir->setTemp(0,
+                       needsTemp ? temp(LDefinition::SIMD128FLOAT) : LDefinition::BogusTemp());
+          lowerForFPU(lir, ins, lhs, rhs);
+          return;
+      }
+
+      default:
+        MOZ_CRASH("unknown simd type on binary arith operation");
+    }
+}
+
+void
+LIRGeneratorX86Shared::visitSimdBinarySaturating(MSimdBinarySaturating* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
+    MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    MDefinition* lhs = ins->lhs();
+    MDefinition* rhs = ins->rhs();
+
+    if (ins->isCommutative())
+        ReorderCommutative(&lhs, &rhs, ins);
+
+    LSimdBinarySaturating* lir = new (alloc()) LSimdBinarySaturating();
+    lowerForFPU(lir, ins, lhs, rhs);
+}
+
+void
+LIRGeneratorX86Shared::visitSimdSelect(MSimdSelect* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    LSimdSelect* lins = new(alloc()) LSimdSelect;
+    MDefinition* r0 = ins->getOperand(0);
+    MDefinition* r1 = ins->getOperand(1);
+    MDefinition* r2 = ins->getOperand(2);
+
+    lins->setOperand(0, useRegister(r0));
+    lins->setOperand(1, useRegister(r1));
+    lins->setOperand(2, useRegister(r2));
+    lins->setTemp(0, temp(LDefinition::SIMD128FLOAT));
+
+    define(lins, ins);
+}
+
+void
+LIRGeneratorX86Shared::visitSimdSplat(MSimdSplat* ins)
+{
+    LAllocation x = useRegisterAtStart(ins->getOperand(0));
+
+    switch (ins->type()) {
+      case MIRType::Int8x16:
+        define(new (alloc()) LSimdSplatX16(x), ins);
+        break;
+      case MIRType::Int16x8:
+        define(new (alloc()) LSimdSplatX8(x), ins);
+        break;
+      case MIRType::Int32x4:
+      case MIRType::Float32x4:
+      case MIRType::Bool8x16:
+      case MIRType::Bool16x8:
+      case MIRType::Bool32x4:
+        // Use the SplatX4 instruction for all boolean splats. Since the input
+        // value is a 32-bit int that is either 0 or -1, the X4 splat gives
+        // the right result for all boolean geometries.
+        // For floats, (Non-AVX) codegen actually wants the input and the output
+        // to be in the same register, but we can't currently use
+        // defineReuseInput because they have different types (scalar vs
+        // vector), so a spill slot for one may not be suitable for the other.
+        define(new (alloc()) LSimdSplatX4(x), ins);
+        break;
+      default:
+        MOZ_CRASH("Unknown SIMD kind");
+    }
+}
+
+void
+LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4* ins)
+{
+    switch (ins->type()) {
+      case MIRType::Float32x4: {
+        // Ideally, x would be used at start and reused for the output, however
+        // register allocation currently doesn't permit us to tie together two
+        // virtual registers with different types.
+        LAllocation x = useRegister(ins->getOperand(0));
+        LAllocation y = useRegister(ins->getOperand(1));
+        LAllocation z = useRegister(ins->getOperand(2));
+        LAllocation w = useRegister(ins->getOperand(3));
+        LDefinition t = temp(LDefinition::SIMD128FLOAT);
+        define(new (alloc()) LSimdValueFloat32x4(x, y, z, w, t), ins);
+        break;
+      }
+      case MIRType::Bool32x4:
+      case MIRType::Int32x4: {
+        // No defineReuseInput => useAtStart for everyone.
+        LAllocation x = useRegisterAtStart(ins->getOperand(0));
+        LAllocation y = useRegisterAtStart(ins->getOperand(1));
+        LAllocation z = useRegisterAtStart(ins->getOperand(2));
+        LAllocation w = useRegisterAtStart(ins->getOperand(3));
+        define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins);
+        break;
+      }
+      default:
+        MOZ_CRASH("Unknown SIMD kind");
+    }
+}
+
+void
+LIRGeneratorX86Shared::visitSimdSwizzle(MSimdSwizzle* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->input()->type()));
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    if (IsIntegerSimdType(ins->input()->type())) {
+        LUse use = useRegisterAtStart(ins->input());
+        LSimdSwizzleI* lir = new (alloc()) LSimdSwizzleI(use);
+        define(lir, ins);
+        // We need a GPR temp register for pre-SSSE3 codegen (no vpshufb).
+        if (Assembler::HasSSSE3()) {
+            lir->setTemp(0, LDefinition::BogusTemp());
+        } else {
+            // The temp must be a GPR usable with 8-bit loads and stores.
+#if defined(JS_CODEGEN_X86)
+            lir->setTemp(0, tempFixed(ebx));
+#else
+            lir->setTemp(0, temp());
+#endif
+        }
+    } else if (ins->input()->type() == MIRType::Float32x4) {
+        LUse use = useRegisterAtStart(ins->input());
+        LSimdSwizzleF* lir = new (alloc()) LSimdSwizzleF(use);
+        define(lir, ins);
+        lir->setTemp(0, LDefinition::BogusTemp());
+    } else {
+        MOZ_CRASH("Unknown SIMD kind when getting lane");
+    }
+}
+
+void
+LIRGeneratorX86Shared::visitSimdShuffle(MSimdShuffle* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
+    MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
+    MOZ_ASSERT(IsSimdType(ins->type()));
+    if (ins->type() == MIRType::Int32x4 || ins->type() == MIRType::Float32x4) {
+        bool zFromLHS = ins->lane(2) < 4;
+        bool wFromLHS = ins->lane(3) < 4;
+        uint32_t lanesFromLHS = (ins->lane(0) < 4) + (ins->lane(1) < 4) + zFromLHS + wFromLHS;
+
+        LSimdShuffleX4* lir = new (alloc()) LSimdShuffleX4();
+        lowerForFPU(lir, ins, ins->lhs(), ins->rhs());
+
+        // See codegen for requirements details.
+        LDefinition temp =
+          (lanesFromLHS == 3) ? tempCopy(ins->rhs(), 1) : LDefinition::BogusTemp();
+        lir->setTemp(0, temp);
+    } else {
+        MOZ_ASSERT(ins->type() == MIRType::Int8x16 || ins->type() == MIRType::Int16x8);
+        LSimdShuffle* lir = new (alloc()) LSimdShuffle();
+        lir->setOperand(0, useRegister(ins->lhs()));
+        lir->setOperand(1, useRegister(ins->rhs()));
+        define(lir, ins);
+        // We need a GPR temp register for pre-SSSE3 codegen, and an SSE temp
+        // when using pshufb.
+        if (Assembler::HasSSSE3()) {
+            lir->setTemp(0, temp(LDefinition::SIMD128INT));
+        } else {
+            // The temp must be a GPR usable with 8-bit loads and stores.
+#if defined(JS_CODEGEN_X86)
+            lir->setTemp(0, tempFixed(ebx));
+#else
+            lir->setTemp(0, temp());
+#endif
+        }
+    }
+}
+
+void
+LIRGeneratorX86Shared::visitSimdGeneralShuffle(MSimdGeneralShuffle* ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    LSimdGeneralShuffleBase* lir;
+    if (IsIntegerSimdType(ins->type())) {
+#if defined(JS_CODEGEN_X86)
+        // The temp register must be usable with 8-bit load and store
+        // instructions, so one of %eax-%edx.
+        LDefinition t;
+        if (ins->type() == MIRType::Int8x16)
+            t = tempFixed(ebx);
+        else
+            t = temp();
+#else
+        LDefinition t = temp();
+#endif
+        lir = new (alloc()) LSimdGeneralShuffleI(t);
+    } else if (ins->type() == MIRType::Float32x4) {
+        lir = new (alloc()) LSimdGeneralShuffleF(temp());
+    } else {
+        MOZ_CRASH("Unknown SIMD kind when doing a shuffle");
+    }
+
+    if (!lir->init(alloc(), ins->numVectors() + ins->numLanes()))
+        return;
+
+    for (unsigned i = 0; i < ins->numVectors(); i++) {
+        MOZ_ASSERT(IsSimdType(ins->vector(i)->type()));
+        lir->setOperand(i, useRegister(ins->vector(i)));
+    }
+
+    for (unsigned i = 0; i < ins->numLanes(); i++) {
+        MOZ_ASSERT(ins->lane(i)->type() == MIRType::Int32);
+        // Note that there can be up to 16 lane arguments, so we can't assume
+        // that they all get an allocated register.
+        lir->setOperand(i + ins->numVectors(), use(ins->lane(i)));
+    }
+
+    assignSnapshot(lir, Bailout_BoundsCheck);
+    define(lir, ins);
+}
+
+void
+LIRGeneratorX86Shared::visitCopySign(MCopySign* ins)
+{
+    MDefinition* lhs = ins->lhs();
+    MDefinition* rhs = ins->rhs();
+
+    MOZ_ASSERT(IsFloatingPointType(lhs->type()));
+    MOZ_ASSERT(lhs->type() == rhs->type());
+    MOZ_ASSERT(lhs->type() == ins->type());
+
+    LInstructionHelper<1, 2, 2>* lir;
+    if (lhs->type() == MIRType::Double)
+        lir = new(alloc()) LCopySignD();
+    else
+        lir = new(alloc()) LCopySignF();
+
+    // As lowerForFPU, but we want rhs to be in a FP register too.
+    lir->setOperand(0, useRegisterAtStart(lhs));
+    lir->setOperand(1, lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs));
+    if (!Assembler::HasAVX())
+        defineReuseInput(lir, ins, 0);
+    else
+        define(lir, ins);
+}