diff options
Diffstat (limited to 'js/src/jit/x86-shared/Lowering-x86-shared.cpp')
-rw-r--r-- | js/src/jit/x86-shared/Lowering-x86-shared.cpp | 1019 |
1 files changed, 1019 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp new file mode 100644 index 000000000..8e820070a --- /dev/null +++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -0,0 +1,1019 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/Lowering-x86-shared.h" + +#include "mozilla/MathAlgorithms.h" + +#include "jit/MIR.h" + +#include "jit/shared/Lowering-shared-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::Abs; +using mozilla::FloorLog2; +using mozilla::Swap; + +LTableSwitch* +LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy, + MTableSwitch* tableswitch) +{ + return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch); +} + +LTableSwitchV* +LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch) +{ + return new(alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), + temp(), tempDouble(), temp(), tableswitch); +} + +void +LIRGeneratorX86Shared::visitGuardShape(MGuardShape* ins) +{ + MOZ_ASSERT(ins->object()->type() == MIRType::Object); + + LGuardShape* guard = new(alloc()) LGuardShape(useRegisterAtStart(ins->object())); + assignSnapshot(guard, ins->bailoutKind()); + add(guard, ins); + redefine(ins, ins->object()); +} + +void +LIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup* ins) +{ + MOZ_ASSERT(ins->object()->type() == MIRType::Object); + + LGuardObjectGroup* guard = new(alloc()) LGuardObjectGroup(useRegisterAtStart(ins->object())); + assignSnapshot(guard, ins->bailoutKind()); + add(guard, ins); + redefine(ins, ins->object()); +} + +void +LIRGeneratorX86Shared::visitPowHalf(MPowHalf* ins) +{ + MDefinition* input = ins->input(); + MOZ_ASSERT(input->type() == MIRType::Double); + LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input)); + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs) +{ + ins->setOperand(0, useRegisterAtStart(lhs)); + + // shift operator should be constant or in register ecx + // x86 can't shift a non-ecx register + if (rhs->isConstant()) + ins->setOperand(1, useOrConstantAtStart(rhs)); + else + ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx)); + + defineReuseInput(ins, mir, 0); +} + +template<size_t Temps> +void +LIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs) +{ + ins->setInt64Operand(0, useInt64RegisterAtStart(lhs)); +#if defined(JS_NUNBOX32) + if (mir->isRotate()) + ins->setTemp(0, temp()); +#endif + + static_assert(LShiftI64::Rhs == INT64_PIECES, "Assume Rhs is located at INT64_PIECES."); + static_assert(LRotateI64::Count == INT64_PIECES, "Assume Count is located at INT64_PIECES."); + + // shift operator should be constant or in register ecx + // x86 can't shift a non-ecx register + if (rhs->isConstant()) { + ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs)); + } else { + // The operands are int64, but we only care about the lower 32 bits of + // the RHS. On 32-bit, the code below will load that part in ecx and + // will discard the upper half. + ensureDefined(rhs); + LUse use(ecx); + use.setVirtualRegister(rhs->virtualRegister()); + ins->setOperand(INT64_PIECES, use); + } + + defineInt64ReuseInput(ins, mir, 0); +} + +template void LIRGeneratorX86Shared::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); +template void LIRGeneratorX86Shared::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + +void +LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, + MDefinition* input) +{ + ins->setOperand(0, useRegisterAtStart(input)); + defineReuseInput(ins, mir, 0); +} + +void +LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs) +{ + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs)); + defineReuseInput(ins, mir, 0); +} + +template<size_t Temps> +void +LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs) +{ + // Without AVX, we'll need to use the x86 encodings where one of the + // inputs must be the same location as the output. + if (!Assembler::HasAVX()) { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs)); + defineReuseInput(ins, mir, 0); + } else { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, useAtStart(rhs)); + define(ins, mir); + } +} + +template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); +template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + +void +LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs) +{ + lowerForALU(ins, mir, lhs, rhs); +} + +void +LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs) +{ + // Swap the operands around to fit the instructions that x86 actually has. + // We do this here, before register allocation, so that we don't need + // temporaries and copying afterwards. + switch (mir->operation()) { + case MSimdBinaryComp::greaterThan: + case MSimdBinaryComp::greaterThanOrEqual: + mir->reverse(); + Swap(lhs, rhs); + break; + default: + break; + } + + lowerForFPU(ins, mir, lhs, rhs); +} + +void +LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir, + MDefinition* lhs, MDefinition* rhs) +{ + baab->setOperand(0, useRegisterAtStart(lhs)); + baab->setOperand(1, useRegisterOrConstantAtStart(rhs)); + add(baab, mir); +} + +void +LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs) +{ + // Note: If we need a negative zero check, lhs is used twice. + LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation(); + LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy); + if (mul->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, mul, 0); +} + +void +LIRGeneratorX86Shared::lowerDivI(MDiv* div) +{ + if (div->isUnsigned()) { + lowerUDiv(div); + return; + } + + // Division instructions are slow. Division by constant denominators can be + // rewritten to use other instructions. + if (div->rhs()->isConstant()) { + int32_t rhs = div->rhs()->toConstant()->toInt32(); + + // Division by powers of two can be done by shifting, and division by + // other numbers can be done by a reciprocal multiplication technique. + int32_t shift = FloorLog2(Abs(rhs)); + if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { + LAllocation lhs = useRegisterAtStart(div->lhs()); + LDivPowTwoI* lir; + if (!div->canBeNegativeDividend()) { + // Numerator is unsigned, so does not need adjusting. + lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0); + } else { + // Numerator is signed, and needs adjusting, and an extra + // lhs copy register is needed. + lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0); + } + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, div, 0); + return; + } + if (rhs != 0) { + LDivOrModConstantI* lir; + lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(edx))); + return; + } + } + + LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()), + tempFixed(edx)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(eax))); +} + +void +LIRGeneratorX86Shared::lowerModI(MMod* mod) +{ + if (mod->isUnsigned()) { + lowerUMod(mod); + return; + } + + if (mod->rhs()->isConstant()) { + int32_t rhs = mod->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(Abs(rhs)); + if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { + LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, mod, 0); + return; + } + if (rhs != 0) { + LDivOrModConstantI* lir; + lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(eax))); + return; + } + } + + LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()), + useRegister(mod->rhs()), + tempFixed(eax)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(edx))); +} + +void +LIRGeneratorX86Shared::visitWasmSelect(MWasmSelect* ins) +{ + if (ins->type() == MIRType::Int64) { + auto* lir = new(alloc()) LWasmSelectI64(useInt64RegisterAtStart(ins->trueExpr()), + useInt64(ins->falseExpr()), + useRegister(ins->condExpr())); + + defineInt64ReuseInput(lir, ins, LWasmSelectI64::TrueExprIndex); + return; + } + + auto* lir = new(alloc()) LWasmSelect(useRegisterAtStart(ins->trueExpr()), + use(ins->falseExpr()), + useRegister(ins->condExpr())); + + defineReuseInput(lir, ins, LWasmSelect::TrueExprIndex); +} + +void +LIRGeneratorX86Shared::visitAsmJSNeg(MAsmJSNeg* ins) +{ + switch (ins->type()) { + case MIRType::Int32: + defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0); + break; + case MIRType::Float32: + defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0); + break; + case MIRType::Double: + defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0); + break; + default: + MOZ_CRASH(); + } +} + +void +LIRGeneratorX86Shared::lowerWasmLoad(MWasmLoad* ins) +{ + MOZ_ASSERT(ins->type() != MIRType::Int64); + + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + auto* lir = new(alloc()) LWasmLoad(useRegisterOrZeroAtStart(base)); + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerUDiv(MDiv* div) +{ + if (div->rhs()->isConstant()) { + uint32_t rhs = div->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(rhs); + + LAllocation lhs = useRegisterAtStart(div->lhs()); + if (rhs != 0 && uint32_t(1) << shift == rhs) { + LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, div, 0); + } else { + LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()), + rhs, tempFixed(eax)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(edx))); + } + return; + } + + LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()), + useRegister(div->rhs()), + tempFixed(edx)); + if (div->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, div, LAllocation(AnyRegister(eax))); +} + +void +LIRGeneratorX86Shared::lowerUMod(MMod* mod) +{ + if (mod->rhs()->isConstant()) { + uint32_t rhs = mod->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(rhs); + + if (rhs != 0 && uint32_t(1) << shift == rhs) { + LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineReuseInput(lir, mod, 0); + } else { + LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()), + rhs, tempFixed(edx)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(eax))); + } + return; + } + + LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()), + useRegister(mod->rhs()), + tempFixed(eax)); + if (mod->fallible()) + assignSnapshot(lir, Bailout_DoubleOutput); + defineFixed(lir, mod, LAllocation(AnyRegister(edx))); +} + +void +LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) +{ + MDefinition* lhs = mir->lhs(); + MDefinition* rhs = mir->rhs(); + + MOZ_ASSERT(lhs->type() == MIRType::Int32); + MOZ_ASSERT(rhs->type() == MIRType::Int32); + MOZ_ASSERT(mir->type() == MIRType::Double); + +#ifdef JS_CODEGEN_X64 + MOZ_ASSERT(ecx == rcx); +#endif + + LUse lhsUse = useRegisterAtStart(lhs); + LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx); + + LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0)); + define(lir, mir); +} + +void +LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) +{ + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double); + + LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble(); + define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins); +} + +void +LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) +{ + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Float32); + + LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32(); + define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins); +} + +void +LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins, + bool useI386ByteRegisters) +{ + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + + // If the target is a floating register then we need a temp at the + // lower level; that temp must be eax. + // + // Otherwise the target (if used) is an integer register, which + // must be eax. If the target is not used the machine code will + // still clobber eax, so just pretend it's used. + // + // oldval must be in a register. + // + // newval must be in a register. If the source is a byte array + // then newval must be a register that has a byte size: on x86 + // this must be ebx, ecx, or edx (eax is taken for the output). + // + // Bug #1077036 describes some further optimization opportunities. + + bool fixedOutput = false; + LDefinition tempDef = LDefinition::BogusTemp(); + LAllocation newval; + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { + tempDef = tempFixed(eax); + newval = useRegister(ins->newval()); + } else { + fixedOutput = true; + if (useI386ByteRegisters && ins->isByteArray()) + newval = useFixed(ins->newval(), ebx); + else + newval = useRegister(ins->newval()); + } + + const LAllocation oldval = useRegister(ins->oldval()); + + LCompareExchangeTypedArrayElement* lir = + new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef); + + if (fixedOutput) + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + else + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins, + bool useI386ByteRegisters) +{ + MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + const LAllocation value = useRegister(ins->value()); + + // The underlying instruction is XCHG, which can operate on any + // register. + // + // If the target is a floating register (for Uint32) then we need + // a temp into which to exchange. + // + // If the source is a byte array then we need a register that has + // a byte size; in this case -- on x86 only -- pin the output to + // an appropriate register and use that as a temp in the back-end. + + LDefinition tempDef = LDefinition::BogusTemp(); + if (ins->arrayType() == Scalar::Uint32) { + // This restriction is bug 1077305. + MOZ_ASSERT(ins->type() == MIRType::Double); + tempDef = temp(); + } + + LAtomicExchangeTypedArrayElement* lir = + new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef); + + if (useI386ByteRegisters && ins->isByteArray()) + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + else + define(lir, ins); +} + +void +LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins, + bool useI386ByteRegisters) +{ + MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + + // Case 1: the result of the operation is not used. + // + // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND, + // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case. + + if (!ins->hasUses()) { + LAllocation value; + if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant()) + value = useFixed(ins->value(), ebx); + else + value = useRegisterOrConstant(ins->value()); + + LAtomicTypedArrayElementBinopForEffect* lir = + new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value); + + add(lir, ins); + return; + } + + // Case 2: the result of the operation is used. + // + // For ADD and SUB we'll use XADD: + // + // movl src, output + // lock xaddl output, mem + // + // For the 8-bit variants XADD needs a byte register for the output. + // + // For AND/OR/XOR we need to use a CMPXCHG loop: + // + // movl *mem, eax + // L: mov eax, temp + // andl src, temp + // lock cmpxchg temp, mem ; reads eax also + // jnz L + // ; result in eax + // + // Note the placement of L, cmpxchg will update eax with *mem if + // *mem does not have the expected value, so reloading it at the + // top of the loop would be redundant. + // + // If the array is not a uint32 array then: + // - eax should be the output (one result of the cmpxchg) + // - there is a temp, which must have a byte register if + // the array has 1-byte elements elements + // + // If the array is a uint32 array then: + // - eax is the first temp + // - we also need a second temp + // + // There are optimization opportunities: + // - better register allocation in the x86 8-bit case, Bug #1077036. + + bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp); + bool fixedOutput = true; + bool reuseInput = false; + LDefinition tempDef1 = LDefinition::BogusTemp(); + LDefinition tempDef2 = LDefinition::BogusTemp(); + LAllocation value; + + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { + value = useRegisterOrConstant(ins->value()); + fixedOutput = false; + if (bitOp) { + tempDef1 = tempFixed(eax); + tempDef2 = temp(); + } else { + tempDef1 = temp(); + } + } else if (useI386ByteRegisters && ins->isByteArray()) { + if (ins->value()->isConstant()) + value = useRegisterOrConstant(ins->value()); + else + value = useFixed(ins->value(), ebx); + if (bitOp) + tempDef1 = tempFixed(ecx); + } else if (bitOp) { + value = useRegisterOrConstant(ins->value()); + tempDef1 = temp(); + } else if (ins->value()->isConstant()) { + fixedOutput = false; + value = useRegisterOrConstant(ins->value()); + } else { + fixedOutput = false; + reuseInput = true; + value = useRegisterAtStart(ins->value()); + } + + LAtomicTypedArrayElementBinop* lir = + new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2); + + if (fixedOutput) + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + else if (reuseInput) + defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp); + else + define(lir, ins); +} + +void +LIRGeneratorX86Shared::visitSimdInsertElement(MSimdInsertElement* ins) +{ + MOZ_ASSERT(IsSimdType(ins->type())); + + LUse vec = useRegisterAtStart(ins->vector()); + LUse val = useRegister(ins->value()); + switch (ins->type()) { + case MIRType::Int8x16: + case MIRType::Bool8x16: + // When SSE 4.1 is not available, we need to go via the stack. + // This requires the value to be inserted to be in %eax-%edx. + // Pick %ebx since other instructions use %eax or %ecx hard-wired. +#if defined(JS_CODEGEN_X86) + if (!AssemblerX86Shared::HasSSE41()) + val = useFixed(ins->value(), ebx); +#endif + defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0); + break; + case MIRType::Int16x8: + case MIRType::Int32x4: + case MIRType::Bool16x8: + case MIRType::Bool32x4: + defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0); + break; + case MIRType::Float32x4: + defineReuseInput(new(alloc()) LSimdInsertElementF(vec, val), ins, 0); + break; + default: + MOZ_CRASH("Unknown SIMD kind when generating constant"); + } +} + +void +LIRGeneratorX86Shared::visitSimdExtractElement(MSimdExtractElement* ins) +{ + MOZ_ASSERT(IsSimdType(ins->input()->type())); + MOZ_ASSERT(!IsSimdType(ins->type())); + + switch (ins->input()->type()) { + case MIRType::Int8x16: + case MIRType::Int16x8: + case MIRType::Int32x4: { + MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable); + LUse use = useRegisterAtStart(ins->input()); + if (ins->type() == MIRType::Double) { + // Extract an Uint32 lane into a double. + MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned); + define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins); + } else { + auto* lir = new (alloc()) LSimdExtractElementI(use); +#if defined(JS_CODEGEN_X86) + // On x86 (32-bit), we may need to use movsbl or movzbl instructions + // to sign or zero extend the extracted lane to 32 bits. The 8-bit + // version of these instructions require a source register that is + // %al, %bl, %cl, or %dl. + // Fix it to %ebx since we can't express that constraint better. + if (ins->input()->type() == MIRType::Int8x16) { + defineFixed(lir, ins, LAllocation(AnyRegister(ebx))); + return; + } +#endif + define(lir, ins); + } + break; + } + case MIRType::Float32x4: { + MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable); + LUse use = useRegisterAtStart(ins->input()); + define(new(alloc()) LSimdExtractElementF(use), ins); + break; + } + case MIRType::Bool8x16: + case MIRType::Bool16x8: + case MIRType::Bool32x4: { + MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable); + LUse use = useRegisterAtStart(ins->input()); + define(new(alloc()) LSimdExtractElementB(use), ins); + break; + } + default: + MOZ_CRASH("Unknown SIMD kind when extracting element"); + } +} + +void +LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins) +{ + MOZ_ASSERT(IsSimdType(ins->lhs()->type())); + MOZ_ASSERT(IsSimdType(ins->rhs()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + if (ins->isCommutative()) + ReorderCommutative(&lhs, &rhs, ins); + + switch (ins->type()) { + case MIRType::Int8x16: { + LSimdBinaryArithIx16* lir = new (alloc()) LSimdBinaryArithIx16(); + lir->setTemp(0, LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + case MIRType::Int16x8: { + LSimdBinaryArithIx8* lir = new (alloc()) LSimdBinaryArithIx8(); + lir->setTemp(0, LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + case MIRType::Int32x4: { + LSimdBinaryArithIx4* lir = new (alloc()) LSimdBinaryArithIx4(); + bool needsTemp = + ins->operation() == MSimdBinaryArith::Op_mul && !MacroAssembler::HasSSE41(); + lir->setTemp(0, needsTemp ? temp(LDefinition::SIMD128INT) : LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + case MIRType::Float32x4: { + LSimdBinaryArithFx4* lir = new (alloc()) LSimdBinaryArithFx4(); + + bool needsTemp = ins->operation() == MSimdBinaryArith::Op_max || + ins->operation() == MSimdBinaryArith::Op_minNum || + ins->operation() == MSimdBinaryArith::Op_maxNum; + lir->setTemp(0, + needsTemp ? temp(LDefinition::SIMD128FLOAT) : LDefinition::BogusTemp()); + lowerForFPU(lir, ins, lhs, rhs); + return; + } + + default: + MOZ_CRASH("unknown simd type on binary arith operation"); + } +} + +void +LIRGeneratorX86Shared::visitSimdBinarySaturating(MSimdBinarySaturating* ins) +{ + MOZ_ASSERT(IsSimdType(ins->lhs()->type())); + MOZ_ASSERT(IsSimdType(ins->rhs()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + if (ins->isCommutative()) + ReorderCommutative(&lhs, &rhs, ins); + + LSimdBinarySaturating* lir = new (alloc()) LSimdBinarySaturating(); + lowerForFPU(lir, ins, lhs, rhs); +} + +void +LIRGeneratorX86Shared::visitSimdSelect(MSimdSelect* ins) +{ + MOZ_ASSERT(IsSimdType(ins->type())); + + LSimdSelect* lins = new(alloc()) LSimdSelect; + MDefinition* r0 = ins->getOperand(0); + MDefinition* r1 = ins->getOperand(1); + MDefinition* r2 = ins->getOperand(2); + + lins->setOperand(0, useRegister(r0)); + lins->setOperand(1, useRegister(r1)); + lins->setOperand(2, useRegister(r2)); + lins->setTemp(0, temp(LDefinition::SIMD128FLOAT)); + + define(lins, ins); +} + +void +LIRGeneratorX86Shared::visitSimdSplat(MSimdSplat* ins) +{ + LAllocation x = useRegisterAtStart(ins->getOperand(0)); + + switch (ins->type()) { + case MIRType::Int8x16: + define(new (alloc()) LSimdSplatX16(x), ins); + break; + case MIRType::Int16x8: + define(new (alloc()) LSimdSplatX8(x), ins); + break; + case MIRType::Int32x4: + case MIRType::Float32x4: + case MIRType::Bool8x16: + case MIRType::Bool16x8: + case MIRType::Bool32x4: + // Use the SplatX4 instruction for all boolean splats. Since the input + // value is a 32-bit int that is either 0 or -1, the X4 splat gives + // the right result for all boolean geometries. + // For floats, (Non-AVX) codegen actually wants the input and the output + // to be in the same register, but we can't currently use + // defineReuseInput because they have different types (scalar vs + // vector), so a spill slot for one may not be suitable for the other. + define(new (alloc()) LSimdSplatX4(x), ins); + break; + default: + MOZ_CRASH("Unknown SIMD kind"); + } +} + +void +LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4* ins) +{ + switch (ins->type()) { + case MIRType::Float32x4: { + // Ideally, x would be used at start and reused for the output, however + // register allocation currently doesn't permit us to tie together two + // virtual registers with different types. + LAllocation x = useRegister(ins->getOperand(0)); + LAllocation y = useRegister(ins->getOperand(1)); + LAllocation z = useRegister(ins->getOperand(2)); + LAllocation w = useRegister(ins->getOperand(3)); + LDefinition t = temp(LDefinition::SIMD128FLOAT); + define(new (alloc()) LSimdValueFloat32x4(x, y, z, w, t), ins); + break; + } + case MIRType::Bool32x4: + case MIRType::Int32x4: { + // No defineReuseInput => useAtStart for everyone. + LAllocation x = useRegisterAtStart(ins->getOperand(0)); + LAllocation y = useRegisterAtStart(ins->getOperand(1)); + LAllocation z = useRegisterAtStart(ins->getOperand(2)); + LAllocation w = useRegisterAtStart(ins->getOperand(3)); + define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins); + break; + } + default: + MOZ_CRASH("Unknown SIMD kind"); + } +} + +void +LIRGeneratorX86Shared::visitSimdSwizzle(MSimdSwizzle* ins) +{ + MOZ_ASSERT(IsSimdType(ins->input()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + + if (IsIntegerSimdType(ins->input()->type())) { + LUse use = useRegisterAtStart(ins->input()); + LSimdSwizzleI* lir = new (alloc()) LSimdSwizzleI(use); + define(lir, ins); + // We need a GPR temp register for pre-SSSE3 codegen (no vpshufb). + if (Assembler::HasSSSE3()) { + lir->setTemp(0, LDefinition::BogusTemp()); + } else { + // The temp must be a GPR usable with 8-bit loads and stores. +#if defined(JS_CODEGEN_X86) + lir->setTemp(0, tempFixed(ebx)); +#else + lir->setTemp(0, temp()); +#endif + } + } else if (ins->input()->type() == MIRType::Float32x4) { + LUse use = useRegisterAtStart(ins->input()); + LSimdSwizzleF* lir = new (alloc()) LSimdSwizzleF(use); + define(lir, ins); + lir->setTemp(0, LDefinition::BogusTemp()); + } else { + MOZ_CRASH("Unknown SIMD kind when getting lane"); + } +} + +void +LIRGeneratorX86Shared::visitSimdShuffle(MSimdShuffle* ins) +{ + MOZ_ASSERT(IsSimdType(ins->lhs()->type())); + MOZ_ASSERT(IsSimdType(ins->rhs()->type())); + MOZ_ASSERT(IsSimdType(ins->type())); + if (ins->type() == MIRType::Int32x4 || ins->type() == MIRType::Float32x4) { + bool zFromLHS = ins->lane(2) < 4; + bool wFromLHS = ins->lane(3) < 4; + uint32_t lanesFromLHS = (ins->lane(0) < 4) + (ins->lane(1) < 4) + zFromLHS + wFromLHS; + + LSimdShuffleX4* lir = new (alloc()) LSimdShuffleX4(); + lowerForFPU(lir, ins, ins->lhs(), ins->rhs()); + + // See codegen for requirements details. + LDefinition temp = + (lanesFromLHS == 3) ? tempCopy(ins->rhs(), 1) : LDefinition::BogusTemp(); + lir->setTemp(0, temp); + } else { + MOZ_ASSERT(ins->type() == MIRType::Int8x16 || ins->type() == MIRType::Int16x8); + LSimdShuffle* lir = new (alloc()) LSimdShuffle(); + lir->setOperand(0, useRegister(ins->lhs())); + lir->setOperand(1, useRegister(ins->rhs())); + define(lir, ins); + // We need a GPR temp register for pre-SSSE3 codegen, and an SSE temp + // when using pshufb. + if (Assembler::HasSSSE3()) { + lir->setTemp(0, temp(LDefinition::SIMD128INT)); + } else { + // The temp must be a GPR usable with 8-bit loads and stores. +#if defined(JS_CODEGEN_X86) + lir->setTemp(0, tempFixed(ebx)); +#else + lir->setTemp(0, temp()); +#endif + } + } +} + +void +LIRGeneratorX86Shared::visitSimdGeneralShuffle(MSimdGeneralShuffle* ins) +{ + MOZ_ASSERT(IsSimdType(ins->type())); + + LSimdGeneralShuffleBase* lir; + if (IsIntegerSimdType(ins->type())) { +#if defined(JS_CODEGEN_X86) + // The temp register must be usable with 8-bit load and store + // instructions, so one of %eax-%edx. + LDefinition t; + if (ins->type() == MIRType::Int8x16) + t = tempFixed(ebx); + else + t = temp(); +#else + LDefinition t = temp(); +#endif + lir = new (alloc()) LSimdGeneralShuffleI(t); + } else if (ins->type() == MIRType::Float32x4) { + lir = new (alloc()) LSimdGeneralShuffleF(temp()); + } else { + MOZ_CRASH("Unknown SIMD kind when doing a shuffle"); + } + + if (!lir->init(alloc(), ins->numVectors() + ins->numLanes())) + return; + + for (unsigned i = 0; i < ins->numVectors(); i++) { + MOZ_ASSERT(IsSimdType(ins->vector(i)->type())); + lir->setOperand(i, useRegister(ins->vector(i))); + } + + for (unsigned i = 0; i < ins->numLanes(); i++) { + MOZ_ASSERT(ins->lane(i)->type() == MIRType::Int32); + // Note that there can be up to 16 lane arguments, so we can't assume + // that they all get an allocated register. + lir->setOperand(i + ins->numVectors(), use(ins->lane(i))); + } + + assignSnapshot(lir, Bailout_BoundsCheck); + define(lir, ins); +} + +void +LIRGeneratorX86Shared::visitCopySign(MCopySign* ins) +{ + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + MOZ_ASSERT(IsFloatingPointType(lhs->type())); + MOZ_ASSERT(lhs->type() == rhs->type()); + MOZ_ASSERT(lhs->type() == ins->type()); + + LInstructionHelper<1, 2, 2>* lir; + if (lhs->type() == MIRType::Double) + lir = new(alloc()) LCopySignD(); + else + lir = new(alloc()) LCopySignF(); + + // As lowerForFPU, but we want rhs to be in a FP register too. + lir->setOperand(0, useRegisterAtStart(lhs)); + lir->setOperand(1, lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs)); + if (!Assembler::HasAVX()) + defineReuseInput(lir, ins, 0); + else + define(lir, ins); +} |