/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set ts=8 sts=4 et sw=4 tw=99: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "jit/LoopUnroller.h" #include "jit/MIRGraph.h" using namespace js; using namespace js::jit; using mozilla::ArrayLength; namespace { struct LoopUnroller { typedef HashMap, SystemAllocPolicy> DefinitionMap; explicit LoopUnroller(MIRGraph& graph) : graph(graph), alloc(graph.alloc()), header(nullptr), backedge(nullptr), unrolledHeader(nullptr), unrolledBackedge(nullptr), oldPreheader(nullptr), newPreheader(nullptr) {} MIRGraph& graph; TempAllocator& alloc; // Header and body of the original loop. MBasicBlock* header; MBasicBlock* backedge; // Header and body of the unrolled loop. MBasicBlock* unrolledHeader; MBasicBlock* unrolledBackedge; // Old and new preheaders. The old preheader starts out associated with the // original loop, but becomes the preheader of the new loop. The new // preheader will be given to the original loop. MBasicBlock* oldPreheader; MBasicBlock* newPreheader; // Map terms in the original loop to terms in the current unrolled iteration. DefinitionMap unrolledDefinitions; MDefinition* getReplacementDefinition(MDefinition* def); MResumePoint* makeReplacementResumePoint(MBasicBlock* block, MResumePoint* rp); bool makeReplacementInstruction(MInstruction* ins); void go(LoopIterationBound* bound); }; } // namespace MDefinition* LoopUnroller::getReplacementDefinition(MDefinition* def) { if (def->block()->id() < header->id()) { // The definition is loop invariant. return def; } DefinitionMap::Ptr p = unrolledDefinitions.lookup(def); if (!p) { // After phi analysis (TypeAnalyzer::replaceRedundantPhi) the resume // point at the start of a block can contain definitions from within // the block itself. MOZ_ASSERT(def->isConstant()); MConstant* constant = MConstant::Copy(alloc, def->toConstant()); oldPreheader->insertBefore(*oldPreheader->begin(), constant); return constant; } return p->value(); } bool LoopUnroller::makeReplacementInstruction(MInstruction* ins) { MDefinitionVector inputs(alloc); for (size_t i = 0; i < ins->numOperands(); i++) { MDefinition* old = ins->getOperand(i); MDefinition* replacement = getReplacementDefinition(old); if (!inputs.append(replacement)) return false; } MInstruction* clone = ins->clone(alloc, inputs); unrolledBackedge->add(clone); if (!unrolledDefinitions.putNew(ins, clone)) return false; if (MResumePoint* old = ins->resumePoint()) { MResumePoint* rp = makeReplacementResumePoint(unrolledBackedge, old); clone->setResumePoint(rp); } return true; } MResumePoint* LoopUnroller::makeReplacementResumePoint(MBasicBlock* block, MResumePoint* rp) { MDefinitionVector inputs(alloc); for (size_t i = 0; i < rp->numOperands(); i++) { MDefinition* old = rp->getOperand(i); MDefinition* replacement = old->isUnused() ? old : getReplacementDefinition(old); if (!inputs.append(replacement)) return nullptr; } MResumePoint* clone = MResumePoint::New(alloc, block, rp, inputs); if (!clone) return nullptr; return clone; } void LoopUnroller::go(LoopIterationBound* bound) { // For now we always unroll loops the same number of times. static const size_t UnrollCount = 10; JitSpew(JitSpew_Unrolling, "Attempting to unroll loop"); header = bound->header; // UCE might have determined this isn't actually a loop. if (!header->isLoopHeader()) return; backedge = header->backedge(); oldPreheader = header->loopPredecessor(); MOZ_ASSERT(oldPreheader->numSuccessors() == 1); // Only unroll loops with two blocks: an initial one ending with the // bound's test, and the body ending with the backedge. MTest* test = bound->test; if (header->lastIns() != test) return; if (test->ifTrue() == backedge) { if (test->ifFalse()->id() <= backedge->id()) return; } else if (test->ifFalse() == backedge) { if (test->ifTrue()->id() <= backedge->id()) return; } else { return; } if (backedge->numPredecessors() != 1 || backedge->numSuccessors() != 1) return; MOZ_ASSERT(backedge->phisEmpty()); MBasicBlock* bodyBlocks[] = { header, backedge }; // All instructions in the header and body must be clonable. for (size_t i = 0; i < ArrayLength(bodyBlocks); i++) { MBasicBlock* block = bodyBlocks[i]; for (MInstructionIterator iter(block->begin()); iter != block->end(); iter++) { MInstruction* ins = *iter; if (ins->canClone()) continue; if (ins->isTest() || ins->isGoto() || ins->isInterruptCheck()) continue; #ifdef JS_JITSPEW JitSpew(JitSpew_Unrolling, "Aborting: can't clone instruction %s", ins->opName()); #endif return; } } // Compute the linear inequality we will use for exiting the unrolled loop: // // iterationBound - iterationCount - UnrollCount >= 0 // LinearSum remainingIterationsInequality(bound->boundSum); if (!remainingIterationsInequality.add(bound->currentSum, -1)) return; if (!remainingIterationsInequality.add(-int32_t(UnrollCount))) return; // Terms in the inequality need to be either loop invariant or phis from // the original header. for (size_t i = 0; i < remainingIterationsInequality.numTerms(); i++) { MDefinition* def = remainingIterationsInequality.term(i).term; if (def->isDiscarded()) return; if (def->block()->id() < header->id()) continue; if (def->block() == header && def->isPhi()) continue; return; } // OK, we've checked everything, now unroll the loop. JitSpew(JitSpew_Unrolling, "Unrolling loop"); // The old preheader will go before the unrolled loop, and the old loop // will need a new empty preheader. const CompileInfo& info = oldPreheader->info(); if (header->trackedPc()) { unrolledHeader = MBasicBlock::New(graph, nullptr, info, oldPreheader, header->trackedSite(), MBasicBlock::LOOP_HEADER); unrolledBackedge = MBasicBlock::New(graph, nullptr, info, unrolledHeader, backedge->trackedSite(), MBasicBlock::NORMAL); newPreheader = MBasicBlock::New(graph, nullptr, info, unrolledHeader, oldPreheader->trackedSite(), MBasicBlock::NORMAL); } else { unrolledHeader = MBasicBlock::New(graph, info, oldPreheader, MBasicBlock::LOOP_HEADER); unrolledBackedge = MBasicBlock::New(graph, info, unrolledHeader, MBasicBlock::NORMAL); newPreheader = MBasicBlock::New(graph, info, unrolledHeader, MBasicBlock::NORMAL); } unrolledHeader->discardAllResumePoints(); unrolledBackedge->discardAllResumePoints(); newPreheader->discardAllResumePoints(); // Insert new blocks at their RPO position, and update block ids. graph.insertBlockAfter(oldPreheader, unrolledHeader); graph.insertBlockAfter(unrolledHeader, unrolledBackedge); graph.insertBlockAfter(unrolledBackedge, newPreheader); graph.renumberBlocksAfter(oldPreheader); // We don't tolerate allocation failure after this point. // TODO: This is a bit drastic, is it possible to improve this? AutoEnterOOMUnsafeRegion oomUnsafe; if (!unrolledDefinitions.init()) oomUnsafe.crash("LoopUnroller::go"); // Add phis to the unrolled loop header which correspond to the phis in the // original loop header. MOZ_ASSERT(header->getPredecessor(0) == oldPreheader); for (MPhiIterator iter(header->phisBegin()); iter != header->phisEnd(); iter++) { MPhi* old = *iter; MOZ_ASSERT(old->numOperands() == 2); MPhi* phi = MPhi::New(alloc); phi->setResultType(old->type()); phi->setResultTypeSet(old->resultTypeSet()); phi->setRange(old->range()); unrolledHeader->addPhi(phi); if (!phi->reserveLength(2)) oomUnsafe.crash("LoopUnroller::go"); // Set the first input for the phi for now. We'll set the second after // finishing the unroll. phi->addInput(old->getOperand(0)); // The old phi will now take the value produced by the unrolled loop. old->replaceOperand(0, phi); if (!unrolledDefinitions.putNew(old, phi)) oomUnsafe.crash("LoopUnroller::go"); } // The loop condition can bail out on e.g. integer overflow, so make a // resume point based on the initial resume point of the original header. MResumePoint* headerResumePoint = header->entryResumePoint(); if (headerResumePoint) { MResumePoint* rp = makeReplacementResumePoint(unrolledHeader, headerResumePoint); if (!rp) oomUnsafe.crash("LoopUnroller::makeReplacementResumePoint"); unrolledHeader->setEntryResumePoint(rp); // Perform an interrupt check at the start of the unrolled loop. unrolledHeader->add(MInterruptCheck::New(alloc)); } // Generate code for the test in the unrolled loop. for (size_t i = 0; i < remainingIterationsInequality.numTerms(); i++) { MDefinition* def = remainingIterationsInequality.term(i).term; MDefinition* replacement = getReplacementDefinition(def); remainingIterationsInequality.replaceTerm(i, replacement); } MCompare* compare = ConvertLinearInequality(alloc, unrolledHeader, remainingIterationsInequality); MTest* unrolledTest = MTest::New(alloc, compare, unrolledBackedge, newPreheader); unrolledHeader->end(unrolledTest); // Make an entry resume point for the unrolled body. The unrolled header // does not have side effects on stack values, even if the original loop // header does, so use the same resume point as for the unrolled header. if (headerResumePoint) { MResumePoint* rp = makeReplacementResumePoint(unrolledBackedge, headerResumePoint); if (!rp) oomUnsafe.crash("LoopUnroller::makeReplacementResumePoint"); unrolledBackedge->setEntryResumePoint(rp); } // Make an entry resume point for the new preheader. There are no // instructions which use this but some other stuff wants one to be here. if (headerResumePoint) { MResumePoint* rp = makeReplacementResumePoint(newPreheader, headerResumePoint); if (!rp) oomUnsafe.crash("LoopUnroller::makeReplacementResumePoint"); newPreheader->setEntryResumePoint(rp); } // Generate the unrolled code. MOZ_ASSERT(UnrollCount > 1); size_t unrollIndex = 0; while (true) { // Clone the contents of the original loop into the unrolled loop body. for (size_t i = 0; i < ArrayLength(bodyBlocks); i++) { MBasicBlock* block = bodyBlocks[i]; for (MInstructionIterator iter(block->begin()); iter != block->end(); iter++) { MInstruction* ins = *iter; if (ins->canClone()) { if (!makeReplacementInstruction(*iter)) oomUnsafe.crash("LoopUnroller::makeReplacementDefinition"); } else { // Control instructions are handled separately. MOZ_ASSERT(ins->isTest() || ins->isGoto() || ins->isInterruptCheck()); } } } // Compute the value of each loop header phi after the execution of // this unrolled iteration. MDefinitionVector phiValues(alloc); MOZ_ASSERT(header->getPredecessor(1) == backedge); for (MPhiIterator iter(header->phisBegin()); iter != header->phisEnd(); iter++) { MPhi* old = *iter; MDefinition* oldInput = old->getOperand(1); if (!phiValues.append(getReplacementDefinition(oldInput))) oomUnsafe.crash("LoopUnroller::go"); } unrolledDefinitions.clear(); if (unrollIndex == UnrollCount - 1) { // We're at the end of the last unrolled iteration, set the // backedge input for the unrolled loop phis. size_t phiIndex = 0; for (MPhiIterator iter(unrolledHeader->phisBegin()); iter != unrolledHeader->phisEnd(); iter++) { MPhi* phi = *iter; phi->addInput(phiValues[phiIndex++]); } MOZ_ASSERT(phiIndex == phiValues.length()); break; } // Update the map for the phis in the next iteration. size_t phiIndex = 0; for (MPhiIterator iter(header->phisBegin()); iter != header->phisEnd(); iter++) { MPhi* old = *iter; if (!unrolledDefinitions.putNew(old, phiValues[phiIndex++])) oomUnsafe.crash("LoopUnroller::go"); } MOZ_ASSERT(phiIndex == phiValues.length()); unrollIndex++; } MGoto* backedgeJump = MGoto::New(alloc, unrolledHeader); unrolledBackedge->end(backedgeJump); // Place the old preheader before the unrolled loop. MOZ_ASSERT(oldPreheader->lastIns()->isGoto()); oldPreheader->discardLastIns(); oldPreheader->end(MGoto::New(alloc, unrolledHeader)); // Place the new preheader before the original loop. newPreheader->end(MGoto::New(alloc, header)); // Cleanup the MIR graph. if (!unrolledHeader->addPredecessorWithoutPhis(unrolledBackedge)) oomUnsafe.crash("LoopUnroller::go"); header->replacePredecessor(oldPreheader, newPreheader); oldPreheader->setSuccessorWithPhis(unrolledHeader, 0); newPreheader->setSuccessorWithPhis(header, 0); unrolledBackedge->setSuccessorWithPhis(unrolledHeader, 1); } bool jit::UnrollLoops(MIRGraph& graph, const LoopIterationBoundVector& bounds) { if (bounds.empty()) return true; for (size_t i = 0; i < bounds.length(); i++) { LoopUnroller unroller(graph); unroller.go(bounds[i]); } // The MIR graph is valid, but now has several new blocks. Update or // recompute previous analysis information for the remaining optimization // passes. ClearDominatorTree(graph); if (!BuildDominatorTree(graph)) return false; return true; }