/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts. BasicBlock *InvokeInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; // Split the landing pad. BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint; InnerResumeDest = OuterResumeDest->splitBasicBlock(SplitPoint, OuterResumeDest->getName() + ".body"); // The number of incoming edges we expect to the inner landing pad. const unsigned PHICapacity = 2; // Create corresponding new PHIs for all the PHIs in the outer landing pad. BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); BasicBlock::iterator I = OuterResumeDest->begin(); for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { PHINode *OuterPHI = cast<PHINode>(I); PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, OuterPHI->getName() + ".lpad-body", InsertPoint); OuterPHI->replaceAllUsesWith(InnerPHI); InnerPHI->addIncoming(OuterPHI, OuterResumeDest); } // Create a PHI for the exception values. InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body", InsertPoint); CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); // All done. return InnerResumeDest; }
/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock*> Preds, BranchInst *BI, Pass *P, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1, e = Preds.size(); i != e; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // Explicitly check the BB index here to handle duplicates in Preds. int Idx = PN->getBasicBlockIndex(Preds[i]); if (Idx >= 0) PN->removeIncomingValue(Idx, false); } } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } }
bool Scalarizer::visitPHINode(PHINode &PHI) { VectorType *VT = dyn_cast<VectorType>(PHI.getType()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(PHI.getParent(), &PHI); ValueVector Res; Res.resize(NumElems); unsigned NumOps = PHI.getNumOperands(); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, PHI.getName() + ".i" + Twine(I)); for (unsigned I = 0; I < NumOps; ++I) { Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); for (unsigned J = 0; J < NumElems; ++J) cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); } gather(&PHI, Res); return true; }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. /// In particular, it does not preserve LoopSimplify (because it's /// complicated to handle the case where one of the edges being split /// is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; Loop *L = LI ? LI->getLoopFor(BB) : 0; bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); // Move the edges from Preds to point to NewBB instead of BB. // While here, if we need to preserve loop analyses, collect // some information about how this split will affect loops. bool HasLoopExit = false; bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); if (LI) { // If we need to preserve LCSSA, determine if any of // the preds is a loop exit. if (PreserveLCSSA) if (Loop *PL = LI->getLoopFor(Preds[i])) if (!PL->contains(BB)) HasLoopExit = true; // If we need to preserve LoopInfo, note whether any of the // preds crosses an interesting loop boundary. if (L) { if (L->contains(Preds[i])) IsLoopEntry = false; else SplitMakesNewLoopHeader = true; } } } // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; if (L) { if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an // adjacent loop). To find this, examine each of the predecessors and // determine which loops enclose them, and select the most-nested loop // which contains the loop containing the block being split. Loop *InnermostPredLoop = 0; for (unsigned i = 0; i != NumPreds; ++i) if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { // Seek a loop which actually contains the block being split (to // avoid adjacent loops). while (PredLoop && !PredLoop->contains(BB)) PredLoop = PredLoop->getParentLoop(); // Select the most-nested of these loops which contains the block. if (PredLoop && PredLoop->contains(BB) && (!InnermostPredLoop || InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) InnermostPredLoop = PredLoop; } if (InnermostPredLoop) InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } return NewBB; }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); DEBUG(L->dump()); DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { DEBUG(dbgs() << "Not in simplify form!\n"); return false; } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. assert(!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!"); // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); return false; } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { DEBUG(dbgs() << "Count failed constraint on overflow trip count calculation.\n"); return false; } // Loop structure is the following: // // PreHeader // Header // ... // Latch // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) DT->changeImmediateDominator(NewExit, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. Also update the dominator info for // OtherExits and their immediate successors, since we have new edges into // OtherExits. SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; for (auto *BB : OtherExits) { for (auto &II : *BB) { // Given we preserve LCSSA form, we know that the values used outside the // loop will be used through these phi nodes at the exit blocks that are // transformed below. if (!isa<PHINode>(II)) break; PHINode *Phi = cast<PHINode>(&II); unsigned oldNumOperands = Phi->getNumIncomingValues(); // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap[Phi->getIncomingValue(i)]; // newVal can be a constant or derived from values outside the loop, and // hence need not have a VMap value. if (!newVal) newVal = Phi->getIncomingValue(i); Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } } #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) for (BasicBlock *SuccBB : successors(BB)) { assert(!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!"); } #endif // Update the dominator info because the immediate dominator is no longer the // header of the original Loop. BB has edges both from L and remainder code. // Since the preheader determines which loop is run (L or directly jump to // the remainder code), we set the immediate dominator as the preheader. if (DT) { DT->changeImmediateDominator(BB, PreHeader); // Also update the IDom for immediate successors of BB. If the current // IDom is the header, update the IDom to be the preheader because that is // the nearest common dominator of all predecessors of SuccBB. We need to // check for IDom being the header because successors of exit blocks can // have edges from outside the loop, and we should not incorrectly update // the IDom in that case. for (BasicBlock *SuccBB: successors(BB)) if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { assert(!SuccBB->getSinglePredecessor() && "BB should be the IDom then!"); DT->changeImmediateDominator(SuccBB, PreHeader); } } } } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } if (remainderLoop && UnrollRemainder) { DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1, /*Force*/false, /*AllowRuntime*/false, /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true, /*PreserveOnlyFirst*/false, /*TripMultiple*/1, /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE, PreserveLCSSA); } NumRuntimeUnrolled++; return true; }
/// Update the PHI nodes in OrigBB to include the values coming from NewBB. /// This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, BranchInst *BI, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = nullptr; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (!PredSet.count(PN->getIncomingBlock(i))) continue; if (!InVal) InVal = PN->getIncomingValue(i); else if (InVal != PN->getIncomingValue(i)) { InVal = nullptr; break; } } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. // NOTE! This loop walks backwards for a reason! First off, this minimizes // the cost of removal if we end up removing a large number of values, and // second off, this ensures that the indices for the incoming values // aren't invalidated when we remove one. for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) if (PredSet.count(PN->getIncomingBlock(i))) PN->removeIncomingValue(i, false); // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); continue; } // If the values coming into the block are not the same, we need a new // PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); // NOTE! This loop walks backwards for a reason! First off, this minimizes // the cost of removal if we end up removing a large number of values, and // second off, this ensures that the indices for the incoming values aren't // invalidated when we remove one. for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { BasicBlock *IncomingBB = PN->getIncomingBlock(i); if (PredSet.count(IncomingBB)) { Value *V = PN->removeIncomingValue(i, false); NewPHI->addIncoming(V, IncomingBB); } } PN->addIncoming(NewPHI, NewBB); } }
/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }
/// NormalizeLandingPads - Normalize and discover landing pads, noting them /// in the LandingPads set. A landing pad is normal if the only CFG edges /// that end at it are unwind edges from invoke instructions. If we inlined /// through an invoke we could have a normal branch from the previous /// unwind block through to the landing pad for the original invoke. /// Abnormal landing pads are fixed up by redirecting all unwind edges to /// a new basic block which falls through to the original. bool DwarfEHPrepare::NormalizeLandingPads() { bool Changed = false; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (!isa<InvokeInst>(TI)) continue; BasicBlock *LPad = TI->getSuccessor(1); // Skip landing pads that have already been normalized. if (LandingPads.count(LPad)) continue; // Check that only invoke unwind edges end at the landing pad. bool OnlyUnwoundTo = true; for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { TerminatorInst *PT = (*PI)->getTerminator(); if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) { OnlyUnwoundTo = false; break; } } if (OnlyUnwoundTo) { // Only unwind edges lead to the landing pad. Remember the landing pad. LandingPads.insert(LPad); continue; } // At least one normal edge ends at the landing pad. Redirect the unwind // edges to a new basic block which falls through into this one. // Create the new basic block. BasicBlock *NewBB = BasicBlock::Create(F->getContext(), LPad->getName() + "_unwind_edge"); // Insert it into the function right before the original landing pad. LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); // Redirect unwind edges from the original landing pad to NewBB. for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { TerminatorInst *PT = (*PI++)->getTerminator(); if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad) // Unwind to the new block. PT->setSuccessor(1, NewBB); } // If there are any PHI nodes in LPad, we need to update them so that they // merge incoming values from NewBB instead. for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) { PHINode *PN = cast<PHINode>(II); pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); // Check to see if all of the values coming in via unwind edges are the // same. If so, we don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(*PB); for (pred_iterator PI = PB; PI != PE; ++PI) { if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { InVal = 0; break; } } if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node // in NewBB. PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", NewBB); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); // Now use this new PHI as the common incoming value for NewBB in PN. InVal = NewPN; } // Revector exactly one entry in the PHI node to come from NewBB // and delete all other entries that come from unwind edges. If // there are both normal and unwind edges from the same predecessor, // this leaves an entry for the normal edge. for (pred_iterator PI = PB; PI != PE; ++PI) PN->removeIncomingValue(*PI); PN->addIncoming(InVal, NewBB); } // Add a fallthrough from NewBB to the original landing pad. BranchInst::Create(LPad, NewBB); // Now update DominatorTree and DominanceFrontier analysis information. if (DT) DT->splitBlock(NewBB); if (DF) DF->splitBlock(NewBB); // Remember the newly constructed landing pad. The original landing pad // LPad is no longer a landing pad now that all unwind edges have been // revectored to NewBB. LandingPads.insert(NewBB); ++NumLandingPadsSplit; Changed = true; } return Changed; }
/* * Renaming uses of V to uses of vSSA_PHI * The rule of renaming is: * - All uses of V in the dominator tree of vSSA_PHI are renamed * - Uses of V in the same basicblock of vSSA_PHI are only renamed if they are not in PHI functions * - Uses of V in the dominance frontier follow the same rules of sigma renaming */ void vSSA::renameUsesToPhi(Value *V, PHINode *phi) { // This vector of Instruction* points to the uses of operand. // This auxiliary vector of pointers is used because the use_iterators are invalidated when we do the renaming SmallVector<Instruction*, 25> usepointers; unsigned i = 0, n = V->getNumUses(); usepointers.resize(n); // This vector contains pointers to all sigmas that have its operand renamed to vSSA_phi // For them, we need to try to create phi functions again SmallVector<PHINode*, 25> sigmasRenamed; BasicBlock *BB_next = phi->getParent(); // Get the dominance frontier of the successor DominanceFrontier::iterator DF_BB = DF_->find(BB_next); for (Value::use_iterator uit = V->use_begin(), uend = V->use_end(); uit != uend; ++uit, ++i) usepointers[i] = dyn_cast<Instruction>(*uit); BasicBlock *BB_parent = phi->getParent(); for (i = 0; i < n; ++i) { // Check if the use is in the dominator tree of vSSA_PHI if (DT_->dominates(BB_parent, usepointers[i]->getParent())) { if (BB_parent != usepointers[i]->getParent()) { usepointers[i]->replaceUsesOfWith(V, phi); // If this use is in a sigma, we need to check whether phis creation are needed again for this sigma if (PHINode *sigma = dyn_cast<PHINode>(usepointers[i])) { if (sigma->getName().startswith(vSSA_SIG)) { sigmasRenamed.push_back(sigma); } } } else if (!isa<PHINode>(usepointers[i])) usepointers[i]->replaceUsesOfWith(V, phi); } // Check if the use is in the dominance frontier of phi else if (DF_BB->second.find(usepointers[i]->getParent()) != DF_BB->second.end()) { // Check if the user is a PHI node (it has to be, but only for precaution) if (PHINode *phiuser = dyn_cast<PHINode>(usepointers[i])) { for (unsigned i = 0, e = phiuser->getNumIncomingValues(); i < e; ++i) { Value *operand = phiuser->getIncomingValue(i); if (operand != V) continue; if (DT_->dominates(BB_next, phiuser->getIncomingBlock(i))) { phiuser->setIncomingValue(i, phi); } } } } } for (unsigned k = 0, f = phi->getNumIncomingValues(); k < f; ++k) { PHINode *p = dyn_cast<PHINode>(phi->getIncomingValue(k)); if (!p || !p->getName().startswith(vSSA_SIG)) continue; Value *V = p->getIncomingValue(0); n = V->getNumUses(); usepointers.resize(n); unsigned i = 0; for (Value::use_iterator uit = V->use_begin(), uend = V->use_end(); uit != uend; ++uit, ++i) usepointers[i] = dyn_cast<Instruction>(*uit); for (i = 0; i < n; ++i) { // Check if the use is in the dominator tree of vSSA_PHI if (DT_->dominates(BB_parent, usepointers[i]->getParent())) { if (BB_parent != usepointers[i]->getParent()) { usepointers[i]->replaceUsesOfWith(V, phi); // If this use is in a sigma, we need to check whether phis creation are needed again for this sigma if (PHINode *sigma = dyn_cast<PHINode>(usepointers[i])) { if (sigma->getName().startswith(vSSA_SIG)) { sigmasRenamed.push_back(sigma); } } } else if (!isa<PHINode>(usepointers[i])) usepointers[i]->replaceUsesOfWith(V, phi); } // Check if the use is in the dominance frontier of phi else if (DF_BB->second.find(usepointers[i]->getParent()) != DF_BB->second.end()) { // Check if the user is a PHI node (it has to be, but only for precaution) if (PHINode *phiuser = dyn_cast<PHINode>(usepointers[i])) { for (unsigned i = 0, e = phiuser->getNumIncomingValues(); i < e; ++i) { Value *operand = phiuser->getIncomingValue(i); if (operand != V) continue; if (DT_->dominates(BB_next, phiuser->getIncomingBlock(i))) { phiuser->setIncomingValue(i, phi); } } } } } } // Try to create phis again for the sigmas whose operand was renamed to vSSA_phi // Also, renaming may need to be done again for (SmallVectorImpl<PHINode*>::iterator vit = sigmasRenamed.begin(), vend = sigmasRenamed.end(); vit != vend; ++vit) { renameUsesToSigma(phi, *vit); SmallVector<PHINode*, 25> vssaphis_created = insertPhisForSigma(phi, *vit); //insertSigmaAsOperandOfPhis(vssaphis_created, *vit); populatePhis(vssaphis_created, (*vit)->getIncomingValue(0)); } // Creation of phis may require the creation of sigmas that were not created previosly, so we do it now createSigmasIfNeeded(phi->getParent()); }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; MDNode *LoopID = NewLoop->getLoopID(); assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; Optional<MDNode *> NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); if (NewLoopID.hasValue()) { NewLoop->setLoopID(NewLoopID.getValue()); // Do not setLoopAlreadyUnrolled if loop attributes have been defined // explicitly. return NewLoop; } // Add unroll disable metadata to disable future unrolling for this loop. NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } else return nullptr; }
/// Create a clone of the blocks in a loop and connect them together. /// If UnrollProlog is true, loop structure will not be cloned, otherwise a new /// loop will be created including all cloned blocks, and the iterator of it /// switches to count NewIter down to 0. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = 0; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); NewBlocks.push_back(NewBB); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if UnrollProlog is true, create a direct jump to // InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (UnrollProlog) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (UnrollProlog) { VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (VMap[InVal]) NewPHI->setIncomingValue(idx, VMap[InVal]); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); return NewLoop; } else return nullptr; }
/// \brief This method is called when the specified loop has more than one /// backedge in it. /// /// If this occurs, revector all of these backedges to target a new basic block /// and have that block branch to the loop header. This ensures that loops /// have exactly one backedge. static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, DominatorTree *DT, LoopInfo *LI) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); // Unique backedge insertion currently depends on having a preheader. if (!Preheader) return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName() + ".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), PN->getName()+".be", BETerminator); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; Value *UniqueValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); if (IBB == Preheader) { PreheaderIdx = i; } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { if (!UniqueValue) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; } } } // Delete all of the incoming values from the old PN except the preheader's assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); if (PreheaderIdx != 0) { PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) PN->removeIncomingValue(e-i, false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); // As an optimization, if all incoming values in the new PhiNode (which is a // subset of the incoming values of the old PHI node) have the same value, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); BEBlock->getInstList().erase(NewPN); } } // Now that all of the PHI nodes have been inserted and adjusted, modify the // backedge blocks to just to the BEBlock instead of the header. for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) if (TI->getSuccessor(Op) == Header) TI->setSuccessor(Op, BEBlock); } //===--- Update all analyses which we must preserve now -----------------===// // Update Loop Information - we know that this block is now in the current // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, *LI); // Update dominator information DT->splitBlock(BEBlock); return BEBlock; }
/// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Update PHI nodes at the unrolling loop exit and epilog loop exit /// - Create PHI nodes at the unrolling loop exit to combine /// values that exit the unrolling loop code and jump around it. /// - Update PHI operands in the epilog loop by the new PHI nodes /// - Branch around the epilog loop if extra iters (ModVal) is zero. /// static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); // Loop structure should be the following: // // PreHeader // NewPreHeader // Header // ... // Latch // NewExit (PN) // EpilogPreHeader // EpilogHeader // ... // EpilogLatch // Exit (EpilogPN) // Update PHI nodes at NewExit and Exit. for (Instruction &BBI : *NewExit) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // PN should be used in another PHI located in Exit block as // Exit was split by SplitBlockPredecessors into Exit and NewExit // Basicaly it should look like: // NewExit: // PN = PHI [I, Latch] // ... // Exit: // EpilogPN = PHI [PN, EpilogPreHeader] // // There is EpilogPreHeader incoming block instead of NewExit as // NewExit was spilt 1 more time to get EpilogPreHeader. assert(PN->hasOneUse() && "The phi should have 1 use"); PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser()); assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); // Add incoming PreHeader from branch around the Loop PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); Value *V = PN->getIncomingValueForBlock(Latch); Instruction *I = dyn_cast<Instruction>(V); if (I && L->contains(I)) // If value comes from an instruction in the loop add VMap value. V = VMap.lookup(I); // For the instruction out of the loop, constant or undefined value // insert value itself. EpilogPN->addIncoming(V, EpilogLatch); assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && "EpilogPN should have EpilogPreHeader incoming block"); // Change EpilogPreHeader incoming block to NewExit. EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), NewExit); // Now PHIs should look like: // NewExit: // PN = PHI [I, Latch], [undef, PreHeader] // ... // Exit: // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] } // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). // Update corresponding PHI nodes in epilog loop. for (BasicBlock *Succ : successors(Latch)) { // Skip this as we already updated phis in exit blocks. if (!L->contains(Succ)) continue; for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add new PHI nodes to the loop exit block and update epilog // PHIs with the new PHI values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", NewExit->getFirstNonPHI()); // Adding a value to the new PHI node from the unrolling loop preheader. NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); // Adding a value to the new PHI node from the unrolling loop latch. NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. PHINode *VPN = cast<PHINode>(VMap[&BBI]); VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); } } Instruction *InsertPt = NewExit->getTerminator(); IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(Exit, NewExit); // Split the main loop exit to maintain canonicalization guarantees. SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, PreserveLCSSA); }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. if (!L->isLoopSimplifyForm()) return false; BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop if (!Exit) return false; // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). const SCEV *BECountSC = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) return false; unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) return false; // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) return false; BasicBlock *Latch = L->getLoopLatch(); // Loop structure is the following: // // PreHeader // Header // ... // Latch // Exit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split Exit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // Exit Exit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // Exit Exit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); NumRuntimeUnrolled++; return true; }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and /// DominanceFrontier, but no other analyses. BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0; i != NumPreds; ++i) Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); // Check to see if we can eliminate this phi node. if (Value *V = PN->hasConstantValue(DT != 0)) { Instruction *I = dyn_cast<Instruction>(V); if (!I || DT == 0 || DT->dominates(I, PN)) { PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); PN->eraseFromParent(); } } } return NewBB; }
/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector /// in the given landing pad. In principle, llvm.eh.exception is /// required to be in the landing pad; in practice, SplitCriticalEdge /// can break that invariant, and then inlining can break it further. /// There's a real need for a reliable solution here, but until that /// happens, we have some fragile workarounds here. static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) { // Look for an exception call in the actual landing pad. EHExceptionInst *exn = findExceptionInBlock(lpad); if (exn) return findSelectorForException(exn); // Okay, if that failed, look for one in an obvious successor. If // we find one, we'll fix the IR by moving things back to the // landing pad. bool dominates = true; // does the lpad dominate the exn call BasicBlock *nonDominated = 0; // if not, the first non-dominated block BasicBlock *lastDominated = 0; // and the block which branched to it BasicBlock *exnBlock = lpad; // We need to protect against lpads that lead into infinite loops. SmallPtrSet<BasicBlock*,4> visited; visited.insert(exnBlock); do { // We're not going to apply this hack to anything more complicated // than a series of unconditional branches, so if the block // doesn't terminate in an unconditional branch, just fail. More // complicated cases can arise when, say, sinking a call into a // split unwind edge and then inlining it; but that can do almost // *anything* to the CFG, including leaving the selector // completely unreachable. The only way to fix that properly is // to (1) prohibit transforms which move the exception or selector // values away from the landing pad, e.g. by producing them with // instructions that are pinned to an edge like a phi, or // producing them with not-really-instructions, and (2) making // transforms which split edges deal with that. BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back()); if (!branch || branch->isConditional()) return 0; BasicBlock *successor = branch->getSuccessor(0); // Fail if we found an infinite loop. if (!visited.insert(successor)) return 0; // If the successor isn't dominated by exnBlock: if (!successor->getSinglePredecessor()) { // We don't want to have to deal with threading the exception // through multiple levels of phi, so give up if we've already // followed a non-dominating edge. if (!dominates) return 0; // Otherwise, remember this as a non-dominating edge. dominates = false; nonDominated = successor; lastDominated = exnBlock; } exnBlock = successor; // Can we stop here? exn = findExceptionInBlock(exnBlock); } while (!exn); // Look for a selector call for the exception we found. EHSelectorInst *selector = findSelectorForException(exn); if (!selector) return 0; // The easy case is when the landing pad still dominates the // exception call, in which case we can just move both calls back to // the landing pad. if (dominates) { selector->moveBefore(lpad->getFirstNonPHI()); exn->moveBefore(selector); return selector; } // Otherwise, we have to split at the first non-dominating block. // The CFG looks basically like this: // lpad: // phis_0 // insnsAndBranches_1 // br label %nonDominated // nonDominated: // phis_2 // insns_3 // %exn = call i8* @llvm.eh.exception() // insnsAndBranches_4 // %selector = call @llvm.eh.selector(i8* %exn, ... // We need to turn this into: // lpad: // phis_0 // %exn0 = call i8* @llvm.eh.exception() // %selector0 = call @llvm.eh.selector(i8* %exn0, ... // insnsAndBranches_1 // br label %split // from lastDominated // nonDominated: // phis_2 (without edge from lastDominated) // %exn1 = call i8* @llvm.eh.exception() // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ... // br label %split // split: // phis_2 (edge from lastDominated, edge from split) // %exn = phi ... // %selector = phi ... // insns_3 // insnsAndBranches_4 assert(nonDominated); assert(lastDominated); // First, make clones of the intrinsics to go in lpad. EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone()); EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone()); lpadSelector->setArgOperand(0, lpadExn); lpadSelector->insertBefore(lpad->getFirstNonPHI()); lpadExn->insertBefore(lpadSelector); // Split the non-dominated block. BasicBlock *split = nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(), nonDominated->getName() + ".lpad-fix"); // Redirect the last dominated branch there. cast<BranchInst>(lastDominated->back()).setSuccessor(0, split); // Move the existing intrinsics to the end of the old block. selector->moveBefore(&nonDominated->back()); exn->moveBefore(selector); Instruction *splitIP = &split->front(); // For all the phis in nonDominated, make a new phi in split to join // that phi with the edge from lastDominated. for (BasicBlock::iterator i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) { PHINode *phi = dyn_cast<PHINode>(i); if (!phi) break; PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(), splitIP); phi->replaceAllUsesWith(splitPhi); splitPhi->addIncoming(phi, nonDominated); splitPhi->addIncoming(phi->removeIncomingValue(lastDominated), lastDominated); } // Make new phis for the exception and selector. PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP); exn->replaceAllUsesWith(exnPhi); selector->setArgOperand(0, exn); // except for this use exnPhi->addIncoming(exn, nonDominated); exnPhi->addIncoming(lpadExn, lastDominated); PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP); selector->replaceAllUsesWith(selectorPhi); selectorPhi->addIncoming(selector, nonDominated); selectorPhi->addIncoming(lpadSelector, lastDominated); return lpadSelector; }
/// Get or create a target for the branch out of rewritten calls to /// llvm.eh.resume. BasicBlock *InvokeInliningInfo::getInnerUnwindDest() { if (InnerUnwindDest) return InnerUnwindDest; // Find and hoist the llvm.eh.exception and llvm.eh.selector calls // in the outer landing pad to immediately following the phis. EHSelectorInst *selector = getOuterSelector(); if (!selector) return 0; // The call to llvm.eh.exception *must* be in the landing pad. Instruction *exn = cast<Instruction>(selector->getArgOperand(0)); assert(exn->getParent() == OuterUnwindDest); // TODO: recognize when we've already done this, so that we don't // get a linear number of these when inlining calls into lots of // invokes with the same landing pad. // Do the hoisting. Instruction *splitPoint = exn->getParent()->getFirstNonPHI(); assert(splitPoint != selector && "selector-on-exception dominance broken!"); if (splitPoint == exn) { selector->removeFromParent(); selector->insertAfter(exn); splitPoint = selector->getNextNode(); } else { exn->moveBefore(splitPoint); selector->moveBefore(splitPoint); } // Split the landing pad. InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint, OuterUnwindDest->getName() + ".body"); // The number of incoming edges we expect to the inner landing pad. const unsigned phiCapacity = 2; // Create corresponding new phis for all the phis in the outer landing pad. BasicBlock::iterator insertPoint = InnerUnwindDest->begin(); BasicBlock::iterator I = OuterUnwindDest->begin(); for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { PHINode *outerPhi = cast<PHINode>(I); PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity, outerPhi->getName() + ".lpad-body", insertPoint); outerPhi->replaceAllUsesWith(innerPhi); innerPhi->addIncoming(outerPhi, OuterUnwindDest); } // Create a phi for the exception value... InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity, "exn.lpad-body", insertPoint); exn->replaceAllUsesWith(InnerExceptionPHI); selector->setArgOperand(0, exn); // restore this use InnerExceptionPHI->addIncoming(exn, OuterUnwindDest); // ...and the selector. InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity, "selector.lpad-body", insertPoint); selector->replaceAllUsesWith(InnerSelectorPHI); InnerSelectorPHI->addIncoming(selector, OuterUnwindDest); // All done. return InnerUnwindDest; }
/// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. for (BasicBlock *Succ : successors(Latch)) { for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { V = VMap.lookup(I); } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, PrologLatch); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); } else { PN->addIncoming(NewPN, PrologExit); } } } // Make sure that created prolog loop is in simplified form SmallVector<BasicBlock *, 4> PrologExitPreds; Loop *PrologLoop = LI->getLoopFor(PrologLatch); if (PrologLoop) { for (BasicBlock *PredBB : predecessors(PrologExit)) if (PrologLoop->contains(PredBB)) PrologExitPreds.push_back(PredBB); SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, PreserveLCSSA); } // Create a branch around the original loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologExit->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) // This means %xtraiter is (BECount + 1) and all of the iterations of this // loop were executed by the prologue. Note that if BECount <u (Count - 1) // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); }
void RegionExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const { for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { BasicBlock *BB = *I; // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { for (User::op_iterator OI = II->op_begin(), OE = II->op_end(); OI != OE; ++OI) if (definedInCaller(Blocks, *OI)) Inputs.insert(*OI); #if LLVM_VERSION_MINOR == 5 for (User *U : II->users()) if (!definedInRegion(Blocks, U)) { #else for (Value::use_iterator UI = II->use_begin(), UE = II->use_end(); UI != UE; ++UI) if (!definedInRegion(Blocks, *UI)) { #endif Outputs.insert(II); break; } } } } /// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void RegionExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, Header->getName()+".ce"); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, update dominator sets. The blocks that dominate the new one are the // blocks that dominate TIBB plus the new block itself. if (DT) DT->splitBlock(NewBB); // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName()+".ce", NewBB->begin()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }