/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary /// and adds a mapping from the original loop to the new loop to NewLoops. /// Returns nullptr if no new loop was created and a pointer to the /// original loop OriginalBB was part of otherwise. const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops) { // Figure out which loop New is in. const Loop *OldLoop = LI->getLoopFor(OriginalBB); assert(OldLoop && "Should (at least) be in the loop being unrolled!"); Loop *&NewLoop = NewLoops[OldLoop]; if (!NewLoop) { // Found a new sub-loop. assert(OriginalBB == OldLoop->getHeader() && "Header should be first in RPO"); NewLoop = new Loop(); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); if (NewLoopParent) NewLoopParent->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return OldLoop; } else { NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return nullptr; } }
/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p /// Blocks. /// /// Updates LoopInfo and DominatorTree assuming the loop is dominated by block /// \p LoopDomBB. Insert the new blocks before block specified in \p Before. Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl<BasicBlock *> &Blocks) { assert(OrigLoop->getSubLoops().empty() && "Loop to be cloned cannot have inner loop"); Function *F = OrigLoop->getHeader()->getParent(); Loop *ParentLoop = OrigLoop->getParentLoop(); Loop *NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); BasicBlock *OrigPH = OrigLoop->getLoopPreheader(); assert(OrigPH && "No preheader"); BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F); // To rename the loop PHIs. VMap[OrigPH] = NewPH; Blocks.push_back(NewPH); // Update LoopInfo. if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewPH, *LI); // Update DominatorTree. DT->addNewBlock(NewPH, LoopDomBB); for (BasicBlock *BB : OrigLoop->getBlocks()) { BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); VMap[BB] = NewBB; // Update LoopInfo. NewLoop->addBasicBlockToLoop(NewBB, *LI); // Add DominatorTree node. After seeing all blocks, update to correct IDom. DT->addNewBlock(NewBB, NewPH); Blocks.push_back(NewBB); } for (BasicBlock *BB : OrigLoop->getBlocks()) { // Update DominatorTree. BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]), cast<BasicBlock>(VMap[IDomBB])); } // Move them physically from the end of the block list. F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), NewPH); F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), NewLoop->getHeader()->getIterator(), F->end()); return NewLoop; }
void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, Loop *OuterLoop) { Loop *OuterLoopParent = OuterLoop->getParentLoop(); if (OuterLoopParent) { // Remove the loop from its parent loop. removeChildLoop(OuterLoopParent, OuterLoop); removeChildLoop(OuterLoop, InnerLoop); OuterLoopParent->addChildLoop(InnerLoop); } else { removeChildLoop(OuterLoop, InnerLoop); LI->changeTopLevelLoop(OuterLoop, InnerLoop); } while (!InnerLoop->empty()) OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin())); InnerLoop->addChildLoop(OuterLoop); }
void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, Loop *OuterLoop) { Loop *OuterLoopParent = OuterLoop->getParentLoop(); if (OuterLoopParent) { // Remove the loop from its parent loop. removeChildLoop(OuterLoopParent, OuterLoop); removeChildLoop(OuterLoop, InnerLoop); OuterLoopParent->addChildLoop(InnerLoop); } else { removeChildLoop(OuterLoop, InnerLoop); LI->changeTopLevelLoop(OuterLoop, InnerLoop); } for (Loop::iterator I = InnerLoop->begin(), E = InnerLoop->end(); I != E; ++I) OuterLoop->addChildLoop(InnerLoop->removeChildLoop(I)); InnerLoop->addChildLoop(OuterLoop); }
/// \brief If this loop has multiple backedges, try to pull one of them out into /// a nested loop. /// /// This is important for code that looks like /// this: /// /// Loop: /// ... /// br cond, Loop, Next /// ... /// br cond2, Loop, Out /// /// To identify this common case, we look at the PHI nodes in the header of the /// loop. PHI nodes with unchanging values on one backedge correspond to values /// that change in the "outer" loop, but not in the "inner" loop. /// /// If we are able to separate out a loop, return the new outer loop that was /// created. /// static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { // Don't try to separate loops without a preheader. if (!Preheader) return nullptr; // The header is not a landing pad; preheader insertion should ensure this. BasicBlock *Header = L->getHeader(); assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); if (!Header->canSplitPredecessors()) return nullptr; PHINode *PN = findPHIToPartitionLoops(L, DT, AC); if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This // handles the case when a PHI node has multiple instances of itself as // arguments. SmallVector<BasicBlock*, 8> OuterLoopPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) != PN || !L->contains(PN->getIncomingBlock(i))) { // We can't split indirectbr edges. if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } } DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in // this loop, tell it to forget them, because we're about to // substantially change it. if (SE) SE->forgetLoop(L); bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", DT, LI, PreserveLCSSA); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); // Create the new outer loop. Loop *NewOuter = new Loop(); // Change the parent loop to use the outer loop as its child now. if (Loop *Parent = L->getParentLoop()) Parent->replaceChildLoopWith(L, NewOuter); else LI->changeTopLevelLoop(L, NewOuter); // L is now a subloop of our outer loop. NewOuter->addChildLoop(L); for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) NewOuter->addBlockEntry(*I); // Now reset the header in L, which had been moved by // SplitBlockPredecessors for the outer loop. L->moveToHeader(Header); // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set<BasicBlock*> BlocksInL; for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { BasicBlock *P = *PI; if (DT->dominates(Header, P)) addBlockAndPredsToSet(P, Header, BlocksInL); } // Scan all of the loop children of L, moving them to OuterLoop if they are // not part of the inner loop. const std::vector<Loop*> &SubLoops = L->getSubLoops(); for (size_t I = 0; I != SubLoops.size(); ) if (BlocksInL.count(SubLoops[I]->getHeader())) ++I; // Loop remains in L else NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); // Now that we know which blocks are in L and which need to be moved to // OuterLoop, move any blocks that need it. for (unsigned i = 0; i != L->getBlocks().size(); ++i) { BasicBlock *BB = L->getBlocks()[i]; if (!BlocksInL.count(BB)) { // Move this block to the parent, updating the exit blocks sets L->removeBlockFromLoop(BB); if ((*LI)[BB] == L) LI->changeLoopFor(BB, NewOuter); --i; } } return NewOuter; }
// We generate a loop of either of the following structures: // // BeforeBB BeforeBB // | | // v v // GuardBB PreHeaderBB // / | | _____ // __ PreHeaderBB | v \/ | // / \ / | HeaderBB latch // latch HeaderBB | |\ | // \ / \ / | \------/ // < \ / | // \ / v // ExitBB ExitBB // // depending on whether or not we know that it is executed at least once. If // not, GuardBB checks if the loop is executed at least once. If this is the // case we branch to PreHeaderBB and subsequently to the HeaderBB, which // contains the loop iv 'polly.indvar', the incremented loop iv // 'polly.indvar_next' as well as the condition to check if we execute another // iteration of the loop. After the loop has finished, we branch to ExitBB. Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, PollyIRBuilder &Builder, Pass *P, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBB, ICmpInst::Predicate Predicate, ScopAnnotator *Annotator, bool Parallel, bool UseGuard) { Function *F = Builder.GetInsertBlock()->getParent(); LLVMContext &Context = F->getContext(); assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); assert(LoopIVType && "UB is not integer?"); BasicBlock *BeforeBB = Builder.GetInsertBlock(); BasicBlock *GuardBB = UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); BasicBlock *PreHeaderBB = BasicBlock::Create(Context, "polly.loop_preheader", F); // Update LoopInfo Loop *OuterLoop = LI.getLoopFor(BeforeBB); Loop *NewLoop = new Loop(); if (OuterLoop) OuterLoop->addChildLoop(NewLoop); else LI.addTopLevelLoop(NewLoop); if (OuterLoop) { if (GuardBB) OuterLoop->addBasicBlockToLoop(GuardBB, LI); OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); } NewLoop->addBasicBlockToLoop(HeaderBB, LI); // Notify the annotator (if present) that we have a new loop, but only // after the header block is set. if (Annotator) Annotator->pushLoop(NewLoop, Parallel); // ExitBB ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); ExitBB->setName("polly.loop_exit"); // BeforeBB if (GuardBB) { BeforeBB->getTerminator()->setSuccessor(0, GuardBB); DT.addNewBlock(GuardBB, BeforeBB); // GuardBB Builder.SetInsertPoint(GuardBB); Value *LoopGuard; LoopGuard = Builder.CreateICmp(Predicate, LB, UB); LoopGuard->setName("polly.loop_guard"); Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); DT.addNewBlock(PreHeaderBB, GuardBB); } else { BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); DT.addNewBlock(PreHeaderBB, BeforeBB); } // PreHeaderBB Builder.SetInsertPoint(PreHeaderBB); Builder.CreateBr(HeaderBB); // HeaderBB DT.addNewBlock(HeaderBB, PreHeaderBB); Builder.SetInsertPoint(HeaderBB); PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); IV->addIncoming(LB, PreHeaderBB); Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); Value *LoopCondition; UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub"); LoopCondition = Builder.CreateICmp(Predicate, IV, UB); LoopCondition->setName("polly.loop_cond"); // Create the loop latch and annotate it as such. BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); if (Annotator) Annotator->annotateLoopLatch(B, NewLoop, Parallel); IV->addIncoming(IncrementedIV, HeaderBB); if (GuardBB) DT.changeImmediateDominator(ExitBB, GuardBB); else DT.changeImmediateDominator(ExitBB, HeaderBB); // The loop body should be added here. Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); return IV; }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. This way we don't // need to support "partial unrolling by 1". if (TripCount == 0 && Count < 2) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. ScalarEvolution *SE = PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr; if (SE) SE->forgetLoop(L); // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } // Report the unrolling decision. DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, Twine("completely unrolled loop with ") + Twine(TripCount) + " iterations"); } else { auto EmitDiag = [&](const Twine &T) { emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, "unrolled loop by a factor of " + Twine(Count) + T); }; DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); EmitDiag(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); EmitDiag(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; NewLoops[L] = L; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Tell LI about New. if (*BB == Header) { assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); L->addBasicBlockToLoop(New, *LI); } else { // Figure out which loop New is in. const Loop *OldLoop = LI->getLoopFor(*BB); assert(OldLoop && "Should (at least) be in the loop being unrolled!"); Loop *&NewLoop = NewLoops[OldLoop]; if (!NewLoop) { // Found a new sub-loop. assert(*BB == OldLoop->getHeader() && "Header should be first in RPO"); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); assert(NewLoopParent && "Expected parent loop before sub-loop in RPO"); NewLoop = new Loop; NewLoopParent->addChildLoop(NewLoop); // Forget the old loop, since its inputs may have changed. if (SE) SE->forgetLoop(OldLoop); } NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) // Loop over all of the PHI nodes in the block, changing them to use // the incoming values from the previous block. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; // Add phi entries for newly created values to all exit blocks. for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; ++SI) { if (L->contains(*SI)) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { if (*SI == Headers[i]) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Merge adjacent basic blocks, if possible. SmallPtrSet<Loop *, 4> ForgottenLoops; for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM, ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. AC->clear(); DominatorTree *DT = nullptr; if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. if (DominatorTreeWrapperPass *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { DT = &DTWP->getDomTree(); DT->recalculate(*L->getHeader()->getParent()); } // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Value *V = SimplifyInstruction(Inst)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (PP && DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after // deleteLoopFromQueue updates LoopInfo. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); formLCSSARecursively(*OuterL, *DT, LI, SE); } } return true; }
/// Create a clone of the blocks in a loop and connect them together. /// If UnrollProlog is true, loop structure will not be cloned, otherwise a new /// loop will be created including all cloned blocks, and the iterator of it /// switches to count NewIter down to 0. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = 0; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); NewBlocks.push_back(NewBB); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if UnrollProlog is true, create a direct jump to // InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (UnrollProlog) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (UnrollProlog) { VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (VMap[InVal]) NewPHI->setIncomingValue(idx, VMap[InVal]); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } }