void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, DepChain &D) { PHINode *PN = dyn_cast<PHINode>(I); if (!PN) { D.push_back(I); return; } else { auto NumIncomingValues = PN->getNumIncomingValues(); if (NumIncomingValues != 2) { D.clear(); return; } BasicBlock *BB = PN->getParent(); if (BB != CurLoop->getHeader()) { D.clear(); return; } Value *BEVal = PN->getIncomingValueForBlock(BB); Instruction *BEInst = dyn_cast<Instruction>(BEVal); // This is a single block loop with a preheader, so at least // one value should come over the backedge. assert(BEInst && "There should be a value over the backedge"); Value *PreHdrVal = PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { D.clear(); return; } D.push_back(PN); findDepChainFromPHI(BEInst, D); } }
/// getCanonicalInductionVariable - Check to see if the loop has a canonical /// induction variable: an integer recurrence that starts at 0 and increments /// by one each time through the loop. If so, return the phi node that /// corresponds to it. /// /// The IndVarSimplify pass transforms loops to have a canonical induction /// variable. /// PHINode *Loop::getCanonicalInductionVariable() const { BasicBlock *H = getHeader(); BasicBlock *Incoming = nullptr, *Backedge = nullptr; pred_iterator PI = pred_begin(H); assert(PI != pred_end(H) && "Loop must have at least one backedge!"); Backedge = *PI++; if (PI == pred_end(H)) return nullptr; // dead loop Incoming = *PI++; if (PI != pred_end(H)) return nullptr; // multiple backedges? if (contains(Incoming)) { if (contains(Backedge)) return nullptr; std::swap(Incoming, Backedge); } else if (!contains(Backedge)) return nullptr; // Loop over all of the PHI nodes, looking for a canonical indvar. for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) if (CI->isNullValue()) if (Instruction *Inc = dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) if (CI->equalsInt(1)) return PN; } return nullptr; }
/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock*> Preds, BranchInst *BI, Pass *P, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1, e = Preds.size(); i != e; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // Explicitly check the BB index here to handle duplicates in Preds. int Idx = PN->getBasicBlockIndex(Preds[i]); if (Idx >= 0) PN->removeIncomingValue(Idx, false); } } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } }
Value *Value::DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) { PHINode *PN = dyn_cast<PHINode>(this); if (PN && PN->getParent() == CurBB) return PN->getIncomingValueForBlock(PredBB); return this; }
void LoopInterchangeTransform::splitInnerLoopHeader() { // Split the inner loop header out. Here make sure that the reduction PHI's // stay in the innerloop body. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); if (InnerLoopHasReduction) { // FIXME: Check if the induction PHI will always be the first PHI. BasicBlock *New = InnerLoopHeader->splitBasicBlock( ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split"); if (LI) if (Loop *L = LI->getLoopFor(InnerLoopHeader)) L->addBasicBlockToLoop(New, *LI); // Adjust Reduction PHI's in the block. SmallVector<PHINode *, 8> PHIVec; for (auto I = New->begin(); isa<PHINode>(I); ++I) { PHINode *PHI = dyn_cast<PHINode>(I); Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader); PHI->replaceAllUsesWith(V); PHIVec.push_back((PHI)); } for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) { PHINode *P = *I; P->eraseFromParent(); } } else { SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); } DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " "InnerLoopHeader \n"); }
Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, BasicBlock *BB) { PHINode *PN = dyn_cast<PHINode>(Op); assert(PN); Value *ValueInBlock = PN->getIncomingValueForBlock(BB); return ValueInBlock; }
/// Evaluate a call to function F, returning true if successful, false if we /// can't evaluate it. ActualArgs contains the formal arguments for the /// function. bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl<Constant*> &ActualArgs) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (is_contained(CallStack, F)) return false; CallStack.push_back(F); // Initialize arguments to the incoming values specified. unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++ArgNo) setVal(&*AI, ActualArgs[ArgNo]); // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, // we can only evaluate any one basic block at most once. This set keeps // track of what we have executed so we can detect recursive cases etc. SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; // CurBB - The current basic block we're evaluating. BasicBlock *CurBB = &F->front(); BasicBlock::iterator CurInst = CurBB->begin(); while (1) { BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); if (!EvaluateBlock(CurInst, NextBB)) return false; if (!NextBB) { // Successfully running until there's no next block means that we found // the return. Fill it the return value and pop the call stack. ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); if (RI->getNumOperands()) RetVal = getVal(RI->getOperand(0)); CallStack.pop_back(); return true; } // Okay, we succeeded in evaluating this control flow. See if we have // executed the new block before. If so, we have a looping function, // which we cannot evaluate in reasonable time. if (!ExecutedBlocks.insert(NextBB).second) return false; // looped! // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. PHINode *PN = nullptr; for (CurInst = NextBB->begin(); (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); // Advance to the next block. CurBB = NextBB; } }
/// The function chooses which type of unroll (epilog or prolog) is more /// profitabale. /// Epilog unroll is more profitable when there is PHI that starts from /// constant. In this case epilog will leave PHI start from constant, /// but prolog will convert it to non-constant. /// /// loop: /// PN = PHI [I, Latch], [CI, PreHeader] /// I = foo(PN) /// ... /// /// Epilog unroll case. /// loop: /// PN = PHI [I2, Latch], [CI, PreHeader] /// I1 = foo(PN) /// I2 = foo(I1) /// ... /// Prolog unroll case. /// NewPN = PHI [PrologI, Prolog], [CI, PreHeader] /// loop: /// PN = PHI [I2, Latch], [NewPN, PreHeader] /// I1 = foo(PN) /// I2 = foo(I1) /// ... /// static bool isEpilogProfitable(Loop *L) { BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); assert(PreHeader && Header); for (Instruction &BBI : *Header) { PHINode *PN = dyn_cast<PHINode>(&BBI); if (!PN) break; if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader))) return true; } return false; }
/// updatePHINodes - CFG has been changed. /// Before /// - ExitBB's single predecessor was Latch /// - Latch's second successor was Header /// Now /// - ExitBB's single predecessor is Header /// - Latch's one and only successor is Header /// /// Update ExitBB PHINodes' to reflect this change. void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, BasicBlock *Header, PHINode *IV, Instruction *IVIncrement, Loop *LP) { for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end(); BI != BE; ) { PHINode *PN = dyn_cast<PHINode>(BI); ++BI; if (!PN) break; Value *V = PN->getIncomingValueForBlock(Latch); if (PHINode *PHV = dyn_cast<PHINode>(V)) { // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use // in Header which is new incoming value for PN. Value *NewV = NULL; for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); UI != E; ++UI) if (PHINode *U = dyn_cast<PHINode>(*UI)) if (LP->contains(U->getParent())) { NewV = U; break; } // Add incoming value from header only if PN has any use inside the loop. if (NewV) PN->addIncoming(NewV, Header); } else if (Instruction *PHI = dyn_cast<Instruction>(V)) { // If this instruction is IVIncrement then IV is new incoming value // from header otherwise this instruction must be incoming value from // header because loop is in LCSSA form. if (PHI == IVIncrement) PN->addIncoming(IV, Header); else PN->addIncoming(V, Header); } else // Otherwise this is an incoming value from header because loop is in // LCSSA form. PN->addIncoming(V, Header); // Remove incoming value from Latch. PN->removeIncomingValue(Latch); } }
/// UnswitchNontrivialCondition - We determined that the loop is profitable /// to unswitch when LIC equal Val. Split it into loop versions and test the /// condition outside of either loop. Return the loops created as Out1/Out2. void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); LoopBlocks.clear(); NewBlocks.clear(); // First step, split the preheader and exit blocks, and add these blocks to // the LoopBlocks list. BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this); LoopBlocks.push_back(NewPreheader); // We want the loop to come after the preheader, but before the exit blocks. LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Split all of the edges from inside the loop to their exit blocks. Update // the appropriate Phi nodes as we do so. SplitExitEdges(L, ExitBlocks); // The exit blocks may have been changed due to edge splitting, recompute. ExitBlocks.clear(); L->getUniqueExitBlocks(ExitBlocks); // Add exit blocks to the loop blocks. LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } // Splice the newly inserted blocks into the function right before the // original preheader. F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop // as well. ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); assert(NewExit->getTerminator()->getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"); BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); // If the successor of the exit block had PHI nodes, add an entry for // NewExit. PHINode *PN; for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); ValueToValueMapTy::iterator It = VMap.find(V); if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } } // Rewrite the code to refer to itself. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && "Preheader splitting did not work correctly!"); // Emit the new branch that selects between the two versions of this loop. EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); LPM->deleteSimpleAnalysisValue(OldBR, L); OldBR->eraseFromParent(); LoopProcessWorklist.push_back(NewLoop); redoLoop = true; // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. WeakVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); // It's possible that simplifying one loop could cause the other to be // changed to another value or a constant. If its a constant, don't simplify // it. if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && LICHandle && !isa<Constant>(LICHandle)) RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); }
Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return 0; BasicBlock* returnBlock = 0; BasicBlock* nonReturnBlock = 0; unsigned returnCount = 0; for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); SI != SE; ++SI) if (isa<ReturnInst>((*SI)->getTerminator())) { returnBlock = *SI; returnCount++; } else nonReturnBlock = *SI; if (returnCount != 1) return 0; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function* duplicateFunction = CloneFunction(F, VMap, /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(duplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock* preReturn = newReturnBlock; newReturnBlock = newReturnBlock->splitBasicBlock( newReturnBlock->getFirstNonPHI()); BasicBlock::iterator I = preReturn->begin(); BasicBlock::iterator Ins = newReturnBlock->begin(); while (I != preReturn->end()) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); retPhi->addIncoming(I, preReturn); retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), newEntryBlock); OldPhi->removeIncomingValue(newEntryBlock); ++I; } newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock*> toExtract; toExtract.push_back(newNonReturnBlock); for (Function::iterator FI = duplicateFunction->begin(), FE = duplicateFunction->end(); FI != FE; ++FI) if (&*FI != newEntryBlock && &*FI != newReturnBlock && &*FI != newNonReturnBlock) toExtract.push_back(FI); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.runOnFunction(*duplicateFunction); // Extract the body of the if. Function* extractedFunction = CodeExtractor(toExtract, &DT).extractCodeRegion(); InlineFunctionInfo IFI; // Inline the top-level if test into all callers. std::vector<User*> Users(duplicateFunction->use_begin(), duplicateFunction->use_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) if (CallInst *CI = dyn_cast<CallInst>(*UI)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. duplicateFunction->replaceAllUsesWith(F); duplicateFunction->eraseFromParent(); ++NumPartialInlined; return extractedFunction; }
/// \brief Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. /// Rather, each iteration is peeled off separately, and needs to check the /// exit condition. /// For loops that dynamically execute \p PeelCount iterations or less /// this provides a benefit, since the peeled off iterations, which account /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { if (!canPeel(L)) return false; LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Exit = L->getUniqueExitBlock(); Function *F = Header->getParent(); // Set up all the necessary basic blocks. It is convenient to split the // preheader into 3 parts - two blocks to anchor the peeled copy of the loop // body, and a new preheader for the "real" loop. // Peeling the first iteration transforms. // // PreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // into // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // Each following iteration will split the current bottom anchor in two, // and put the new copy of the loop body between these two blocks. That is, // after peeling another iteration from the example above, we'll split // InsertBot, and get: // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // LoopBody // If (!cond) goto Exit // InsertBot.next: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); BasicBlock *InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); BasicBlock *NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertTop->setName(Header->getName() + ".peel.begin"); InsertBot->setName(Header->getName() + ".peel.next"); NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); ValueToValueMapTy LVMap; // If we have branch weight information, we'll want to update it for the // newly created branches. BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); uint64_t TrueWeight, FalseWeight; uint64_t ExitWeight = 0, BackEdgeWeight = 0; if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight; } // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; ValueToValueMapTy VMap; // The exit weight of the previous iteration is the header entry weight // of the current iteration. So this is exactly how many dynamic iterations // the current peeled-off static iteration uses up. // FIXME: due to the way the distribution is constructed, we need a // guard here to make sure we don't end up with non-positive weights. if (ExitWeight < BackEdgeWeight) BackEdgeWeight -= ExitWeight; else BackEdgeWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, NewBlocks, LoopBlocks, VMap, LVMap, LI); updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter, PeelCount, ExitWeight); InsertTop = InsertBot; InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertBot->setName(Header->getName() + ".peel.next"); F->getBasicBlockList().splice(InsertTop->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); } // Now adjust the phi nodes in the loop header to get their initial values // from the last peeled-off iteration instead of the preheader. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PHI = cast<PHINode>(I); Value *NewVal = PHI->getIncomingValueForBlock(Latch); Instruction *LatchInst = dyn_cast<Instruction>(NewVal); if (LatchInst && L->contains(LatchInst)) NewVal = LVMap[LatchInst]; PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal); } // Adjust the branch weights on the loop exit. if (ExitWeight) { MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } // If the loop is nested, we changed the parent loop, update SE. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); NumPeeled++; return true; }
/// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. /// Rather, each iteration is peeled off separately, and needs to check the /// exit condition. /// For loops that dynamically execute \p PeelCount iterations or less /// this provides a benefit, since the peeled off iterations, which account /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Exit = L->getUniqueExitBlock(); Function *F = Header->getParent(); // Set up all the necessary basic blocks. It is convenient to split the // preheader into 3 parts - two blocks to anchor the peeled copy of the loop // body, and a new preheader for the "real" loop. // Peeling the first iteration transforms. // // PreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // into // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // Each following iteration will split the current bottom anchor in two, // and put the new copy of the loop body between these two blocks. That is, // after peeling another iteration from the example above, we'll split // InsertBot, and get: // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // LoopBody // If (!cond) goto Exit // InsertBot.next: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); BasicBlock *InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); BasicBlock *NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertTop->setName(Header->getName() + ".peel.begin"); InsertBot->setName(Header->getName() + ".peel.next"); NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); ValueToValueMapTy LVMap; // If we have branch weight information, we'll want to update it for the // newly created branches. BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); uint64_t TrueWeight, FalseWeight; uint64_t ExitWeight = 0, CurHeaderWeight = 0; if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; // The # of times the loop body executes is the sum of the exit block // weight and the # of times the backedges are taken. CurHeaderWeight = TrueWeight + FalseWeight; } // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; ValueToValueMapTy VMap; // Subtract the exit weight from the current header weight -- the exit // weight is exactly the weight of the previous iteration's header. // FIXME: due to the way the distribution is constructed, we need a // guard here to make sure we don't end up with non-positive weights. if (ExitWeight < CurHeaderWeight) CurHeaderWeight -= ExitWeight; else CurHeaderWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); if (DT) { // Latches of the cloned loops dominate over the loop exit, so idom of the // latter is the first cloned loop body, as original PreHeader dominates // the original loop body. if (Iter == 0) DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); #endif } auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); updateBranchWeights(InsertBot, LatchBRCopy, Iter, PeelCount, ExitWeight); // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); InsertTop = InsertBot; InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertBot->setName(Header->getName() + ".peel.next"); F->getBasicBlockList().splice(InsertTop->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); } // Now adjust the phi nodes in the loop header to get their initial values // from the last peeled-off iteration instead of the preheader. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PHI = cast<PHINode>(I); Value *NewVal = PHI->getIncomingValueForBlock(Latch); Instruction *LatchInst = dyn_cast<Instruction>(NewVal); if (LatchInst && L->contains(LatchInst)) NewVal = LVMap[LatchInst]; PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal); } // Adjust the branch weights on the loop exit. if (ExitWeight) { // The backedge count is the difference of current header weight and // current loop exit weight. If the current header weight is smaller than // the current loop exit weight, we mark the loop backedge weight as 1. uint64_t BackEdgeWeight = 0; if (ExitWeight < CurHeaderWeight) BackEdgeWeight = CurHeaderWeight - ExitWeight; else BackEdgeWeight = 1; MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; // We modified the loop, update SE. SE->forgetTopmostLoop(L); // FIXME: Incrementally update loop-simplify simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); NumPeeled++; return true; }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, Pass *PP, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. This way we don't // need to support "partial unrolling by 1". if (TripCount == 0 && Count < 2) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. if (PP) { ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); if (SE) SE->forgetLoop(L); } // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } // Report the unrolling decision. DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, Twine("completely unrolled loop with ") + Twine(TripCount) + " iterations"); } else { DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); Twine DiagMsg("unrolled loop by a factor of " + Twine(Count)); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); DiagMsg.concat(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks. for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; ++SI) { if (L->contains(*SI)) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { if (*SI == Headers[i]) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Merge adjacent basic blocks, if possible. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } DominatorTree *DT = nullptr; if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. if (DominatorTreeWrapperPass *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { DT = &DTWP->getDomTree(); DT->recalculate(*L->getHeader()->getParent()); } // Simplify any new induction variables in the partially unrolled loop. ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Value *V = SimplifyInstruction(Inst)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (PP && DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after // deleteLoopFromQueue updates LoopInfo. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); formLCSSARecursively(*OuterL, *DT, SE); } } return true; }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. /// In particular, it does not preserve LoopSimplify (because it's /// complicated to handle the case where one of the edges being split /// is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; Loop *L = LI ? LI->getLoopFor(BB) : 0; bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); // Move the edges from Preds to point to NewBB instead of BB. // While here, if we need to preserve loop analyses, collect // some information about how this split will affect loops. bool HasLoopExit = false; bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); if (LI) { // If we need to preserve LCSSA, determine if any of // the preds is a loop exit. if (PreserveLCSSA) if (Loop *PL = LI->getLoopFor(Preds[i])) if (!PL->contains(BB)) HasLoopExit = true; // If we need to preserve LoopInfo, note whether any of the // preds crosses an interesting loop boundary. if (L) { if (L->contains(Preds[i])) IsLoopEntry = false; else SplitMakesNewLoopHeader = true; } } } // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; if (L) { if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an // adjacent loop). To find this, examine each of the predecessors and // determine which loops enclose them, and select the most-nested loop // which contains the loop containing the block being split. Loop *InnermostPredLoop = 0; for (unsigned i = 0; i != NumPreds; ++i) if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { // Seek a loop which actually contains the block being split (to // avoid adjacent loops). while (PredLoop && !PredLoop->contains(BB)) PredLoop = PredLoop->getParentLoop(); // Select the most-nested of these loops which contains the block. if (PredLoop && PredLoop->contains(BB) && (!InnermostPredLoop || InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) InnermostPredLoop = PredLoop; } if (InnermostPredLoop) InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } return NewBB; }
Value* LoopTripCount::insertTripCount(Loop* L, Instruction* InsertPos) { // inspired from Loop::getCanonicalInductionVariable BasicBlock *H = L->getHeader(); BasicBlock* LoopPred = L->getLoopPredecessor(); BasicBlock* startBB = NULL;//which basicblock stores start value int OneStep = 0;// the extra add or plus step for calc Assert(LoopPred, "Require Loop has a Pred"); DEBUG(errs()<<"loop depth:"<<L->getLoopDepth()<<"\n"); /** whats difference on use of predecessor and preheader??*/ //RET_ON_FAIL(self->getLoopLatch()&&self->getLoopPreheader()); //assert(self->getLoopLatch() && self->getLoopPreheader() && "need loop simplify form" ); ret_null_fail(L->getLoopLatch(), "need loop simplify form"); BasicBlock* TE = NULL;//True Exit SmallVector<BasicBlock*,4> Exits; L->getExitingBlocks(Exits); if(Exits.size()==1) TE = Exits.front(); else{ if(std::find(Exits.begin(),Exits.end(),L->getLoopLatch())!=Exits.end()) TE = L->getLoopLatch(); else{ SmallVector<llvm::Loop::Edge,4> ExitEdges; L->getExitEdges(ExitEdges); //stl 用法,先把所有满足条件的元素(出口的结束符是不可到达)移动到数组的末尾,再统一删除 ExitEdges.erase(std::remove_if(ExitEdges.begin(), ExitEdges.end(), [](llvm::Loop::Edge& I){ return isa<UnreachableInst>(I.second->getTerminator()); }), ExitEdges.end()); if(ExitEdges.size()==1) TE = const_cast<BasicBlock*>(ExitEdges.front().first); } } //process true exit ret_null_fail(TE, "need have a true exit"); Instruction* IndOrNext = NULL; Value* END = NULL; //终止块的终止指令:分情况讨论branchinst,switchinst; //跳转指令br bool a1,a2;condition<-->bool if(isa<BranchInst>(TE->getTerminator())){ const BranchInst* EBR = cast<BranchInst>(TE->getTerminator()); Assert(EBR->isConditional(), "end branch is not conditional"); ICmpInst* EC = dyn_cast<ICmpInst>(EBR->getCondition()); if(EC->getPredicate() == EC->ICMP_SGT){ Assert(!L->contains(EBR->getSuccessor(0)), *EBR<<":abnormal exit with great than");//终止块的终止指令---->跳出执行循环外的指令 OneStep += 1; } else if(EC->getPredicate() == EC->ICMP_EQ) Assert(!L->contains(EBR->getSuccessor(0)), *EBR<<":abnormal exit with great than"); else if(EC->getPredicate() == EC->ICMP_SLT) { ret_null_fail(!L->contains(EBR->getSuccessor(1)), *EBR<<":abnormal exit with less than"); } else { ret_null_fail(0, *EC<<" unknow combination of end condition"); } IndOrNext = dyn_cast<Instruction>(castoff(EC->getOperand(0)));//去掉类型转化 END = EC->getOperand(1); DEBUG(errs()<<"end value:"<<*EC<<"\n"); }else if(isa<SwitchInst>(TE->getTerminator())){ SwitchInst* ESW = const_cast<SwitchInst*>(cast<SwitchInst>(TE->getTerminator())); IndOrNext = dyn_cast<Instruction>(castoff(ESW->getCondition())); for(auto I = ESW->case_begin(),E = ESW->case_end();I!=E;++I){ if(!L->contains(I.getCaseSuccessor())){ ret_null_fail(!END,""); assert(!END && "shouldn't have two ends"); END = I.getCaseValue(); } } DEBUG(errs()<<"end value:"<<*ESW<<"\n"); }else{ assert(0 && "unknow terminator type"); } ret_null_fail(L->isLoopInvariant(END), "end value should be loop invariant");//至此得END值 Value* start = NULL; Value* ind = NULL; Instruction* next = NULL; bool addfirst = false;//add before icmp ed DISABLE(errs()<<*IndOrNext<<"\n"); if(isa<LoadInst>(IndOrNext)){ //memory depend analysis Value* PSi = IndOrNext->getOperand(0);//point type Step.i int SICount[2] = {0};//store in predecessor count,store in loop body count for( auto I = PSi->use_begin(),E = PSi->use_end();I!=E;++I){ DISABLE(errs()<<**I<<"\n"); StoreInst* SI = dyn_cast<StoreInst>(*I); if(!SI || SI->getOperand(1) != PSi) continue; if(!start&&L->isLoopInvariant(SI->getOperand(0))) { if(SI->getParent() != LoopPred) if(std::find(pred_begin(LoopPred),pred_end(LoopPred),SI->getParent()) == pred_end(LoopPred)) continue; start = SI->getOperand(0); startBB = SI->getParent(); ++SICount[0]; } Instruction* SI0 = dyn_cast<Instruction>(SI->getOperand(0)); if(L->contains(SI) && SI0 && SI0->getOpcode() == Instruction::Add){ next = SI0; ++SICount[1]; } } Assert(SICount[0]==1 && SICount[1]==1, ""); ind = IndOrNext; }else{ if(isa<PHINode>(IndOrNext)){ PHINode* PHI = cast<PHINode>(IndOrNext); ind = IndOrNext; if(castoff(PHI->getIncomingValue(0)) == castoff(PHI->getIncomingValue(1)) && PHI->getParent() != H) ind = castoff(PHI->getIncomingValue(0)); addfirst = false; }else if(IndOrNext->getOpcode() == Instruction::Add){ next = IndOrNext; addfirst = true; }else{ Assert(0 ,"unknow how to analysis"); } for(auto I = H->begin();isa<PHINode>(I);++I){ PHINode* P = cast<PHINode>(I); if(ind && P == ind){ //start = P->getIncomingValueForBlock(L->getLoopPredecessor()); start = tryFindStart(P, L, startBB); next = dyn_cast<Instruction>(P->getIncomingValueForBlock(L->getLoopLatch())); }else if(next && P->getIncomingValueForBlock(L->getLoopLatch()) == next){ //start = P->getIncomingValueForBlock(L->getLoopPredecessor()); start = tryFindStart(P, L, startBB); ind = P; } } } Assert(start ,"couldn't find a start value"); //process complex loops later //DEBUG(if(L->getLoopDepth()>1 || !L->getSubLoops().empty()) return NULL); DEBUG(errs()<<"start value:"<<*start<<"\n"); DEBUG(errs()<<"ind value:"<<*ind<<"\n"); DEBUG(errs()<<"next value:"<<*next<<"\n"); //process non add later unsigned next_phi_idx = 0; ConstantInt* Step = NULL,*PrevStep = NULL;/*only used if next is phi node*/ ret_null_fail(next, ""); PHINode* next_phi = dyn_cast<PHINode>(next); do{ if(next_phi) { next = dyn_cast<Instruction>(next_phi->getIncomingValue(next_phi_idx)); ret_null_fail(next, ""); DEBUG(errs()<<"next phi "<<next_phi_idx<<":"<<*next<<"\n"); if(Step&&PrevStep){ Assert(Step->getSExtValue() == PrevStep->getSExtValue(),""); } PrevStep = Step; } Assert(next->getOpcode() == Instruction::Add , "why induction increment is not Add"); Assert(next->getOperand(0) == ind ,"why induction increment is not add it self"); Step = dyn_cast<ConstantInt>(next->getOperand(1)); Assert(Step,""); }while(next_phi && ++next_phi_idx<next_phi->getNumIncomingValues()); //RET_ON_FAIL(Step->equalsInt(1)); //assert(VERBOSE(Step->equalsInt(1),Step) && "why induction increment number is not 1"); Value* RES = NULL; //if there are no predecessor, we can insert code into start value basicblock IRBuilder<> Builder(InsertPos); Assert(start->getType()->isIntegerTy() && END->getType()->isIntegerTy() , " why increment is not integer type"); if(start->getType() != END->getType()){ start = Builder.CreateCast(CastInst::getCastOpcode(start, false, END->getType(), false),start,END->getType()); } if(Step->getType() != END->getType()){ //Because Step is a Constant, so it casted is constant Step = dyn_cast<ConstantInt>(Builder.CreateCast(CastInst::getCastOpcode(Step, false, END->getType(), false),Step,END->getType())); AssertRuntime(Step); } if(Step->isMinusOne()) RES = Builder.CreateSub(start,END); else//Step Couldn't be zero RES = Builder.CreateSub(END, start); if(addfirst) OneStep -= 1; if(Step->isMinusOne()) OneStep*=-1; assert(OneStep<=1 && OneStep>=-1); RES = (OneStep==1)?Builder.CreateAdd(RES,Step):(OneStep==-1)?Builder.CreateSub(RES, Step):RES; if(!Step->isMinusOne()&&!Step->isOne()) RES = Builder.CreateSDiv(RES, Step); RES->setName(H->getName()+".tc"); return RES; }
/// Update the PHI nodes in OrigBB to include the values coming from NewBB. /// This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, BranchInst *BI, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = nullptr; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (!PredSet.count(PN->getIncomingBlock(i))) continue; if (!InVal) InVal = PN->getIncomingValue(i); else if (InVal != PN->getIncomingValue(i)) { InVal = nullptr; break; } } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. // NOTE! This loop walks backwards for a reason! First off, this minimizes // the cost of removal if we end up removing a large number of values, and // second off, this ensures that the indices for the incoming values // aren't invalidated when we remove one. for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) if (PredSet.count(PN->getIncomingBlock(i))) PN->removeIncomingValue(i, false); // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); continue; } // If the values coming into the block are not the same, we need a new // PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); // NOTE! This loop walks backwards for a reason! First off, this minimizes // the cost of removal if we end up removing a large number of values, and // second off, this ensures that the indices for the incoming values aren't // invalidated when we remove one. for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { BasicBlock *IncomingBB = PN->getIncomingBlock(i); if (PredSet.count(IncomingBB)) { Value *V = PN->removeIncomingValue(i, false); NewPHI->addIncoming(V, IncomingBB); } } PN->addIncoming(NewPHI, NewBB); } }
void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { auto getFirstPHI = [](BasicBlock *BB) { BasicBlock::iterator I = BB->begin(); PHINode *FirstPhi = nullptr; while (I != BB->end()) { PHINode *Phi = dyn_cast<PHINode>(I); if (!Phi) break; if (!FirstPhi) { FirstPhi = Phi; break; } } return FirstPhi; }; // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = ClonedOI->ReturnBlock; // only split block when necessary: PHINode *FirstPhi = getFirstPHI(PreReturn); unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size(); if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1) return; auto IsTrivialPhi = [](PHINode *PN) -> Value * { Value *CommonValue = PN->getIncomingValue(0); if (all_of(PN->incoming_values(), [&](Value *V) { return V == CommonValue; })) return CommonValue; return nullptr; }; ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock( ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &ClonedOI->ReturnBlock->front(); SmallVector<Instruction *, 4> DeadPhis; while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode *RetPhi = PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); OldPhi->replaceAllUsesWith(RetPhi); Ins = ClonedOI->ReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); for (BasicBlock *E : ClonedOI->ReturnBlockPreds) { RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E); OldPhi->removeIncomingValue(E); } // After incoming values splitting, the old phi may become trivial. // Keeping the trivial phi can introduce definition inside the outline // region which is live-out, causing necessary overhead (load, store // arg passing etc). if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { OldPhi->replaceAllUsesWith(OldPhiVal); DeadPhis.push_back(OldPhi); } ++I; } for (auto *DP : DeadPhis) DP->eraseFromParent(); for (auto E : ClonedOI->ReturnBlockPreds) { E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); } }
/// Create a clone of the blocks in a loop and connect them together. /// If UnrollProlog is true, loop structure will not be cloned, otherwise a new /// loop will be created including all cloned blocks, and the iterator of it /// switches to count NewIter down to 0. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = 0; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); NewBlocks.push_back(NewBB); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if UnrollProlog is true, create a direct jump to // InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (UnrollProlog) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (UnrollProlog) { VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (VMap[InVal]) NewPHI->setIncomingValue(idx, VMap[InVal]); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } }
bool LoopIndexSplit::splitLoop() { SplitCondition = NULL; if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ) return false; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BranchInst *SBR = NULL; // Split Condition Branch BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator()); // If Exiting block includes loop variant instructions then this // loop may not be split safely. BasicBlock *ExitingBlock = ExitCondition->getParent(); if (!cleanBlock(ExitingBlock)) return false; LLVMContext &Context = Header->getContext(); for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator()); if (!BR || BR->isUnconditional()) continue; ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition()); if (!CI || CI == ExitCondition || CI->getPredicate() == ICmpInst::ICMP_NE || CI->getPredicate() == ICmpInst::ICMP_EQ) continue; // Unable to handle triangle loops at the moment. // In triangle loop, split condition is in header and one of the // the split destination is loop latch. If split condition is EQ // then such loops are already handle in processOneIterationLoop(). if (Header == (*I) && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1))) continue; // If the block does not dominate the latch then this is not a diamond. // Such loop may not benefit from index split. if (!DT->dominates((*I), Latch)) continue; // If split condition branches heads do not have single predecessor, // SplitCondBlock, then is not possible to remove inactive branch. if (!BR->getSuccessor(0)->getSinglePredecessor() || !BR->getSuccessor(1)->getSinglePredecessor()) return false; // If the merge point for BR is not loop latch then skip this condition. if (BR->getSuccessor(0) != Latch) { DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0)); assert (DF0 != DF->end() && "Unable to find dominance frontier"); if (!DF0->second.count(Latch)) continue; } if (BR->getSuccessor(1) != Latch) { DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1)); assert (DF1 != DF->end() && "Unable to find dominance frontier"); if (!DF1->second.count(Latch)) continue; } SplitCondition = CI; SBR = BR; break; } if (!SplitCondition) return false; // If the predicate sign does not match then skip. if (ExitCondition->isSigned() != SplitCondition->isSigned()) return false; unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue); unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0)); Value *SplitValue = SplitCondition->getOperand(SVOpNum); if (!L->isLoopInvariant(SplitValue)) return false; if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum))) return false; // Normalize loop conditions so that it is easier to calculate new loop // bounds. if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) { ExitCondition->setPredicate(ExitCondition->getInversePredicate()); BasicBlock *T = EBR->getSuccessor(0); EBR->setSuccessor(0, EBR->getSuccessor(1)); EBR->setSuccessor(1, T); } if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) { SplitCondition->setPredicate(SplitCondition->getInversePredicate()); BasicBlock *T = SBR->getSuccessor(0); SBR->setSuccessor(0, SBR->getSuccessor(1)); SBR->setSuccessor(1, T); } //[*] Calculate new loop bounds. Value *AEV = SplitValue; Value *BSV = SplitValue; bool Sign = SplitCondition->isSigned(); Instruction *PHTerm = L->getLoopPreheader()->getTerminator(); if (IVisLT(*ExitCondition)) { if (IVisLT(*SplitCondition)) { /* Do nothing */ } else if (IVisLE(*SplitCondition)) { AEV = getPlusOne(SplitValue, Sign, PHTerm, Context); BSV = getPlusOne(SplitValue, Sign, PHTerm, Context); } else { assert (0 && "Unexpected split condition!"); } } else if (IVisLE(*ExitCondition)) { if (IVisLT(*SplitCondition)) { AEV = getMinusOne(SplitValue, Sign, PHTerm, Context); } else if (IVisLE(*SplitCondition)) { BSV = getPlusOne(SplitValue, Sign, PHTerm, Context); } else { assert (0 && "Unexpected split condition!"); } } else { assert (0 && "Unexpected exit condition!"); } AEV = getMin(AEV, IVExitValue, Sign, PHTerm); BSV = getMax(BSV, IVStartValue, Sign, PHTerm); // [*] Clone Loop DenseMap<const Value *, Value *> ValueMap; Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this); Loop *ALoop = L; // [*] ALoop's exiting edge enters BLoop's header. // ALoop's original exit block becomes BLoop's exit block. PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]); BasicBlock *A_ExitingBlock = ExitCondition->getParent(); BranchInst *A_ExitInsn = dyn_cast<BranchInst>(A_ExitingBlock->getTerminator()); assert (A_ExitInsn && "Unable to find suitable loop exit branch"); BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1); BasicBlock *B_Header = BLoop->getHeader(); if (ALoop->contains(B_ExitBlock)) { B_ExitBlock = A_ExitInsn->getSuccessor(0); A_ExitInsn->setSuccessor(0, B_Header); } else A_ExitInsn->setSuccessor(1, B_Header); // [*] Update ALoop's exit value using new exit value. ExitCondition->setOperand(EVOpNum, AEV); // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from // original loop's preheader. Add incoming PHINode values from // ALoop's exiting block. Update BLoop header's domiantor info. // Collect inverse map of Header PHINodes. DenseMap<Value *, Value *> InverseMap; for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), BE = ALoop->getHeader()->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { PHINode *PNClone = cast<PHINode>(ValueMap[PN]); InverseMap[PNClone] = PN; } else break; } BasicBlock *A_Preheader = ALoop->getLoopPreheader(); for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { // Remove incoming value from original preheader. PN->removeIncomingValue(A_Preheader); // Add incoming value from A_ExitingBlock. if (PN == B_IndVar) PN->addIncoming(BSV, A_ExitingBlock); else { PHINode *OrigPN = cast<PHINode>(InverseMap[PN]); Value *V2 = NULL; // If loop header is also loop exiting block then // OrigPN is incoming value for B loop header. if (A_ExitingBlock == ALoop->getHeader()) V2 = OrigPN; else V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock); PN->addIncoming(V2, A_ExitingBlock); } } else break; } DT->changeImmediateDominator(B_Header, A_ExitingBlock); DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT); // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit // block. Remove incoming PHINode values from ALoop's exiting block. // Add new incoming values from BLoop's incoming exiting value. // Update BLoop exit block's dominator info.. BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]); for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], B_ExitingBlock); PN->removeIncomingValue(A_ExitingBlock); } else break; } DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock); DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT); //[*] Split ALoop's exit edge. This creates a new block which // serves two purposes. First one is to hold PHINode defnitions // to ensure that ALoop's LCSSA form. Second use it to act // as a preheader for BLoop. BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this); //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes // in A_ExitBlock to redefine outgoing PHI definitions from ALoop. for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock); PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName()); newPHI->addIncoming(V1, A_ExitingBlock); A_ExitBlock->getInstList().push_front(newPHI); PN->removeIncomingValue(A_ExitBlock); PN->addIncoming(newPHI, A_ExitBlock); } else break; } //[*] Eliminate split condition's inactive branch from ALoop. BasicBlock *A_SplitCondBlock = SplitCondition->getParent(); BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator()); BasicBlock *A_InactiveBranch = NULL; BasicBlock *A_ActiveBranch = NULL; A_ActiveBranch = A_BR->getSuccessor(0); A_InactiveBranch = A_BR->getSuccessor(1); A_BR->setUnconditionalDest(A_ActiveBranch); removeBlocks(A_InactiveBranch, L, A_ActiveBranch); //[*] Eliminate split condition's inactive branch in from BLoop. BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]); BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator()); BasicBlock *B_InactiveBranch = NULL; BasicBlock *B_ActiveBranch = NULL; B_ActiveBranch = B_BR->getSuccessor(1); B_InactiveBranch = B_BR->getSuccessor(0); B_BR->setUnconditionalDest(B_ActiveBranch); removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch); BasicBlock *A_Header = ALoop->getHeader(); if (A_ExitingBlock == A_Header) return true; //[*] Move exit condition into split condition block to avoid // executing dead loop iteration. ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]); Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]); ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]); moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition, cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, ALoop, EVOpNum); moveExitCondition(B_SplitCondBlock, B_ActiveBranch, B_ExitBlock, B_ExitCondition, B_SplitCondition, B_IndVar, B_IndVarIncrement, BLoop, EVOpNum); NumIndexSplit++; return true; }
Function *PartialInlinerImpl::unswitchFunction(Function *F) { // First, verify that this function is an unswitching candidate... BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return nullptr; BasicBlock *ReturnBlock = nullptr; BasicBlock *NonReturnBlock = nullptr; unsigned ReturnCount = 0; for (BasicBlock *BB : successors(EntryBlock)) { if (isa<ReturnInst>(BB->getTerminator())) { ReturnBlock = BB; ReturnCount++; } else NonReturnBlock = BB; } if (ReturnCount != 1) return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]); BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]); BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(DuplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = NewReturnBlock; NewReturnBlock = NewReturnBlock->splitBasicBlock( NewReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &NewReturnBlock->front(); while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(RetPhi); Ins = NewReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), NewEntryBlock); OldPhi->removeIncomingValue(NewEntryBlock); ++I; } NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; ToExtract.push_back(NewNonReturnBlock); for (BasicBlock &BB : *DuplicateFunction) if (&BB != NewEntryBlock && &BB != NewReturnBlock && &BB != NewNonReturnBlock) ToExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.recalculate(*DuplicateFunction); // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. LoopInfo LI(DT); BranchProbabilityInfo BPI(*DuplicateFunction, LI); BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI); // Extract the body of the if. Function *ExtractedFunction = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI) .extractCodeRegion(); // Inline the top-level if test into all callers. std::vector<User *> Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); for (User *User : Users) if (CallInst *CI = dyn_cast<CallInst>(User)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(User)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. DuplicateFunction->replaceAllUsesWith(F); DuplicateFunction->eraseFromParent(); ++NumPartialInlined; return ExtractedFunction; }
/// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. for (BasicBlock *Succ : successors(Latch)) { for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { V = VMap.lookup(I); } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, PrologLatch); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); } else { PN->addIncoming(NewPN, PrologExit); } } } // Make sure that created prolog loop is in simplified form SmallVector<BasicBlock *, 4> PrologExitPreds; Loop *PrologLoop = LI->getLoopFor(PrologLatch); if (PrologLoop) { for (BasicBlock *PredBB : predecessors(PrologExit)) if (PrologLoop->contains(PredBB)) PrologExitPreds.push_back(PredBB); SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, PreserveLCSSA); } // Create a branch around the original loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologExit->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) // This means %xtraiter is (BECount + 1) and all of the iterations of this // loop were executed by the prologue. Note that if BECount <u (Count - 1) // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); }
bool LoopInterchangeTransform::adjustLoopBranches() { DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); BasicBlock *InnerLoopLatchPredecessor = InnerLoopLatch->getUniquePredecessor(); BasicBlock *InnerLoopLatchSuccessor; BasicBlock *OuterLoopLatchSuccessor; BranchInst *OuterLoopLatchBI = dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); BranchInst *InnerLoopLatchBI = dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); BranchInst *OuterLoopHeaderBI = dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); BranchInst *InnerLoopHeaderBI = dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || !InnerLoopHeaderBI) return false; BranchInst *InnerLoopLatchPredecessorBI = dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); BranchInst *OuterLoopPredecessorBI = dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) return false; BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); if (!InnerLoopHeaderSuccessor) return false; // Adjust Loop Preheader and headers unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); } NumSucc = OuterLoopHeaderBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) OuterLoopHeaderBI->setSuccessor(i, LoopExit); else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); } // Adjust reduction PHI's now that the incoming block has changed. updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, OuterLoopHeader); BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); InnerLoopHeaderBI->eraseFromParent(); // -------------Adjust loop latches----------- if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); else InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); } // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with // the value and remove this PHI node from inner loop. SmallVector<PHINode *, 8> LcssaVec; for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) { PHINode *LcssaPhi = cast<PHINode>(I); LcssaVec.push_back(LcssaPhi); } for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) { PHINode *P = *I; Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); P->replaceAllUsesWith(Incoming); P->eraseFromParent(); } if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); else OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); else InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); } else { OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); } return true; }
/// Create a clone of the blocks in a loop and connect them together. /// This function doesn't create a clone of the loop structure. /// /// There are two value maps that are defined and used. VMap is /// for the values in the current loop instance. LVMap contains /// the values from the last loop instance. We need the LVMap values /// to update the initial values for the current loop instance. /// static void CloneLoopBlocks(Loop *L, bool FirstCopy, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); NewBlocks.push_back(NewBB); if (Loop *ParentLoop = L->getParentLoop()) ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block InsertTop->getTerminator()->setSuccessor(0, NewBB); // Change the incoming values to the ones defined in the // previously cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (FirstCopy) { // We replace the first phi node with the value from the preheader VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); NewBB->getInstList().erase(NewPHI); } else { // Update VMap with values from the previous block unsigned idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); if (Instruction *I = dyn_cast<Instruction>(InVal)) if (L->contains(I)) InVal = LVMap[InVal]; NewPHI->setIncomingValue(idx, InVal); NewPHI->setIncomingBlock(idx, InsertTop); } } } if (Latch == *BB) { VMap.erase((*BB)->getTerminator()); NewBB->getTerminator()->eraseFromParent(); BranchInst::Create(InsertBot, NewBB); } } // LastValueMap is updated with the values for the current loop // which are used the next time this function is called. for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { LVMap[VI->first] = VI->second; } }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was succesful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { assert(L->isLCSSAForm()); BasicBlock *Header = L->getHeader(); BasicBlock *LatchBlock = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; return false; } // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); // Find trip multiple if count is not available unsigned TripMultiple = 1; if (TripCount == 0) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) DOUT << " Trip Count = " << TripCount << "\n"; if (TripMultiple != 1) DOUT << " Trip Multiple = " << TripMultiple << "\n"; // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } if (CompletelyUnroll) { DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { DEBUG(errs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DOUT << " with a breakout at trip " << BreakoutTrip; } else if (TripMultiple != 1) { DOUT << " with " << TripMultiple << " trips per branch"; } DOUT << "!\n"; } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. typedef DenseMap<const Value*, Value*> ValueMapTy; ValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); OrigPHINode.push_back(PN); if (Instruction *I = dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) if (L->contains(I->getParent())) LastValueMap[I] = I; } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); for (unsigned It = 1; It != Count; ++It) { char SuffixBuffer[100]; sprintf(SuffixBuffer, ".%d", It); std::vector<BasicBlock*> NewBlocks; for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { ValueMapTy ValueMap; BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI->getParent())) InVal = LastValueMap[InValI]; ValueMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks except // the successor of the latch block. The successor of the exit block will // be updated specially after unrolling all the way. if (*BB != LatchBlock) for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); UI != UE;) { Instruction *UseInst = cast<Instruction>(*UI); ++UI; if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) { PHINode *phi = cast<PHINode>(UseInst); Value *Incoming = phi->getIncomingValueForBlock(*BB); phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) { Latches.push_back(New); // Also, clear out the new latch's back edge so that it doesn't look // like a new loop, so that it's amenable to being merged with adjacent // blocks later on. TerminatorInst *Term = New->getTerminator(); assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); Term->setSuccessor(!ContinueOnTrue, NULL); } NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, LastValueMap); } // The latch block exits the loop. If there are any PHI nodes in the // successor blocks, update them to use the appropriate values computed as the // last iteration of the loop. if (Count != 1) { SmallPtrSet<PHINode*, 8> Users; for (Value::use_iterator UI = LatchBlock->use_begin(), UE = LatchBlock->use_end(); UI != UE; ++UI) if (PHINode *phi = dyn_cast<PHINode>(*UI)) Users.insert(phi); BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end(); SI != SE; ++SI) { PHINode *PN = *SI; Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI->getParent())) InVal = LastValueMap[InVal]; } PN->addIncoming(InVal, LastIterationBB); } } // Now, if we're doing complete unrolling, loop over the PHI nodes in the // original block, setting them to their incoming values. if (CompletelyUnroll) { BasicBlock *Preheader = L->getLoopPreheader(); for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { Term->setUnconditionalDest(Dest); // Merge adjacent basic blocks, if possible. if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { std::replace(Latches.begin(), Latches.end(), Dest, Fold); std::replace(Headers.begin(), Headers.end(), Dest, Fold); } } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Constant *C = ConstantFoldInstruction(Inst, Header->getContext())) { Inst->replaceAllUsesWith(C); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != NULL) LPM->deleteLoopFromQueue(L); // If we didn't completely unroll the loop, it should still be in LCSSA form. if (!CompletelyUnroll) assert(L->isLCSSAForm()); return true; }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); return NewLoop; } else return nullptr; }
/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes and turn unwind /// instructions into branches to the invoke unwind dest. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); std::vector<Value*> InvokeDestPHIValues; // If there are PHI nodes in the unwind destination block, we need to // keep track of which values came into them from this invoke, then remove // the entry for this block. BasicBlock *InvokeBlock = II->getParent(); for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); // Save the value to use for this edge. InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock)); } Function *Caller = FirstNewBlock->getParent(); // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to // rewrite. if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { if (InlinedCodeInfo.ContainsCalls) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){ Instruction *I = BBI++; // We only need to check for function calls: inlined invoke // instructions require no special handling. if (!isa<CallInst>(I)) continue; CallInst *CI = cast<CallInst>(I); // If this call cannot unwind, don't convert it to an invoke. if (CI->doesNotThrow()) continue; // Convert this function call into an invoke instruction. // First, split the basic block. BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); // Next, create the new invoke instruction, inserting it at the end // of the old basic block. SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, InvokeArgs.begin(), InvokeArgs.end(), CI->getName(), BB->getTerminator()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); // Make sure that anything using the call now uses the invoke! CI->replaceAllUsesWith(II); // Delete the unconditional branch inserted by splitBasicBlock BB->getInstList().pop_back(); Split->getInstList().pop_front(); // Delete the original call // Update any PHI nodes in the exceptional block to indicate that // there is now a new entry in them. unsigned i = 0; for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I, ++i) { PHINode *PN = cast<PHINode>(I); PN->addIncoming(InvokeDestPHIValues[i], BB); } // This basic block is now complete, start scanning the next one. break; } } if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { // An UnwindInst requires special handling when it gets inlined into an // invoke site. Once this happens, we know that the unwind would cause // a control transfer to the invoke exception destination, so we can // transform it into a direct branch to the exception destination. BranchInst::Create(InvokeDest, UI); // Delete the unwind instruction! UI->eraseFromParent(); // Update any PHI nodes in the exceptional block to indicate that // there is now a new entry in them. unsigned i = 0; for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I, ++i) { PHINode *PN = cast<PHINode>(I); PN->addIncoming(InvokeDestPHIValues[i], BB); } } } } // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. InvokeDest->removePredecessor(II->getParent()); }
/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. /// \param Exit The exit block of the original loop. /// \param[out] NewBlocks A list of the the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. /// \param LoopBlocks A helper for DFS-traversal of the loop. /// \param LVMap A value-map that maps instructions from the original loop to /// instructions in the last peeled-off iteration. static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Exit, SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, LoopInfo *LI) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *PreHeader = L->getLoopPreheader(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); NewBlocks.push_back(NewBB); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; } // Hook-up the control flow for the newly inserted blocks. // The new header is hooked up directly to the "top", which is either // the original loop preheader (for the first iteration) or the previous // iteration's exiting block (for every other iteration) InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header])); // Similarly, for the latch: // The original exiting edge is still hooked up to the loop exit. // The backedge now goes to the "bottom", which is either the loop's real // header (for the last peeled iteration) or the copied header of the next // iteration (for every other iteration) BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(VMap[Latch])->getTerminator()); unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); LatchBR->setSuccessor(HeaderIdx, InsertBot); LatchBR->setSuccessor(1 - HeaderIdx, Exit); // The new copy of the loop body starts with a bunch of PHI nodes // that pick an incoming value from either the preheader, or the previous // loop iteration. Since this copy is no longer part of the loop, we // resolve this statically: // For the first iteration, we use the value from the preheader directly. // For any other iteration, we replace the phi with the value generated by // the immediately preceding clone of the loop body (which represents // the previous iteration). for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (IterNumber == 0) { VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); } else { Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); if (LatchInst && L->contains(LatchInst)) VMap[&*I] = LVMap[LatchInst]; else VMap[&*I] = LatchVal; } cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } // Fix up the outgoing values - we need to add a value for the iteration // we've just created. Note that this must happen *after* the incoming // values are adjusted, since the value going out of the latch may also be // a value coming into the header. for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) { PHINode *PHI = cast<PHINode>(I); Value *LatchVal = PHI->getIncomingValueForBlock(Latch); Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); if (LatchInst && L->contains(LatchInst)) LatchVal = VMap[LatchVal]; PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch])); } // LastValueMap is updated with the values for the current loop // which are used the next time this function is called. for (const auto &KV : VMap) LVMap[KV.first] = KV.second; }
/// eliminateUnconditionalBranch - Clone the instructions from the destination /// block into the source block, eliminating the specified unconditional branch. /// If the destination block defines values used by successors of the dest /// block, we may need to insert PHI nodes. /// void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { BasicBlock *SourceBlock = Branch->getParent(); BasicBlock *DestBlock = Branch->getSuccessor(0); assert(SourceBlock != DestBlock && "Our predicate is broken!"); DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName() << "]: Eliminating branch: " << *Branch); // See if we can avoid duplicating code by moving it up to a dominator of both // blocks. if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n"); // If there are non-phi instructions in DestBlock that have no operands // defined in DestBlock, and if the instruction has no side effects, we can // move the instruction to DomBlock instead of duplicating it. BasicBlock::iterator BBI = DestBlock->getFirstNonPHI(); while (!isa<TerminatorInst>(BBI)) { Instruction *I = BBI++; bool CanHoist = I->isSafeToSpeculativelyExecute() && !I->mayReadFromMemory(); if (CanHoist) { for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) if (OpI->getParent() == DestBlock || (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) { CanHoist = false; break; } if (CanHoist) { // Remove from DestBlock, move right before the term in DomBlock. DestBlock->getInstList().remove(I); DomBlock->getInstList().insert(DomBlock->getTerminator(), I); DEBUG(errs() << "Hoisted: " << *I); } } } } // Tail duplication can not update SSA properties correctly if the values // defined in the duplicated tail are used outside of the tail itself. For // this reason, we spill all values that are used outside of the tail to the // stack. for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I) if (I->isUsedOutsideOfBlock(DestBlock)) { // We found a use outside of the tail. Create a new stack slot to // break this inter-block usage pattern. DemoteRegToStack(*I); } // We are going to have to map operands from the original block B to the new // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI // nodes also define part of this mapping. Loop over these PHI nodes, adding // them to our mapping. // std::map<Value*, Value*> ValueMapping; BasicBlock::iterator BI = DestBlock->begin(); bool HadPHINodes = isa<PHINode>(BI); for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock); // Clone the non-phi instructions of the dest block into the source block, // keeping track of the mapping... // for (; BI != DestBlock->end(); ++BI) { Instruction *New = BI->clone(); New->setName(BI->getName()); SourceBlock->getInstList().push_back(New); ValueMapping[BI] = New; } // Now that we have built the mapping information and cloned all of the // instructions (giving us a new terminator, among other things), walk the new // instructions, rewriting references of old instructions to use new // instructions. // BI = Branch; ++BI; // Get an iterator to the first new instruction for (; BI != SourceBlock->end(); ++BI) for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { std::map<Value*, Value*>::const_iterator I = ValueMapping.find(BI->getOperand(i)); if (I != ValueMapping.end()) BI->setOperand(i, I->second); } // Next we check to see if any of the successors of DestBlock had PHI nodes. // If so, we need to add entries to the PHI nodes for SourceBlock now. for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock); SI != SE; ++SI) { BasicBlock *Succ = *SI; for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) { PHINode *PN = cast<PHINode>(PNI); // Ok, we have a PHI node. Figure out what the incoming value was for the // DestBlock. Value *IV = PN->getIncomingValueForBlock(DestBlock); // Remap the value if necessary... std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV); if (I != ValueMapping.end()) IV = I->second; PN->addIncoming(IV, SourceBlock); } } // Next, remove the old branch instruction, and any PHI node entries that we // had. BI = Branch; ++BI; // Get an iterator to the first new instruction DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes... SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch... // Final step: now that we have finished everything up, walk the cloned // instructions one last time, constant propagating and DCE'ing them, because // they may not be needed anymore. // if (HadPHINodes) { while (BI != SourceBlock->end()) { Instruction *Inst = BI++; if (isInstructionTriviallyDead(Inst)) Inst->eraseFromParent(); else if (Constant *C = ConstantFoldInstruction(Inst)) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); } } } ++NumEliminated; // We just killed a branch! }
/// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Update PHI nodes at the unrolling loop exit and epilog loop exit /// - Create PHI nodes at the unrolling loop exit to combine /// values that exit the unrolling loop code and jump around it. /// - Update PHI operands in the epilog loop by the new PHI nodes /// - Branch around the epilog loop if extra iters (ModVal) is zero. /// static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); // Loop structure should be the following: // // PreHeader // NewPreHeader // Header // ... // Latch // NewExit (PN) // EpilogPreHeader // EpilogHeader // ... // EpilogLatch // Exit (EpilogPN) // Update PHI nodes at NewExit and Exit. for (Instruction &BBI : *NewExit) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // PN should be used in another PHI located in Exit block as // Exit was split by SplitBlockPredecessors into Exit and NewExit // Basicaly it should look like: // NewExit: // PN = PHI [I, Latch] // ... // Exit: // EpilogPN = PHI [PN, EpilogPreHeader] // // There is EpilogPreHeader incoming block instead of NewExit as // NewExit was spilt 1 more time to get EpilogPreHeader. assert(PN->hasOneUse() && "The phi should have 1 use"); PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser()); assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); // Add incoming PreHeader from branch around the Loop PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); Value *V = PN->getIncomingValueForBlock(Latch); Instruction *I = dyn_cast<Instruction>(V); if (I && L->contains(I)) // If value comes from an instruction in the loop add VMap value. V = VMap.lookup(I); // For the instruction out of the loop, constant or undefined value // insert value itself. EpilogPN->addIncoming(V, EpilogLatch); assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && "EpilogPN should have EpilogPreHeader incoming block"); // Change EpilogPreHeader incoming block to NewExit. EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), NewExit); // Now PHIs should look like: // NewExit: // PN = PHI [I, Latch], [undef, PreHeader] // ... // Exit: // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] } // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). // Update corresponding PHI nodes in epilog loop. for (BasicBlock *Succ : successors(Latch)) { // Skip this as we already updated phis in exit blocks. if (!L->contains(Succ)) continue; for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add new PHI nodes to the loop exit block and update epilog // PHIs with the new PHI values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", NewExit->getFirstNonPHI()); // Adding a value to the new PHI node from the unrolling loop preheader. NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); // Adding a value to the new PHI node from the unrolling loop latch. NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. PHINode *VPN = cast<PHINode>(VMap[&BBI]); VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); } } Instruction *InsertPt = NewExit->getTerminator(); IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(Exit, NewExit); // Split the main loop exit to maintain canonicalization guarantees. SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, PreserveLCSSA); }