void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); removeLifetimeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { RewriteSingleStoreAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { if (!DIB) DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { PromoteSingleBlockAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the stores are all that is // left. if (Info.UsingBlocks.empty()) { // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { if (!DIB) DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); } SI->eraseFromParent(); LBI.deleteValue(SI); } if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); // The alloca's debuginfo can be removed as well. if (DbgDeclareInst *DDI = Info.DbgDeclare) DDI->eraseFromParent(); ++NumLocalPromoted; continue; } } // If we haven't computed dominator tree levels, do so now. if (DomLevels.empty()) { SmallVector<DomTreeNode*, 32> Worklist; DomTreeNode *Root = DT.getRootNode(); DomLevels[Root] = 0; Worklist.push_back(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); unsigned ChildLevel = DomLevels[Node] + 1; for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { DomLevels[*CI] = ChildLevel; Worklist.push_back(*CI); } } } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) BBNumbers[I] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non deterministic order, thus creating // non deterministic def->use chains. for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVector<BasicBlock*, 16>::iterator EntIt = std::lower_bound(Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && !isa<PHINode>(Inst->use_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction*, 16> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. SmallPtrSet<BasicBlock*, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { DemoteRegToStack(*Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. SmallPtrSet<PHINode*, 8> PHIsToDemote; for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); if (PHIsToDemote.empty()) continue; // Demote the PHIs to the stack. for (SmallPtrSet<PHINode*, 8>::iterator I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(UnwindBlock->begin()); } }
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and /// an unconditional branch in it. void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { if (SinglePred != DestBB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); MergeBasicBlockIntoOnlyPred(DestBB, this); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB // to handle the new incoming edges it is about to have. PHINode *PN; for (BasicBlock::iterator BBI = DestBB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { // Remove the incoming value for BB, and remember it. Value *InVal = PN->removeIncomingValue(BB, false); // Two options: either the InVal is a phi node defined in BB or it is some // value that dominates BB. PHINode *InValPhi = dyn_cast<PHINode>(InVal); if (InValPhi && InValPhi->getParent() == BB) { // Add all of the input values of the input PHI as inputs of this phi. for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InValPhi->getIncomingValue(i), InValPhi->getIncomingBlock(i)); } else { // Otherwise, add one instance of the dominating value for each edge that // we will be adding. if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) PN->addIncoming(InVal, *PI); } } } // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); if (DT) { BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); DT->changeImmediateDominator(DestBB, NewIDom); DT->eraseNode(BB); } if (PFI) { PFI->replaceAllUses(BB, DestBB); PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); ++NumBlocksElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg"); MDNode *TheCallMD = NULL; if (TheCall && TheCall->hasMetadata()) TheCallMD = TheCall->getMetadata(DbgKind); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = I->getMetadata(DbgKind)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); I->setMetadata(DbgKind, NewMD); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. I->setMetadata(DbgKind, 0); } } PHIToResolve.push_back(cast<PHINode>(OldI)); } } // FIXME: // FIXME: // FIXME: Unclone all this metadata stuff. // FIXME: // FIXME: // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = I->getMetadata(DbgKind)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); I->setMetadata(DbgKind, NewMD); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. I->setMetadata(DbgKind, 0); } } RemapInstruction(I, VMap); } } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[OldI] == PN && "VMap mismatch"); VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of // VMap. Single entry phi nodes can have multiple VMap entries // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { ++I; continue; } // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge /// we spill into a stack location, guaranteeing that there is nothing live /// across the unwind edge. This process also splits all critical edges /// coming out of invoke's. void SjLjEHPass:: splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; SplitCriticalEdge(II, 0, this); SplitCriticalEdge(II, 1, this); assert(!isa<PHINode>(II->getNormalDest()) && !isa<PHINode>(II->getUnwindDest()) && "critical edge splitting left single entry phi nodes?"); } Function *F = Invokes.back()->getParent()->getParent(); // To avoid having to handle incoming arguments specially, we lower each arg // to a copy instruction in the entry block. This ensures that the argument // value itself cannot be live across the entry block. BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsertPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { // This is always a no-op cast because we're casting AI to AI->getType() so // src and destination types are identical. BitCast is the only possibility. CastInst *NC = new BitCastInst( AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); AI->replaceAllUsesWith(NC); // Normally its is forbidden to replace a CastInst's operand because it // could cause the opcode to reflect an illegal conversion. However, we're // replacing it here with the same value it was constructed with to simply // make NC its user. NC->setOperand(0, AI); } // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && !isa<PHINode>(Inst->use_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction*,16> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. std::set<BasicBlock*> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { NeedsSpill = true; } } // If we decided we need a spill, do it. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); } } }
// This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); PHINode *InnerInductionVar; PHINode *OuterInductionVar; // We currently handle only 1 induction variable inside the loop. We also do // not handle reductions as of now. if (getPHICount(InnerLoopHeader) > 1) return true; if (getPHICount(OuterLoopHeader) > 1) return true; InnerInductionVar = getInductionVariable(InnerLoop, SE); OuterInductionVar = getInductionVariable(OuterLoop, SE); if (!OuterInductionVar || !InnerInductionVar) { DEBUG(dbgs() << "Induction variable not found\n"); return true; } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { DEBUG(dbgs() << "Loop structure not understood by pass\n"); return true; } // TODO: Loops with LCSSA PHI's are currently not handled. if (isa<PHINode>(OuterLoopLatch->begin())) { DEBUG(dbgs() << "Found and LCSSA PHI in outer loop latch\n"); return true; } if (InnerLoopLatch != InnerLoopHeader && isa<PHINode>(InnerLoopLatch->begin())) { DEBUG(dbgs() << "Found and LCSSA PHI in inner loop latch\n"); return true; } // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have // more than 1 user of induction.next since it would result in broken code // after split. // e.g. // for(i=0;i<N;i++) { // for(j = 0;j<M;j++) { // A[j+1][i+2] = A[j][i]+k; // } // } bool FoundInduction = false; Instruction *InnerIndexVarInc = nullptr; if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) InnerIndexVarInc = dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); else InnerIndexVarInc = dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); if (!InnerIndexVarInc) return true; // Since we split the inner loop latch on this induction variable. Make sure // we do not have any instruction between the induction variable and branch // instruction. for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend(); I != E && !FoundInduction; ++I) { if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I)) continue; const Instruction &Ins = *I; // We found an instruction. If this is not induction variable then it is not // safe to split this loop latch. if (!Ins.isIdenticalTo(InnerIndexVarInc)) return true; else FoundInduction = true; } // The loop latch ended and we didnt find the induction variable return as // current limitation. if (!FoundInduction) return true; return false; }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree information if it /// is available, thus calling this pass will not invalidate either of them. /// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges, bool DontDeleteUselessPhis, bool SplitLandingPads) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (DestBB->isLandingPad()) return nullptr; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (!P) return NewBB; DominatorTreeWrapperPass *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); // If we have nothing to update, just return. if (!DT && !LI) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = nullptr; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, we may need // to update LoopSimplify form and LCSSA form. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); // The only that we can break LoopSimplify form by splitting a critical // edge is if after the split there exists some edge from TIL to DestBB // *and* the only edge into DestBB from outside of TIL is that of // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB // is the new exit block and it has no non-loop predecessors. If the // second isn't true, then DestBB was not in LoopSimplify form prior to // the split as it had a non-loop predecessor. In both of these cases, // the predecessor must be directly in TIL, not in a subloop, or again // LoopSimplify doesn't hold. SmallVector<BasicBlock *, 4> LoopPreds; for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P == NewBB) continue; // The new block is known. if (LI->getLoopFor(P) != TIL) { // No need to re-simplify, it wasn't to start with. LoopPreds.clear(); break; } LoopPreds.push_back(P); } if (!LoopPreds.empty()) { assert(!DestBB->isLandingPad() && "We don't split edges to landing pads!"); BasicBlock *NewExitBB = SplitBlockPredecessors(DestBB, LoopPreds, "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } return NewBB; }
/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }
BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
bool isLinkListLoop(Loop * pLoop, map<PHINode *, set<Value *> > & LinkListHeaderMapping ) { BasicBlock * pHeader = pLoop->getHeader(); vector<PHINode *> vecToDo; for(BasicBlock::iterator II = pHeader->begin(); II != pHeader->end(); II ++ ) { if(isa<PHINode>(II)) { if(PointerType * pPointerType = dyn_cast<PointerType>(II->getType())) { if(isa<StructType>(pPointerType->getElementType())) { vecToDo.push_back(cast<PHINode>(II)); } } } else { break; } } if(vecToDo.size() == 0) { return false; } vector<PHINode *>::iterator itVecPhiBegin = vecToDo.begin(); vector<PHINode *>::iterator itVecPhiEnd = vecToDo.end(); for(; itVecPhiBegin != itVecPhiEnd; itVecPhiBegin ++ ) { PHINode * pPHI = *itVecPhiBegin; bool bFlag = true; for(unsigned i = 0; i < pPHI->getNumIncomingValues(); i ++ ) { if(pLoop->contains(pPHI->getIncomingBlock(i))) { if(Instruction * pInst = dyn_cast<Instruction>(pPHI->getIncomingValue(i))) { if(!isReachableThroughLinkListDereference(pPHI, pInst, pLoop)) { bFlag = false; break; } } else { bFlag = false; break; } } } if(bFlag) { for(unsigned i = 0; i < pPHI->getNumIncomingValues(); i ++ ) { if(!pLoop->contains(pPHI->getIncomingBlock(i))) { LinkListHeaderMapping[pPHI].insert(pPHI->getIncomingValue(i)); } } } } if(LinkListHeaderMapping.size() > 0) { return true; } else { return false; } }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA, Loop **ResultLoop) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { LLVM_DEBUG(dbgs() << "Not in simplify form!\n"); return false; } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); if (!LatchBR || LatchBR->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. LLVM_DEBUG( dbgs() << "Loop latch not terminated by a conditional branch.\n"); return false; } unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); if (L->contains(LatchExit)) { // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. LLVM_DEBUG( dbgs() << "One of the loop latch successors must be the exit block.\n"); return false; } // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { LLVM_DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); return false; } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { LLVM_DEBUG( dbgs() << "Count failed constraint on overflow trip count calculation.\n"); return false; } // Loop structure is the following: // // PreHeader // Header // ... // Latch // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, nullptr, PreserveLCSSA); // NewExit gets its DebugLoc from LatchExit, which is not part of the // original Loop. // Fix this by setting Loop's DebugLoc to NewExit. auto *NewExitTerminator = NewExit->getTerminator(); NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) DT->changeImmediateDominator(NewExit, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. for (auto *BB : OtherExits) { for (auto &II : *BB) { // Given we preserve LCSSA form, we know that the values used outside the // loop will be used through these phi nodes at the exit blocks that are // transformed below. if (!isa<PHINode>(II)) break; PHINode *Phi = cast<PHINode>(&II); unsigned oldNumOperands = Phi->getNumIncomingValues(); // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap.lookup(Phi->getIncomingValue(i)); // newVal can be a constant or derived from values outside the loop, and // hence need not have a VMap value. Also, since lookup already generated // a default "null" VMap entry for this value, we need to populate that // VMap entry correctly, with the mapped entry being itself. if (!newVal) { newVal = Phi->getIncomingValue(i); VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i); } Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } } #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) for (BasicBlock *SuccBB : successors(BB)) { assert(!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!"); } #endif } // Update the immediate dominator of the exit blocks and blocks that are // reachable from the exit blocks. This is needed because we now have paths // from both the original loop and the remainder code reaching the exit // blocks. While the IDom of these exit blocks were from the original loop, // now the IDom is the preheader (which decides whether the original loop or // remainder code should run). if (DT && !L->getExitingBlock()) { SmallVector<BasicBlock *, 16> ChildrenToUpdate; // NB! We have to examine the dom children of all loop blocks, not just // those which are the IDom of the exit blocks. This is because blocks // reachable from the exit blocks can have their IDom as the nearest common // dominator of the exit blocks. for (auto *BB : L->blocks()) { auto *DomNodeBB = DT->getNode(BB); for (auto *DomChild : DomNodeBB->getChildren()) { auto *DomChildBB = DomChild->getBlock(); if (!L->contains(LI->getLoopFor(DomChildBB))) ChildrenToUpdate.push_back(DomChildBB); } } for (auto *BB : ChildrenToUpdate) DT->changeImmediateDominator(BB, PreHeader); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the any // of its parent loops, so the Scalar Evolution pass needs to be run again. SE->forgetTopmostLoop(L); // Verify that the Dom Tree is correct. #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) if (DT) assert(DT->verify(DominatorTree::VerificationLevel::Full)); #endif // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA); } auto UnrollResult = LoopUnrollResult::Unmodified; if (remainderLoop && UnrollRemainder) { LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollResult = UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, /*Force*/ false, /*AllowRuntime*/ false, /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA); } if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) *ResultLoop = remainderLoop; NumRuntimeUnrolled++; return true; }
/// InsertUniqueBackedgeBlock - This method is called when the specified loop /// has more than one backedge in it. If this occurs, revector all of these /// backedges to target a new basic block and have that block branch to the loop /// header. This ensures that loops have exactly one backedge. /// BasicBlock * LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); // Unique backedge insertion currently depends on having a preheader. if (!Preheader) return 0; // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) return 0; if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be", BETerminator); NewPN->reserveOperandSpace(BackedgeBlocks.size()); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; Value *UniqueValue = 0; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); if (IBB == Preheader) { PreheaderIdx = i; } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { if (UniqueValue == 0) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; } } } // Delete all of the incoming values from the old PN except the preheader's assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); if (PreheaderIdx != 0) { PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) PN->removeIncomingValue(e-i, false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); // As an optimization, if all incoming values in the new PhiNode (which is a // subset of the incoming values of the old PHI node) have the same value, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); if (AA) AA->deleteValue(NewPN); BEBlock->getInstList().erase(NewPN); } } // Now that all of the PHI nodes have been inserted and adjusted, modify the // backedge blocks to just to the BEBlock instead of the header. for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) if (TI->getSuccessor(Op) == Header) TI->setSuccessor(Op, BEBlock); } //===--- Update all analyses which we must preserve now -----------------===// // Update Loop Information - we know that this block is now in the current // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, LI->getBase()); // Update dominator information DT->splitBlock(BEBlock); return BEBlock; }
void RegionExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const { for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { BasicBlock *BB = *I; // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { for (User::op_iterator OI = II->op_begin(), OE = II->op_end(); OI != OE; ++OI) if (definedInCaller(Blocks, *OI)) Inputs.insert(*OI); #if LLVM_VERSION_MINOR == 5 for (User *U : II->users()) if (!definedInRegion(Blocks, U)) { #else for (Value::use_iterator UI = II->use_begin(), UE = II->use_end(); UI != UE; ++UI) if (!definedInRegion(Blocks, *UI)) { #endif Outputs.insert(II); break; } } } } /// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void RegionExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, Header->getName()+".ce"); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, update dominator sets. The blocks that dominate the new one are the // blocks that dominate TIBB plus the new block itself. if (DT) DT->splitBlock(NewBB); // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName()+".ce", NewBB->begin()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }
/// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, CloningDirector *Director) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; if (Director) { TypeMapper = Director->getTypeRemapper(); Materializer = Director->getValueMaterializer(); } #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) for (const Argument &II : OldFunc->args()) assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo, Director); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap[&BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { if (isa<PHINode>(VMap[PN])) PHIToResolve.push_back(PN); else break; } else { break; } } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); }
/// \brief Recursively handle the condition leading to a loop Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) { // Only search through PHI nodes which are inside the loop. If we try this // with PHI nodes that are outside of the loop, we end up inserting new PHI // nodes outside of the loop which depend on values defined inside the loop. // This will break the module with // 'Instruction does not dominate all users!' errors. PHINode *Phi = nullptr; if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) { BasicBlock *Parent = Phi->getParent(); PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); Value *Ret = NewPhi; // Handle all non-constant incoming values first for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { Value *Incoming = Phi->getIncomingValue(i); BasicBlock *From = Phi->getIncomingBlock(i); if (isa<ConstantInt>(Incoming)) { NewPhi->addIncoming(Broken, From); continue; } Phi->setIncomingValue(i, BoolFalse); Value *PhiArg = handleLoopCondition(Incoming, Broken, L, Term); NewPhi->addIncoming(PhiArg, From); } BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock(); for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { Value *Incoming = Phi->getIncomingValue(i); if (Incoming != BoolTrue) continue; BasicBlock *From = Phi->getIncomingBlock(i); if (From == IDom) { // We're in the following situation: // IDom/From // | \ // | If-block // | / // Parent // where we want to break out of the loop if the If-block is not taken. // Due to the depth-first traversal, there should be an end.cf // intrinsic in Parent, and we insert an else.break before it. // // Note that the end.cf need not be the first non-phi instruction // of parent, particularly when we're dealing with a multi-level // break, but it should occur within a group of intrinsic calls // at the beginning of the block. CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt()); while (OldEnd && OldEnd->getCalledFunction() != EndCf) OldEnd = dyn_cast<CallInst>(OldEnd->getNextNode()); if (OldEnd && OldEnd->getCalledFunction() == EndCf) { Value *Args[] = { OldEnd->getArgOperand(0), NewPhi }; Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); continue; } } TerminatorInst *Insert = From->getTerminator(); Value *PhiArg = CallInst::Create(Break, Broken, "", Insert); NewPhi->setIncomingValue(i, PhiArg); } eraseIfUnused(Phi); return Ret; } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { BasicBlock *Parent = Inst->getParent(); Instruction *Insert; if (L->contains(Inst)) { Insert = Parent->getTerminator(); } else { Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime(); } Value *Args[] = { Cond, Broken }; return CallInst::Create(IfBreak, Args, "", Insert); // Insert IfBreak before TERM for constant COND. } else if (isa<ConstantInt>(Cond)) { Value *Args[] = { Cond, Broken }; return CallInst::Create(IfBreak, Args, "", Term); } else { llvm_unreachable("Unhandled loop condition!"); } return nullptr; }
// First thing we need to do is scan the whole function for values that are // live across unwind edges. Each value that is live across an unwind edge // we spill into a stack location, guaranteeing that there is nothing live // across the unwind edge. This process also splits all critical edges // coming out of invoke's. void LowerInvoke:: splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; SplitCriticalEdge(II, 0, this); SplitCriticalEdge(II, 1, this); assert(!isa<PHINode>(II->getNormalDest()) && !isa<PHINode>(II->getUnwindDest()) && "critical edge splitting left single entry phi nodes?"); } Function *F = Invokes.back()->getParent()->getParent(); // To avoid having to handle incoming arguments specially, we lower each arg // to a copy instruction in the entry block. This ensures that the argument // value itself cannot be live across the entry block. BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsertPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); Instruction *NI = InsertValueInst::Create(AI, EI, 0); NI->insertAfter(EI); AI->replaceAllUsesWith(NI); // Set the operand of the instructions back to the AllocaInst. EI->setOperand(0, AI); NI->setOperand(0, AI); } else { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. CastInst *NC = new BitCastInst( AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. // Normally it's forbidden to replace a CastInst's operand because it // could cause the opcode to reflect an illegal conversion. However, // we're replacing it here with the same value it was constructed with. // We do this because the above replaceAllUsesWith() clobbered the // operand, but we want this one to remain. NC->setOperand(0, AI); } } // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->user_back())->getParent() == BB && !isa<PHINode>(Inst->user_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction*,16> Users; for (User *U : Inst->users()) { Instruction *UI = cast<Instruction>(U); if (UI->getParent() != BB || isa<PHINode>(UI)) Users.push_back(UI); } // Scan all of the uses and see if the live range is live across an unwind // edge. If we find a use live across an invoke edge, create an alloca // and spill the value. // Find all of the blocks that this value is live in. std::set<BasicBlock*> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { NeedsSpill = true; } } // If we decided we need a spill, do it. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); } } }
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return /// instructions to the predecessor to enable tail call optimizations. The /// case it is currently looking for is: /// bb0: /// %tmp0 = tail call i32 @f0() /// br label %return /// bb1: /// %tmp1 = tail call i32 @f1() /// br label %return /// bb2: /// %tmp2 = tail call i32 @f2() /// br label %return /// return: /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] /// ret i32 %retval /// /// => /// /// bb0: /// %tmp0 = tail call i32 @f0() /// ret i32 %tmp0 /// bb1: /// %tmp1 = tail call i32 @f1() /// ret i32 %tmp1 /// bb2: /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; Value *V = RI->getReturnValue(); PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; if (V && !PN) return false; BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) return false; // It's not safe to eliminate the sign / zero extension of the return value. // See llvm::isInTailCallPosition(). const Function *F = BB->getParent(); Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) return false; // Make sure there are no instructions between the PHI and return, or that the // return is the first instruction in the block. if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); if (&*BI != RI) return false; } else { BasicBlock::iterator BI = BB->begin(); while (isa<DbgInfoIntrinsic>(BI)) ++BI; if (&*BI != RI) return false; } /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. SmallVector<CallInst*, 4> TailCalls; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } else { SmallPtrSet<BasicBlock*, 4> VisitedBBs; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { if (!VisitedBBs.insert(*PI)) continue; BasicBlock::InstListType &InstList = (*PI)->getInstList(); BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); if (RI == RE) continue; CallInst *CI = dyn_cast<CallInst>(&*RI); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } bool Changed = false; for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { CallInst *CI = TailCalls[i]; CallSite CS(CI); // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) continue; // Make sure the call instruction is followed by an unconditional branch to // the return block. BasicBlock *CallBB = CI->getParent(); BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) continue; // Duplicate the return into CallBB. (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); ModifiedDT = Changed = true; ++NumRetsDup; } // If we eliminated all predecessors of the block, delete the block now. if (Changed && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; }
/// This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to /// update the PHI nodes that reside in the block. Note that this should be /// called while the predecessor still refers to this block. /// void BasicBlock::removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs) { assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs. find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) && "removePredecessor: BB is not a predecessor!"); if (InstList.empty()) return; PHINode *APN = dyn_cast<PHINode>(&front()); if (!APN) return; // Quick exit. // If there are exactly two predecessors, then we want to nuke the PHI nodes // altogether. However, we cannot do this, if this in this case: // // Loop: // %x = phi [X, Loop] // %x2 = add %x, 1 ;; This would become %x2 = add %x2, 1 // br Loop ;; %x2 does not dominate all uses // // This is because the PHI node input is actually taken from the predecessor // basic block. The only case this can happen is with a self loop, so we // check for this case explicitly now. // unsigned max_idx = APN->getNumIncomingValues(); assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!"); if (max_idx == 2) { BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred); // Disable PHI elimination! if (this == Other) max_idx = 3; } // <= Two predecessors BEFORE I remove one? if (max_idx <= 2 && !DontDeleteUselessPHIs) { // Yup, loop through and nuke the PHI nodes while (PHINode *PN = dyn_cast<PHINode>(&front())) { // Remove the predecessor first. PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs); // If the PHI _HAD_ two uses, replace PHI node with its now *single* value if (max_idx == 2) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); else // We are left with an infinite loop with no entries: kill the PHI. PN->replaceAllUsesWith(UndefValue::get(PN->getType())); getInstList().pop_front(); // Remove the PHI node } // If the PHI node already only had one entry, it got deleted by // removeIncomingValue. } } else { // Okay, now we know that we need to remove predecessor #pred_idx from all // PHI nodes. Iterate over each PHI node fixing them up PHINode *PN; for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) { ++II; PN->removeIncomingValue(Pred, false); // If all incoming values to the Phi are the same, we can replace the Phi // with that value. Value* PNV = nullptr; if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue())) if (PNV != PN) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } } } }
/// RewriteLoopExitValues - Check to see if this loop has a computable /// loop-invariant execution count. If so, this means that we can compute the /// final value of any expressions that are recurrent in the loop, and /// substitute the exit values from the loop into any instructions outside of /// the loop that use the final values of the current expressions. /// /// This is mostly redundant with the regular IndVarSimplify activities that /// happen later, except that it's more powerful in some cases, because it's /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Verify the input to the pass in already in LCSSA form. assert(L->isLCSSAForm(*DT)); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitBlocks[i]; // If there are no PHI nodes in this exit block, then no values defined // inside the loop are used on this path, skip it. PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); if (!PN) continue; unsigned NumPreds = PN->getNumIncomingValues(); // Iterate over all of the PHI nodes. BasicBlock::iterator BBI = ExitBB->begin(); while ((PN = dyn_cast<PHINode>(BBI++))) { if (PN->use_empty()) continue; // dead use, don't replace it // SCEV only supports integer expressions for now. if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) continue; // It's necessary to tell ScalarEvolution about this explicitly so that // it can walk the def-use list and forget all SCEVs, as it may not be // watching the PHI itself. Once the new exit value is in place, there // may not be a def-use connection between the loop and every instruction // which got a SCEVAddRecExpr for that loop. SE->forgetValue(PN); // Iterate over all of the values in all the PHI nodes. for (unsigned i = 0; i != NumPreds; ++i) { // If the value being merged in is not integer or is not defined // in the loop, skip it. Value *InVal = PN->getIncomingValue(i); if (!isa<Instruction>(InVal)) continue; // If this pred is for a subloop, not L itself, skip it. if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) continue; // The Block is in a subloop, skip it. // Check that InVal is defined in the loop. Instruction *Inst = cast<Instruction>(InVal); if (!L->contains(Inst)) continue; // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) continue; Changed = true; ++NumReplaced; Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); } } if (NumPreds != 1) { // Clone the PHI and delete the original one. This lets IVUsers and // any other maps purge the original user from their records. PHINode *NewPN = cast<PHINode>(PN->clone()); NewPN->takeName(PN); NewPN->insertBefore(PN); PN->replaceAllUsesWith(NewPN); PN->eraseFromParent(); } } } // The insertion point instruction may have been deleted; clear it out // so that the rewriter doesn't trip over it later. Rewriter.clearInsertPoint(); }
/// \brief If this loop has multiple backedges, try to pull one of them out into /// a nested loop. /// /// This is important for code that looks like /// this: /// /// Loop: /// ... /// br cond, Loop, Next /// ... /// br cond2, Loop, Out /// /// To identify this common case, we look at the PHI nodes in the header of the /// loop. PHI nodes with unchanging values on one backedge correspond to values /// that change in the "outer" loop, but not in the "inner" loop. /// /// If we are able to separate out a loop, return the new outer loop that was /// created. /// static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, Pass *PP) { // Don't try to separate loops without a preheader. if (!Preheader) return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!L->getHeader()->isLandingPad() && "Can't insert backedge to landing pad"); PHINode *PN = findPHIToPartitionLoops(L, AA, DT); if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This // handles the case when a PHI node has multiple instances of itself as // arguments. SmallVector<BasicBlock*, 8> OuterLoopPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) != PN || !L->contains(PN->getIncomingBlock(i))) { // We can't split indirectbr edges. if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } } DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in // this loop, tell it to forget them, because we're about to // substantially change it. if (SE) SE->forgetLoop(L); BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); // Create the new outer loop. Loop *NewOuter = new Loop(); // Change the parent loop to use the outer loop as its child now. if (Loop *Parent = L->getParentLoop()) Parent->replaceChildLoopWith(L, NewOuter); else LI->changeTopLevelLoop(L, NewOuter); // L is now a subloop of our outer loop. NewOuter->addChildLoop(L); for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) NewOuter->addBlockEntry(*I); // Now reset the header in L, which had been moved by // SplitBlockPredecessors for the outer loop. L->moveToHeader(Header); // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set<BasicBlock*> BlocksInL; for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { BasicBlock *P = *PI; if (DT->dominates(Header, P)) addBlockAndPredsToSet(P, Header, BlocksInL); } // Scan all of the loop children of L, moving them to OuterLoop if they are // not part of the inner loop. const std::vector<Loop*> &SubLoops = L->getSubLoops(); for (size_t I = 0; I != SubLoops.size(); ) if (BlocksInL.count(SubLoops[I]->getHeader())) ++I; // Loop remains in L else NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); // Now that we know which blocks are in L and which need to be moved to // OuterLoop, move any blocks that need it. for (unsigned i = 0; i != L->getBlocks().size(); ++i) { BasicBlock *BB = L->getBlocks()[i]; if (!BlocksInL.count(BB)) { // Move this block to the parent, updating the exit blocks sets L->removeBlockFromLoop(BB); if ((*LI)[BB] == L) LI->changeLoopFor(BB, NewOuter); --i; } } return NewOuter; }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree and /// DominatorFrontier information if it is available, thus calling this pass /// will not invalidate either of them. This returns the new block if the edge /// was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst::Create(DestBB, NewBB); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { // This conceptually does: // foreach (PHINode *PN in DestBB) // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); // but is optimized for two cases. if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } else { // However, the foreach loop is slow for blocks with lots of predecessors // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk // the user list of TIBB to find the PHI nodes. SmallPtrSet<PHINode*, 16> UpdatedPHIs; for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); UI != E; ) { Value::use_iterator Use = UI++; if (PHINode *PN = dyn_cast<PHINode>(*Use)) { // Remove one entry from each PHI. if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) PN->setOperand(Use.getOperandNo(), NewBB); } } } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. if (DT == 0 && DF == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Should we update DominanceFrontier information? if (DF) { // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." " not implemented yet!"); } // Since the new block is dominated by its only predecessor TIBB, // it cannot be in any block's dominance frontier. If NewBB dominates // DestBB, its dominance frontier is the same as DestBB's, otherwise it is // just {DestBB}. DominanceFrontier::DomSetType NewDFSet; if (NewBBDominatesDestBB) { DominanceFrontier::iterator I = DF->find(DestBB); if (I != DF->end()) { DF->addBasicBlock(NewBB, I->second); if (I->second.count(DestBB)) { // However NewBB's frontier does not include DestBB. DominanceFrontier::iterator NF = DF->find(NewBB); DF->removeFromFrontier(NF, DestBB); } } else DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); } else { DominanceFrontier::DomSetType NewDFSet; NewDFSet.insert(DestBB); DF->addBasicBlock(NewBB, NewDFSet); } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) { SmallVector<BasicBlock *, 1> OrigPred; OrigPred.push_back(TIBB); CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); } // For each unique exit block... SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; if (TIL->contains(P)) Preds.push_back(P); else HasPredOutsideOfLoop = true; } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } // Update ProfileInfo if it is around. if (PI) PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; }