bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI, ScalarEvolution &SE, const DominatorTree &DT) { Loop *L = LI.getLoopFor(LInst->getParent()); auto *Ptr = LInst->getPointerOperand(); const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L); while (L && R.contains(L)) { if (!SE.isLoopInvariant(PtrSCEV, L)) return false; L = L->getParentLoop(); } for (auto *User : Ptr->users()) { auto *UserI = dyn_cast<Instruction>(User); if (!UserI || !R.contains(UserI)) continue; if (!UserI->mayWriteToMemory()) continue; auto &BB = *UserI->getParent(); bool DominatesAllPredecessors = true; for (auto Pred : predecessors(R.getExit())) if (R.contains(Pred) && !DT.dominates(&BB, Pred)) DominatesAllPredecessors = false; if (!DominatesAllPredecessors) continue; return false; } return true; }
/// Return true if the specified block dominates at least /// one of the blocks in the specified list. static bool blockDominatesAnExit(BasicBlock *BB, DominatorTree &DT, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { DomTreeNode *DomNode = DT.getNode(BB); return any_of(ExitBlocks, [&](BasicBlock *EB) { return DT.dominates(DomNode, DT.getNode(EB)); }); }
/// Return a set of basic blocks to insert sinked instructions. /// /// The returned set of basic blocks (BBsToSinkInto) should satisfy: /// /// * Inside the loop \p L /// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto /// that domintates the UseBB /// * Has minimum total frequency that is no greater than preheader frequency /// /// The purpose of the function is to find the optimal sinking points to /// minimize execution cost, which is defined as "sum of frequency of /// BBsToSinkInto". /// As a result, the returned BBsToSinkInto needs to have minimum total /// frequency. /// Additionally, if the total frequency of BBsToSinkInto exceeds preheader /// frequency, the optimal solution is not sinking (return empty set). /// /// \p ColdLoopBBs is used to help find the optimal sinking locations. /// It stores a list of BBs that is: /// /// * Inside the loop \p L /// * Has a frequency no larger than the loop's preheader /// * Sorted by BB frequency /// /// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()). /// To avoid expensive computation, we cap the maximum UseBBs.size() in its /// caller. static SmallPtrSet<BasicBlock *, 2> findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, DominatorTree &DT, BlockFrequencyInfo &BFI) { SmallPtrSet<BasicBlock *, 2> BBsToSinkInto; if (UseBBs.size() == 0) return BBsToSinkInto; BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end()); SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB; // For every iteration: // * Pick the ColdestBB from ColdLoopBBs // * Find the set BBsDominatedByColdestBB that satisfy: // - BBsDominatedByColdestBB is a subset of BBsToSinkInto // - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB // * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove // BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to // BBsToSinkInto for (BasicBlock *ColdestBB : ColdLoopBBs) { BBsDominatedByColdestBB.clear(); for (BasicBlock *SinkedBB : BBsToSinkInto) if (DT.dominates(ColdestBB, SinkedBB)) BBsDominatedByColdestBB.insert(SinkedBB); if (BBsDominatedByColdestBB.size() == 0) continue; if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) > BFI.getBlockFreq(ColdestBB)) { for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) { BBsToSinkInto.erase(DominatedBB); } BBsToSinkInto.insert(ColdestBB); } } // If the total frequency of BBsToSinkInto is larger than preheader frequency, // do not sink. if (adjustedSumFreq(BBsToSinkInto, BFI) > BFI.getBlockFreq(L.getLoopPreheader())) BBsToSinkInto.clear(); return BBsToSinkInto; }
/// IsAcceptableTarget - Return true if it is possible to sink the instruction /// in the specified basic block. static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo, DominatorTree &DT, LoopInfo &LI) { assert(Inst && "Instruction to be sunk is null"); assert(SuccToSinkTo && "Candidate sink target is null"); // It is not possible to sink an instruction into its own block. This can // happen with loops. if (Inst->getParent() == SuccToSinkTo) return false; // It's never legal to sink an instruction into a block which terminates in an // EH-pad. if (SuccToSinkTo->getTerminator()->isExceptionalTerminator()) return false; // If the block has multiple predecessors, this would introduce computation // on different code paths. We could split the critical edge, but for now we // just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. if (Inst->mayReadFromMemory()) return false; // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!DT.dominates(Inst->getParent(), SuccToSinkTo)) return false; // Don't sink instructions into a loop. Loop *succ = LI.getLoopFor(SuccToSinkTo); Loop *cur = LI.getLoopFor(Inst->getParent()); if (succ != nullptr && succ != cur) return false; } // Finally, check that all the uses of the instruction are actually // dominated by the candidate return AllUsesDominatedByBlock(Inst, SuccToSinkTo, DT); }
//Get all possible execution paths for a function. Ignoring the backedges for now void MLStatic::tracePath(BasicBlock *BB){ path.push_back(BB); int flag=0; const TerminatorInst *TInst = BB->getTerminator(); int succn = TInst->getNumSuccessors(); for(int i=0,NSucc = TInst->getNumSuccessors(); i < NSucc; ++i){ BasicBlock *Succ = TInst->getSuccessor(i); if(!DT->dominates(Succ,BB)){ tracePath(Succ); } else{ flag=1; std::vector<BasicBlock *> temp; for(int i=0;i<path.size();++i){ //DEBUG(dbgs()<<path[i]->getName()<<" "); temp.push_back(path[i]); } pathCollecn.push_back(temp); pathCollecn2[BB->getParent()].push_back(temp); //DEBUG(dbgs()<<"\n"); flag=0; } } if(succn==0){ std::vector<BasicBlock *> temp; for(int i=0;i<path.size();++i){ //DEBUG(dbgs()<<path[i]->getName()<<" "); temp.push_back(path[i]); } pathCollecn.push_back(temp); pathCollecn2[BB->getParent()].push_back(temp); //DEBUG(dbgs()<<"\n"); } path.pop_back(); }
/// AllUsesDominatedByBlock - Return true if all uses of the specified value /// occur in blocks dominated by the specified block. static bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB, DominatorTree &DT) { // Ignoring debug uses is necessary so debug info doesn't affect the code. // This may leave a referencing dbg_value in the original block, before // the definition of the vreg. Dwarf generator handles this although the // user might not get the right info at runtime. for (Use &U : Inst->uses()) { // Determine the block of the use. Instruction *UseInst = cast<Instruction>(U.getUser()); BasicBlock *UseBlock = UseInst->getParent(); if (PHINode *PN = dyn_cast<PHINode>(UseInst)) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo()); UseBlock = PN->getIncomingBlock(Num); } // Check that it dominates. if (!DT.dominates(BB, UseBlock)) return false; } return true; }
/// Returns true if this loop is known to contain a call safepoint which /// must unconditionally execute on any iteration of the loop which returns /// to the loop header via an edge from Pred. Returns a conservative correct /// answer; i.e. false is always valid. static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header, BasicBlock *Pred, DominatorTree &DT, const TargetLibraryInfo &TLI) { // In general, we're looking for any cut of the graph which ensures // there's a call safepoint along every edge between Header and Pred. // For the moment, we look only for the 'cuts' that consist of a single call // instruction in a block which is dominated by the Header and dominates the // loop latch (Pred) block. Somewhat surprisingly, walking the entire chain // of such dominating blocks gets substantially more occurrences than just // checking the Pred and Header blocks themselves. This may be due to the // density of loop exit conditions caused by range and null checks. // TODO: structure this as an analysis pass, cache the result for subloops, // avoid dom tree recalculations assert(DT.dominates(Header, Pred) && "loop latch not dominated by header?"); BasicBlock *Current = Pred; while (true) { for (Instruction &I : *Current) { if (auto CS = CallSite(&I)) // Note: Technically, needing a safepoint isn't quite the right // condition here. We should instead be checking if the target method // has an // unconditional poll. In practice, this is only a theoretical concern // since we don't have any methods with conditional-only safepoint // polls. if (needsStatepoint(CS, TLI)) return true; } if (Current == Header) break; Current = DT.getNode(Current)->getIDom()->getBlock(); } return false; }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree information if it /// is available, thus calling this pass will not invalidate either of them. /// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges, bool DontDeleteUselessPhis, bool SplitLandingPads) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (DestBB->isLandingPad()) return 0; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); // If we have nothing to update, just return. if (DT == 0 && LI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); // For each unique exit block... // FIXME: This code is functionally equivalent to the corresponding // loop in LoopSimplify. SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; if (TIL->contains(P)) { if (isa<IndirectBrInst>(P->getTerminator())) { Preds.clear(); break; } Preds.push_back(P); } else { HasPredOutsideOfLoop = true; } } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { if (!Exit->isLandingPad()) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds, "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(Preds, NewExitBB, Exit); } else if (SplitLandingPads) { SmallVector<BasicBlock*, 8> NewBBs; SplitLandingPadPredecessors(Exit, Preds, ".split1", ".split2", P, NewBBs); if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit); } } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } return NewBB; }
/// \brief Rewrite as many loads as possible given a single store. /// /// When there is only a single store, we can use the domtree to trivially /// replace all of the dominated loads with the stored value. Do so, and return /// true if this has successfully promoted the alloca entirely. If this returns /// false there were some loads which were not dominated by the single store /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, AliasSetTracker *AST) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); int StoreIndex = -1; // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { Instruction *UserInst = cast<Instruction>(*UI++); if (!isa<LoadInst>(UserInst)) { assert(UserInst == OnlyStore && "Should only have load/stores"); continue; } LoadInst *LI = cast<LoadInst>(UserInst); // Okay, if we have a load from the alloca, we want to replace it with the // only value stored to the alloca. We can do this if the value is // dominated by the store. If not, we use the rest of the mem2reg machinery // to insert the phi nodes as needed. if (!StoringGlobalVal) { // Non-instructions are always dominated. if (LI->getParent() == StoreBB) { // If we have a use that is in the same block as the store, compare the // indices of the two instructions to see which one came first. If the // load came before the store, we can't handle it. if (StoreIndex == -1) StoreIndex = LBI.getInstructionIndex(OnlyStore); if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { // Can't handle this load, bail out. Info.UsingBlocks.push_back(StoreBB); continue; } } else if (LI->getParent() != StoreBB && !DT.dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to // check their relationships. If the store doesn't dom the use, bail // out. Info.UsingBlocks.push_back(LI->getParent()); continue; } } // Otherwise, we *can* safely rewrite this load. Value *ReplVal = OnlyStore->getOperand(0); // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } // Finally, after the scan, check to see if the store is all that is left. if (!Info.UsingBlocks.empty()) return false; // If not, we'll have to fall back for the remainder. // Record debuginfo for the store and remove the declaration's // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { DIBuilder DIB(*AI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); return true; }
/// For every instruction from the worklist, check to see if it has any uses /// that are outside the current loop. If so, insert LCSSA PHI nodes and /// rewrite the uses. bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, DominatorTree &DT, LoopInfo &LI) { SmallVector<Use *, 16> UsesToRewrite; SmallSetVector<PHINode *, 16> PHIsToRemove; PredIteratorCache PredCache; bool Changed = false; // Cache the Loop ExitBlocks across this loop. We expect to get a lot of // instructions within the same loops, computing the exit blocks is // expensive, and we're not mutating the loop structure. SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks; while (!Worklist.empty()) { UsesToRewrite.clear(); Instruction *I = Worklist.pop_back_val(); BasicBlock *InstBB = I->getParent(); Loop *L = LI.getLoopFor(InstBB); if (!LoopExitBlocks.count(L)) L->getExitBlocks(LoopExitBlocks[L]); assert(LoopExitBlocks.count(L)); const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L]; if (ExitBlocks.empty()) continue; // Tokens cannot be used in PHI nodes, so we skip over them. // We can run into tokens which are live out of a loop with catchswitch // instructions in Windows EH if the catchswitch has one catchpad which // is inside the loop and another which is not. if (I->getType()->isTokenTy()) continue; for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(U); if (InstBB != UserBB && !L->contains(UserBB)) UsesToRewrite.push_back(&U); } // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) continue; ++NumLCSSA; // We are applying the transformation // Invoke instructions are special in that their result value is not // available along their unwind edge. The code below tests to see whether // DomBB dominates the value, so adjust DomBB to the normal destination // block, which is effectively where the value is first usable. BasicBlock *DomBB = InstBB; if (InvokeInst *Inv = dyn_cast<InvokeInst>(I)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT.getNode(DomBB); SmallVector<PHINode *, 16> AddedPHIs; SmallVector<PHINode *, 8> PostProcessPHIs; SmallVector<PHINode *, 4> InsertedPHIs; SSAUpdater SSAUpdate(&InsertedPHIs); SSAUpdate.Initialize(I->getType(), I->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. for (BasicBlock *ExitBB : ExitBlocks) { if (!DT.dominates(DomNode, DT.getNode(ExitBB))) continue; // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), I->getName() + ".lcssa", &ExitBB->front()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(I, Pred); // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. if (!L->contains(Pred)) UsesToRewrite.push_back( &PN->getOperandUse(PN->getOperandNumForIncomingValue( PN->getNumIncomingValues() - 1))); } AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); // LoopSimplify might fail to simplify some loops (e.g. when indirect // branches are involved). In such situations, it might happen that an // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we // create PHIs in such an exit block, we are also inserting PHIs into L2's // header. This could break LCSSA form for L2 because these inserted PHIs // can also have uses outside of L2. Remember all PHIs in such situation // as to revisit than later on. FIXME: Remove this if indirectbr support // into LoopSimplify gets improved. if (auto *OtherLoop = LI.getLoopFor(ExitBB)) if (!L->contains(OtherLoop)) PostProcessPHIs.push_back(PN); } // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (Use *UseToRewrite : UsesToRewrite) { // If this use is in an exit block, rewrite to use the newly inserted PHI. // This is required for correctness because SSAUpdate doesn't handle uses // in the same block. It assumes the PHI we inserted is at the end of the // block. Instruction *User = cast<Instruction>(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. if (UseToRewrite->get()->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); UseToRewrite->set(&UserBB->front()); continue; } // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UseToRewrite); } // SSAUpdater might have inserted phi-nodes inside other loops. We'll need // to post-process them to keep LCSSA form. for (PHINode *InsertedPN : InsertedPHIs) { if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent())) if (!L->contains(OtherLoop)) PostProcessPHIs.push_back(InsertedPN); } // Post process PHI instructions that were inserted into another disjoint // loop and update their exits properly. for (auto *PostProcessPN : PostProcessPHIs) { if (PostProcessPN->use_empty()) continue; // Reprocess each PHI instruction. Worklist.push_back(PostProcessPN); } // Keep track of PHI nodes that we want to remove because they did not have // any uses rewritten. for (PHINode *PN : AddedPHIs) if (PN->use_empty()) PHIsToRemove.insert(PN); Changed = true; } // Remove PHI nodes that did not have any uses rewritten. for (PHINode *PN : PHIsToRemove) { assert (PN->use_empty() && "Trying to remove a phi with uses."); PN->eraseFromParent(); } return Changed; }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree and /// DominatorFrontier information if it is available, thus calling this pass /// will not invalidate either of them. This returns the new block if the edge /// was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst::Create(DestBB, NewBB); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { // This conceptually does: // foreach (PHINode *PN in DestBB) // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); // but is optimized for two cases. if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } else { // However, the foreach loop is slow for blocks with lots of predecessors // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk // the user list of TIBB to find the PHI nodes. SmallPtrSet<PHINode*, 16> UpdatedPHIs; for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); UI != E; ) { Value::use_iterator Use = UI++; if (PHINode *PN = dyn_cast<PHINode>(Use)) { // Remove one entry from each PHI. if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) PN->setOperand(Use.getOperandNo(), NewBB); } } } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. if (DT == 0 && DF == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) if (*I != NewBB) OtherPreds.push_back(*I); } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Should we update DominanceFrontier information? if (DF) { // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." " not implemented yet!"); } // Since the new block is dominated by its only predecessor TIBB, // it cannot be in any block's dominance frontier. If NewBB dominates // DestBB, its dominance frontier is the same as DestBB's, otherwise it is // just {DestBB}. DominanceFrontier::DomSetType NewDFSet; if (NewBBDominatesDestBB) { DominanceFrontier::iterator I = DF->find(DestBB); if (I != DF->end()) { DF->addBasicBlock(NewBB, I->second); if (I->second.count(DestBB)) { // However NewBB's frontier does not include DestBB. DominanceFrontier::iterator NF = DF->find(NewBB); DF->removeFromFrontier(NF, DestBB); } } else DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); } else { DominanceFrontier::DomSetType NewDFSet; NewDFSet.insert(DestBB); DF->addBasicBlock(NewBB, NewDFSet); } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) { SmallVector<BasicBlock *, 1> OrigPred; OrigPred.push_back(TIBB); CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); } // For each unique exit block... SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) if (TIL->contains(*I)) Preds.push_back(*I); else HasPredOutsideOfLoop = true; // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } // Update ProfileInfo if it is around. if (PI) PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; }
/// Rewrite as many loads as possible given a single store. /// /// When there is only a single store, we can use the domtree to trivially /// replace all of the dominated loads with the stored value. Do so, and return /// true if this has successfully promoted the alloca entirely. If this returns /// false there were some loads which were not dominated by the single store /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); int StoreIndex = -1; // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast<Instruction>(*UI++); if (!isa<LoadInst>(UserInst)) { assert(UserInst == OnlyStore && "Should only have load/stores"); continue; } LoadInst *LI = cast<LoadInst>(UserInst); // Okay, if we have a load from the alloca, we want to replace it with the // only value stored to the alloca. We can do this if the value is // dominated by the store. If not, we use the rest of the mem2reg machinery // to insert the phi nodes as needed. if (!StoringGlobalVal) { // Non-instructions are always dominated. if (LI->getParent() == StoreBB) { // If we have a use that is in the same block as the store, compare the // indices of the two instructions to see which one came first. If the // load came before the store, we can't handle it. if (StoreIndex == -1) StoreIndex = LBI.getInstructionIndex(OnlyStore); if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { // Can't handle this load, bail out. Info.UsingBlocks.push_back(StoreBB); continue; } } else if (LI->getParent() != StoreBB && !DT.dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to // check their relationships. If the store doesn't dom the use, bail // out. Info.UsingBlocks.push_back(LI->getParent()); continue; } } // Otherwise, we *can* safely rewrite this load. Value *ReplVal = OnlyStore->getOperand(0); // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); LI->replaceAllUsesWith(ReplVal); LI->eraseFromParent(); LBI.deleteValue(LI); } // Finally, after the scan, check to see if the store is all that is left. if (!Info.UsingBlocks.empty()) return false; // If not, we'll have to fall back for the remainder. // Record debuginfo for the store and remove the declaration's // debuginfo. for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); DII->eraseFromParent(); LBI.deleteValue(DII); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); AI->eraseFromParent(); LBI.deleteValue(AI); return true; }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and /// DominanceFrontier, but no other analyses. BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0; i != NumPreds; ++i) Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); // Check to see if we can eliminate this phi node. if (Value *V = PN->hasConstantValue(DT != 0)) { Instruction *I = dyn_cast<Instruction>(V); if (!I || DT == 0 || DT->dominates(I, PN)) { PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); PN->eraseFromParent(); } } } return NewBB; }
// // Method: InsertFreesAtEnd() // // Description: // Insert free instructions so that the memory allocated by the specified // malloc instruction is freed on function exit. // void ConvertUnsafeAllocas::InsertFreesAtEnd(Instruction *MI) { assert (MI && "MI is NULL!\n"); // // Get the dominance frontier information about the malloc instruction's // basic block. We cache the information in case we end up processing // multiple instructions from the same function. // BasicBlock *currentBlock = MI->getParent(); Function * F = currentBlock->getParent(); DominanceFrontier * dfmt = &getAnalysis<DominanceFrontier>(*F); DominatorTree * domTree = &getAnalysis<DominatorTree>(*F); DominanceFrontier::const_iterator it = dfmt->find(currentBlock); #if 0 // // If the basic block has a dominance frontier, use it. // if (it != dfmt->end()) { const DominanceFrontier::DomSetType &S = it->second; if (S.size() > 0) { DominanceFrontier::DomSetType::iterator pCurrent = S.begin(), pEnd = S.end(); for (; pCurrent != pEnd; ++pCurrent) { BasicBlock *frontierBlock = *pCurrent; // One of its predecessors is dominated by currentBlock; // need to insert a free in that predecessor for (pred_iterator SI = pred_begin(frontierBlock), SE = pred_end(frontierBlock); SI != SE; ++SI) { BasicBlock *predecessorBlock = *SI; if (domTree->dominates (predecessorBlock, currentBlock)) { // Get the terminator Instruction *InsertPt = predecessorBlock->getTerminator(); new FreeInst(MI, InsertPt); } } } return; } } #endif // // There is no dominance frontier; insert frees on all returns; // std::vector<Instruction*> FreePoints; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (isa<ReturnInst>(BB->getTerminator()) || isa<ResumeInst>(BB->getTerminator())) FreePoints.push_back(BB->getTerminator()); // // We have the Free points; now we construct the free instructions at each // of the points. // std::vector<Instruction*>::iterator fpI = FreePoints.begin(), fpE = FreePoints.end(); for (; fpI != fpE ; ++ fpI) { // // Determine whether the allocation dominates the return. If not, then // don't insert a free instruction for now. // Instruction *InsertPt = *fpI; if (domTree->dominates (MI->getParent(), InsertPt->getParent())) { CallInst::Create (kfree, MI, "", InsertPt); } else { ++MissingFrees; } } }
// // Method: processFunction() // // Description: // This method searches for calls to a specified run-time check. For every // such call, it replaces the pointer that the call checks with the return // value of the call. // // This allows functions like boundscheck() to return a rewrite pointer; // this code changes the program to use the returned rewrite pointer instead // of the original pointer which was passed into boundscheck(). // // Inputs: // M - The module to modify. // Check - A reference to a structure describing the checking function to // process. // // Return value: // false - No modifications were made to the Module. // true - One or more modifications were made to the module. // bool RewriteOOB::processFunction (Module & M, const CheckInfo & Check) { // // Get a pointer to the checking function. If the checking function does // not exist within the program, then do nothing. // Function * F = M.getFunction (Check.name); if (!F) return false; // // Ensure the function has the right number of arguments and that its // result is a pointer type. // assert (isa<PointerType>(F->getReturnType())); // // To avoid recalculating the dominator information each time we process a // use of the specified function F, we will record the function containing // the call instruction to F and the corresponding dominator information; we // will then update this information only when the next use is a call // instruction belonging to a different function. We are helped by the fact // that iterating through uses often groups uses within the same function. // Function * CurrentFunction = 0; DominatorTree * domTree = 0; // // Iterate though all calls to the function and modify the use of the // operand to be the result of the function. // bool modified = false; for (Value::use_iterator FU = F->use_begin(); FU != F->use_end(); ++FU) { // // We are only concerned about call instructions; any other use is of // no interest to the organization. // if (CallInst * CI = dyn_cast<CallInst>(*FU)) { // // We're going to make a change. Mark that we will have done so. // modified = true; // // Get the operand that needs to be replaced as well as the operand // with all of the casts peeled away. Increment the operand index by // one because a call instruction's first operand is the function to // call. // Value * RealOperand = Check.getCheckedPointer (CI); Value * PeeledOperand = RealOperand->stripPointerCasts(); // // Cast the result of the call instruction to match that of the original // value. // BasicBlock::iterator i(CI); Instruction * CastCI = castTo (CI, PeeledOperand->getType(), PeeledOperand->getName(), ++i); // // Get dominator information for the function. // if ((CI->getParent()->getParent()) != CurrentFunction) { CurrentFunction = CI->getParent()->getParent(); domTree = &getAnalysis<DominatorTree>(*CurrentFunction); } // // For every use that the call instruction dominates, change the use to // use the result of the call instruction. We first collect the uses // that need to be modified before doing the modifications to avoid any // iterator invalidation errors. // std::vector<User *> Uses; Value::use_iterator UI = PeeledOperand->use_begin(); for (; UI != PeeledOperand->use_end(); ++UI) { if (Instruction * Use = dyn_cast<Instruction>(*UI)) if ((CI != Use) && (domTree->dominates (CI, Use))) { Uses.push_back (*UI); ++Changes; } } while (Uses.size()) { User * Use = Uses.back(); Uses.pop_back(); Use->replaceUsesOfWith (PeeledOperand, CastCI); } } } return modified; }
bool PlaceSafepoints::runOnFunction(Function &F) { if (F.isDeclaration() || F.empty()) { // This is a declaration, nothing to do. Must exit early to avoid crash in // dom tree calculation return false; } bool modified = false; // In various bits below, we rely on the fact that uses are reachable from // defs. When there are basic blocks unreachable from the entry, dominance // and reachablity queries return non-sensical results. Thus, we preprocess // the function to ensure these properties hold. modified |= removeUnreachableBlocks(F); // STEP 1 - Insert the safepoint polling locations. We do not need to // actually insert parse points yet. That will be done for all polls and // calls in a single pass. // Note: With the migration, we need to recompute this for each 'pass'. Once // we merge these, we'll do it once before the analysis DominatorTree DT; std::vector<CallSite> ParsePointNeeded; if (EnableBackedgeSafepoints) { // Construct a pass manager to run the LoopPass backedge logic. We // need the pass manager to handle scheduling all the loop passes // appropriately. Doing this by hand is painful and just not worth messing // with for the moment. FunctionPassManager FPM(F.getParent()); PlaceBackedgeSafepointsImpl *PBS = new PlaceBackedgeSafepointsImpl(EnableCallSafepoints); FPM.add(PBS); // Note: While the analysis pass itself won't modify the IR, LoopSimplify // (which it depends on) may. i.e. analysis must be recalculated after run FPM.run(F); // We preserve dominance information when inserting the poll, otherwise // we'd have to recalculate this on every insert DT.recalculate(F); // Insert a poll at each point the analysis pass identified for (size_t i = 0; i < PBS->PollLocations.size(); i++) { // We are inserting a poll, the function is modified modified = true; // The poll location must be the terminator of a loop latch block. TerminatorInst *Term = PBS->PollLocations[i]; std::vector<CallSite> ParsePoints; if (SplitBackedge) { // Split the backedge of the loop and insert the poll within that new // basic block. This creates a loop with two latches per original // latch (which is non-ideal), but this appears to be easier to // optimize in practice than inserting the poll immediately before the // latch test. // Since this is a latch, at least one of the successors must dominate // it. Its possible that we have a) duplicate edges to the same header // and b) edges to distinct loop headers. We need to insert pools on // each. (Note: This still relies on LoopSimplify.) DenseSet<BasicBlock *> Headers; for (unsigned i = 0; i < Term->getNumSuccessors(); i++) { BasicBlock *Succ = Term->getSuccessor(i); if (DT.dominates(Succ, Term->getParent())) { Headers.insert(Succ); } } assert(!Headers.empty() && "poll location is not a loop latch?"); // The split loop structure here is so that we only need to recalculate // the dominator tree once. Alternatively, we could just keep it up to // date and use a more natural merged loop. DenseSet<BasicBlock *> SplitBackedges; for (BasicBlock *Header : Headers) { BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, nullptr); SplitBackedges.insert(NewBB); } DT.recalculate(F); for (BasicBlock *NewBB : SplitBackedges) { InsertSafepointPoll(DT, NewBB->getTerminator(), ParsePoints); NumBackedgeSafepoints++; } } else { // Split the latch block itself, right before the terminator. InsertSafepointPoll(DT, Term, ParsePoints); NumBackedgeSafepoints++; } // Record the parse points for later use ParsePointNeeded.insert(ParsePointNeeded.end(), ParsePoints.begin(), ParsePoints.end()); } } if (EnableEntrySafepoints) { DT.recalculate(F); Instruction *term = findLocationForEntrySafepoint(F, DT); if (!term) { // policy choice not to insert? } else { std::vector<CallSite> RuntimeCalls; InsertSafepointPoll(DT, term, RuntimeCalls); modified = true; NumEntrySafepoints++; ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(), RuntimeCalls.end()); } } if (EnableCallSafepoints) { DT.recalculate(F); std::vector<CallSite> Calls; findCallSafepoints(F, Calls); NumCallSafepoints += Calls.size(); ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end()); } // Unique the vectors since we can end up with duplicates if we scan the call // site for call safepoints after we add it for entry or backedge. The // only reason we need tracking at all is that some functions might have // polls but not call safepoints and thus we might miss marking the runtime // calls for the polls. (This is useful in test cases!) unique_unsorted(ParsePointNeeded); // Any parse point (no matter what source) will be handled here DT.recalculate(F); // Needed? // We're about to start modifying the function if (!ParsePointNeeded.empty()) modified = true; // Now run through and insert the safepoints, but do _NOT_ update or remove // any existing uses. We have references to live variables that need to // survive to the last iteration of this loop. std::vector<Value *> Results; Results.reserve(ParsePointNeeded.size()); for (size_t i = 0; i < ParsePointNeeded.size(); i++) { CallSite &CS = ParsePointNeeded[i]; Value *GCResult = ReplaceWithStatepoint(CS, nullptr); Results.push_back(GCResult); } assert(Results.size() == ParsePointNeeded.size()); // Adjust all users of the old call sites to use the new ones instead for (size_t i = 0; i < ParsePointNeeded.size(); i++) { CallSite &CS = ParsePointNeeded[i]; Value *GCResult = Results[i]; if (GCResult) { // In case if we inserted result in a different basic block than the // original safepoint (this can happen for invokes). We need to be sure // that // original result value was not used in any of the phi nodes at the // beginning of basic block with gc result. Because we know that all such // blocks will have single predecessor we can safely assume that all phi // nodes have single entry (because of normalizeBBForInvokeSafepoint). // Just remove them all here. if (CS.isInvoke()) { FoldSingleEntryPHINodes(cast<Instruction>(GCResult)->getParent(), nullptr); assert( !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin())); } // Replace all uses with the new call CS.getInstruction()->replaceAllUsesWith(GCResult); } // Now that we've handled all uses, remove the original call itself // Note: The insert point can't be the deleted instruction! CS.getInstruction()->eraseFromParent(); } return modified; }
bool PlaceSafepoints::runOnFunction(Function &F) { if (F.isDeclaration() || F.empty()) { // This is a declaration, nothing to do. Must exit early to avoid crash in // dom tree calculation return false; } if (isGCSafepointPoll(F)) { // Given we're inlining this inside of safepoint poll insertion, this // doesn't make any sense. Note that we do make any contained calls // parseable after we inline a poll. return false; } if (!shouldRewriteFunction(F)) return false; const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool Modified = false; // In various bits below, we rely on the fact that uses are reachable from // defs. When there are basic blocks unreachable from the entry, dominance // and reachablity queries return non-sensical results. Thus, we preprocess // the function to ensure these properties hold. Modified |= removeUnreachableBlocks(F); // STEP 1 - Insert the safepoint polling locations. We do not need to // actually insert parse points yet. That will be done for all polls and // calls in a single pass. DominatorTree DT; DT.recalculate(F); SmallVector<Instruction *, 16> PollsNeeded; std::vector<CallSite> ParsePointNeeded; if (enableBackedgeSafepoints(F)) { // Construct a pass manager to run the LoopPass backedge logic. We // need the pass manager to handle scheduling all the loop passes // appropriately. Doing this by hand is painful and just not worth messing // with for the moment. legacy::FunctionPassManager FPM(F.getParent()); bool CanAssumeCallSafepoints = enableCallSafepoints(F); auto *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints); FPM.add(PBS); FPM.run(F); // We preserve dominance information when inserting the poll, otherwise // we'd have to recalculate this on every insert DT.recalculate(F); auto &PollLocations = PBS->PollLocations; auto OrderByBBName = [](Instruction *a, Instruction *b) { return a->getParent()->getName() < b->getParent()->getName(); }; // We need the order of list to be stable so that naming ends up stable // when we split edges. This makes test cases much easier to write. llvm::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName); // We can sometimes end up with duplicate poll locations. This happens if // a single loop is visited more than once. The fact this happens seems // wrong, but it does happen for the split-backedge.ll test case. PollLocations.erase(std::unique(PollLocations.begin(), PollLocations.end()), PollLocations.end()); // Insert a poll at each point the analysis pass identified // The poll location must be the terminator of a loop latch block. for (TerminatorInst *Term : PollLocations) { // We are inserting a poll, the function is modified Modified = true; if (SplitBackedge) { // Split the backedge of the loop and insert the poll within that new // basic block. This creates a loop with two latches per original // latch (which is non-ideal), but this appears to be easier to // optimize in practice than inserting the poll immediately before the // latch test. // Since this is a latch, at least one of the successors must dominate // it. Its possible that we have a) duplicate edges to the same header // and b) edges to distinct loop headers. We need to insert pools on // each. SetVector<BasicBlock *> Headers; for (unsigned i = 0; i < Term->getNumSuccessors(); i++) { BasicBlock *Succ = Term->getSuccessor(i); if (DT.dominates(Succ, Term->getParent())) { Headers.insert(Succ); } } assert(!Headers.empty() && "poll location is not a loop latch?"); // The split loop structure here is so that we only need to recalculate // the dominator tree once. Alternatively, we could just keep it up to // date and use a more natural merged loop. SetVector<BasicBlock *> SplitBackedges; for (BasicBlock *Header : Headers) { BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT); PollsNeeded.push_back(NewBB->getTerminator()); NumBackedgeSafepoints++; } } else { // Split the latch block itself, right before the terminator. PollsNeeded.push_back(Term); NumBackedgeSafepoints++; } } } if (enableEntrySafepoints(F)) { if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) { PollsNeeded.push_back(Location); Modified = true; NumEntrySafepoints++; } // TODO: else we should assert that there was, in fact, a policy choice to // not insert a entry safepoint poll. } // Now that we've identified all the needed safepoint poll locations, insert // safepoint polls themselves. for (Instruction *PollLocation : PollsNeeded) { std::vector<CallSite> RuntimeCalls; InsertSafepointPoll(PollLocation, RuntimeCalls, TLI); ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(), RuntimeCalls.end()); } return Modified; }
static void InsertSafepointPoll(DominatorTree &DT, Instruction *term, std::vector<CallSite> &ParsePointsNeeded /*rval*/) { Module *M = term->getParent()->getParent()->getParent(); assert(M); // Inline the safepoint poll implementation - this will get all the branch, // control flow, etc.. Most importantly, it will introduce the actual slow // path call - where we need to insert a safepoint (parsepoint). FunctionType *ftype = FunctionType::get(Type::getVoidTy(M->getContext()), false); assert(ftype && "null?"); // Note: This cast can fail if there's a function of the same name with a // different type inserted previously Function *F = dyn_cast<Function>(M->getOrInsertFunction("gc.safepoint_poll", ftype)); assert(F && !F->empty() && "definition must exist"); CallInst *poll = CallInst::Create(F, "", term); // Record some information about the call site we're replacing BasicBlock *OrigBB = term->getParent(); BasicBlock::iterator before(poll), after(poll); bool isBegin(false); if (before == term->getParent()->begin()) { isBegin = true; } else { before--; } after++; assert(after != poll->getParent()->end() && "must have successor"); assert(DT.dominates(before, after) && "trivially true"); // do the actual inlining InlineFunctionInfo IFI; bool inlineStatus = InlineFunction(poll, IFI); assert(inlineStatus && "inline must succeed"); (void)inlineStatus; // suppress warning in release-asserts // Check post conditions assert(IFI.StaticAllocas.empty() && "can't have allocs"); std::vector<CallInst *> calls; // new calls std::set<BasicBlock *> BBs; // new BBs + insertee // Include only the newly inserted instructions, Note: begin may not be valid // if we inserted to the beginning of the basic block BasicBlock::iterator start; if (isBegin) { start = OrigBB->begin(); } else { start = before; start++; } // If your poll function includes an unreachable at the end, that's not // valid. Bugpoint likes to create this, so check for it. assert(isPotentiallyReachable(&*start, &*after, nullptr, nullptr) && "malformed poll function"); scanInlinedCode(&*(start), &*(after), calls, BBs); // Recompute since we've invalidated cached data. Conceptually we // shouldn't need to do this, but implementation wise we appear to. Needed // so we can insert safepoints correctly. // TODO: update more cheaply DT.recalculate(*after->getParent()->getParent()); assert(!calls.empty() && "slow path not found for safepoint poll"); // Record the fact we need a parsable state at the runtime call contained in // the poll function. This is required so that the runtime knows how to // parse the last frame when we actually take the safepoint (i.e. execute // the slow path) assert(ParsePointsNeeded.empty()); for (size_t i = 0; i < calls.size(); i++) { // No safepoint needed or wanted if (!needsStatepoint(calls[i])) { continue; } // These are likely runtime calls. Should we assert that via calling // convention or something? ParsePointsNeeded.push_back(CallSite(calls[i])); } assert(ParsePointsNeeded.size() <= calls.size()); }
bool PlaceSafepoints::runOnFunction(Function &F) { if (F.isDeclaration() || F.empty()) { // This is a declaration, nothing to do. Must exit early to avoid crash in // dom tree calculation return false; } if (isGCSafepointPoll(F)) { // Given we're inlining this inside of safepoint poll insertion, this // doesn't make any sense. Note that we do make any contained calls // parseable after we inline a poll. return false; } if (!shouldRewriteFunction(F)) return false; bool modified = false; // In various bits below, we rely on the fact that uses are reachable from // defs. When there are basic blocks unreachable from the entry, dominance // and reachablity queries return non-sensical results. Thus, we preprocess // the function to ensure these properties hold. modified |= removeUnreachableBlocks(F); // STEP 1 - Insert the safepoint polling locations. We do not need to // actually insert parse points yet. That will be done for all polls and // calls in a single pass. DominatorTree DT; DT.recalculate(F); SmallVector<Instruction *, 16> PollsNeeded; std::vector<CallSite> ParsePointNeeded; if (enableBackedgeSafepoints(F)) { // Construct a pass manager to run the LoopPass backedge logic. We // need the pass manager to handle scheduling all the loop passes // appropriately. Doing this by hand is painful and just not worth messing // with for the moment. legacy::FunctionPassManager FPM(F.getParent()); bool CanAssumeCallSafepoints = enableCallSafepoints(F); PlaceBackedgeSafepointsImpl *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints); FPM.add(PBS); FPM.run(F); // We preserve dominance information when inserting the poll, otherwise // we'd have to recalculate this on every insert DT.recalculate(F); auto &PollLocations = PBS->PollLocations; auto OrderByBBName = [](Instruction *a, Instruction *b) { return a->getParent()->getName() < b->getParent()->getName(); }; // We need the order of list to be stable so that naming ends up stable // when we split edges. This makes test cases much easier to write. std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName); // We can sometimes end up with duplicate poll locations. This happens if // a single loop is visited more than once. The fact this happens seems // wrong, but it does happen for the split-backedge.ll test case. PollLocations.erase(std::unique(PollLocations.begin(), PollLocations.end()), PollLocations.end()); // Insert a poll at each point the analysis pass identified // The poll location must be the terminator of a loop latch block. for (TerminatorInst *Term : PollLocations) { // We are inserting a poll, the function is modified modified = true; if (SplitBackedge) { // Split the backedge of the loop and insert the poll within that new // basic block. This creates a loop with two latches per original // latch (which is non-ideal), but this appears to be easier to // optimize in practice than inserting the poll immediately before the // latch test. // Since this is a latch, at least one of the successors must dominate // it. Its possible that we have a) duplicate edges to the same header // and b) edges to distinct loop headers. We need to insert pools on // each. SetVector<BasicBlock *> Headers; for (unsigned i = 0; i < Term->getNumSuccessors(); i++) { BasicBlock *Succ = Term->getSuccessor(i); if (DT.dominates(Succ, Term->getParent())) { Headers.insert(Succ); } } assert(!Headers.empty() && "poll location is not a loop latch?"); // The split loop structure here is so that we only need to recalculate // the dominator tree once. Alternatively, we could just keep it up to // date and use a more natural merged loop. SetVector<BasicBlock *> SplitBackedges; for (BasicBlock *Header : Headers) { BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT); PollsNeeded.push_back(NewBB->getTerminator()); NumBackedgeSafepoints++; } } else { // Split the latch block itself, right before the terminator. PollsNeeded.push_back(Term); NumBackedgeSafepoints++; } } } if (enableEntrySafepoints(F)) { Instruction *Location = findLocationForEntrySafepoint(F, DT); if (!Location) { // policy choice not to insert? } else { PollsNeeded.push_back(Location); modified = true; NumEntrySafepoints++; } } // Now that we've identified all the needed safepoint poll locations, insert // safepoint polls themselves. for (Instruction *PollLocation : PollsNeeded) { std::vector<CallSite> RuntimeCalls; InsertSafepointPoll(PollLocation, RuntimeCalls); ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(), RuntimeCalls.end()); } // If we've been asked to not wrap the calls with gc.statepoint, then we're // done. In the near future, this option will be "constant folded" to true, // and the code below that deals with insert gc.statepoint calls will be // removed. Wrapping potentially safepointing calls in gc.statepoint will // then become the responsibility of the RewriteStatepointsForGC pass. if (NoStatepoints) return modified; PollsNeeded.clear(); // make sure we don't accidentally use // The dominator tree has been invalidated by the inlining performed in the // above loop. TODO: Teach the inliner how to update the dom tree? DT.recalculate(F); if (enableCallSafepoints(F)) { std::vector<CallSite> Calls; findCallSafepoints(F, Calls); NumCallSafepoints += Calls.size(); ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end()); } // Unique the vectors since we can end up with duplicates if we scan the call // site for call safepoints after we add it for entry or backedge. The // only reason we need tracking at all is that some functions might have // polls but not call safepoints and thus we might miss marking the runtime // calls for the polls. (This is useful in test cases!) unique_unsorted(ParsePointNeeded); // Any parse point (no matter what source) will be handled here // We're about to start modifying the function if (!ParsePointNeeded.empty()) modified = true; // Now run through and insert the safepoints, but do _NOT_ update or remove // any existing uses. We have references to live variables that need to // survive to the last iteration of this loop. std::vector<Value *> Results; Results.reserve(ParsePointNeeded.size()); for (size_t i = 0; i < ParsePointNeeded.size(); i++) { CallSite &CS = ParsePointNeeded[i]; // For invoke statepoints we need to remove all phi nodes at the normal // destination block. // Reason for this is that we can place gc_result only after last phi node // in basic block. We will get malformed code after RAUW for the // gc_result if one of this phi nodes uses result from the invoke. if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) { normalizeForInvokeSafepoint(Invoke->getNormalDest(), Invoke->getParent()); } Value *GCResult = ReplaceWithStatepoint(CS); Results.push_back(GCResult); } assert(Results.size() == ParsePointNeeded.size()); // Adjust all users of the old call sites to use the new ones instead for (size_t i = 0; i < ParsePointNeeded.size(); i++) { CallSite &CS = ParsePointNeeded[i]; Value *GCResult = Results[i]; if (GCResult) { // Can not RAUW for the invoke gc result in case of phi nodes preset. assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin())); // Replace all uses with the new call CS.getInstruction()->replaceAllUsesWith(GCResult); } // Now that we've handled all uses, remove the original call itself // Note: The insert point can't be the deleted instruction! CS.getInstruction()->eraseFromParent(); } return modified; }
/// Given an instruction in the loop, check to see if it has any uses that are /// outside the current loop. If so, insert LCSSA PHI nodes and rewrite the /// uses. static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, const SmallVectorImpl<BasicBlock *> &ExitBlocks, PredIteratorCache &PredCache, LoopInfo *LI) { SmallVector<Use *, 16> UsesToRewrite; // Tokens cannot be used in PHI nodes, so we skip over them. // We can run into tokens which are live out of a loop with catchswitch // instructions in Windows EH if the catchswitch has one catchpad which // is inside the loop and another which is not. if (Inst.getType()->isTokenTy()) return false; BasicBlock *InstBB = Inst.getParent(); for (Use &U : Inst.uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(U); if (InstBB != UserBB && !L.contains(UserBB)) UsesToRewrite.push_back(&U); } // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) return false; ++NumLCSSA; // We are applying the transformation // Invoke instructions are special in that their result value is not available // along their unwind edge. The code below tests to see whether DomBB // dominates the value, so adjust DomBB to the normal destination block, // which is effectively where the value is first usable. BasicBlock *DomBB = Inst.getParent(); if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT.getNode(DomBB); SmallVector<PHINode *, 16> AddedPHIs; SmallVector<PHINode *, 8> PostProcessPHIs; SSAUpdater SSAUpdate; SSAUpdate.Initialize(Inst.getType(), Inst.getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. for (BasicBlock *ExitBB : ExitBlocks) { if (!DT.dominates(DomNode, DT.getNode(ExitBB))) continue; // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB), Inst.getName() + ".lcssa", &ExitBB->front()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(&Inst, Pred); // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. if (!L.contains(Pred)) UsesToRewrite.push_back( &PN->getOperandUse(PN->getOperandNumForIncomingValue( PN->getNumIncomingValues() - 1))); } AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); // LoopSimplify might fail to simplify some loops (e.g. when indirect // branches are involved). In such situations, it might happen that an exit // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create // PHIs in such an exit block, we are also inserting PHIs into L2's header. // This could break LCSSA form for L2 because these inserted PHIs can also // have uses outside of L2. Remember all PHIs in such situation as to // revisit than later on. FIXME: Remove this if indirectbr support into // LoopSimplify gets improved. if (auto *OtherLoop = LI->getLoopFor(ExitBB)) if (!L.contains(OtherLoop)) PostProcessPHIs.push_back(PN); } // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (Use *UseToRewrite : UsesToRewrite) { // If this use is in an exit block, rewrite to use the newly inserted PHI. // This is required for correctness because SSAUpdate doesn't handle uses in // the same block. It assumes the PHI we inserted is at the end of the // block. Instruction *User = cast<Instruction>(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. if (UseToRewrite->get()->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); UseToRewrite->set(&UserBB->front()); continue; } // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UseToRewrite); } // Post process PHI instructions that were inserted into another disjoint loop // and update their exits properly. for (auto *I : PostProcessPHIs) { if (I->use_empty()) continue; BasicBlock *PHIBB = I->getParent(); Loop *OtherLoop = LI->getLoopFor(PHIBB); SmallVector<BasicBlock *, 8> EBs; OtherLoop->getExitBlocks(EBs); if (EBs.empty()) continue; // Recurse and re-process each PHI instruction. FIXME: we should really // convert this entire thing to a worklist approach where we process a // vector of instructions... processInstruction(*OtherLoop, *I, DT, EBs, PredCache, LI); } // Remove PHI nodes that did not have any uses rewritten. for (PHINode *PN : AddedPHIs) if (PN->use_empty()) PN->eraseFromParent(); return true; }