void LoopInterchangeTransform::splitInnerLoopHeader() { // Split the inner loop header out. Here make sure that the reduction PHI's // stay in the innerloop body. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); if (InnerLoopHasReduction) { // FIXME: Check if the induction PHI will always be the first PHI. BasicBlock *New = InnerLoopHeader->splitBasicBlock( ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split"); if (LI) if (Loop *L = LI->getLoopFor(InnerLoopHeader)) L->addBasicBlockToLoop(New, *LI); // Adjust Reduction PHI's in the block. SmallVector<PHINode *, 8> PHIVec; for (auto I = New->begin(); isa<PHINode>(I); ++I) { PHINode *PHI = dyn_cast<PHINode>(I); Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader); PHI->replaceAllUsesWith(V); PHIVec.push_back((PHI)); } for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) { PHINode *P = *I; P->eraseFromParent(); } } else { SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); } DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " "InnerLoopHeader \n"); }
/// Removes phis that have no predecessor void ABCD::removePhis() { for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) { PHINode *PN = phis_to_remove[i]; PN->replaceAllUsesWith(UndefValue::get(PN->getType())); PN->eraseFromParent(); } }
bool TailCallElim::runTRE(Function &F) { // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; TTI = &getAnalysis<TargetTransformInfo>(); BasicBlock *OldEntry = nullptr; bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls // marked with the 'tail' attribute, because doing so would cause the stack // size to increase (real TRE would deallocate variable sized allocas, TRE // doesn't). bool CanTRETailMarkedCall = CanTRE(F); // Change any tail recursive calls to loops. // // FIXME: The code generator produces really bad code when an 'escaping // alloca' is changed from being a static alloca to being a dynamic alloca. // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); MadeChange |= Change; } } // If we eliminated any tail recursions, it's possible that we inserted some // silly PHI nodes which just merge an initial value (the incoming operand) // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { PHINode *PN = ArgumentPHIs[i]; // If the PHI Node is a dynamic constant, replace it with the value it is. if (Value *PNV = SimplifyInstruction(PN)) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } } return MadeChange; }
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { // create 2 PHIs: one for size and another for offset PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); // insert right away in the cache to handle recursive PHIs CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI); // compute offset/size for each PHI incoming pointer for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt()); SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); if (!bothKnown(EdgeData)) { OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy)); OffsetPHI->eraseFromParent(); SizePHI->replaceAllUsesWith(UndefValue::get(IntTy)); SizePHI->eraseFromParent(); return unknown(); } SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i)); OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i)); } Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp; if ((Tmp = SizePHI->hasConstantValue())) { Size = Tmp; SizePHI->replaceAllUsesWith(Size); SizePHI->eraseFromParent(); } if ((Tmp = OffsetPHI->hasConstantValue())) { Offset = Tmp; OffsetPHI->replaceAllUsesWith(Offset); OffsetPHI->eraseFromParent(); } return std::make_pair(Size, Offset); }
/// \brief The first part of loop-nestification is to find a PHI node that tells /// us how to partition the loops. static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, AssumptionCache *AC) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); continue; } // Scan this PHI node looking for a use of the PHI node by itself. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == PN && L->contains(PN->getIncomingBlock(i))) // We found something tasty to remove. return PN; } return nullptr; }
/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a /// PHI node that tells us how to partition the loops. static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, AliasAnalysis *AA, LoopInfo *LI) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; if (Value *V = SimplifyInstruction(PN, 0, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); PN->eraseFromParent(); continue; } // Scan this PHI node looking for a use of the PHI node by itself. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == PN && L->contains(PN->getIncomingBlock(i))) // We found something tasty to remove. return PN; } return 0; }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); DebugLoc TheCallDL; if (TheCall) TheCallDL = TheCall->getDebugLoc(); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) PHIToResolve.push_back(cast<PHINode>(OldI)); } // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) RemapInstruction(I, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[OldI] == PN && "VMap mismatch"); VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of // VMap. Single entry phi nodes can have multiple VMap entries // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { ++I; continue; } // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
bool LoopInterchangeTransform::adjustLoopBranches() { DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); BasicBlock *InnerLoopLatchPredecessor = InnerLoopLatch->getUniquePredecessor(); BasicBlock *InnerLoopLatchSuccessor; BasicBlock *OuterLoopLatchSuccessor; BranchInst *OuterLoopLatchBI = dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); BranchInst *InnerLoopLatchBI = dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); BranchInst *OuterLoopHeaderBI = dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); BranchInst *InnerLoopHeaderBI = dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || !InnerLoopHeaderBI) return false; BranchInst *InnerLoopLatchPredecessorBI = dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); BranchInst *OuterLoopPredecessorBI = dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) return false; BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); if (!InnerLoopHeaderSuccessor) return false; // Adjust Loop Preheader and headers unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); } NumSucc = OuterLoopHeaderBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) OuterLoopHeaderBI->setSuccessor(i, LoopExit); else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); } // Adjust reduction PHI's now that the incoming block has changed. updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, OuterLoopHeader); BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); InnerLoopHeaderBI->eraseFromParent(); // -------------Adjust loop latches----------- if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); else InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); } // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with // the value and remove this PHI node from inner loop. SmallVector<PHINode *, 8> LcssaVec; for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) { PHINode *LcssaPhi = cast<PHINode>(I); LcssaVec.push_back(LcssaPhi); } for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) { PHINode *P = *I; Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); P->replaceAllUsesWith(Incoming); P->eraseFromParent(); } if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); else OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); else InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); } else { OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); } return true; }
/// RewriteLoopExitValues - Check to see if this loop has a computable /// loop-invariant execution count. If so, this means that we can compute the /// final value of any expressions that are recurrent in the loop, and /// substitute the exit values from the loop into any instructions outside of /// the loop that use the final values of the current expressions. /// /// This is mostly redundant with the regular IndVarSimplify activities that /// happen later, except that it's more powerful in some cases, because it's /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Verify the input to the pass in already in LCSSA form. assert(L->isLCSSAForm(*DT)); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitBlocks[i]; // If there are no PHI nodes in this exit block, then no values defined // inside the loop are used on this path, skip it. PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); if (!PN) continue; unsigned NumPreds = PN->getNumIncomingValues(); // Iterate over all of the PHI nodes. BasicBlock::iterator BBI = ExitBB->begin(); while ((PN = dyn_cast<PHINode>(BBI++))) { if (PN->use_empty()) continue; // dead use, don't replace it // SCEV only supports integer expressions for now. if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) continue; // It's necessary to tell ScalarEvolution about this explicitly so that // it can walk the def-use list and forget all SCEVs, as it may not be // watching the PHI itself. Once the new exit value is in place, there // may not be a def-use connection between the loop and every instruction // which got a SCEVAddRecExpr for that loop. SE->forgetValue(PN); // Iterate over all of the values in all the PHI nodes. for (unsigned i = 0; i != NumPreds; ++i) { // If the value being merged in is not integer or is not defined // in the loop, skip it. Value *InVal = PN->getIncomingValue(i); if (!isa<Instruction>(InVal)) continue; // If this pred is for a subloop, not L itself, skip it. if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) continue; // The Block is in a subloop, skip it. // Check that InVal is defined in the loop. Instruction *Inst = cast<Instruction>(InVal); if (!L->contains(Inst)) continue; // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) continue; Changed = true; ++NumReplaced; Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); } } if (NumPreds != 1) { // Clone the PHI and delete the original one. This lets IVUsers and // any other maps purge the original user from their records. PHINode *NewPN = cast<PHINode>(PN->clone()); NewPN->takeName(PN); NewPN->insertBefore(PN); PN->replaceAllUsesWith(NewPN); PN->eraseFromParent(); } } } }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and /// DominanceFrontier, but no other analyses. BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0; i != NumPreds; ++i) Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); // Check to see if we can eliminate this phi node. if (Value *V = PN->hasConstantValue(DT != 0)) { Instruction *I = dyn_cast<Instruction>(V); if (!I || DT == 0 || DT->dominates(I, PN)) { PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); PN->eraseFromParent(); } } } return NewBB; }
Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. if (!HasValueForBlock(BB)) return GetValueAtEndOfBlock(BB); // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (i == 0) SingularValue = PredVal; else if (PredVal != SingularValue) SingularValue = nullptr; } } else { bool isFirstPred = true; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) SingularValue = nullptr; } } // If there are no predecessors, just return undef. if (PredValues.empty()) return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. if (SingularValue) return SingularValue; // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa<PHINode>(BB->begin())) { SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), PredValues.end()); PHINode *SomePHI; for (BasicBlock::iterator It = BB->begin(); (SomePHI = dyn_cast<PHINode>(It)); ++It) { if (IsEquivalentPHI(SomePHI, ValueMapping)) return SomePHI; } } // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (const auto &PredValue : PredValues) InsertedPHI->addIncoming(PredValue.second, PredValue.first); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *V = SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } // Set the DebugLoc of the inserted PHI, if available. DebugLoc DL; if (const Instruction *I = BB->getFirstNonPHI()) DL = I->getDebugLoc(); InsertedPHI->setDebugLoc(DL); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; }
bool TailCallElim::runOnFunction(Function &F) { // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; TTI = &getAnalysis<TargetTransformInfo>(); BasicBlock *OldEntry = 0; bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls // marked with the 'tail' attribute, because doing so would cause the stack // size to increase (real TRE would deallocate variable sized allocas, TRE // doesn't). bool CanTRETailMarkedCall = true; // Find calls that can be marked tail. AllocaCaptureTracker ACT; for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { CanTRETailMarkedCall &= CanTRE(AI); PointerMayBeCaptured(AI, &ACT); // If any allocas are captured, exit. if (ACT.Captured) return false; } } } // Second pass, change any tail recursive calls to loops. // // FIXME: The code generator produces really bad code when an 'escaping // alloca' is changed from being a static alloca to being a dynamic alloca. // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. if (ACT.UsesAlloca.empty()) { for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); MadeChange |= Change; } } } // If we eliminated any tail recursions, it's possible that we inserted some // silly PHI nodes which just merge an initial value (the incoming operand) // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. if (!ArgumentPHIs.empty()) { for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { PHINode *PN = ArgumentPHIs[i]; // If the PHI Node is a dynamic constant, replace it with the value it is. if (Value *PNV = SimplifyInstruction(PN)) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } } } // At this point, we know that the function does not have any captured // allocas. If additionally the function does not call setjmp, mark all calls // in the function that do not access stack memory with the tail keyword. This // implies ensuring that there does not exist any path from a call that takes // in an alloca but does not capture it and the call which we wish to mark // with "tail". if (!F.callsFunctionThatReturnsTwice()) { for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast<CallInst>(I)) { if (!ACT.UsesAlloca.count(CI)) { CI->setTailCall(); MadeChange = true; } } } } } return MadeChange; }
/// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) for (const Argument &II : OldFunc->args()) assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap[&BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { if (isa<PHINode>(VMap[PN])) PHIToResolve.push_back(PN); else break; } else { break; } } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); }
void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; ForwardIDFCalculator IDF(DT); for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); removeLifetimeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (auto &BB : F) BBNumbers[&BB] = ID++; } // Remember the dbg.declare intrinsic describing this alloca, if any. if (!Info.DbgDeclares.empty()) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. // Unique the set of defining blocks for efficient lookup. SmallPtrSet<BasicBlock *, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); // Determine which blocks the value is live in. These are blocks which lead // to uses. SmallPtrSet<BasicBlock *, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need phi // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. IDF.setLiveInBlocks(LiveInBlocks); IDF.setDefiningBlocks(DefBlocks); SmallVector<BasicBlock *, 32> PHIBlocks; IDF.calculate(PHIBlocks); if (PHIBlocks.size() > 1) llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { return BBNumbers.lookup(A) < BBNumbers.lookup(B); }); unsigned CurrentVersion = 0; for (BasicBlock *BB : PHIBlocks) QueuePhiNode(BB, AllocaNum, CurrentVersion); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // When handling debug info, treat all incoming values as if they have unknown // locations until proven otherwise. RenamePassData::LocationVector Locations(Allocas.size()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values), std::move(Locations)); do { RenamePassData RPD = std::move(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (Instruction *A : Allocas) { // If there are any uses of the alloca instructions left, they must be in // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (auto &Declares : AllocaDbgDeclares) for (auto *DII : Declares) DII->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non-deterministic order, thus creating // non-deterministic def->use chains. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = SimplifyInstruction(PN, SQ)) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) { return BBNumbers.lookup(A) < BBNumbers.lookup(B); }; llvm::sort(Preds, CompareBBNumbers); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i), CompareBBNumbers); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (BasicBlock *Pred : Preds) SomePHI->addIncoming(UndefVal, Pred); } } NewPhiNodes.clear(); }
/// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs /// though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get the personality function from the callee if it contains a landing pad. Value *CalleePersonality = 0; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); CalleePersonality = LP->getPersonalityFn(); break; } // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. if (CalleePersonality) { for (Function::const_iterator I = Caller->begin(), E = Caller->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. if (LP->getPersonalityFn() != CalleePersonality) return false; break; } } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; const Argument *Arg = I; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags if HandleByValArgument introduced a new alloca and // the callee has calls. MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; // Try to determine the size of the allocation. ConstantInt *AllocaSize = 0; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { if (IFI.TD) { Type *AllocaType = AI->getAllocatedType(); uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. if (AllocaArraySize != ~0ULL && UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } } } builder.CreateLifetimeStart(AI, AllocaSize); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); builder.CreateLifetimeEnd(AI, AllocaSize); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); NewBr->setDebugLoc(Returns[0]->getDebugLoc()); } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst* BI = BranchInst::Create(AfterCallBB, RI); Loc = RI->getDebugLoc(); BI->setDebugLoc(Loc); RI->eraseFromParent(); } // We need to set the debug location to *somewhere* inside the // inlined function. The line number may be nonsensical, but the // instruction will at least be associated with the right // function. if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Loc); } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } return true; }
bool TailCallElim::runOnFunction(Function &F) { // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; BasicBlock *OldEntry = 0; bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; bool FunctionContainsEscapingAllocas = false; // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls // marked with the 'tail' attribute, because doing so would cause the stack // size to increase (real TCE would deallocate variable sized allocas, TCE // doesn't). bool CannotTCETailMarkedCall = false; // Loop over the function, looking for any returning blocks, and keeping track // of whether this function has any non-trivially used allocas. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall) break; FunctionContainsEscapingAllocas |= CheckForEscapingAllocas(BB, CannotTCETailMarkedCall); } /// FIXME: The code generator produces really bad code when an 'escaping /// alloca' is changed from being a static alloca to being a dynamic alloca. /// Until this is resolved, disable this transformation if that would ever /// happen. This bug is PR962. if (FunctionContainsEscapingAllocas) return false; // Second pass, change any tail calls to loops. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,CannotTCETailMarkedCall); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, CannotTCETailMarkedCall); MadeChange |= Change; } } // If we eliminated any tail recursions, it's possible that we inserted some // silly PHI nodes which just merge an initial value (the incoming operand) // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. if (!ArgumentPHIs.empty()) { for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { PHINode *PN = ArgumentPHIs[i]; // If the PHI Node is a dynamic constant, replace it with the value it is. if (Value *PNV = SimplifyInstruction(PN)) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } } } // Finally, if this function contains no non-escaping allocas, or calls // setjmp, mark all calls in the function as eligible for tail calls //(there is no stack memory for them to access). if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice()) for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { CI->setTailCall(); MadeChange = true; } return MadeChange; }
/// \brief Simplify one loop and queue further loops for simplification. /// /// FIXME: Currently this accepts both lots of analyses that it uses and a raw /// Pass pointer. The Pass pointer is used by numerous utilities to update /// specific analyses. Rather than a pass it would be much cleaner and more /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { bool Changed = false; ReprocessLoop: // Check to see that no blocks (other than the header) in this loop have // predecessors that are not in the loop. This is not valid for natural // loops, but can occur if the blocks are unreachable. Since they are // unreachable we can just shamelessly delete those CFG edges! for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); BB != E; ++BB) { if (*BB == L->getHeader()) continue; SmallPtrSet<BasicBlock*, 4> BadPreds; for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) { BasicBlock *P = *PI; if (!L->contains(P)) BadPreds.insert(P); } // Delete each unique out-of-loop (and thus dead) predecessor. for (BasicBlock *P : BadPreds) { DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " << P->getName() << "\n"); // Inform each successor of each dead pred. for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI) (*SI)->removePredecessor(P); // Zap the dead pred's terminator and replace it with unreachable. TerminatorInst *TI = P->getTerminator(); TI->replaceAllUsesWith(UndefValue::get(TI->getType())); P->getTerminator()->eraseFromParent(); new UnreachableInst(P->getContext(), P); Changed = true; } } // If there are exiting blocks with branches on undef, resolve the undef in // the direction which will exit the loop. This will help simplify loop // trip count computations. SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), E = ExitingBlocks.end(); I != E; ++I) if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " << (*I)->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); // This may make the loop analyzable, force SCEV recomputation. if (SE) SE->forgetLoop(L); Changed = true; } } // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L, PP); if (Preheader) { ++NumInserted; Changed = true; } } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), E = ExitBlockSet.end(); I != E; ++I) { BasicBlock *ExitBlock = *I; for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); PI != PE; ++PI) // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PP)) { ++NumInserted; Changed = true; } break; } } // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. BasicBlock *LoopLatch = L->getLoopLatch(); if (!LoopLatch) { // If this is really a nested loop, rip it out into a child loop. Don't do // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, PP, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. Worklist.push_back(OuterL); // This is a big restructuring change, reprocess the whole loop. Changed = true; // GCC doesn't tail recursion eliminate this. // FIXME: It isn't clear we can't rely on LLVM to TRE this. goto ReprocessLoop; } } // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); if (LoopLatch) { ++NumInserted; Changed = true; } } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); } // If this loop has multiple exits and the exits all go to the same // block, attempt to merge the exits. This helps several passes, such // as LoopRotation, which do not support loops with multiple exits. // SimplifyCFG also does this (and this code uses the same utility // function), however this code is loop-aware, where SimplifyCFG is // not. That gives it the advantage of being able to hoist // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. bool UniqueExit = true; if (!ExitBlocks.empty()) for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) if (ExitBlocks[i] != ExitBlocks[0]) { UniqueExit = false; break; } if (UniqueExit) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!BI || !BI->isConditional()) continue; CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); if (!CI || CI->getParent() != ExitingBlock) continue; // Attempt to hoist out all instructions except for the // comparison and the branch. bool AllInvariant = true; bool AnyInvariant = false; for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { Instruction *Inst = I++; // Skip debug info intrinsics. if (isa<DbgInfoIntrinsic>(Inst)) continue; if (Inst == CI) continue; if (!L->makeLoopInvariant(Inst, AnyInvariant, Preheader ? Preheader->getTerminator() : nullptr)) { AllInvariant = false; break; } } if (AnyInvariant) { Changed = true; // The loop disposition of all SCEV expressions that depend on any // hoisted values have also changed. if (SE) SE->forgetLoopDispositions(L); } if (!AllInvariant) continue; // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. if (!FoldBranchToCommonDest(BI)) continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); // Notify ScalarEvolution before deleting this block. Currently assume the // parent loop doesn't change (spliting edges doesn't count). If blocks, // CFG edges, or other values in the parent loop change, then we need call // to forgetLoop() for the parent instead. if (SE) SE->forgetLoop(L); assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = Node->getChildren(); while (!Children.empty()) { DomTreeNode *Child = Children.front(); DT->changeImmediateDominator(Child, Node->getIDom()); } DT->eraseNode(ExitingBlock); BI->getSuccessor(0)->removePredecessor(ExitingBlock); BI->getSuccessor(1)->removePredecessor(ExitingBlock); ExitingBlock->eraseFromParent(); } } return Changed; }
/// removePredecessor - This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to /// update the PHI nodes that reside in the block. Note that this should be /// called while the predecessor still refers to this block. /// void BasicBlock::removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs) { assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs. find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) && "removePredecessor: BB is not a predecessor!"); if (InstList.empty()) return; PHINode *APN = dyn_cast<PHINode>(&front()); if (!APN) return; // Quick exit. // If there are exactly two predecessors, then we want to nuke the PHI nodes // altogether. However, we cannot do this, if this in this case: // // Loop: // %x = phi [X, Loop] // %x2 = add %x, 1 ;; This would become %x2 = add %x2, 1 // br Loop ;; %x2 does not dominate all uses // // This is because the PHI node input is actually taken from the predecessor // basic block. The only case this can happen is with a self loop, so we // check for this case explicitly now. // unsigned max_idx = APN->getNumIncomingValues(); assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!"); if (max_idx == 2) { BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred); // Disable PHI elimination! if (this == Other) max_idx = 3; } // <= Two predecessors BEFORE I remove one? if (max_idx <= 2 && !DontDeleteUselessPHIs) { // Yup, loop through and nuke the PHI nodes while (PHINode *PN = dyn_cast<PHINode>(&front())) { // Remove the predecessor first. PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs); // If the PHI _HAD_ two uses, replace PHI node with its now *single* value if (max_idx == 2) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); else // We are left with an infinite loop with no entries: kill the PHI. PN->replaceAllUsesWith(UndefValue::get(PN->getType())); getInstList().pop_front(); // Remove the PHI node } // If the PHI node already only had one entry, it got deleted by // removeIncomingValue. } } else { // Okay, now we know that we need to remove predecessor #pred_idx from all // PHI nodes. Iterate over each PHI node fixing them up PHINode *PN; for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) { ++II; PN->removeIncomingValue(Pred, false); // If all incoming values to the Phi are the same, we can replace the Phi // with that value. Value* PNV = 0; if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue())) if (PNV != PN) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } } } }
void PromoteMem2Reg::run() { Function &F = *DF.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { RewriteSingleStoreAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { PromoteSingleBlockAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the stores are all that is // left. if (Info.UsingBlocks.empty()) { // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) ConvertDebugDeclareToDebugValue(DDI, SI); SI->eraseFromParent(); LBI.deleteValue(SI); } if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); // The alloca's debuginfo can be removed as well. if (DbgDeclareInst *DDI = Info.DbgDeclare) DDI->eraseFromParent(); ++NumLocalPromoted; continue; } } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) BBNumbers[I] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // sections of dead code that were not processed on the dominance frontier. // Just delete the users now. // if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = PN->hasConstantValue(&DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVector<BasicBlock*, 16>::iterator EntIt = std::lower_bound(Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
// InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Instruction *TheCall = CS.getInstruction(); LLVMContext &Context = TheCall->getContext(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueMap<const Value*, Value*> VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); const Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), &*Caller->begin()->begin()); // Emit a memcpy. const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, Tys, 3); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); Value *Size; if (IFI.TD == 0) Size = ConstantExpr::getSizeOf(AggTy); else Size = ConstantInt::get(Type::getInt64Ty(Context), IFI.TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { DestCast, SrcCast, Size, ConstantInt::get(Type::getInt32Ty(Context), 1), ConstantInt::get(Type::getInt1Ty(Context), 0) }; CallInst *TheMemCpy = CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); // If we have a call graph, update it. if (CallGraph *CG = IFI.CG) { CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); CallGraphNode *CallerNode = (*CG)[Caller]; CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); } // Uses of the argument in the function should use our new alloca // instead. ActualArg = NewAlloca; // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags. MustClearTailCallFlags = true; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. // { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller if the // StaticAllocas pointer is non-null. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; if (CallGraph *CG = IFI.CG) { StackSaveCGN = CG->getOrInsertFunction(StackSave); StackRestoreCGN = CG->getOrInsertFunction(StackRestore); CallerNode = (*CG)[Caller]; } // Insert the llvm.stacksave. CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", FirstNewBlock->begin()); if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); } // Count the number of StackRestore calls we insert. unsigned NumStackRestores = Returns.size(); // If we are inlining an invoke instruction, insert restores before each // unwind. These unwinds will be rewritten into branches later. if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI); if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); ++NumStackRestores; } } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining through a 'nounwind' call site then any inlined 'unwind' // instructions are unreachable. if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { TerminatorInst *Term = BB->getTerminator(); if (isa<UnwindInst>(Term)) { new UnreachableInst(Context, Term); BB->getInstList().erase(Term); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) BranchInst::Create(II->getNormalDest(), TheCall); // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(NewBr, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. const Type *RTy = CalledFunc->getReturnType(); if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } // Now that we inserted the PHI, check to see if it has a single value // (e.g. all the entries are the same or undef). If so, remove the PHI so // it doesn't block other optimizations. if (Value *V = PHI->hasConstantValue()) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. BasicBlock *ReturnBB = Returns[0]->getParent(); AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); // Update PHI nodes that use the ReturnBB to use the AfterCallBB. ReturnBB->replaceAllUsesWith(AfterCallBB); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); return true; }