Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return 0; BasicBlock* returnBlock = 0; BasicBlock* nonReturnBlock = 0; unsigned returnCount = 0; for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); SI != SE; ++SI) if (isa<ReturnInst>((*SI)->getTerminator())) { returnBlock = *SI; returnCount++; } else nonReturnBlock = *SI; if (returnCount != 1) return 0; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function* duplicateFunction = CloneFunction(F, VMap, /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(duplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock* preReturn = newReturnBlock; newReturnBlock = newReturnBlock->splitBasicBlock( newReturnBlock->getFirstNonPHI()); BasicBlock::iterator I = preReturn->begin(); BasicBlock::iterator Ins = newReturnBlock->begin(); while (I != preReturn->end()) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); retPhi->addIncoming(I, preReturn); retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), newEntryBlock); OldPhi->removeIncomingValue(newEntryBlock); ++I; } newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock*> toExtract; toExtract.push_back(newNonReturnBlock); for (Function::iterator FI = duplicateFunction->begin(), FE = duplicateFunction->end(); FI != FE; ++FI) if (&*FI != newEntryBlock && &*FI != newReturnBlock && &*FI != newNonReturnBlock) toExtract.push_back(FI); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.runOnFunction(*duplicateFunction); // Extract the body of the if. Function* extractedFunction = CodeExtractor(toExtract, &DT).extractCodeRegion(); InlineFunctionInfo IFI; // Inline the top-level if test into all callers. std::vector<User*> Users(duplicateFunction->use_begin(), duplicateFunction->use_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) if (CallInst *CI = dyn_cast<CallInst>(*UI)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. duplicateFunction->replaceAllUsesWith(F); duplicateFunction->eraseFromParent(); ++NumPartialInlined; return extractedFunction; }
/// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) for (const Argument &II : OldFunc->args()) assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap.lookup(&BI); BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { if (isa<PHINode>(VMap[PN])) PHIToResolve.push_back(PN); else break; } else { break; } } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap.lookup(PN->getIncomingBlock(pred)); if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred; // Revisit the next entry. --e; } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (const auto &PCI : PredCount) { BasicBlock *Pred = PCI.first; for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). const DataLayout &DL = NewFunc->getParent()->getDataLayout(); SmallSetVector<const Value *, 8> Worklist; for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (isa<PHINode>(VMap[PHIToResolve[Idx]])) Worklist.insert(PHIToResolve[Idx]); // Note that we must test the size on each iteration, the worklist can grow. for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { const Value *OrigV = Worklist[Idx]; auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV)); if (!I) continue; // Skip over non-intrinsic callsites, we don't want to remove any nodes from // the CGSCC. CallSite CS = CallSite(I); if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic()) continue; // See if this instruction simplifies. Value *SimpleV = SimplifyInstruction(I, DL); if (!SimpleV) continue; // Stash away all the uses of the old instruction so we can check them for // recursive simplifications after a RAUW. This is cheaper than checking all // uses of To on the recursive step in most cases. for (const User *U : OrigV->users()) Worklist.insert(cast<Instruction>(U)); // Replace the instruction with its simplified value. I->replaceAllUsesWith(SimpleV); // If the original instruction had no side effects, remove it. if (isInstructionTriviallyDead(I)) I->eraseFromParent(); else VMap[OrigV] = I; } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) if (const PHINode *PN = dyn_cast<PHINode>(I)) PHIToResolve.push_back(PN); else break; // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[OldI] == PN && "VMap mismatch"); VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) recursivelySimplifyInstruction(PN, TD); // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(I) == pred_end(I) || I->getSinglePredecessor() == I)) { BasicBlock *DeadBB = I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
Function *PartialInlinerImpl::unswitchFunction(Function *F) { // First, verify that this function is an unswitching candidate... BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return nullptr; BasicBlock *ReturnBlock = nullptr; BasicBlock *NonReturnBlock = nullptr; unsigned ReturnCount = 0; for (BasicBlock *BB : successors(EntryBlock)) { if (isa<ReturnInst>(BB->getTerminator())) { ReturnBlock = BB; ReturnCount++; } else NonReturnBlock = BB; } if (ReturnCount != 1) return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]); BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]); BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(DuplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = NewReturnBlock; NewReturnBlock = NewReturnBlock->splitBasicBlock( NewReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &NewReturnBlock->front(); while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(RetPhi); Ins = NewReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), NewEntryBlock); OldPhi->removeIncomingValue(NewEntryBlock); ++I; } NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; ToExtract.push_back(NewNonReturnBlock); for (BasicBlock &BB : *DuplicateFunction) if (&BB != NewEntryBlock && &BB != NewReturnBlock && &BB != NewNonReturnBlock) ToExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.recalculate(*DuplicateFunction); // Extract the body of the if. Function *ExtractedFunction = CodeExtractor(ToExtract, &DT).extractCodeRegion(); // Inline the top-level if test into all callers. std::vector<User *> Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); for (User *User : Users) if (CallInst *CI = dyn_cast<CallInst>(User)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(User)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. DuplicateFunction->replaceAllUsesWith(F); DuplicateFunction->eraseFromParent(); ++NumPartialInlined; return ExtractedFunction; }
/// NormalizeLandingPads - Normalize and discover landing pads, noting them /// in the LandingPads set. A landing pad is normal if the only CFG edges /// that end at it are unwind edges from invoke instructions. If we inlined /// through an invoke we could have a normal branch from the previous /// unwind block through to the landing pad for the original invoke. /// Abnormal landing pads are fixed up by redirecting all unwind edges to /// a new basic block which falls through to the original. bool DwarfEHPrepare::NormalizeLandingPads() { bool Changed = false; const MCAsmInfo *MAI = TM->getMCAsmInfo(); bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (!isa<InvokeInst>(TI)) continue; BasicBlock *LPad = TI->getSuccessor(1); // Skip landing pads that have already been normalized. if (LandingPads.count(LPad)) continue; // Check that only invoke unwind edges end at the landing pad. bool OnlyUnwoundTo = true; bool SwitchOK = usingSjLjEH; for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { TerminatorInst *PT = (*PI)->getTerminator(); // The SjLj dispatch block uses a switch instruction. This is effectively // an unwind edge, so we can disregard it here. There will only ever // be one dispatch, however, so if there are multiple switches, one // of them truly is a normal edge, not an unwind edge. if (SwitchOK && isa<SwitchInst>(PT)) { SwitchOK = false; continue; } if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) { OnlyUnwoundTo = false; break; } } if (OnlyUnwoundTo) { // Only unwind edges lead to the landing pad. Remember the landing pad. LandingPads.insert(LPad); continue; } // At least one normal edge ends at the landing pad. Redirect the unwind // edges to a new basic block which falls through into this one. // Create the new basic block. BasicBlock *NewBB = BasicBlock::Create(F->getContext(), LPad->getName() + "_unwind_edge"); // Insert it into the function right before the original landing pad. LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); // Redirect unwind edges from the original landing pad to NewBB. for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { TerminatorInst *PT = (*PI++)->getTerminator(); if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad) // Unwind to the new block. PT->setSuccessor(1, NewBB); } // If there are any PHI nodes in LPad, we need to update them so that they // merge incoming values from NewBB instead. for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) { PHINode *PN = cast<PHINode>(II); pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); // Check to see if all of the values coming in via unwind edges are the // same. If so, we don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(*PB); for (pred_iterator PI = PB; PI != PE; ++PI) { if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { InVal = 0; break; } } if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node // in NewBB. PHINode *NewPN = PHINode::Create(PN->getType(), PN->getNumIncomingValues(), PN->getName()+".unwind", NewBB); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); // Now use this new PHI as the common incoming value for NewBB in PN. InVal = NewPN; } // Revector exactly one entry in the PHI node to come from NewBB // and delete all other entries that come from unwind edges. If // there are both normal and unwind edges from the same predecessor, // this leaves an entry for the normal edge. for (pred_iterator PI = PB; PI != PE; ++PI) PN->removeIncomingValue(*PI); PN->addIncoming(InVal, NewBB); } // Add a fallthrough from NewBB to the original landing pad. BranchInst::Create(LPad, NewBB); // Now update DominatorTree analysis information. DT->splitBlock(NewBB); // Remember the newly constructed landing pad. The original landing pad // LPad is no longer a landing pad now that all unwind edges have been // revectored to NewBB. LandingPads.insert(NewBB); ++NumLandingPadsSplit; Changed = true; } return Changed; }
Function* LLVMSignatureInfo::createFunctionStub(bool special, bool virt) { std::vector<Value*> Args; std::vector<Value*> FunctionArgs; std::vector<Value*> TempArgs; J3Intrinsics& Intrinsics = *Compiler->getIntrinsics(); Function* stub = NULL; FunctionType* FTy = (virt || special)? getVirtualType() : getStaticType(); if (virt) { stub = Compiler->virtualStubs[FTy]; } else if (special) { stub = Compiler->specialStubs[FTy]; } else { stub = Compiler->staticStubs[FTy]; } if (stub != NULL) { return stub; } if (Compiler->isStaticCompiling()) { vmkit::ThreadAllocator allocator; const char* type = virt ? "virtual_stub" : special ? "special_stub" : "static_stub"; char* buf = (char*)allocator.Allocate( (signature->keyName->size << 1) + 1 + 11); signature->nativeName(buf, type); stub = Function::Create( FTy, GlobalValue::ExternalLinkage, buf, Compiler->getLLVMModule()); } else { stub = Function::Create( FTy, GlobalValue::ExternalLinkage, "", Compiler->getLLVMModule()); } LLVMContext& context = Compiler->getLLVMModule()->getContext(); BasicBlock* currentBlock = BasicBlock::Create(context, "enter", stub); BasicBlock* endBlock = BasicBlock::Create(context, "end", stub); BasicBlock* callBlock = BasicBlock::Create(context, "call", stub); PHINode* node = NULL; if (!signature->getReturnType()->isVoid()) { node = PHINode::Create(stub->getReturnType(), 2, "", endBlock); } for (Function::arg_iterator arg = stub->arg_begin(); arg != stub->arg_end(); ++arg) { Value* temp = arg; if (Compiler->useCooperativeGC() && arg->getType() == Intrinsics.JavaObjectType) { temp = new AllocaInst(Intrinsics.JavaObjectType, "", currentBlock); new StoreInst(arg, temp, "", currentBlock); Value* GCArgs[2] = { new BitCastInst(temp, Intrinsics.ptrPtrType, "", currentBlock), Intrinsics.constantPtrNull }; CallInst::Create(Intrinsics.llvm_gc_gcroot, GCArgs, "", currentBlock); } TempArgs.push_back(temp); } if (virt) { if (Compiler->useCooperativeGC()) { Args.push_back(new LoadInst(TempArgs[0], "", false, currentBlock)); } else { Args.push_back(TempArgs[0]); } } Value* val = CallInst::Create(virt ? Intrinsics.ResolveVirtualStubFunction : special ? Intrinsics.ResolveSpecialStubFunction: Intrinsics.ResolveStaticStubFunction, Args, "", currentBlock); Constant* nullValue = Constant::getNullValue(val->getType()); Value* cmp = new ICmpInst(*currentBlock, ICmpInst::ICMP_EQ, nullValue, val, ""); BranchInst::Create(endBlock, callBlock, cmp, currentBlock); if (node) node->addIncoming(Constant::getNullValue(node->getType()), currentBlock); currentBlock = callBlock; Value* Func = new BitCastInst(val, stub->getType(), "", currentBlock); int i = 0; for (Function::arg_iterator arg = stub->arg_begin(); arg != stub->arg_end(); ++arg, ++i) { Value* temp = arg; if (Compiler->useCooperativeGC() && arg->getType() == Intrinsics.JavaObjectType) { temp = new LoadInst(TempArgs[i], "", false, currentBlock); } FunctionArgs.push_back(temp); } Value* res = CallInst::Create(Func, FunctionArgs, "", currentBlock); if (node) node->addIncoming(res, currentBlock); BranchInst::Create(endBlock, currentBlock); currentBlock = endBlock; if (node) { ReturnInst::Create(context, node, currentBlock); } else { ReturnInst::Create(context, currentBlock); } stub->setGC("vmkit"); stub->addFnAttr(Attribute::NoInline); stub->addFnAttr(Attribute::NoUnwind); if (virt) { Compiler->virtualStubs[FTy] = stub; } else if (special) { Compiler->specialStubs[FTy] = stub; } else { Compiler->staticStubs[FTy] = stub; } return stub; }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// LoopInfo, and LCCSA but no other analyses. In particular, it does not /// preserve LoopSimplify (because it's complicated to handle the case where one /// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; Loop *L = LI ? LI->getLoopFor(BB) : 0; bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); // Move the edges from Preds to point to NewBB instead of BB. // While here, if we need to preserve loop analyses, collect // some information about how this split will affect loops. bool HasLoopExit = false; bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); if (LI) { // If we need to preserve LCSSA, determine if any of // the preds is a loop exit. if (PreserveLCSSA) if (Loop *PL = LI->getLoopFor(Preds[i])) if (!PL->contains(BB)) HasLoopExit = true; // If we need to preserve LoopInfo, note whether any of the // preds crosses an interesting loop boundary. if (L) { if (L->contains(Preds[i])) IsLoopEntry = false; else SplitMakesNewLoopHeader = true; } } } // Update dominator tree if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; if (L) { if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an // adjacent loop). To find this, examine each of the predecessors and // determine which loops enclose them, and select the most-nested loop // which contains the loop containing the block being split. Loop *InnermostPredLoop = 0; for (unsigned i = 0; i != NumPreds; ++i) if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { // Seek a loop which actually contains the block being split (to // avoid adjacent loops). while (PredLoop && !PredLoop->contains(BB)) PredLoop = PredLoop->getParentLoop(); // Select the most-nested of these loops which contains the block. if (PredLoop && PredLoop->contains(BB) && (!InnermostPredLoop || InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) InnermostPredLoop = PredLoop; } if (InnermostPredLoop) InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } return NewBB; }
void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, bool EnableFastISel) { Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); AI != E; ++AI) if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) InitializeRegForValue(AI); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. Function::iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { const Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. // The object may need to be placed onto the stack near the stack // protector if one exists. Determine here if this object is a suitable // candidate. I.e., it would trigger the creation of a stack protector. bool MayNeedSP = (AI->isArrayAllocation() || (TySize > 8 && isa<ArrayType>(Ty) && cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); } for (; BB != EB; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) InitializeRegForValue(I); // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); MBBMap[BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. if (BB->hasAddressTaken()) MBB->setHasAddressTaken(); // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. PHINode *PN; DebugLoc DL; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { PN = dyn_cast<PHINode>(I); if (!PN || PN->use_empty()) continue; unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); PHIReg += NumRegisters; } } } }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; MDNode *LoopID = NewLoop->getLoopID(); assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; Optional<MDNode *> NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); if (NewLoopID.hasValue()) { NewLoop->setLoopID(NewLoopID.getValue()); // Do not setLoopAlreadyUnrolled if loop attributes have been defined // explicitly. return NewLoop; } // Add unroll disable metadata to disable future unrolling for this loop. NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } else return nullptr; }
/// RewriteLoopExitValues - Check to see if this loop has a computable /// loop-invariant execution count. If so, this means that we can compute the /// final value of any expressions that are recurrent in the loop, and /// substitute the exit values from the loop into any instructions outside of /// the loop that use the final values of the current expressions. /// /// This is mostly redundant with the regular IndVarSimplify activities that /// happen later, except that it's more powerful in some cases, because it's /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Verify the input to the pass in already in LCSSA form. assert(L->isLCSSAForm()); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitBlocks[i]; // If there are no PHI nodes in this exit block, then no values defined // inside the loop are used on this path, skip it. PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); if (!PN) continue; unsigned NumPreds = PN->getNumIncomingValues(); // Iterate over all of the PHI nodes. BasicBlock::iterator BBI = ExitBB->begin(); while ((PN = dyn_cast<PHINode>(BBI++))) { if (PN->use_empty()) continue; // dead use, don't replace it // SCEV only supports integer expressions for now. if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) continue; // It's necessary to tell ScalarEvolution about this explicitly so that // it can walk the def-use list and forget all SCEVs, as it may not be // watching the PHI itself. Once the new exit value is in place, there // may not be a def-use connection between the loop and every instruction // which got a SCEVAddRecExpr for that loop. SE->forgetValue(PN); // Iterate over all of the values in all the PHI nodes. for (unsigned i = 0; i != NumPreds; ++i) { // If the value being merged in is not integer or is not defined // in the loop, skip it. Value *InVal = PN->getIncomingValue(i); if (!isa<Instruction>(InVal)) continue; // If this pred is for a subloop, not L itself, skip it. if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) continue; // The Block is in a subloop, skip it. // Check that InVal is defined in the loop. Instruction *Inst = cast<Instruction>(InVal); if (!L->contains(Inst)) continue; // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) continue; Changed = true; ++NumReplaced; Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); } } if (NumPreds != 1) { // Clone the PHI and delete the original one. This lets IVUsers and // any other maps purge the original user from their records. PHINode *NewPN = cast<PHINode>(PN->clone()); NewPN->takeName(PN); NewPN->insertBefore(PN); PN->replaceAllUsesWith(NewPN); PN->eraseFromParent(); } } } }
/// InsertUniqueBackedgeBlock - This method is called when the specified loop /// has more than one backedge in it. If this occurs, revector all of these /// backedges to target a new basic block and have that block branch to the loop /// header. This ensures that loops have exactly one backedge. /// BasicBlock * LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); // Unique backedge insertion currently depends on having a preheader. if (!Preheader) return 0; // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) return 0; if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be", BETerminator); NewPN->reserveOperandSpace(BackedgeBlocks.size()); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; Value *UniqueValue = 0; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); if (IBB == Preheader) { PreheaderIdx = i; } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { if (UniqueValue == 0) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; } } } // Delete all of the incoming values from the old PN except the preheader's assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); if (PreheaderIdx != 0) { PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) PN->removeIncomingValue(e-i, false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); // As an optimization, if all incoming values in the new PhiNode (which is a // subset of the incoming values of the old PHI node) have the same value, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); if (AA) AA->deleteValue(NewPN); BEBlock->getInstList().erase(NewPN); } } // Now that all of the PHI nodes have been inserted and adjusted, modify the // backedge blocks to just to the BEBlock instead of the header. for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) if (TI->getSuccessor(Op) == Header) TI->setSuccessor(Op, BEBlock); } //===--- Update all analyses which we must preserve now -----------------===// // Update Loop Information - we know that this block is now in the current // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, LI->getBase()); // Update dominator information DT->splitBlock(BEBlock); return BEBlock; }
void RegionExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const { for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { BasicBlock *BB = *I; // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { for (User::op_iterator OI = II->op_begin(), OE = II->op_end(); OI != OE; ++OI) if (definedInCaller(Blocks, *OI)) Inputs.insert(*OI); #if LLVM_VERSION_MINOR == 5 for (User *U : II->users()) if (!definedInRegion(Blocks, U)) { #else for (Value::use_iterator UI = II->use_begin(), UE = II->use_end(); UI != UE; ++UI) if (!definedInRegion(Blocks, *UI)) { #endif Outputs.insert(II); break; } } } } /// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void RegionExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, Header->getName()+".ce"); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, update dominator sets. The blocks that dominate the new one are the // blocks that dominate TIBB plus the new block itself. if (DT) DT->splitBlock(NewBB); // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName()+".ce", NewBB->begin()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); DebugLoc TheCallDL; if (TheCall) TheCallDL = TheCall->getDebugLoc(); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) PHIToResolve.push_back(cast<PHINode>(OldI)); } // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) RemapInstruction(I, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[OldI] == PN && "VMap mismatch"); VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of // VMap. Single entry phi nodes can have multiple VMap entries // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { ++I; continue; } // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
// InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Instruction *TheCall = CS.getInstruction(); LLVMContext &Context = TheCall->getContext(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueMap<const Value*, Value*> VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); const Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), &*Caller->begin()->begin()); // Emit a memcpy. const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, Tys, 3); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); Value *Size; if (IFI.TD == 0) Size = ConstantExpr::getSizeOf(AggTy); else Size = ConstantInt::get(Type::getInt64Ty(Context), IFI.TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { DestCast, SrcCast, Size, ConstantInt::get(Type::getInt32Ty(Context), 1), ConstantInt::get(Type::getInt1Ty(Context), 0) }; CallInst *TheMemCpy = CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); // If we have a call graph, update it. if (CallGraph *CG = IFI.CG) { CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); CallGraphNode *CallerNode = (*CG)[Caller]; CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); } // Uses of the argument in the function should use our new alloca // instead. ActualArg = NewAlloca; // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags. MustClearTailCallFlags = true; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. // { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller if the // StaticAllocas pointer is non-null. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; if (CallGraph *CG = IFI.CG) { StackSaveCGN = CG->getOrInsertFunction(StackSave); StackRestoreCGN = CG->getOrInsertFunction(StackRestore); CallerNode = (*CG)[Caller]; } // Insert the llvm.stacksave. CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", FirstNewBlock->begin()); if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); } // Count the number of StackRestore calls we insert. unsigned NumStackRestores = Returns.size(); // If we are inlining an invoke instruction, insert restores before each // unwind. These unwinds will be rewritten into branches later. if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI); if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); ++NumStackRestores; } } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining through a 'nounwind' call site then any inlined 'unwind' // instructions are unreachable. if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { TerminatorInst *Term = BB->getTerminator(); if (isa<UnwindInst>(Term)) { new UnreachableInst(Context, Term); BB->getInstList().erase(Term); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) BranchInst::Create(II->getNormalDest(), TheCall); // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(NewBr, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. const Type *RTy = CalledFunc->getReturnType(); if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } // Now that we inserted the PHI, check to see if it has a single value // (e.g. all the entries are the same or undef). If so, remove the PHI so // it doesn't block other optimizations. if (Value *V = PHI->hasConstantValue()) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. BasicBlock *ReturnBB = Returns[0]->getParent(); AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); // Update PHI nodes that use the ReturnBB to use the AfterCallBB. ReturnBB->replaceAllUsesWith(AfterCallBB); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); return true; }
void PromoteMem2Reg::run() { Function &F = *DF.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { RewriteSingleStoreAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { PromoteSingleBlockAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the stores are all that is // left. if (Info.UsingBlocks.empty()) { // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) ConvertDebugDeclareToDebugValue(DDI, SI); SI->eraseFromParent(); LBI.deleteValue(SI); } if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); // The alloca's debuginfo can be removed as well. if (DbgDeclareInst *DDI = Info.DbgDeclare) DDI->eraseFromParent(); ++NumLocalPromoted; continue; } } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) BBNumbers[I] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // sections of dead code that were not processed on the dominance frontier. // Just delete the users now. // if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = PN->hasConstantValue(&DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVector<BasicBlock*, 16>::iterator EntIt = std::lower_bound(Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA, Loop **ResultLoop) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { LLVM_DEBUG(dbgs() << "Not in simplify form!\n"); return false; } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); if (!LatchBR || LatchBR->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. LLVM_DEBUG( dbgs() << "Loop latch not terminated by a conditional branch.\n"); return false; } unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); if (L->contains(LatchExit)) { // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. LLVM_DEBUG( dbgs() << "One of the loop latch successors must be the exit block.\n"); return false; } // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { LLVM_DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); return false; } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { LLVM_DEBUG( dbgs() << "Count failed constraint on overflow trip count calculation.\n"); return false; } // Loop structure is the following: // // PreHeader // Header // ... // Latch // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, nullptr, PreserveLCSSA); // NewExit gets its DebugLoc from LatchExit, which is not part of the // original Loop. // Fix this by setting Loop's DebugLoc to NewExit. auto *NewExitTerminator = NewExit->getTerminator(); NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) DT->changeImmediateDominator(NewExit, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. for (auto *BB : OtherExits) { for (auto &II : *BB) { // Given we preserve LCSSA form, we know that the values used outside the // loop will be used through these phi nodes at the exit blocks that are // transformed below. if (!isa<PHINode>(II)) break; PHINode *Phi = cast<PHINode>(&II); unsigned oldNumOperands = Phi->getNumIncomingValues(); // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap.lookup(Phi->getIncomingValue(i)); // newVal can be a constant or derived from values outside the loop, and // hence need not have a VMap value. Also, since lookup already generated // a default "null" VMap entry for this value, we need to populate that // VMap entry correctly, with the mapped entry being itself. if (!newVal) { newVal = Phi->getIncomingValue(i); VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i); } Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } } #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) for (BasicBlock *SuccBB : successors(BB)) { assert(!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!"); } #endif } // Update the immediate dominator of the exit blocks and blocks that are // reachable from the exit blocks. This is needed because we now have paths // from both the original loop and the remainder code reaching the exit // blocks. While the IDom of these exit blocks were from the original loop, // now the IDom is the preheader (which decides whether the original loop or // remainder code should run). if (DT && !L->getExitingBlock()) { SmallVector<BasicBlock *, 16> ChildrenToUpdate; // NB! We have to examine the dom children of all loop blocks, not just // those which are the IDom of the exit blocks. This is because blocks // reachable from the exit blocks can have their IDom as the nearest common // dominator of the exit blocks. for (auto *BB : L->blocks()) { auto *DomNodeBB = DT->getNode(BB); for (auto *DomChild : DomNodeBB->getChildren()) { auto *DomChildBB = DomChild->getBlock(); if (!L->contains(LI->getLoopFor(DomChildBB))) ChildrenToUpdate.push_back(DomChildBB); } } for (auto *BB : ChildrenToUpdate) DT->changeImmediateDominator(BB, PreHeader); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the any // of its parent loops, so the Scalar Evolution pass needs to be run again. SE->forgetTopmostLoop(L); // Verify that the Dom Tree is correct. #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) if (DT) assert(DT->verify(DominatorTree::VerificationLevel::Full)); #endif // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA); } auto UnrollResult = LoopUnrollResult::Unmodified; if (remainderLoop && UnrollRemainder) { LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollResult = UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, /*Force*/ false, /*AllowRuntime*/ false, /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA); } if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) *ResultLoop = remainderLoop; NumRuntimeUnrolled++; return true; }
/// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs /// though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get the personality function from the callee if it contains a landing pad. Value *CalleePersonality = 0; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); CalleePersonality = LP->getPersonalityFn(); break; } // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. if (CalleePersonality) { for (Function::const_iterator I = Caller->begin(), E = Caller->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. if (LP->getPersonalityFn() != CalleePersonality) return false; break; } } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags if HandleByValArgument introduced a new alloca and // the callee has calls. MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; // Try to determine the size of the allocation. ConstantInt *AllocaSize = 0; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { if (IFI.TD) { Type *AllocaType = AI->getAllocatedType(); uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. if (AllocaArraySize != ~0ULL && UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } } } builder.CreateLifetimeStart(AI, AllocaSize); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); builder.CreateLifetimeEnd(AI, AllocaSize); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); NewBr->setDebugLoc(Returns[0]->getDebugLoc()); } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } return true; }
/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }