/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock*> Preds, BranchInst *BI, Pass *P, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1, e = Preds.size(); i != e; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // Explicitly check the BB index here to handle duplicates in Preds. int Idx = PN->getBasicBlockIndex(Preds[i]); if (Idx >= 0) PN->removeIncomingValue(Idx, false); } } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } }
// \brief Update the first occurrence of the "switch statement" BB in the PHI // node with the "new" BB. The other occurrences will: // // 1) Be updated by subsequent calls to this function. Switch statements may // have more than one outcoming edge into the same BB if they all have the same // value. When the switch statement is converted these incoming edges are now // coming from multiple BBs. // 2) Removed if subsequent incoming values now share the same case, i.e., // multiple outcome edges are condensed into one. This is necessary to keep the // number of phi values equal to the number of branches to SuccBB. static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, unsigned NumMergedCases) { for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI(); I != IE; ++I) { PHINode *PN = cast<PHINode>(I); // Only update the first occurence. unsigned Idx = 0, E = PN->getNumIncomingValues(); unsigned LocalNumMergedCases = NumMergedCases; for (; Idx != E; ++Idx) { if (PN->getIncomingBlock(Idx) == OrigBB) { PN->setIncomingBlock(Idx, NewBB); break; } } // Remove additional occurences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) if (PN->getIncomingBlock(Idx) == OrigBB) { PN->removeIncomingValue(Idx); LocalNumMergedCases--; } } }
// \brief Update the first occurrence of the "switch statement" BB in the PHI // node with the "new" BB. The other occurrences will: // // 1) Be updated by subsequent calls to this function. Switch statements may // have more than one outcoming edge into the same BB if they all have the same // value. When the switch statement is converted these incoming edges are now // coming from multiple BBs. // 2) Removed if subsequent incoming values now share the same case, i.e., // multiple outcome edges are condensed into one. This is necessary to keep the // number of phi values equal to the number of branches to SuccBB. static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, unsigned NumMergedCases) { for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI(); I != IE; ++I) { PHINode *PN = cast<PHINode>(I); // Only update the first occurence. unsigned Idx = 0, E = PN->getNumIncomingValues(); unsigned LocalNumMergedCases = NumMergedCases; for (; Idx != E; ++Idx) { if (PN->getIncomingBlock(Idx) == OrigBB) { PN->setIncomingBlock(Idx, NewBB); break; } } // Remove additional occurences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. SmallVector<unsigned, 8> Indices; for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) if (PN->getIncomingBlock(Idx) == OrigBB) { Indices.push_back(Idx); LocalNumMergedCases--; } // Remove incoming values in the reverse order to prevent invalidating // *successive* index. for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III) PN->removeIncomingValue(*III); } }
// newLeafBlock - Create a new leaf block for the binary lookup tree. It // checks if the switch's value == the case's value. If not, then it // jumps to the default branch. At this point in the tree, the value // can't be another valid case value, so the jump to the "default" branch // is warranted. // BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* OrigBlock, BasicBlock* Default) { Function* F = OrigBlock->getParent(); BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewLeaf); // Emit comparison ICmpInst* Comp = NULL; if (Leaf.Low == Leaf.High) { // Make the seteq instruction... Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf"); } else { // Make range comparison if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { // Val >= Min && Val <= Hi --> Val <= Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, "SwitchLeaf"); } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, "SwitchLeaf"); } else { // Emit V-Lo <=u Hi-Lo Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, Val->getName()+".off", NewLeaf); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, "SwitchLeaf"); } } // Make the conditional branch... BasicBlock* Succ = Leaf.BB; BranchInst::Create(Succ, Default, Comp, NewLeaf); // If there were any PHI nodes in this successor, rewrite one entry // from OrigBlock to come from NewLeaf. for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { PHINode* PN = cast<PHINode>(I); // Remove all but one incoming entries from the cluster uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() - cast<ConstantInt>(Leaf.Low)->getSExtValue(); for (uint64_t j = 0; j < Range; ++j) { PN->removeIncomingValue(OrigBlock); } int BlockIdx = PN->getBasicBlockIndex(OrigBlock); assert(BlockIdx != -1 && "Switch didn't go to this successor??"); PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); } return NewLeaf; }
void CheckInserter::insertCycleChecks(Function &F) { IdentifyBackEdges &IBE = getAnalysis<IdentifyBackEdges>(); for (Function::iterator B1 = F.begin(); B1 != F.end(); ++B1) { TerminatorInst *TI = B1->getTerminator(); for (unsigned j = 0; j < TI->getNumSuccessors(); ++j) { BasicBlock *B2 = TI->getSuccessor(j); unsigned BackEdgeID = IBE.getID(B1, B2); if (BackEdgeID != (unsigned)-1) { assert(BackEdgeID < MaxNumBackEdges); BasicBlock *BackEdgeBlock = BasicBlock::Create( F.getContext(), "backedge_" + B1->getName() + "_" + B2->getName(), &F); CallInst::Create(CycleCheck, ConstantInt::get(IntType, BackEdgeID), "", BackEdgeBlock); // BackEdgeBlock -> B2 // Fix the PHINodes in B2. BranchInst::Create(B2, BackEdgeBlock); for (BasicBlock::iterator I = B2->begin(); B2->getFirstNonPHI() != I; ++I) { PHINode *PHI = cast<PHINode>(I); // Note: If B2 has multiple incoming edges from B1 (e.g. B1 terminates // with a SelectInst), its PHINodes must also have multiple incoming // edges from B1. However, after adding BackEdgeBlock and essentially // merging the multiple incoming edges from B1, there will be only one // edge from BackEdgeBlock to B2. Therefore, we need to remove the // redundant incoming edges from B2's PHINodes. bool FirstIncomingFromB1 = true; for (unsigned k = 0; k < PHI->getNumIncomingValues(); ++k) { if (PHI->getIncomingBlock(k) == B1) { if (FirstIncomingFromB1) { FirstIncomingFromB1 = false; PHI->setIncomingBlock(k, BackEdgeBlock); } else { PHI->removeIncomingValue(k, false); --k; } } } } // B1 -> BackEdgeBlock // There might be multiple back edges from B1 to B2. Need to replace // them all. for (unsigned j2 = j; j2 < TI->getNumSuccessors(); ++j2) { if (TI->getSuccessor(j2) == B2) { TI->setSuccessor(j2, BackEdgeBlock); } } } } } }
/// updatePHINodes - CFG has been changed. /// Before /// - ExitBB's single predecessor was Latch /// - Latch's second successor was Header /// Now /// - ExitBB's single predecessor is Header /// - Latch's one and only successor is Header /// /// Update ExitBB PHINodes' to reflect this change. void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, BasicBlock *Header, PHINode *IV, Instruction *IVIncrement, Loop *LP) { for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end(); BI != BE; ) { PHINode *PN = dyn_cast<PHINode>(BI); ++BI; if (!PN) break; Value *V = PN->getIncomingValueForBlock(Latch); if (PHINode *PHV = dyn_cast<PHINode>(V)) { // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use // in Header which is new incoming value for PN. Value *NewV = NULL; for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); UI != E; ++UI) if (PHINode *U = dyn_cast<PHINode>(*UI)) if (LP->contains(U->getParent())) { NewV = U; break; } // Add incoming value from header only if PN has any use inside the loop. if (NewV) PN->addIncoming(NewV, Header); } else if (Instruction *PHI = dyn_cast<Instruction>(V)) { // If this instruction is IVIncrement then IV is new incoming value // from header otherwise this instruction must be incoming value from // header because loop is in LCSSA form. if (PHI == IVIncrement) PN->addIncoming(IV, Header); else PN->addIncoming(V, Header); } else // Otherwise this is an incoming value from header because loop is in // LCSSA form. PN->addIncoming(V, Header); // Remove incoming value from Latch. PN->removeIncomingValue(Latch); } }
Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return 0; BasicBlock* returnBlock = 0; BasicBlock* nonReturnBlock = 0; unsigned returnCount = 0; for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); SI != SE; ++SI) if (isa<ReturnInst>((*SI)->getTerminator())) { returnBlock = *SI; returnCount++; } else nonReturnBlock = *SI; if (returnCount != 1) return 0; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function* duplicateFunction = CloneFunction(F, VMap, /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(duplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock* preReturn = newReturnBlock; newReturnBlock = newReturnBlock->splitBasicBlock( newReturnBlock->getFirstNonPHI()); BasicBlock::iterator I = preReturn->begin(); BasicBlock::iterator Ins = newReturnBlock->begin(); while (I != preReturn->end()) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); retPhi->addIncoming(I, preReturn); retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), newEntryBlock); OldPhi->removeIncomingValue(newEntryBlock); ++I; } newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock*> toExtract; toExtract.push_back(newNonReturnBlock); for (Function::iterator FI = duplicateFunction->begin(), FE = duplicateFunction->end(); FI != FE; ++FI) if (&*FI != newEntryBlock && &*FI != newReturnBlock && &*FI != newNonReturnBlock) toExtract.push_back(FI); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.runOnFunction(*duplicateFunction); // Extract the body of the if. Function* extractedFunction = CodeExtractor(toExtract, &DT).extractCodeRegion(); InlineFunctionInfo IFI; // Inline the top-level if test into all callers. std::vector<User*> Users(duplicateFunction->use_begin(), duplicateFunction->use_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) if (CallInst *CI = dyn_cast<CallInst>(*UI)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. duplicateFunction->replaceAllUsesWith(F); duplicateFunction->eraseFromParent(); ++NumPartialInlined; return extractedFunction; }
/// removePredecessor - This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to /// update the PHI nodes that reside in the block. Note that this should be /// called while the predecessor still refers to this block. /// void BasicBlock::removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs) { assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs. find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) && "removePredecessor: BB is not a predecessor!"); if (InstList.empty()) return; PHINode *APN = dyn_cast<PHINode>(&front()); if (!APN) return; // Quick exit. // If there are exactly two predecessors, then we want to nuke the PHI nodes // altogether. However, we cannot do this, if this in this case: // // Loop: // %x = phi [X, Loop] // %x2 = add %x, 1 ;; This would become %x2 = add %x2, 1 // br Loop ;; %x2 does not dominate all uses // // This is because the PHI node input is actually taken from the predecessor // basic block. The only case this can happen is with a self loop, so we // check for this case explicitly now. // unsigned max_idx = APN->getNumIncomingValues(); assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!"); if (max_idx == 2) { BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred); // Disable PHI elimination! if (this == Other) max_idx = 3; } // <= Two predecessors BEFORE I remove one? if (max_idx <= 2 && !DontDeleteUselessPHIs) { // Yup, loop through and nuke the PHI nodes while (PHINode *PN = dyn_cast<PHINode>(&front())) { // Remove the predecessor first. PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs); // If the PHI _HAD_ two uses, replace PHI node with its now *single* value if (max_idx == 2) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); else // We are left with an infinite loop with no entries: kill the PHI. PN->replaceAllUsesWith(UndefValue::get(PN->getType())); getInstList().pop_front(); // Remove the PHI node } // If the PHI node already only had one entry, it got deleted by // removeIncomingValue. } } else { // Okay, now we know that we need to remove predecessor #pred_idx from all // PHI nodes. Iterate over each PHI node fixing them up PHINode *PN; for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) { ++II; PN->removeIncomingValue(Pred, false); // If all incoming values to the Phi are the same, we can replace the Phi // with that value. Value* PNV = 0; if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue())) if (PNV != PN) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } } } }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was succesful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { assert(L->isLCSSAForm()); BasicBlock *Header = L->getHeader(); BasicBlock *LatchBlock = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; return false; } // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); // Find trip multiple if count is not available unsigned TripMultiple = 1; if (TripCount == 0) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) DOUT << " Trip Count = " << TripCount << "\n"; if (TripMultiple != 1) DOUT << " Trip Multiple = " << TripMultiple << "\n"; // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } if (CompletelyUnroll) { DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { DEBUG(errs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DOUT << " with a breakout at trip " << BreakoutTrip; } else if (TripMultiple != 1) { DOUT << " with " << TripMultiple << " trips per branch"; } DOUT << "!\n"; } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. typedef DenseMap<const Value*, Value*> ValueMapTy; ValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); OrigPHINode.push_back(PN); if (Instruction *I = dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) if (L->contains(I->getParent())) LastValueMap[I] = I; } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); for (unsigned It = 1; It != Count; ++It) { char SuffixBuffer[100]; sprintf(SuffixBuffer, ".%d", It); std::vector<BasicBlock*> NewBlocks; for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { ValueMapTy ValueMap; BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI->getParent())) InVal = LastValueMap[InValI]; ValueMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks except // the successor of the latch block. The successor of the exit block will // be updated specially after unrolling all the way. if (*BB != LatchBlock) for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); UI != UE;) { Instruction *UseInst = cast<Instruction>(*UI); ++UI; if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) { PHINode *phi = cast<PHINode>(UseInst); Value *Incoming = phi->getIncomingValueForBlock(*BB); phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) { Latches.push_back(New); // Also, clear out the new latch's back edge so that it doesn't look // like a new loop, so that it's amenable to being merged with adjacent // blocks later on. TerminatorInst *Term = New->getTerminator(); assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); Term->setSuccessor(!ContinueOnTrue, NULL); } NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, LastValueMap); } // The latch block exits the loop. If there are any PHI nodes in the // successor blocks, update them to use the appropriate values computed as the // last iteration of the loop. if (Count != 1) { SmallPtrSet<PHINode*, 8> Users; for (Value::use_iterator UI = LatchBlock->use_begin(), UE = LatchBlock->use_end(); UI != UE; ++UI) if (PHINode *phi = dyn_cast<PHINode>(*UI)) Users.insert(phi); BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end(); SI != SE; ++SI) { PHINode *PN = *SI; Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI->getParent())) InVal = LastValueMap[InVal]; } PN->addIncoming(InVal, LastIterationBB); } } // Now, if we're doing complete unrolling, loop over the PHI nodes in the // original block, setting them to their incoming values. if (CompletelyUnroll) { BasicBlock *Preheader = L->getLoopPreheader(); for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { Term->setUnconditionalDest(Dest); // Merge adjacent basic blocks, if possible. if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { std::replace(Latches.begin(), Latches.end(), Dest, Fold); std::replace(Headers.begin(), Headers.end(), Dest, Fold); } } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Constant *C = ConstantFoldInstruction(Inst, Header->getContext())) { Inst->replaceAllUsesWith(C); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != NULL) LPM->deleteLoopFromQueue(L); // If we didn't completely unroll the loop, it should still be in LCSSA form. if (!CompletelyUnroll) assert(L->isLCSSAForm()); return true; }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. /// In particular, it does not preserve LoopSimplify (because it's /// complicated to handle the case where one of the edges being split /// is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; Loop *L = LI ? LI->getLoopFor(BB) : 0; bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); // Move the edges from Preds to point to NewBB instead of BB. // While here, if we need to preserve loop analyses, collect // some information about how this split will affect loops. bool HasLoopExit = false; bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); if (LI) { // If we need to preserve LCSSA, determine if any of // the preds is a loop exit. if (PreserveLCSSA) if (Loop *PL = LI->getLoopFor(Preds[i])) if (!PL->contains(BB)) HasLoopExit = true; // If we need to preserve LoopInfo, note whether any of the // preds crosses an interesting loop boundary. if (L) { if (L->contains(Preds[i])) IsLoopEntry = false; else SplitMakesNewLoopHeader = true; } } } // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; if (L) { if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an // adjacent loop). To find this, examine each of the predecessors and // determine which loops enclose them, and select the most-nested loop // which contains the loop containing the block being split. Loop *InnermostPredLoop = 0; for (unsigned i = 0; i != NumPreds; ++i) if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { // Seek a loop which actually contains the block being split (to // avoid adjacent loops). while (PredLoop && !PredLoop->contains(BB)) PredLoop = PredLoop->getParentLoop(); // Select the most-nested of these loops which contains the block. if (PredLoop && PredLoop->contains(BB) && (!InnermostPredLoop || InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) InnermostPredLoop = PredLoop; } if (InnermostPredLoop) InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } return NewBB; }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; MDNode *LoopID = NewLoop->getLoopID(); assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; Optional<MDNode *> NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); if (NewLoopID.hasValue()) { NewLoop->setLoopID(NewLoopID.getValue()); // Do not setLoopAlreadyUnrolled if loop attributes have been defined // explicitly. return NewLoop; } // Add unroll disable metadata to disable future unrolling for this loop. NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } else return nullptr; }
/// NormalizeLandingPads - Normalize and discover landing pads, noting them /// in the LandingPads set. A landing pad is normal if the only CFG edges /// that end at it are unwind edges from invoke instructions. If we inlined /// through an invoke we could have a normal branch from the previous /// unwind block through to the landing pad for the original invoke. /// Abnormal landing pads are fixed up by redirecting all unwind edges to /// a new basic block which falls through to the original. bool DwarfEHPrepare::NormalizeLandingPads() { bool Changed = false; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (!isa<InvokeInst>(TI)) continue; BasicBlock *LPad = TI->getSuccessor(1); // Skip landing pads that have already been normalized. if (LandingPads.count(LPad)) continue; // Check that only invoke unwind edges end at the landing pad. bool OnlyUnwoundTo = true; for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { TerminatorInst *PT = (*PI)->getTerminator(); if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) { OnlyUnwoundTo = false; break; } } if (OnlyUnwoundTo) { // Only unwind edges lead to the landing pad. Remember the landing pad. LandingPads.insert(LPad); continue; } // At least one normal edge ends at the landing pad. Redirect the unwind // edges to a new basic block which falls through into this one. // Create the new basic block. BasicBlock *NewBB = BasicBlock::Create(F->getContext(), LPad->getName() + "_unwind_edge"); // Insert it into the function right before the original landing pad. LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); // Redirect unwind edges from the original landing pad to NewBB. for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { TerminatorInst *PT = (*PI++)->getTerminator(); if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad) // Unwind to the new block. PT->setSuccessor(1, NewBB); } // If there are any PHI nodes in LPad, we need to update them so that they // merge incoming values from NewBB instead. for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) { PHINode *PN = cast<PHINode>(II); pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); // Check to see if all of the values coming in via unwind edges are the // same. If so, we don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(*PB); for (pred_iterator PI = PB; PI != PE; ++PI) { if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { InVal = 0; break; } } if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node // in NewBB. PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", NewBB); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); // Now use this new PHI as the common incoming value for NewBB in PN. InVal = NewPN; } // Revector exactly one entry in the PHI node to come from NewBB // and delete all other entries that come from unwind edges. If // there are both normal and unwind edges from the same predecessor, // this leaves an entry for the normal edge. for (pred_iterator PI = PB; PI != PE; ++PI) PN->removeIncomingValue(*PI); PN->addIncoming(InVal, NewBB); } // Add a fallthrough from NewBB to the original landing pad. BranchInst::Create(LPad, NewBB); // Now update DominatorTree and DominanceFrontier analysis information. if (DT) DT->splitBlock(NewBB); if (DF) DF->splitBlock(NewBB); // Remember the newly constructed landing pad. The original landing pad // LPad is no longer a landing pad now that all unwind edges have been // revectored to NewBB. LandingPads.insert(NewBB); ++NumLandingPadsSplit; Changed = true; } return Changed; }
/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector /// in the given landing pad. In principle, llvm.eh.exception is /// required to be in the landing pad; in practice, SplitCriticalEdge /// can break that invariant, and then inlining can break it further. /// There's a real need for a reliable solution here, but until that /// happens, we have some fragile workarounds here. static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) { // Look for an exception call in the actual landing pad. EHExceptionInst *exn = findExceptionInBlock(lpad); if (exn) return findSelectorForException(exn); // Okay, if that failed, look for one in an obvious successor. If // we find one, we'll fix the IR by moving things back to the // landing pad. bool dominates = true; // does the lpad dominate the exn call BasicBlock *nonDominated = 0; // if not, the first non-dominated block BasicBlock *lastDominated = 0; // and the block which branched to it BasicBlock *exnBlock = lpad; // We need to protect against lpads that lead into infinite loops. SmallPtrSet<BasicBlock*,4> visited; visited.insert(exnBlock); do { // We're not going to apply this hack to anything more complicated // than a series of unconditional branches, so if the block // doesn't terminate in an unconditional branch, just fail. More // complicated cases can arise when, say, sinking a call into a // split unwind edge and then inlining it; but that can do almost // *anything* to the CFG, including leaving the selector // completely unreachable. The only way to fix that properly is // to (1) prohibit transforms which move the exception or selector // values away from the landing pad, e.g. by producing them with // instructions that are pinned to an edge like a phi, or // producing them with not-really-instructions, and (2) making // transforms which split edges deal with that. BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back()); if (!branch || branch->isConditional()) return 0; BasicBlock *successor = branch->getSuccessor(0); // Fail if we found an infinite loop. if (!visited.insert(successor)) return 0; // If the successor isn't dominated by exnBlock: if (!successor->getSinglePredecessor()) { // We don't want to have to deal with threading the exception // through multiple levels of phi, so give up if we've already // followed a non-dominating edge. if (!dominates) return 0; // Otherwise, remember this as a non-dominating edge. dominates = false; nonDominated = successor; lastDominated = exnBlock; } exnBlock = successor; // Can we stop here? exn = findExceptionInBlock(exnBlock); } while (!exn); // Look for a selector call for the exception we found. EHSelectorInst *selector = findSelectorForException(exn); if (!selector) return 0; // The easy case is when the landing pad still dominates the // exception call, in which case we can just move both calls back to // the landing pad. if (dominates) { selector->moveBefore(lpad->getFirstNonPHI()); exn->moveBefore(selector); return selector; } // Otherwise, we have to split at the first non-dominating block. // The CFG looks basically like this: // lpad: // phis_0 // insnsAndBranches_1 // br label %nonDominated // nonDominated: // phis_2 // insns_3 // %exn = call i8* @llvm.eh.exception() // insnsAndBranches_4 // %selector = call @llvm.eh.selector(i8* %exn, ... // We need to turn this into: // lpad: // phis_0 // %exn0 = call i8* @llvm.eh.exception() // %selector0 = call @llvm.eh.selector(i8* %exn0, ... // insnsAndBranches_1 // br label %split // from lastDominated // nonDominated: // phis_2 (without edge from lastDominated) // %exn1 = call i8* @llvm.eh.exception() // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ... // br label %split // split: // phis_2 (edge from lastDominated, edge from split) // %exn = phi ... // %selector = phi ... // insns_3 // insnsAndBranches_4 assert(nonDominated); assert(lastDominated); // First, make clones of the intrinsics to go in lpad. EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone()); EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone()); lpadSelector->setArgOperand(0, lpadExn); lpadSelector->insertBefore(lpad->getFirstNonPHI()); lpadExn->insertBefore(lpadSelector); // Split the non-dominated block. BasicBlock *split = nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(), nonDominated->getName() + ".lpad-fix"); // Redirect the last dominated branch there. cast<BranchInst>(lastDominated->back()).setSuccessor(0, split); // Move the existing intrinsics to the end of the old block. selector->moveBefore(&nonDominated->back()); exn->moveBefore(selector); Instruction *splitIP = &split->front(); // For all the phis in nonDominated, make a new phi in split to join // that phi with the edge from lastDominated. for (BasicBlock::iterator i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) { PHINode *phi = dyn_cast<PHINode>(i); if (!phi) break; PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(), splitIP); phi->replaceAllUsesWith(splitPhi); splitPhi->addIncoming(phi, nonDominated); splitPhi->addIncoming(phi->removeIncomingValue(lastDominated), lastDominated); } // Make new phis for the exception and selector. PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP); exn->replaceAllUsesWith(exnPhi); selector->setArgOperand(0, exn); // except for this use exnPhi->addIncoming(exn, nonDominated); exnPhi->addIncoming(lpadExn, lastDominated); PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP); selector->replaceAllUsesWith(selectorPhi); selectorPhi->addIncoming(selector, nonDominated); selectorPhi->addIncoming(lpadSelector, lastDominated); return lpadSelector; }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, Pass *PP, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. This way we don't // need to support "partial unrolling by 1". if (TripCount == 0 && Count < 2) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. if (PP) { ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); if (SE) SE->forgetLoop(L); } // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } // Report the unrolling decision. DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, Twine("completely unrolled loop with ") + Twine(TripCount) + " iterations"); } else { DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); Twine DiagMsg("unrolled loop by a factor of " + Twine(Count)); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); DiagMsg.concat(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks. for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; ++SI) { if (L->contains(*SI)) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { if (*SI == Headers[i]) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Merge adjacent basic blocks, if possible. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } DominatorTree *DT = nullptr; if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. if (DominatorTreeWrapperPass *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { DT = &DTWP->getDomTree(); DT->recalculate(*L->getHeader()->getParent()); } // Simplify any new induction variables in the partially unrolled loop. ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Value *V = SimplifyInstruction(Inst)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (PP && DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after // deleteLoopFromQueue updates LoopInfo. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); formLCSSARecursively(*OuterL, *DT, SE); } } return true; }
static bool convertFunction(Function *Func) { bool Changed = false; IntegerType *I32 = Type::getInt32Ty(Func->getContext()); // Skip zero in case programs treat a null pointer as special. uint32_t NextNum = 1; DenseMap<BasicBlock *, ConstantInt *> LabelNums; BasicBlock *DefaultBB = NULL; // Replace each indirectbr with a switch. // // If there are multiple indirectbr instructions in the function, // this could be expensive. While an indirectbr is usually // converted to O(1) machine instructions, the switch we generate // here will be O(n) in the number of target labels. // // However, Clang usually generates just a single indirectbr per // function anyway when compiling C computed gotos. // // We could try to generate one switch to handle all the indirectbr // instructions in the function, but that would be complicated to // implement given that variables that are live at one indirectbr // might not be live at others. for (llvm::Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) { if (IndirectBrInst *Br = dyn_cast<IndirectBrInst>(BB->getTerminator())) { Changed = true; if (!DefaultBB) { DefaultBB = BasicBlock::Create(Func->getContext(), "indirectbr_default", Func); new UnreachableInst(Func->getContext(), DefaultBB); } // An indirectbr can list the same target block multiple times. // Keep track of the basic blocks we've handled to avoid adding // the same case multiple times. DenseSet<BasicBlock *> BlocksSeen; Value *Cast = new PtrToIntInst(Br->getAddress(), I32, "indirectbr_cast", Br); unsigned Count = Br->getNumSuccessors(); SwitchInst *Switch = SwitchInst::Create(Cast, DefaultBB, Count, Br); for (unsigned I = 0; I < Count; ++I) { BasicBlock *Dest = Br->getSuccessor(I); if (!BlocksSeen.insert(Dest).second) { // Remove duplicated entries from phi nodes. for (BasicBlock::iterator Inst = Dest->begin(); ; ++Inst) { PHINode *Phi = dyn_cast<PHINode>(Inst); if (!Phi) break; Phi->removeIncomingValue(Br->getParent()); } continue; } ConstantInt *Val; if (LabelNums.count(Dest) == 0) { Val = ConstantInt::get(I32, NextNum++); LabelNums[Dest] = Val; BlockAddress *BA = BlockAddress::get(Func, Dest); Value *ValAsPtr = ConstantExpr::getIntToPtr(Val, BA->getType()); BA->replaceAllUsesWith(ValAsPtr); BA->destroyConstant(); } else { Val = LabelNums[Dest]; } Switch->addCase(Val, Br->getSuccessor(I)); } Br->eraseFromParent(); } } // If there are any blockaddresses that are never used by an // indirectbr, replace them with dummy values. SmallVector<Value *, 20> Users(Func->user_begin(), Func->user_end()); for (auto U : Users) { if (BlockAddress *BA = dyn_cast<BlockAddress>(U)) { Changed = true; Value *DummyVal = ConstantExpr::getIntToPtr(ConstantInt::get(I32, ~0L), BA->getType()); BA->replaceAllUsesWith(DummyVal); BA->destroyConstant(); } } return Changed; }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); return NewLoop; } else return nullptr; }
static void ConvertOperandToType(User *U, Value *OldVal, Value *NewVal, ValueMapCache &VMC, const TargetData &TD) { if (isa<ValueHandle>(U)) return; // Valuehandles don't let go of operands... if (VMC.OperandsMapped.count(U)) return; VMC.OperandsMapped.insert(U); ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(U); if (VMCI != VMC.ExprMap.end()) return; Instruction *I = cast<Instruction>(U); // Only Instructions convertible BasicBlock *BB = I->getParent(); assert(BB != 0 && "Instruction not embedded in basic block!"); std::string Name = I->getName(); I->setName(""); Instruction *Res; // Result of conversion //std::cerr << endl << endl << "Type:\t" << Ty << "\nInst: " << I // << "BB Before: " << BB << endl; // Prevent I from being removed... ValueHandle IHandle(VMC, I); const Type *NewTy = NewVal->getType(); Constant *Dummy = (NewTy != Type::VoidTy) ? Constant::getNullValue(NewTy) : 0; switch (I->getOpcode()) { case Instruction::Cast: if (VMC.NewCasts.count(ValueHandle(VMC, I))) { // This cast has already had it's value converted, causing a new cast to // be created. We don't want to create YET ANOTHER cast instruction // representing the original one, so just modify the operand of this cast // instruction, which we know is newly created. I->setOperand(0, NewVal); I->setName(Name); // give I its name back return; } else { Res = new CastInst(NewVal, I->getType(), Name); } break; case Instruction::Add: if (isa<PointerType>(NewTy)) { Value *IndexVal = I->getOperand(OldVal == I->getOperand(0) ? 1 : 0); std::vector<Value*> Indices; BasicBlock::iterator It = I; if (const Type *ETy = ConvertibleToGEP(NewTy, IndexVal, Indices, TD,&It)){ // If successful, convert the add to a GEP //const Type *RetTy = PointerType::get(ETy); // First operand is actually the given pointer... Res = new GetElementPtrInst(NewVal, Indices, Name); assert(cast<PointerType>(Res->getType())->getElementType() == ETy && "ConvertibleToGEP broken!"); break; } } // FALLTHROUGH case Instruction::Sub: case Instruction::SetEQ: case Instruction::SetNE: { Res = BinaryOperator::create(cast<BinaryOperator>(I)->getOpcode(), Dummy, Dummy, Name); VMC.ExprMap[I] = Res; // Add node to expression eagerly unsigned OtherIdx = (OldVal == I->getOperand(0)) ? 1 : 0; Value *OtherOp = I->getOperand(OtherIdx); Res->setOperand(!OtherIdx, NewVal); Value *NewOther = ConvertExpressionToType(OtherOp, NewTy, VMC, TD); Res->setOperand(OtherIdx, NewOther); break; } case Instruction::Shl: case Instruction::Shr: assert(I->getOperand(0) == OldVal); Res = new ShiftInst(cast<ShiftInst>(I)->getOpcode(), NewVal, I->getOperand(1), Name); break; case Instruction::Free: // Free can free any pointer type! assert(I->getOperand(0) == OldVal); Res = new FreeInst(NewVal); break; case Instruction::Load: { assert(I->getOperand(0) == OldVal && isa<PointerType>(NewVal->getType())); const Type *LoadedTy = cast<PointerType>(NewVal->getType())->getElementType(); Value *Src = NewVal; if (const CompositeType *CT = dyn_cast<CompositeType>(LoadedTy)) { std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; // No offset, get first leaf. LoadedTy = getStructOffsetType(CT, Offset, Indices, TD, false); assert(LoadedTy->isFirstClassType()); if (Indices.size() != 1) { // Do not generate load X, 0 // Insert the GEP instruction before this load. Src = new GetElementPtrInst(Src, Indices, Name+".idx", I); } } Res = new LoadInst(Src, Name); assert(Res->getType()->isFirstClassType() && "Load of structure or array!"); break; } case Instruction::Store: { if (I->getOperand(0) == OldVal) { // Replace the source value // Check to see if operand #1 has already been converted... ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(I->getOperand(1)); if (VMCI != VMC.ExprMap.end()) { // Comments describing this stuff are in the OperandConvertibleToType // switch statement for Store... // const Type *ElTy = cast<PointerType>(VMCI->second->getType())->getElementType(); Value *SrcPtr = VMCI->second; if (ElTy != NewTy) { // We check that this is a struct in the initial scan... const StructType *SElTy = cast<StructType>(ElTy); std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; const Type *Ty = getStructOffsetType(ElTy, Offset, Indices, TD,false); assert(Offset == 0 && "Offset changed!"); assert(NewTy == Ty && "Did not convert to correct type!"); // Insert the GEP instruction before this store. SrcPtr = new GetElementPtrInst(SrcPtr, Indices, SrcPtr->getName()+".idx", I); } Res = new StoreInst(NewVal, SrcPtr); VMC.ExprMap[I] = Res; } else { // Otherwise, we haven't converted Operand #1 over yet... const PointerType *NewPT = PointerType::get(NewTy); Res = new StoreInst(NewVal, Constant::getNullValue(NewPT)); VMC.ExprMap[I] = Res; Res->setOperand(1, ConvertExpressionToType(I->getOperand(1), NewPT, VMC, TD)); } } else { // Replace the source pointer const Type *ValTy = cast<PointerType>(NewTy)->getElementType(); Value *SrcPtr = NewVal; if (isa<StructType>(ValTy)) { std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; ValTy = getStructOffsetType(ValTy, Offset, Indices, TD, false); assert(Offset == 0 && ValTy); // Insert the GEP instruction before this store. SrcPtr = new GetElementPtrInst(SrcPtr, Indices, SrcPtr->getName()+".idx", I); } Res = new StoreInst(Constant::getNullValue(ValTy), SrcPtr); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), ValTy, VMC, TD)); } break; } case Instruction::GetElementPtr: { // Convert a one index getelementptr into just about anything that is // desired. // BasicBlock::iterator It = I; const Type *OldElTy = cast<PointerType>(I->getType())->getElementType(); unsigned DataSize = TD.getTypeSize(OldElTy); Value *Index = I->getOperand(1); if (DataSize != 1) { // Insert a multiply of the old element type is not a unit size... Value *CST; if (Index->getType()->isSigned()) CST = ConstantSInt::get(Index->getType(), DataSize); else CST = ConstantUInt::get(Index->getType(), DataSize); Index = BinaryOperator::create(Instruction::Mul, Index, CST, "scale", It); } // Perform the conversion now... // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewVal->getType(),Index,Indices,TD,&It); assert(ElTy != 0 && "GEP Conversion Failure!"); Res = new GetElementPtrInst(NewVal, Indices, Name); assert(Res->getType() == PointerType::get(ElTy) && "ConvertibleToGet failed!"); } #if 0 if (I->getType() == PointerType::get(Type::SByteTy)) { // Convert a getelementptr sbyte * %reg111, uint 16 freely back to // anything that is a pointer type... // BasicBlock::iterator It = I; // Check to see if the second argument is an expression that can // be converted to the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewVal->getType(), I->getOperand(1), Indices, TD, &It); assert(ElTy != 0 && "GEP Conversion Failure!"); Res = new GetElementPtrInst(NewVal, Indices, Name); } else { // Convert a getelementptr ulong * %reg123, uint %N // to getelementptr long * %reg123, uint %N // ... where the type must simply stay the same size... // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); Res = new GetElementPtrInst(NewVal, Indices, Name); } #endif break; case Instruction::PHI: { PHINode *OldPN = cast<PHINode>(I); PHINode *NewPN = new PHINode(NewTy, Name); VMC.ExprMap[I] = NewPN; while (OldPN->getNumOperands()) { BasicBlock *BB = OldPN->getIncomingBlock(0); Value *OldVal = OldPN->getIncomingValue(0); ValueHandle OldValHandle(VMC, OldVal); OldPN->removeIncomingValue(BB, false); Value *V = ConvertExpressionToType(OldVal, NewTy, VMC, TD); NewPN->addIncoming(V, BB); } Res = NewPN; break; } case Instruction::Call: { Value *Meth = I->getOperand(0); std::vector<Value*> Params(I->op_begin()+1, I->op_end()); if (Meth == OldVal) { // Changing the function pointer? const PointerType *NewPTy = cast<PointerType>(NewVal->getType()); const FunctionType *NewTy = cast<FunctionType>(NewPTy->getElementType()); if (NewTy->getReturnType() == Type::VoidTy) Name = ""; // Make sure not to name a void call! // Get an iterator to the call instruction so that we can insert casts for // operands if need be. Note that we do not require operands to be // convertible, we can insert casts if they are convertible but not // compatible. The reason for this is that we prefer to have resolved // functions but casted arguments if possible. // BasicBlock::iterator It = I; // Convert over all of the call operands to their new types... but only // convert over the part that is not in the vararg section of the call. // for (unsigned i = 0; i != NewTy->getNumParams(); ++i) if (Params[i]->getType() != NewTy->getParamType(i)) { // Create a cast to convert it to the right type, we know that this // is a lossless cast... // Params[i] = new CastInst(Params[i], NewTy->getParamType(i), "callarg.cast." + Params[i]->getName(), It); } Meth = NewVal; // Update call destination to new value } else { // Changing an argument, must be in vararg area std::vector<Value*>::iterator OI = find(Params.begin(), Params.end(), OldVal); assert (OI != Params.end() && "Not using value!"); *OI = NewVal; } Res = new CallInst(Meth, Params, Name); break; } default: assert(0 && "Expression convertible, but don't know how to convert?"); return; } // If the instruction was newly created, insert it into the instruction // stream. // BasicBlock::iterator It = I; assert(It != BB->end() && "Instruction not in own basic block??"); BB->getInstList().insert(It, Res); // Keep It pointing to old instruction DEBUG(std::cerr << "COT CREATED: " << (void*)Res << " " << *Res << "In: " << (void*)I << " " << *I << "Out: " << (void*)Res << " " << *Res); // Add the instruction to the expression map VMC.ExprMap[I] = Res; if (I->getType() != Res->getType()) ConvertValueToNewType(I, Res, VMC, TD); else { bool FromStart = true; Value::use_iterator UI; while (1) { if (FromStart) UI = I->use_begin(); if (UI == I->use_end()) break; if (isa<ValueHandle>(*UI)) { ++UI; FromStart = false; } else { User *U = *UI; if (!FromStart) --UI; U->replaceUsesOfWith(I, Res); if (!FromStart) ++UI; } } } }
Value *llvm::ConvertExpressionToType(Value *V, const Type *Ty, ValueMapCache &VMC, const TargetData &TD) { if (V->getType() == Ty) return V; // Already where we need to be? ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(V); if (VMCI != VMC.ExprMap.end()) { const Value *GV = VMCI->second; const Type *GTy = VMCI->second->getType(); assert(VMCI->second->getType() == Ty); if (Instruction *I = dyn_cast<Instruction>(V)) ValueHandle IHandle(VMC, I); // Remove I if it is unused now! return VMCI->second; } DEBUG(std::cerr << "CETT: " << (void*)V << " " << *V); Instruction *I = dyn_cast<Instruction>(V); if (I == 0) { Constant *CPV = cast<Constant>(V); // Constants are converted by constant folding the cast that is required. // We assume here that all casts are implemented for constant prop. Value *Result = ConstantExpr::getCast(CPV, Ty); // Add the instruction to the expression map //VMC.ExprMap[V] = Result; return Result; } BasicBlock *BB = I->getParent(); std::string Name = I->getName(); if (!Name.empty()) I->setName(""); Instruction *Res; // Result of conversion ValueHandle IHandle(VMC, I); // Prevent I from being removed! Constant *Dummy = Constant::getNullValue(Ty); switch (I->getOpcode()) { case Instruction::Cast: assert(VMC.NewCasts.count(ValueHandle(VMC, I)) == 0); Res = new CastInst(I->getOperand(0), Ty, Name); VMC.NewCasts.insert(ValueHandle(VMC, Res)); break; case Instruction::Add: case Instruction::Sub: Res = BinaryOperator::create(cast<BinaryOperator>(I)->getOpcode(), Dummy, Dummy, Name); VMC.ExprMap[I] = Res; // Add node to expression eagerly Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), Ty, VMC, TD)); Res->setOperand(1, ConvertExpressionToType(I->getOperand(1), Ty, VMC, TD)); break; case Instruction::Shl: case Instruction::Shr: Res = new ShiftInst(cast<ShiftInst>(I)->getOpcode(), Dummy, I->getOperand(1), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), Ty, VMC, TD)); break; case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); Res = new LoadInst(Constant::getNullValue(PointerType::get(Ty)), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(LI->getPointerOperand(), PointerType::get(Ty), VMC, TD)); assert(Res->getOperand(0)->getType() == PointerType::get(Ty)); assert(Ty == Res->getType()); assert(Res->getType()->isFirstClassType() && "Load of structure or array!"); break; } case Instruction::PHI: { PHINode *OldPN = cast<PHINode>(I); PHINode *NewPN = new PHINode(Ty, Name); VMC.ExprMap[I] = NewPN; // Add node to expression eagerly while (OldPN->getNumOperands()) { BasicBlock *BB = OldPN->getIncomingBlock(0); Value *OldVal = OldPN->getIncomingValue(0); ValueHandle OldValHandle(VMC, OldVal); OldPN->removeIncomingValue(BB, false); Value *V = ConvertExpressionToType(OldVal, Ty, VMC, TD); NewPN->addIncoming(V, BB); } Res = NewPN; break; } case Instruction::Malloc: { Res = ConvertMallocToType(cast<MallocInst>(I), Ty, Name, VMC, TD); break; } case Instruction::GetElementPtr: { // GetElementPtr's are directly convertible to a pointer type if they have // a number of zeros at the end. Because removing these values does not // change the logical offset of the GEP, it is okay and fair to remove them. // This can change this: // %t1 = getelementptr %Hosp * %hosp, ubyte 4, ubyte 0 ; <%List **> // %t2 = cast %List * * %t1 to %List * // into // %t2 = getelementptr %Hosp * %hosp, ubyte 4 ; <%List *> // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); // Check to see if there are zero elements that we can remove from the // index array. If there are, check to see if removing them causes us to // get to the right type... // std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); const Type *BaseType = GEP->getPointerOperand()->getType(); const Type *PVTy = cast<PointerType>(Ty)->getElementType(); Res = 0; while (!Indices.empty() && Indices.back() == Constant::getNullValue(Indices.back()->getType())){ Indices.pop_back(); if (GetElementPtrInst::getIndexedType(BaseType, Indices, true) == PVTy) { if (Indices.size() == 0) Res = new CastInst(GEP->getPointerOperand(), BaseType); // NOOP CAST else Res = new GetElementPtrInst(GEP->getPointerOperand(), Indices, Name); break; } } if (Res == 0 && GEP->getNumOperands() == 2 && GEP->getType() == PointerType::get(Type::SByteTy)) { // Otherwise, we can convert a GEP from one form to the other iff the // current gep is of the form 'getelementptr sbyte*, unsigned N // and we could convert this to an appropriate GEP for the new type. // const PointerType *NewSrcTy = PointerType::get(PVTy); BasicBlock::iterator It = I; // Check to see if 'N' is an expression that can be converted to // the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewSrcTy, I->getOperand(1), Indices, TD, &It); if (ElTy) { assert(ElTy == PVTy && "Internal error, setup wrong!"); Res = new GetElementPtrInst(Constant::getNullValue(NewSrcTy), Indices, Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), NewSrcTy, VMC, TD)); } } // Otherwise, it could be that we have something like this: // getelementptr [[sbyte] *] * %reg115, uint %reg138 ; [sbyte]** // and want to convert it into something like this: // getelemenptr [[int] *] * %reg115, uint %reg138 ; [int]** // if (Res == 0) { const PointerType *NewSrcTy = PointerType::get(PVTy); std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); Res = new GetElementPtrInst(Constant::getNullValue(NewSrcTy), Indices, Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), NewSrcTy, VMC, TD)); } assert(Res && "Didn't find match!"); break; } case Instruction::Call: { assert(!isa<Function>(I->getOperand(0))); // If this is a function pointer, we can convert the return type if we can // convert the source function pointer. // const PointerType *PT = cast<PointerType>(I->getOperand(0)->getType()); const FunctionType *FT = cast<FunctionType>(PT->getElementType()); std::vector<const Type *> ArgTys(FT->param_begin(), FT->param_end()); const FunctionType *NewTy = FunctionType::get(Ty, ArgTys, FT->isVarArg()); const PointerType *NewPTy = PointerType::get(NewTy); if (Ty == Type::VoidTy) Name = ""; // Make sure not to name calls that now return void! Res = new CallInst(Constant::getNullValue(NewPTy), std::vector<Value*>(I->op_begin()+1, I->op_end()), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0),NewPTy,VMC,TD)); break; } default: assert(0 && "Expression convertible, but don't know how to convert?"); return 0; } assert(Res->getType() == Ty && "Didn't convert expr to correct type!"); BB->getInstList().insert(I, Res); // Add the instruction to the expression map VMC.ExprMap[I] = Res; unsigned NumUses = I->use_size(); for (unsigned It = 0; It < NumUses; ) { unsigned OldSize = NumUses; Value::use_iterator UI = I->use_begin(); std::advance(UI, It); ConvertOperandToType(*UI, I, Res, VMC, TD); NumUses = I->use_size(); if (NumUses == OldSize) ++It; } DEBUG(std::cerr << "ExpIn: " << (void*)I << " " << *I << "ExpOut: " << (void*)Res << " " << *Res); return Res; }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); DebugLoc TheCallDL; if (TheCall) TheCallDL = TheCall->getDebugLoc(); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) PHIToResolve.push_back(cast<PHINode>(OldI)); } // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) RemapInstruction(I, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[OldI] == PN && "VMap mismatch"); VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of // VMap. Single entry phi nodes can have multiple VMap entries // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { ++I; continue; } // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
Function *PartialInlinerImpl::unswitchFunction(Function *F) { // First, verify that this function is an unswitching candidate... BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return nullptr; BasicBlock *ReturnBlock = nullptr; BasicBlock *NonReturnBlock = nullptr; unsigned ReturnCount = 0; for (BasicBlock *BB : successors(EntryBlock)) { if (isa<ReturnInst>(BB->getTerminator())) { ReturnBlock = BB; ReturnCount++; } else NonReturnBlock = BB; } if (ReturnCount != 1) return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]); BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]); BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(DuplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = NewReturnBlock; NewReturnBlock = NewReturnBlock->splitBasicBlock( NewReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &NewReturnBlock->front(); while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(RetPhi); Ins = NewReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), NewEntryBlock); OldPhi->removeIncomingValue(NewEntryBlock); ++I; } NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; ToExtract.push_back(NewNonReturnBlock); for (BasicBlock &BB : *DuplicateFunction) if (&BB != NewEntryBlock && &BB != NewReturnBlock && &BB != NewNonReturnBlock) ToExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.recalculate(*DuplicateFunction); // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. LoopInfo LI(DT); BranchProbabilityInfo BPI(*DuplicateFunction, LI); BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI); // Extract the body of the if. Function *ExtractedFunction = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI) .extractCodeRegion(); // Inline the top-level if test into all callers. std::vector<User *> Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); for (User *User : Users) if (CallInst *CI = dyn_cast<CallInst>(User)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(User)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. DuplicateFunction->replaceAllUsesWith(F); DuplicateFunction->eraseFromParent(); ++NumPartialInlined; return ExtractedFunction; }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and /// DominanceFrontier, but no other analyses. BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0; i != NumPreds; ++i) Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); // Check to see if we can eliminate this phi node. if (Value *V = PN->hasConstantValue(DT != 0)) { Instruction *I = dyn_cast<Instruction>(V); if (!I || DT == 0 || DT->dominates(I, PN)) { PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); PN->eraseFromParent(); } } } return NewBB; }
// Inserts an unwinding annotation (assume or assert, depending on the function // given in the constructor) and removes the loop. bool RmLoopPass::runOnLoop(Loop *L, LPPassManager &LPM){ BasicBlock *latch = L -> getLoopLatch(); BasicBlock *header = L -> getHeader(); SmallVector<BasicBlock *, 1> exitBBs; L -> getExitBlocks(exitBBs); BasicBlock *exitBB = NULL; SmallVector<BasicBlock *, 1>::iterator it = exitBBs.begin(); for(; it != exitBBs.end() && !exitBB; ++it){ if(std::find(createdBB.begin(), createdBB.end(), *it) == createdBB.end()) exitBB = *it; } assert(exitBB && "exitBB is null"); // std::cout << "\n\n LOOP REMOVAL:\n"; // std::cout << "Latch: " << latch -> getName().str() << "\n"; // std::cout << "Header: " << header -> getName().str() << "\n"; // std::cout << "ExitBB: " << exitBB -> getName().str() << "\n"; //assert(exitBBs.size() == 1 && "RmLoopPass - more than one exit BB"); // At this point we have an header, a latch and an exit BasicBlock // and they all are different assert(latch && header && "Not able to obtain some loop basic block; try to run doInitialization before"); // Get loop last branch instruction BranchInst *br = cast<BranchInst>(latch -> getTerminator()); // assert(br -> isConditional() && "loop terminator with unconditional branch"); // Get loop's last iteration condition Value *cond = NULL; // Loop last iteration branch condition if(br -> isConditional()) cond = br -> getCondition(); else{ std::cout << "\n************************************************************\n"; std::cout << "*BE CAREFUL!!!! There is a latch with unconditional branch!*\n"; std::cout << "************************************************************\n"; } // In order to remove the back edge, we need to remove the loop from the LPPAssManager LPM.deleteLoopFromQueue(L); // Create a new BasicBlock with the unwinding annotation. // Unreachable instruction is used as a terminator instruction in this BasicBlock BasicBlock *newBB = BasicBlock::Create(header -> getContext() , "unwinding_annotation" , header -> getParent()); createdBB.push_back(newBB); Type *t = Type::getInt32Ty(header -> getContext()); Constant *c = llvm::ConstantInt::get(t,uint32_t(0)); ArrayRef<Value *> *param = new ArrayRef<Value *>(c); CallInst::Create(function, *param, "", newBB); if(unreachable){ new UnreachableInst(header -> getContext(), newBB); }else{ BranchInst::Create(exitBB,newBB); for(BasicBlock::iterator it = exitBB->begin(); it != exitBB->end();++it){ PHINode *phi = dyn_cast<PHINode>(it); if(!phi) break; //Value *latchValue = phi->getIncomingValueForBlock(latch); phi->addIncoming(UndefValue::get(phi -> getType()),newBB); } } BranchInst *newBr = NULL; if(cond){ if(br -> getSuccessor(0) == header){ newBr = BranchInst::Create(newBB,br -> getSuccessor(1),cond); }else{ newBr = BranchInst::Create(br -> getSuccessor(1),newBB,cond); } }else{ newBr = BranchInst::Create(newBB); } ReplaceInstWithInst(br,newBr); // The latch BasicBlock must be removed from the PHI nodes in // the header BasicBlock for(BasicBlock::iterator it = header->begin(); it != header->end();++it){ PHINode *phi = dyn_cast<PHINode>(it); if(!phi) break; int latchIndex = phi->getBasicBlockIndex(latch); phi->removeIncomingValue(latchIndex); } //std::cout << "\n---- NewBB ------\n"; //newBB -> print(outs()); //std::cout << "\n---- ExitBB\n"; //exitBB -> print(outs()); return true; }
void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { auto getFirstPHI = [](BasicBlock *BB) { BasicBlock::iterator I = BB->begin(); PHINode *FirstPhi = nullptr; while (I != BB->end()) { PHINode *Phi = dyn_cast<PHINode>(I); if (!Phi) break; if (!FirstPhi) { FirstPhi = Phi; break; } } return FirstPhi; }; // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = ClonedOI->ReturnBlock; // only split block when necessary: PHINode *FirstPhi = getFirstPHI(PreReturn); unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size(); if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1) return; auto IsTrivialPhi = [](PHINode *PN) -> Value * { Value *CommonValue = PN->getIncomingValue(0); if (all_of(PN->incoming_values(), [&](Value *V) { return V == CommonValue; })) return CommonValue; return nullptr; }; ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock( ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &ClonedOI->ReturnBlock->front(); SmallVector<Instruction *, 4> DeadPhis; while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode *RetPhi = PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); OldPhi->replaceAllUsesWith(RetPhi); Ins = ClonedOI->ReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); for (BasicBlock *E : ClonedOI->ReturnBlockPreds) { RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E); OldPhi->removeIncomingValue(E); } // After incoming values splitting, the old phi may become trivial. // Keeping the trivial phi can introduce definition inside the outline // region which is live-out, causing necessary overhead (load, store // arg passing etc). if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { OldPhi->replaceAllUsesWith(OldPhiVal); DeadPhis.push_back(OldPhi); } ++I; } for (auto *DP : DeadPhis) DP->eraseFromParent(); for (auto E : ClonedOI->ReturnBlockPreds) { E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); } }
/// Update the PHI nodes in OrigBB to include the values coming from NewBB. /// This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, BranchInst *BI, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = nullptr; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (!PredSet.count(PN->getIncomingBlock(i))) continue; if (!InVal) InVal = PN->getIncomingValue(i); else if (InVal != PN->getIncomingValue(i)) { InVal = nullptr; break; } } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. // NOTE! This loop walks backwards for a reason! First off, this minimizes // the cost of removal if we end up removing a large number of values, and // second off, this ensures that the indices for the incoming values // aren't invalidated when we remove one. for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) if (PredSet.count(PN->getIncomingBlock(i))) PN->removeIncomingValue(i, false); // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); continue; } // If the values coming into the block are not the same, we need a new // PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); // NOTE! This loop walks backwards for a reason! First off, this minimizes // the cost of removal if we end up removing a large number of values, and // second off, this ensures that the indices for the incoming values aren't // invalidated when we remove one. for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { BasicBlock *IncomingBB = PN->getIncomingBlock(i); if (PredSet.count(IncomingBB)) { Value *V = PN->removeIncomingValue(i, false); NewPHI->addIncoming(V, IncomingBB); } } PN->addIncoming(NewPHI, NewBB); } }
/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }
/// \brief This method is called when the specified loop has more than one /// backedge in it. /// /// If this occurs, revector all of these backedges to target a new basic block /// and have that block branch to the loop header. This ensures that loops /// have exactly one backedge. static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, DominatorTree *DT, LoopInfo *LI) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); // Unique backedge insertion currently depends on having a preheader. if (!Preheader) return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName() + ".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), PN->getName()+".be", BETerminator); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; Value *UniqueValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); if (IBB == Preheader) { PreheaderIdx = i; } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { if (!UniqueValue) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; } } } // Delete all of the incoming values from the old PN except the preheader's assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); if (PreheaderIdx != 0) { PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) PN->removeIncomingValue(e-i, false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); // As an optimization, if all incoming values in the new PhiNode (which is a // subset of the incoming values of the old PHI node) have the same value, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); BEBlock->getInstList().erase(NewPN); } } // Now that all of the PHI nodes have been inserted and adjusted, modify the // backedge blocks to just to the BEBlock instead of the header. for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) if (TI->getSuccessor(Op) == Header) TI->setSuccessor(Op, BEBlock); } //===--- Update all analyses which we must preserve now -----------------===// // Update Loop Information - we know that this block is now in the current // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, *LI); // Update dominator information DT->splitBlock(BEBlock); return BEBlock; }
/// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) for (const Argument &II : OldFunc->args()) assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap[&BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { if (isa<PHINode>(VMap[PN])) PHIToResolve.push_back(PN); else break; } else { break; } } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); }
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and /// an unconditional branch in it. void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { if (SinglePred != DestBB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); MergeBasicBlockIntoOnlyPred(DestBB, this); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB // to handle the new incoming edges it is about to have. PHINode *PN; for (BasicBlock::iterator BBI = DestBB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { // Remove the incoming value for BB, and remember it. Value *InVal = PN->removeIncomingValue(BB, false); // Two options: either the InVal is a phi node defined in BB or it is some // value that dominates BB. PHINode *InValPhi = dyn_cast<PHINode>(InVal); if (InValPhi && InValPhi->getParent() == BB) { // Add all of the input values of the input PHI as inputs of this phi. for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InValPhi->getIncomingValue(i), InValPhi->getIncomingBlock(i)); } else { // Otherwise, add one instance of the dominating value for each edge that // we will be adding. if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) PN->addIncoming(InVal, *PI); } } } // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); if (DT && !ModifiedDT) { BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); DT->changeImmediateDominator(DestBB, NewIDom); DT->eraseNode(BB); } if (PFI) { PFI->replaceAllUses(BB, DestBB); PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); ++NumBlocksElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); }
void RegionExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const { for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { BasicBlock *BB = *I; // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { for (User::op_iterator OI = II->op_begin(), OE = II->op_end(); OI != OE; ++OI) if (definedInCaller(Blocks, *OI)) Inputs.insert(*OI); #if LLVM_VERSION_MINOR == 5 for (User *U : II->users()) if (!definedInRegion(Blocks, U)) { #else for (Value::use_iterator UI = II->use_begin(), UE = II->use_end(); UI != UE; ++UI) if (!definedInRegion(Blocks, *UI)) { #endif Outputs.insert(II); break; } } } } /// severSplitPHINodes - If a PHI node has multiple inputs from outside of the /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void RegionExtractor::severSplitPHINodes(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { PHINode *PN = dyn_cast<PHINode>(Header->begin()); if (!PN) return; // No PHI nodes. // If the header node contains any PHI nodes, check to see if there is more // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; // If there is one (or fewer) predecessor from outside the region, we don't // need to do anything special. if (NumPredsOutsideRegion <= 1) return; } // Otherwise, we need to split the header block into two pieces: one // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, Header->getName()+".ce"); // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; Blocks.remove(OldPred); Blocks.insert(NewBB); Header = NewBB; // Okay, update dominator sets. The blocks that dominate the new one are the // blocks that dominate TIBB plus the new block itself. if (DT) DT->splitBlock(NewBB); // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName()+".ce", NewBB->begin()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; } } } } }