/// FindReusablePredBB - Check all of the predecessors of the block DestPHI /// lives in to see if there is a block that we can reuse as a critical edge /// from TIBB. static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { BasicBlock *Dest = DestPHI->getParent(); /// TIPHIValues - This array is lazily computed to determine the values of /// PHIs in Dest that TI would provide. SmallVector<Value*, 32> TIPHIValues; /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. unsigned TIBBEntryNo = 0; // Check to see if Dest has any blocks that can be used as a split edge for // this terminator. for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { BasicBlock *Pred = DestPHI->getIncomingBlock(pi); // To be usable, the pred has to end with an uncond branch to the dest. BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); if (!PredBr || !PredBr->isUnconditional()) continue; // Must be empty other than the branch and debug info. BasicBlock::iterator I = Pred->begin(); while (isa<DbgInfoIntrinsic>(I)) I++; if (&*I != PredBr) continue; // Cannot be the entry block; its label does not get emitted. if (Pred == &Dest->getParent()->getEntryBlock()) continue; // Finally, since we know that Dest has phi nodes in it, we have to make // sure that jumping to Pred will have the same effect as going to Dest in // terms of PHI values. PHINode *PN; unsigned PHINo = 0; unsigned PredEntryNo = pi; bool FoundMatch = true; for (BasicBlock::iterator I = Dest->begin(); (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { if (PHINo == TIPHIValues.size()) { if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) TIBBEntryNo = PN->getBasicBlockIndex(TIBB); TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); } // If the PHI entry doesn't work, we can't use this pred. if (PN->getIncomingBlock(PredEntryNo) != Pred) PredEntryNo = PN->getBasicBlockIndex(Pred); if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { FoundMatch = false; break; } } // If we found a workable predecessor, change TI to branch to Succ. if (FoundMatch) return Pred; } return 0; }
// switchConvert - Convert the switch statement into a linear scan // through all the case values void LowerSwitchPass::switchConvert(CaseItr begin, CaseItr end, Value* value, BasicBlock* origBlock, BasicBlock* defaultBlock) { BasicBlock *curHead = defaultBlock; Function *F = origBlock->getParent(); // iterate through all the cases, creating a new BasicBlock for each for (CaseItr it = begin; it < end; ++it) { BasicBlock *newBlock = BasicBlock::Create(getGlobalContext(), "NodeBlock"); Function::iterator FI = origBlock; F->getBasicBlockList().insert(++FI, newBlock); ICmpInst *cmpInst = new ICmpInst(*newBlock, ICmpInst::ICMP_EQ, value, it->value, "case.cmp"); BranchInst::Create(it->block, curHead, cmpInst, newBlock); // If there were any PHI nodes in this successor, rewrite one entry // from origBlock to come from newBlock. for (BasicBlock::iterator bi = it->block->begin(); isa<PHINode>(bi); ++bi) { PHINode* PN = cast<PHINode>(bi); int blockIndex = PN->getBasicBlockIndex(origBlock); assert(blockIndex != -1 && "Switch didn't go to this successor??"); PN->setIncomingBlock((unsigned)blockIndex, newBlock); } curHead = newBlock; } // Branch to our shiny new if-then stuff... BranchInst::Create(curHead, origBlock); }
// newLeafBlock - Create a new leaf block for the binary lookup tree. It // checks if the switch's value == the case's value. If not, then it // jumps to the default branch. At this point in the tree, the value // can't be another valid case value, so the jump to the "default" branch // is warranted. // BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* OrigBlock, BasicBlock* Default) { Function* F = OrigBlock->getParent(); BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewLeaf); // Emit comparison ICmpInst* Comp = NULL; if (Leaf.Low == Leaf.High) { // Make the seteq instruction... Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf"); } else { // Make range comparison if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { // Val >= Min && Val <= Hi --> Val <= Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, "SwitchLeaf"); } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, "SwitchLeaf"); } else { // Emit V-Lo <=u Hi-Lo Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, Val->getName()+".off", NewLeaf); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, "SwitchLeaf"); } } // Make the conditional branch... BasicBlock* Succ = Leaf.BB; BranchInst::Create(Succ, Default, Comp, NewLeaf); // If there were any PHI nodes in this successor, rewrite one entry // from OrigBlock to come from NewLeaf. for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { PHINode* PN = cast<PHINode>(I); // Remove all but one incoming entries from the cluster uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() - cast<ConstantInt>(Leaf.Low)->getSExtValue(); for (uint64_t j = 0; j < Range; ++j) { PN->removeIncomingValue(OrigBlock); } int BlockIdx = PN->getBasicBlockIndex(OrigBlock); assert(BlockIdx != -1 && "Switch didn't go to this successor??"); PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); } return NewLeaf; }
/// This splits a basic block into two at the specified /// instruction. Note that all instructions BEFORE the specified iterator stay /// as part of the original basic block, an unconditional branch is added to /// the new BB, and the rest of the instructions in the BB are moved to the new /// BB, including the old terminator. This invalidates the iterator. /// /// Note that this only works on well formed basic blocks (must have a /// terminator), and 'I' must not be the end of instruction list (which would /// cause a degenerate basic block to be formed, having a terminator inside of /// the basic block). /// BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!"); assert(I != InstList.end() && "Trying to get me to create degenerate basic block!"); BasicBlock *InsertBefore = std::next(Function::iterator(this)) .getNodePtrUnchecked(); BasicBlock *New = BasicBlock::Create(getContext(), BBName, getParent(), InsertBefore); // Save DebugLoc of split point before invalidating iterator. DebugLoc Loc = I->getDebugLoc(); // Move all of the specified instructions from the original basic block into // the new basic block. New->getInstList().splice(New->end(), this->getInstList(), I, end()); // Add a branch instruction to the newly formed basic block. BranchInst *BI = BranchInst::Create(New, this); BI->setDebugLoc(Loc); // Now we must loop through all of the successors of the New block (which // _were_ the successors of the 'this' block), and update any PHI nodes in // successors. If there were PHI nodes in the successors, then they need to // know that incoming branches will be from New, not from Old. // for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) { // Loop over any phi nodes in the basic block, updating the BB field of // incoming values... BasicBlock *Successor = *I; PHINode *PN; for (BasicBlock::iterator II = Successor->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { int IDX = PN->getBasicBlockIndex(this); while (IDX != -1) { PN->setIncomingBlock((unsigned)IDX, New); IDX = PN->getBasicBlockIndex(this); } } } return New; }
// This is basically the split basic block function but it does not create // a new basic block. void Decompiler::splitBasicBlockIntoBlock(Function::iterator Src, BasicBlock::iterator FirstInst, BasicBlock *Tgt) { assert(Src->getTerminator() && "Can't use splitBasicBlock on degenerate BB!"); assert(FirstInst != Src->end() && "Trying to get me to create degenerate basic block!"); Tgt->moveAfter(Src); // Move all of the specified instructions from the original basic block into // the new basic block. Tgt->getInstList().splice(Tgt->end(), Src->getInstList(), FirstInst, Src->end()); // Add a branch instruction to the newly formed basic block. BranchInst *BI = BranchInst::Create(Tgt, Src); // Set debugLoc to the instruction before the terminator's DebugLoc. // Note the pre-inc which can confuse folks. BI->setDebugLoc((++Src->rbegin())->getDebugLoc()); // Now we must loop through all of the successors of the New block (which // _were_ the successors of the 'this' block), and update any PHI nodes in // successors. If there were PHI nodes in the successors, then they need to // know that incoming branches will be from New, not from Old. // for (succ_iterator I = succ_begin(Tgt), E = succ_end(Tgt); I != E; ++I) { // Loop over any phi nodes in the basic block, updating the BB field of // incoming values... BasicBlock *Successor = *I; PHINode *PN; for (BasicBlock::iterator II = Successor->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { int IDX = PN->getBasicBlockIndex(Src); while (IDX != -1) { PN->setIncomingBlock((unsigned)IDX, Tgt); IDX = PN->getBasicBlockIndex(Src); } } } }
/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock*> Preds, BranchInst *BI, Pass *P, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1, e = Preds.size(); i != e; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // Explicitly check the BB index here to handle duplicates in Preds. int Idx = PN->getBasicBlockIndex(Preds[i]); if (Idx >= 0) PN->removeIncomingValue(Idx, false); } } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } }
// processSwitchInst - Replace the specified switch instruction with a sequence // of chained if-then instructions. // void LowerSwitchPass::processSwitchInst(SwitchInst *SI) { BasicBlock *origBlock = SI->getParent(); BasicBlock *defaultBlock = SI->getDefaultDest(); Function *F = origBlock->getParent(); Value *switchValue = SI->getCondition(); // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. BasicBlock* newDefault = BasicBlock::Create(getGlobalContext(), "newDefault"); #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 8) F->getBasicBlockList().insert(defaultBlock->getIterator(), newDefault); #else F->getBasicBlockList().insert(defaultBlock, newDefault); #endif BranchInst::Create(defaultBlock, newDefault); // If there is an entry in any PHI nodes for the default edge, make sure // to update them as well. for (BasicBlock::iterator I = defaultBlock->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); int BlockIdx = PN->getBasicBlockIndex(origBlock); assert(BlockIdx != -1 && "Switch didn't go to this successor??"); PN->setIncomingBlock((unsigned)BlockIdx, newDefault); } CaseVector cases; #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 1) for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) cases.push_back(SwitchCase(i.getCaseValue(), i.getCaseSuccessor())); #else for (unsigned i = 1; i < SI->getNumSuccessors(); ++i) cases.push_back(SwitchCase(SI->getSuccessorValue(i), SI->getSuccessor(i))); #endif // reverse cases, as switchConvert constructs a chain of // basic blocks by appending to the front. if we reverse, // the if comparisons will happen in the same order // as the cases appear in the switch std::reverse(cases.begin(), cases.end()); switchConvert(cases.begin(), cases.end(), switchValue, origBlock, newDefault); // We are now done with the switch instruction, so delete it origBlock->getInstList().erase(SI); }
// processSwitchInst - Replace the specified switch instruction with a sequence // of chained if-then insts in a balanced binary search. // void LowerSwitch::processSwitchInst(SwitchInst *SI) { BasicBlock *CurBlock = SI->getParent(); BasicBlock *OrigBlock = CurBlock; Function *F = CurBlock->getParent(); Value *Val = SI->getOperand(0); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); // If there is only the default destination, don't bother with the code below. if (SI->getNumOperands() == 2) { BranchInst::Create(SI->getDefaultDest(), CurBlock); CurBlock->getInstList().erase(SI); return; } // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); F->getBasicBlockList().insert(Default, NewDefault); BranchInst::Create(Default, NewDefault); // If there is an entry in any PHI nodes for the default edge, make sure // to update them as well. for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); int BlockIdx = PN->getBasicBlockIndex(OrigBlock); assert(BlockIdx != -1 && "Switch didn't go to this successor??"); PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); } // Prepare cases vector. CaseVector Cases; unsigned numCmps = Clusterify(Cases, SI); DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() << ". Total compares: " << numCmps << "\n"); DEBUG(dbgs() << "Cases: " << Cases << "\n"); (void)numCmps; BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, OrigBlock, NewDefault); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); // We are now done with the switch instruction, delete it. CurBlock->getInstList().erase(SI); }
/// \brief Add the real PHI value as soon as everything is set up void StructurizeCFG::setPhiValues() { SSAUpdater Updater; for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end(); AI != AE; ++AI) { BasicBlock *To = AI->first; BBVector &From = AI->second; if (!DeletedPhis.count(To)) continue; PhiMap &Map = DeletedPhis[To]; for (PhiMap::iterator PI = Map.begin(), PE = Map.end(); PI != PE; ++PI) { PHINode *Phi = PI->first; Value *Undef = UndefValue::get(Phi->getType()); Updater.Initialize(Phi->getType(), ""); Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); Updater.AddAvailableValue(To, Undef); NearestCommonDominator Dominator(DT); Dominator.addBlock(To, false); for (BBValueVector::iterator VI = PI->second.begin(), VE = PI->second.end(); VI != VE; ++VI) { Updater.AddAvailableValue(VI->first, VI->second); Dominator.addBlock(VI->first); } if (!Dominator.wasResultExplicitMentioned()) Updater.AddAvailableValue(Dominator.getResult(), Undef); for (BBVector::iterator FI = From.begin(), FE = From.end(); FI != FE; ++FI) { int Idx = Phi->getBasicBlockIndex(*FI); assert(Idx != -1); Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI)); } } DeletedPhis.erase(To); } assert(DeletedPhis.empty()); }
void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) { TerminatorInst *TI = getTerminator(); if (!TI) // Cope with being called on a BasicBlock that doesn't have a terminator // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this. return; for (BasicBlock *Succ : TI->successors()) { // N.B. Succ might not be a complete BasicBlock, so don't assume // that it ends with a non-phi instruction. for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) { PHINode *PN = dyn_cast<PHINode>(II); if (!PN) break; int i; while ((i = PN->getBasicBlockIndex(this)) >= 0) PN->setIncomingBlock(i, New); } } }
void MemoryInstrumenter::instrumentPointerInstruction(Instruction *I) { BasicBlock::iterator Loc; if (isa<PHINode>(I)) { // Cannot insert hooks right after a PHI, because PHINodes have to be // grouped together. Loc = I->getParent()->getFirstNonPHI(); } else if (!I->isTerminator()) { Loc = I; ++Loc; } else { assert(isa<InvokeInst>(I)); InvokeInst *II = cast<InvokeInst>(I); BasicBlock *NormalDest = II->getNormalDest(); // It's not always OK to insert HookTopLevel simply at the beginning of the // normal destination, because the normal destionation may be shared by // multiple InvokeInsts. In that case, we will create a critical edge block, // and add the HookTopLevel over there. if (NormalDest->getUniquePredecessor()) { Loc = NormalDest->getFirstNonPHI(); } else { BasicBlock *CritEdge = BasicBlock::Create(I->getContext(), "crit_edge", I->getParent()->getParent()); Loc = BranchInst::Create(NormalDest, CritEdge); // Now that CritEdge becomes the new predecessor of NormalDest, replace // all phi uses of I->getParent() with CritEdge. for (auto J = NormalDest->begin(); NormalDest->getFirstNonPHI() != J; ++J) { PHINode *Phi = cast<PHINode>(J); int i; while ((i = Phi->getBasicBlockIndex(I->getParent())) >= 0) Phi->setIncomingBlock(i, CritEdge); } II->setNormalDest(CritEdge); } } if (LoadInst *LI = dyn_cast<LoadInst>(I)) instrumentPointer(I, LI->getPointerOperand(), Loc); else instrumentPointer(I, NULL, Loc); }
/// \brief Add the real PHI value as soon as everything is set up void StructurizeCFG::setPhiValues() { SSAUpdater Updater; for (const auto &AddedPhi : AddedPhis) { BasicBlock *To = AddedPhi.first; const BBVector &From = AddedPhi.second; if (!DeletedPhis.count(To)) continue; PhiMap &Map = DeletedPhis[To]; for (const auto &PI : Map) { PHINode *Phi = PI.first; Value *Undef = UndefValue::get(Phi->getType()); Updater.Initialize(Phi->getType(), ""); Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); Updater.AddAvailableValue(To, Undef); NearestCommonDominator Dominator(DT); Dominator.addBlock(To, false); for (const auto &VI : PI.second) { Updater.AddAvailableValue(VI.first, VI.second); Dominator.addBlock(VI.first); } if (!Dominator.wasResultExplicitMentioned()) Updater.AddAvailableValue(Dominator.getResult(), Undef); for (BasicBlock *FI : From) { int Idx = Phi->getBasicBlockIndex(FI); assert(Idx != -1); Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(FI)); } } DeletedPhis.erase(To); } assert(DeletedPhis.empty()); }
void Executor::transferToBasicBlock(BasicBlock *dst, BasicBlock *src, ExecutionState &state) { // Note that in general phi nodes can reuse phi values from the same // block but the incoming value is the eval() result *before* the // execution of any phi nodes. this is pathological and doesn't // really seem to occur, but just in case we run the PhiCleanerPass // which makes sure this cannot happen and so it is safe to just // eval things in order. The PhiCleanerPass also makes sure that all // incoming blocks have the same order for each PHINode so we only // have to compute the index once. // // With that done we simply set an index in the state so that PHI // instructions know which argument to eval, set the pc, and continue. // XXX this lookup has to go ? KFunction *kf = state.stack().back().kf; unsigned entry = kf->basicBlockEntry[dst]; state.pc() = &kf->instructions[entry]; if (state.pc()->inst->getOpcode() == Instruction::PHI) { PHINode *first = static_cast<PHINode*>(state.pc()->inst); state.crtThread().incomingBBIndex = first->getBasicBlockIndex(src); } }
/// Create a clone of the blocks in a loop and connect them together. /// This function doesn't create a clone of the loop structure. /// /// There are two value maps that are defined and used. VMap is /// for the values in the current loop instance. LVMap contains /// the values from the last loop instance. We need the LVMap values /// to update the initial values for the current loop instance. /// static void CloneLoopBlocks(Loop *L, bool FirstCopy, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); NewBlocks.push_back(NewBB); if (Loop *ParentLoop = L->getParentLoop()) ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block InsertTop->getTerminator()->setSuccessor(0, NewBB); // Change the incoming values to the ones defined in the // previously cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (FirstCopy) { // We replace the first phi node with the value from the preheader VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); NewBB->getInstList().erase(NewPHI); } else { // Update VMap with values from the previous block unsigned idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); if (Instruction *I = dyn_cast<Instruction>(InVal)) if (L->contains(I)) InVal = LVMap[InVal]; NewPHI->setIncomingValue(idx, InVal); NewPHI->setIncomingBlock(idx, InsertTop); } } } if (Latch == *BB) { VMap.erase((*BB)->getTerminator()); NewBB->getTerminator()->eraseFromParent(); BranchInst::Create(InsertBot, NewBB); } } // LastValueMap is updated with the values for the current loop // which are used the next time this function is called. for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { LVMap[VI->first] = VI->second; } }
void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S) { assert(Stmt.isRegionStmt() && "Only region statements can be copied by the block generator"); // Forget all old mappings. BlockMap.clear(); RegionMaps.clear(); IncompletePHINodeMap.clear(); // The region represented by the statement. Region *R = Stmt.getRegion(); // Create a dedicated entry for the region where we can reload all demoted // inputs. BasicBlock *EntryBB = R->getEntry(); BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); Builder.SetInsertPoint(EntryBBCopy->begin()); for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) BlockMap[*PI] = EntryBBCopy; // Iterate over all blocks in the region in a breadth-first search. std::deque<BasicBlock *> Blocks; SmallPtrSet<BasicBlock *, 8> SeenBlocks; Blocks.push_back(EntryBB); SeenBlocks.insert(EntryBB); while (!Blocks.empty()) { BasicBlock *BB = Blocks.front(); Blocks.pop_front(); // First split the block and update dominance information. BasicBlock *BBCopy = splitBB(BB); BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy); // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; // Get the mapping for this block and initialize it with the mapping // available at its immediate dominator (in the new region). ValueMapT &RegionMap = RegionMaps[BBCopy]; RegionMap = RegionMaps[BBCopyIDom]; // Copy the block with the BlockGenerator. copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS); // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; // Add values to incomplete PHI nodes waiting for this block to be copied. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB]) addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, GlobalMap, LTS); IncompletePHINodeMap[BB].clear(); // And continue with new successors inside the region. for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) if (R->contains(*SI) && SeenBlocks.insert(*SI).second) Blocks.push_back(*SI); } // Now create a new dedicated region exit block and add it to the region map. BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit"); BlockMap[R->getExit()] = ExitBBCopy; repairDominance(R->getExit(), ExitBBCopy); // As the block generator doesn't handle control flow we need to add the // region control flow by hand after all blocks have been copied. for (BasicBlock *BB : SeenBlocks) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *BBCopy = BlockMap[BB]; Instruction *BICopy = BBCopy->getTerminator(); ValueMapT &RegionMap = RegionMaps[BBCopy]; RegionMap.insert(BlockMap.begin(), BlockMap.end()); Builder.SetInsertPoint(BBCopy); copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); BICopy->eraseFromParent(); } // Add counting PHI nodes to all loops in the region that can be used as // replacement for SCEVs refering to the old loop. for (BasicBlock *BB : SeenBlocks) { Loop *L = LI.getLoopFor(BB); if (L == nullptr || L->getHeader() != BB) continue; BasicBlock *BBCopy = BlockMap[BB]; Value *NullVal = Builder.getInt32(0); PHINode *LoopPHI = PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv"); Instruction *LoopPHIInc = BinaryOperator::CreateAdd( LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc"); LoopPHI->insertBefore(BBCopy->begin()); LoopPHIInc->insertBefore(BBCopy->getTerminator()); for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) { if (!R->contains(PredBB)) continue; if (L->contains(PredBB)) LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]); else LoopPHI->addIncoming(NullVal, BlockMap[PredBB]); } for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy))) if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0) LoopPHI->addIncoming(NullVal, PredBBCopy); LTS[L] = SE.getUnknown(LoopPHI); } // Add all mappings from the region to the global map so outside uses will use // the copied instructions. for (auto &BBMap : RegionMaps) GlobalMap.insert(BBMap.second.begin(), BBMap.second.end()); // Reset the old insert point for the build. Builder.SetInsertPoint(ExitBBCopy->begin()); }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree and /// DominatorFrontier information if it is available, thus calling this pass /// will not invalidate either of them. This returns the new block if the edge /// was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst::Create(DestBB, NewBB); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { // This conceptually does: // foreach (PHINode *PN in DestBB) // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); // but is optimized for two cases. if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } else { // However, the foreach loop is slow for blocks with lots of predecessors // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk // the user list of TIBB to find the PHI nodes. SmallPtrSet<PHINode*, 16> UpdatedPHIs; for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); UI != E; ) { Value::use_iterator Use = UI++; if (PHINode *PN = dyn_cast<PHINode>(Use)) { // Remove one entry from each PHI. if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) PN->setOperand(Use.getOperandNo(), NewBB); } } } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. if (DT == 0 && DF == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) if (*I != NewBB) OtherPreds.push_back(*I); } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Should we update DominanceFrontier information? if (DF) { // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." " not implemented yet!"); } // Since the new block is dominated by its only predecessor TIBB, // it cannot be in any block's dominance frontier. If NewBB dominates // DestBB, its dominance frontier is the same as DestBB's, otherwise it is // just {DestBB}. DominanceFrontier::DomSetType NewDFSet; if (NewBBDominatesDestBB) { DominanceFrontier::iterator I = DF->find(DestBB); if (I != DF->end()) { DF->addBasicBlock(NewBB, I->second); if (I->second.count(DestBB)) { // However NewBB's frontier does not include DestBB. DominanceFrontier::iterator NF = DF->find(NewBB); DF->removeFromFrontier(NF, DestBB); } } else DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); } else { DominanceFrontier::DomSetType NewDFSet; NewDFSet.insert(DestBB); DF->addBasicBlock(NewBB, NewDFSet); } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) { SmallVector<BasicBlock *, 1> OrigPred; OrigPred.push_back(TIBB); CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); } // For each unique exit block... SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) if (TIL->contains(*I)) Preds.push_back(*I); else HasPredOutsideOfLoop = true; // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } // Update ProfileInfo if it is around. if (PI) PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; MDNode *LoopID = NewLoop->getLoopID(); assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; Optional<MDNode *> NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); if (NewLoopID.hasValue()) { NewLoop->setLoopID(NewLoopID.getValue()); // Do not setLoopAlreadyUnrolled if loop attributes have been defined // explicitly. return NewLoop; } // Add unroll disable metadata to disable future unrolling for this loop. NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } else return nullptr; }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree information if it /// is available, thus calling this pass will not invalidate either of them. /// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges, bool DontDeleteUselessPhis, bool SplitLandingPads) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (DestBB->isLandingPad()) return 0; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); // If we have nothing to update, just return. if (DT == 0 && LI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); // For each unique exit block... // FIXME: This code is functionally equivalent to the corresponding // loop in LoopSimplify. SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; if (TIL->contains(P)) { if (isa<IndirectBrInst>(P->getTerminator())) { Preds.clear(); break; } Preds.push_back(P); } else { HasPredOutsideOfLoop = true; } } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { if (!Exit->isLandingPad()) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds, "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(Preds, NewExitBB, Exit); } else if (SplitLandingPads) { SmallVector<BasicBlock*, 8> NewBBs; SplitLandingPadPredecessors(Exit, Preds, ".split1", ".split2", P, NewBBs); if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit); } } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } return NewBB; }
// Inserts an unwinding annotation (assume or assert, depending on the function // given in the constructor) and removes the loop. bool RmLoopPass::runOnLoop(Loop *L, LPPassManager &LPM){ BasicBlock *latch = L -> getLoopLatch(); BasicBlock *header = L -> getHeader(); SmallVector<BasicBlock *, 1> exitBBs; L -> getExitBlocks(exitBBs); BasicBlock *exitBB = NULL; SmallVector<BasicBlock *, 1>::iterator it = exitBBs.begin(); for(; it != exitBBs.end() && !exitBB; ++it){ if(std::find(createdBB.begin(), createdBB.end(), *it) == createdBB.end()) exitBB = *it; } assert(exitBB && "exitBB is null"); // std::cout << "\n\n LOOP REMOVAL:\n"; // std::cout << "Latch: " << latch -> getName().str() << "\n"; // std::cout << "Header: " << header -> getName().str() << "\n"; // std::cout << "ExitBB: " << exitBB -> getName().str() << "\n"; //assert(exitBBs.size() == 1 && "RmLoopPass - more than one exit BB"); // At this point we have an header, a latch and an exit BasicBlock // and they all are different assert(latch && header && "Not able to obtain some loop basic block; try to run doInitialization before"); // Get loop last branch instruction BranchInst *br = cast<BranchInst>(latch -> getTerminator()); // assert(br -> isConditional() && "loop terminator with unconditional branch"); // Get loop's last iteration condition Value *cond = NULL; // Loop last iteration branch condition if(br -> isConditional()) cond = br -> getCondition(); else{ std::cout << "\n************************************************************\n"; std::cout << "*BE CAREFUL!!!! There is a latch with unconditional branch!*\n"; std::cout << "************************************************************\n"; } // In order to remove the back edge, we need to remove the loop from the LPPAssManager LPM.deleteLoopFromQueue(L); // Create a new BasicBlock with the unwinding annotation. // Unreachable instruction is used as a terminator instruction in this BasicBlock BasicBlock *newBB = BasicBlock::Create(header -> getContext() , "unwinding_annotation" , header -> getParent()); createdBB.push_back(newBB); Type *t = Type::getInt32Ty(header -> getContext()); Constant *c = llvm::ConstantInt::get(t,uint32_t(0)); ArrayRef<Value *> *param = new ArrayRef<Value *>(c); CallInst::Create(function, *param, "", newBB); if(unreachable){ new UnreachableInst(header -> getContext(), newBB); }else{ BranchInst::Create(exitBB,newBB); for(BasicBlock::iterator it = exitBB->begin(); it != exitBB->end();++it){ PHINode *phi = dyn_cast<PHINode>(it); if(!phi) break; //Value *latchValue = phi->getIncomingValueForBlock(latch); phi->addIncoming(UndefValue::get(phi -> getType()),newBB); } } BranchInst *newBr = NULL; if(cond){ if(br -> getSuccessor(0) == header){ newBr = BranchInst::Create(newBB,br -> getSuccessor(1),cond); }else{ newBr = BranchInst::Create(br -> getSuccessor(1),newBB,cond); } }else{ newBr = BranchInst::Create(newBB); } ReplaceInstWithInst(br,newBr); // The latch BasicBlock must be removed from the PHI nodes in // the header BasicBlock for(BasicBlock::iterator it = header->begin(); it != header->end();++it){ PHINode *phi = dyn_cast<PHINode>(it); if(!phi) break; int latchIndex = phi->getBasicBlockIndex(latch); phi->removeIncomingValue(latchIndex); } //std::cout << "\n---- NewBB ------\n"; //newBB -> print(outs()); //std::cout << "\n---- ExitBB\n"; //exitBB -> print(outs()); return true; }
BasicBlock * llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options) { if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) return nullptr; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Splitting the critical edge to a pad block is non-trivial. Don't do // it in this generic function. if (DestBB->isEHPad()) return nullptr; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB->getIterator(); F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (Options.MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we have nothing to update, just return. auto *DT = Options.DT; auto *LI = Options.LI; if (!DT && !LI) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = nullptr; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, *LI); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, *LI); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, *LI); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, *LI); } } // If TIBB is in a loop and DestBB is outside of that loop, we may need // to update LoopSimplify form and LCSSA form. if (!TIL->contains(DestBB)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (Options.PreserveLCSSA) { createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); } // The only that we can break LoopSimplify form by splitting a critical // edge is if after the split there exists some edge from TIL to DestBB // *and* the only edge into DestBB from outside of TIL is that of // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB // is the new exit block and it has no non-loop predecessors. If the // second isn't true, then DestBB was not in LoopSimplify form prior to // the split as it had a non-loop predecessor. In both of these cases, // the predecessor must be directly in TIL, not in a subloop, or again // LoopSimplify doesn't hold. SmallVector<BasicBlock *, 4> LoopPreds; for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P == NewBB) continue; // The new block is known. if (LI->getLoopFor(P) != TIL) { // No need to re-simplify, it wasn't to start with. LoopPreds.clear(); break; } LoopPreds.push_back(P); } if (!LoopPreds.empty()) { assert(!DestBB->isEHPad() && "We don't split edges to EH pads!"); BasicBlock *NewExitBB = SplitBlockPredecessors( DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA); if (Options.PreserveLCSSA) createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } } } } return NewBB; }
/// \brief Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. /// Rather, each iteration is peeled off separately, and needs to check the /// exit condition. /// For loops that dynamically execute \p PeelCount iterations or less /// this provides a benefit, since the peeled off iterations, which account /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { if (!canPeel(L)) return false; LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Exit = L->getUniqueExitBlock(); Function *F = Header->getParent(); // Set up all the necessary basic blocks. It is convenient to split the // preheader into 3 parts - two blocks to anchor the peeled copy of the loop // body, and a new preheader for the "real" loop. // Peeling the first iteration transforms. // // PreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // into // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // Each following iteration will split the current bottom anchor in two, // and put the new copy of the loop body between these two blocks. That is, // after peeling another iteration from the example above, we'll split // InsertBot, and get: // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // LoopBody // If (!cond) goto Exit // InsertBot.next: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); BasicBlock *InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); BasicBlock *NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertTop->setName(Header->getName() + ".peel.begin"); InsertBot->setName(Header->getName() + ".peel.next"); NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); ValueToValueMapTy LVMap; // If we have branch weight information, we'll want to update it for the // newly created branches. BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); uint64_t TrueWeight, FalseWeight; uint64_t ExitWeight = 0, BackEdgeWeight = 0; if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight; } // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; ValueToValueMapTy VMap; // The exit weight of the previous iteration is the header entry weight // of the current iteration. So this is exactly how many dynamic iterations // the current peeled-off static iteration uses up. // FIXME: due to the way the distribution is constructed, we need a // guard here to make sure we don't end up with non-positive weights. if (ExitWeight < BackEdgeWeight) BackEdgeWeight -= ExitWeight; else BackEdgeWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, NewBlocks, LoopBlocks, VMap, LVMap, LI); updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter, PeelCount, ExitWeight); InsertTop = InsertBot; InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertBot->setName(Header->getName() + ".peel.next"); F->getBasicBlockList().splice(InsertTop->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); } // Now adjust the phi nodes in the loop header to get their initial values // from the last peeled-off iteration instead of the preheader. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PHI = cast<PHINode>(I); Value *NewVal = PHI->getIncomingValueForBlock(Latch); Instruction *LatchInst = dyn_cast<Instruction>(NewVal); if (LatchInst && L->contains(LatchInst)) NewVal = LVMap[LatchInst]; PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal); } // Adjust the branch weights on the loop exit. if (ExitWeight) { MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } // If the loop is nested, we changed the parent loop, update SE. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); NumPeeled++; return true; }
/// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Update PHI nodes at the unrolling loop exit and epilog loop exit /// - Create PHI nodes at the unrolling loop exit to combine /// values that exit the unrolling loop code and jump around it. /// - Update PHI operands in the epilog loop by the new PHI nodes /// - Branch around the epilog loop if extra iters (ModVal) is zero. /// static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); // Loop structure should be the following: // // PreHeader // NewPreHeader // Header // ... // Latch // NewExit (PN) // EpilogPreHeader // EpilogHeader // ... // EpilogLatch // Exit (EpilogPN) // Update PHI nodes at NewExit and Exit. for (Instruction &BBI : *NewExit) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // PN should be used in another PHI located in Exit block as // Exit was split by SplitBlockPredecessors into Exit and NewExit // Basicaly it should look like: // NewExit: // PN = PHI [I, Latch] // ... // Exit: // EpilogPN = PHI [PN, EpilogPreHeader] // // There is EpilogPreHeader incoming block instead of NewExit as // NewExit was spilt 1 more time to get EpilogPreHeader. assert(PN->hasOneUse() && "The phi should have 1 use"); PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser()); assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); // Add incoming PreHeader from branch around the Loop PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); Value *V = PN->getIncomingValueForBlock(Latch); Instruction *I = dyn_cast<Instruction>(V); if (I && L->contains(I)) // If value comes from an instruction in the loop add VMap value. V = VMap.lookup(I); // For the instruction out of the loop, constant or undefined value // insert value itself. EpilogPN->addIncoming(V, EpilogLatch); assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && "EpilogPN should have EpilogPreHeader incoming block"); // Change EpilogPreHeader incoming block to NewExit. EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), NewExit); // Now PHIs should look like: // NewExit: // PN = PHI [I, Latch], [undef, PreHeader] // ... // Exit: // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] } // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). // Update corresponding PHI nodes in epilog loop. for (BasicBlock *Succ : successors(Latch)) { // Skip this as we already updated phis in exit blocks. if (!L->contains(Succ)) continue; for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add new PHI nodes to the loop exit block and update epilog // PHIs with the new PHI values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", NewExit->getFirstNonPHI()); // Adding a value to the new PHI node from the unrolling loop preheader. NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); // Adding a value to the new PHI node from the unrolling loop latch. NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. PHINode *VPN = cast<PHINode>(VMap[&BBI]); VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); } } Instruction *InsertPt = NewExit->getTerminator(); IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(Exit, NewExit); // Split the main loop exit to maintain canonicalization guarantees. SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, PreserveLCSSA); }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); return NewLoop; } else return nullptr; }
/// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. for (BasicBlock *Succ : successors(Latch)) { for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { V = VMap.lookup(I); } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, PrologLatch); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); } else { PN->addIncoming(NewPN, PrologExit); } } } // Make sure that created prolog loop is in simplified form SmallVector<BasicBlock *, 4> PrologExitPreds; Loop *PrologLoop = LI->getLoopFor(PrologLatch); if (PrologLoop) { for (BasicBlock *PredBB : predecessors(PrologExit)) if (PrologLoop->contains(PredBB)) PrologExitPreds.push_back(PredBB); SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, PreserveLCSSA); } // Create a branch around the original loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologExit->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) // This means %xtraiter is (BECount + 1) and all of the iterations of this // loop were executed by the prologue. Note that if BECount <u (Count - 1) // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); }
/// Create a clone of the blocks in a loop and connect them together. /// If UnrollProlog is true, loop structure will not be cloned, otherwise a new /// loop will be created including all cloned blocks, and the iterator of it /// switches to count NewIter down to 0. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = 0; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); NewBlocks.push_back(NewBB); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if UnrollProlog is true, create a direct jump to // InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (UnrollProlog) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (UnrollProlog) { VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (VMap[InVal]) NewPHI->setIncomingValue(idx, VMap[InVal]); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } }
void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, ValueMapT &GlobalMap) { const Region &R = Stmt.getParent()->getRegion(); Region *StmtR = Stmt.getRegion(); assert(StmtR && "Block statements need to use the generateScalarStores() " "function in the BlockGenerator"); BasicBlock *ExitBB = StmtR->getExit(); // For region statements three kinds of scalar stores exists: // (1) A definition used by a non-phi instruction outside the region. // (2) A phi-instruction in the region entry. // (3) A write to a phi instruction in the region exit. // The last case is the tricky one since we do not know anymore which // predecessor of the exit needs to store the operand value that doesn't // have a definition in the region. Therefore, we have to check in each // block in the region if we should store the value or not. // Iterate over all accesses in the given statement. for (MemoryAccess *MA : Stmt) { // Skip non-scalar and read accesses. if (!MA->isScalar() || MA->isRead()) continue; Instruction *ScalarBase = cast<Instruction>(MA->getBaseAddr()); Instruction *ScalarInst = MA->getAccessInstruction(); PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase); Value *ScalarValue = nullptr; AllocaInst *ScalarAddr = nullptr; if (!ScalarBasePHI) { // Case (1) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else if (ScalarBasePHI->getParent() != ExitBB) { // Case (2) assert(ScalarBasePHI->getParent() == StmtR->getEntry() && "Bad PHI self write in non-affine region"); assert(ScalarBase == ScalarInst && "Bad PHI self write in non-affine region"); ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else { int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB); // Skip accesses we will not handle in this basic block but in another one // in the statement region. if (PHIIdx < 0) continue; // Case (3) ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx); } ScalarValue = getNewScalarValue(ScalarValue, R, ScalarMap, BBMap, GlobalMap); Builder.CreateStore(ScalarValue, ScalarAddr); } }
void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, ValueMapT &GlobalMap) { const Region &R = Stmt.getParent()->getRegion(); assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && "Region statements need to use the generateScalarStores() " "function in the RegionGenerator"); // Set to remember a store to the phiops alloca of a PHINode. It is needed as // we might have multiple write accesses to the same PHI and while one is the // self write of the PHI (to the ScalarMap alloca) the other is the write to // the operand alloca (PHIOpMap). SmallPtrSet<PHINode *, 4> SeenPHIs; // Iterate over all accesses in the given statement. for (MemoryAccess *MA : Stmt) { // Skip non-scalar and read accesses. if (!MA->isScalar() || MA->isRead()) continue; Instruction *ScalarBase = cast<Instruction>(MA->getBaseAddr()); Instruction *ScalarInst = MA->getAccessInstruction(); PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase); // Get the alloca node for the base instruction and the value we want to // store. In total there are 4 options: // (1) The base is no PHI, hence it is a simple scalar def-use chain. // (2) The base is a PHI, // (a) and the write is caused by an operand in the block. // (b) and it is the PHI self write (same as case (1)). // (c) (2a) and (2b) are not distinguishable. // For case (1) and (2b) we get the alloca from the scalar map and the value // we want to store is initialized with the instruction attached to the // memory access. For case (2a) we get the alloca from the PHI operand map // and the value we want to store is initialized with the incoming value for // this block. The tricky case (2c) is when both (2a) and (2b) match. This // happens if the PHI operand is in the same block as the PHI. To handle // that we choose the alloca of (2a) first and (2b) for the next write // access to that PHI (there must be 2). Value *ScalarValue = nullptr; AllocaInst *ScalarAddr = nullptr; if (!ScalarBasePHI) { // Case (1) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else { int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB); if (ScalarBasePHI != ScalarInst) { // Case (2a) assert(PHIIdx >= 0 && "Bad scalar write to PHI operand"); SeenPHIs.insert(ScalarBasePHI); ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx); } else if (PHIIdx < 0) { // Case (2b) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else { // Case (2c) if (SeenPHIs.insert(ScalarBasePHI).second) { // First access ==> same as (2a) ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx); } else { // Second access ==> same as (2b) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } } } ScalarValue = getNewScalarValue(ScalarValue, R, ScalarMap, BBMap, GlobalMap); Builder.CreateStore(ScalarValue, ScalarAddr); } }
/// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. /// Rather, each iteration is peeled off separately, and needs to check the /// exit condition. /// For loops that dynamically execute \p PeelCount iterations or less /// this provides a benefit, since the peeled off iterations, which account /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Exit = L->getUniqueExitBlock(); Function *F = Header->getParent(); // Set up all the necessary basic blocks. It is convenient to split the // preheader into 3 parts - two blocks to anchor the peeled copy of the loop // body, and a new preheader for the "real" loop. // Peeling the first iteration transforms. // // PreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // into // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: // // Each following iteration will split the current bottom anchor in two, // and put the new copy of the loop body between these two blocks. That is, // after peeling another iteration from the example above, we'll split // InsertBot, and get: // // InsertTop: // LoopBody // If (!cond) goto Exit // InsertBot: // LoopBody // If (!cond) goto Exit // InsertBot.next: // NewPreHeader: // ... // Header: // LoopBody // If (cond) goto Header // Exit: BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); BasicBlock *InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); BasicBlock *NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertTop->setName(Header->getName() + ".peel.begin"); InsertBot->setName(Header->getName() + ".peel.next"); NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); ValueToValueMapTy LVMap; // If we have branch weight information, we'll want to update it for the // newly created branches. BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); uint64_t TrueWeight, FalseWeight; uint64_t ExitWeight = 0, CurHeaderWeight = 0; if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; // The # of times the loop body executes is the sum of the exit block // weight and the # of times the backedges are taken. CurHeaderWeight = TrueWeight + FalseWeight; } // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; ValueToValueMapTy VMap; // Subtract the exit weight from the current header weight -- the exit // weight is exactly the weight of the previous iteration's header. // FIXME: due to the way the distribution is constructed, we need a // guard here to make sure we don't end up with non-positive weights. if (ExitWeight < CurHeaderWeight) CurHeaderWeight -= ExitWeight; else CurHeaderWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); if (DT) { // Latches of the cloned loops dominate over the loop exit, so idom of the // latter is the first cloned loop body, as original PreHeader dominates // the original loop body. if (Iter == 0) DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); #endif } auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); updateBranchWeights(InsertBot, LatchBRCopy, Iter, PeelCount, ExitWeight); // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); InsertTop = InsertBot; InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); InsertBot->setName(Header->getName() + ".peel.next"); F->getBasicBlockList().splice(InsertTop->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); } // Now adjust the phi nodes in the loop header to get their initial values // from the last peeled-off iteration instead of the preheader. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PHI = cast<PHINode>(I); Value *NewVal = PHI->getIncomingValueForBlock(Latch); Instruction *LatchInst = dyn_cast<Instruction>(NewVal); if (LatchInst && L->contains(LatchInst)) NewVal = LVMap[LatchInst]; PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal); } // Adjust the branch weights on the loop exit. if (ExitWeight) { // The backedge count is the difference of current header weight and // current loop exit weight. If the current header weight is smaller than // the current loop exit weight, we mark the loop backedge weight as 1. uint64_t BackEdgeWeight = 0; if (ExitWeight < CurHeaderWeight) BackEdgeWeight = CurHeaderWeight - ExitWeight; else BackEdgeWeight = 1; MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; // We modified the loop, update SE. SE->forgetTopmostLoop(L); // FIXME: Incrementally update loop-simplify simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); NumPeeled++; return true; }