void vSSA::createSigmasIfNeeded(BasicBlock *BB) { TerminatorInst *ti = BB->getTerminator(); // If the condition used in the terminator instruction is a Comparison instruction: //for each operand of the CmpInst, create sigmas, depending on some conditions /* if(isa<BranchInst>(ti)){ BranchInst * bc = cast<BranchInst>(ti); if(bc->isConditional()){ Value * cond = bc->getCondition(); CmpInst *comparison = dyn_cast<CmpInst>(cond); for (User::const_op_iterator it = comparison->op_begin(), e = comparison->op_end(); it != e; ++it) { Value *operand = *it; if (isa<Instruction>(operand) || isa<Argument>(operand)) { insertSigmas(ti, operand); } } } } */ // CASE 1: Branch Instruction BranchInst *bi = NULL; SwitchInst *si = NULL; if ((bi = dyn_cast<BranchInst>(ti))) { if (bi->isConditional()) { Value *condition = bi->getCondition(); ICmpInst *comparison = dyn_cast<ICmpInst>(condition); if (comparison) { // Create sigmas for ICmp operands for (User::const_op_iterator opit = comparison->op_begin(), opend = comparison->op_end(); opit != opend; ++opit) { Value *operand = *opit; if (isa<Instruction>(operand) || isa<Argument>(operand)) { insertSigmas(ti, operand); // If the operand is a result of a indirect instruction (e.g. ZExt, SExt, Trunc), // Create sigmas for the operands of the operands too CastInst *cinst = NULL; if ((cinst = dyn_cast<CastInst>(operand))) { insertSigmas(ti, cinst->getOperand(0)); } } } } } } // CASE 2: Switch Instruction else if ((si = dyn_cast<SwitchInst>(ti))) { Value *condition = si->getCondition(); if (isa<Instruction>(condition) || isa<Argument>(condition)) { insertSigmas(ti, condition); // If the operand is a result of a indirect instruction (e.g. ZExt, SExt, Trunc), // Create sigmas for the operands of the operands too CastInst *cinst = NULL; if ((cinst = dyn_cast<CastInst>(condition))) { insertSigmas(ti, cinst->getOperand(0)); } } } }
void CPFlowFunction::visitBranchInst(BranchInst &BI) { CPLatticePoint* result = new CPLatticePoint(*(info_in_casted.back())); info_in_casted.pop_back(); BranchInst* current = &BI; if (BI.isConditional()) { Value* cond = BI.getCondition(); if (isa<ICmpInst>(cond)) { std::pair<Use*, Use *> branches = helper::getOps(BI); Use* true_branch = branches.first; Use* false_branch = branches.second; ICmpInst* cmp = dyn_cast<ICmpInst>(cond); std::pair<Use*, Use *> operands = helper::getOps(*cmp); Use* rhs = operands.second; Use* lhs = operands.first; ConstantInt* rhs_const = NULL; ConstantInt* lhs_const = NULL; // get the rhs/lhs as a constant int if (isa<ConstantInt>(rhs)) { rhs_const = dyn_cast<ConstantInt>(rhs); } else if (result->representation.count(rhs->get()) > 0) { rhs_const = result->representation[rhs->get()]; } else { rhs_const = ConstantInt::get(context, llvm::APInt(32, 0, true)); } if (isa<ConstantInt>(lhs)) { lhs_const = dyn_cast<ConstantInt>(lhs->get()); } else if (result->representation.count(lhs->get()) > 0) { lhs_const = result->representation[lhs->get()]; } else { lhs_const = ConstantInt::get(context, llvm::APInt(32, 0, true)); } // Create successors CPLatticePoint* true_branchCLP = new CPLatticePoint(false, false, std::map<Value*,ConstantInt*>(result->representation)); CPLatticePoint* false_branchCLP = new CPLatticePoint(false, false, std::map<Value*,ConstantInt*>(result->representation)); // get the predicate int predicate = 0; predicate = cmp->isSigned() ? cmp->getSignedPredicate() : cmp->getUnsignedPredicate(); if (predicate == CmpInst::ICMP_EQ) { if (isa<ConstantInt>(lhs)) { true_branchCLP->representation[rhs->get()] = lhs_const; } else if (isa<ConstantInt>(rhs)) { true_branchCLP->representation[lhs->get()] = rhs_const; } out_map[true_branch->get()] = true_branchCLP; out_map[false_branch->get()] = false_branchCLP; } else if (predicate == CmpInst::ICMP_NE) { if (isa<ConstantInt>(lhs)) { false_branchCLP->representation[rhs->get()] = lhs_const; } else if (isa<ConstantInt>(rhs)) { false_branchCLP->representation[lhs->get()] = rhs_const; } out_map[true_branch->get()] = true_branchCLP; out_map[false_branch->get()] = false_branchCLP; } else { for (std::map<Value *, LatticePoint *>::iterator it=out_map.begin(); it != out_map.end(); ++it){ Value* elm = it->first; out_map[elm] = new CPLatticePoint(*result); } } } else { for (std::map<Value *, LatticePoint *>::iterator it=out_map.begin(); it != out_map.end(); ++it){ Value* elm = it->first; out_map[elm] = new CPLatticePoint(*result); } } } else { for (std::map<Value *, LatticePoint *>::iterator it=out_map.begin(); it != out_map.end(); ++it){ Value* elm = it->first; out_map[elm] = new CPLatticePoint(*result); } } }
std::pair<Optional<SILValue>, SILLocation> SILGenFunction::emitEpilogBB(SILLocation TopLevel) { assert(ReturnDest.getBlock() && "no epilog bb prepared?!"); SILBasicBlock *epilogBB = ReturnDest.getBlock(); SILLocation ImplicitReturnFromTopLevel = ImplicitReturnLocation::getImplicitReturnLoc(TopLevel); SmallVector<SILValue, 4> directResults; Optional<SILLocation> returnLoc = None; // If the current BB isn't terminated, and we require a return, then we // are not allowed to fall off the end of the function and can't reach here. if (NeedsReturn && B.hasValidInsertionPoint()) B.createUnreachable(ImplicitReturnFromTopLevel); if (epilogBB->pred_empty()) { // If the epilog was not branched to at all, kill the BB and // just emit the epilog into the current BB. while (!epilogBB->empty()) epilogBB->back().eraseFromParent(); eraseBasicBlock(epilogBB); // If the current bb is terminated then the epilog is just unreachable. if (!B.hasValidInsertionPoint()) return { None, TopLevel }; // We emit the epilog at the current insertion point. returnLoc = ImplicitReturnFromTopLevel; } else if (std::next(epilogBB->pred_begin()) == epilogBB->pred_end() && !B.hasValidInsertionPoint()) { // If the epilog has a single predecessor and there's no current insertion // point to fall through from, then we can weld the epilog to that // predecessor BB. // Steal the branch argument as the return value if present. SILBasicBlock *pred = *epilogBB->pred_begin(); BranchInst *predBranch = cast<BranchInst>(pred->getTerminator()); assert(predBranch->getArgs().size() == epilogBB->bbarg_size() && "epilog predecessor arguments does not match block params"); for (auto index : indices(predBranch->getArgs())) { SILValue result = predBranch->getArgs()[index]; directResults.push_back(result); epilogBB->getBBArg(index)->replaceAllUsesWith(result); } // If we are optimizing, we should use the return location from the single, // previously processed, return statement if any. if (predBranch->getLoc().is<ReturnLocation>()) { returnLoc = predBranch->getLoc(); } else { returnLoc = ImplicitReturnFromTopLevel; } // Kill the branch to the now-dead epilog BB. pred->erase(predBranch); // Move any instructions from the EpilogBB to the end of the 'pred' block. pred->spliceAtEnd(epilogBB); // Finally we can erase the epilog BB. eraseBasicBlock(epilogBB); // Emit the epilog into its former predecessor. B.setInsertionPoint(pred); } else { // Move the epilog block to the end of the ordinary section. auto endOfOrdinarySection = StartOfPostmatter; B.moveBlockTo(epilogBB, endOfOrdinarySection); // Emit the epilog into the epilog bb. Its arguments are the // direct results. directResults.append(epilogBB->bbarg_begin(), epilogBB->bbarg_end()); // If we are falling through from the current block, the return is implicit. B.emitBlock(epilogBB, ImplicitReturnFromTopLevel); } // Emit top-level cleanups into the epilog block. assert(!Cleanups.hasAnyActiveCleanups(getCleanupsDepth(), ReturnDest.getDepth()) && "emitting epilog in wrong scope"); auto cleanupLoc = CleanupLocation::get(TopLevel); Cleanups.emitCleanupsForReturn(cleanupLoc); // If the return location is known to be that of an already // processed return, use it. (This will get triggered when the // epilog logic is simplified.) // // Otherwise make the ret instruction part of the cleanups. if (!returnLoc) returnLoc = cleanupLoc; // Build the return value. We don't do this if there are no direct // results; this can happen for void functions, but also happens when // prepareEpilog was asked to not add result arguments to the epilog // block. SILValue returnValue; if (!directResults.empty()) { assert(directResults.size() == F.getLoweredFunctionType()->getNumDirectResults()); returnValue = buildReturnValue(*this, TopLevel, directResults); } return { returnValue, *returnLoc }; }
/// Insert code in the prolog code when unrolling a loop with a /// run-time trip-count. /// /// This method assumes that the loop unroll factor is total number /// of loop bodes in the loop after unrolling. (Some folks refer /// to the unroll factor as the number of *extra* copies added). /// We assume also that the loop unroll factor is a power-of-two. So, after /// unrolling the loop, the number of loop bodies executed is 2, /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for /// the switch instruction is generated. /// /// extraiters = tripcount % loopfactor /// if (extraiters == 0) jump Loop: /// if (extraiters == loopfactor) jump L1 /// if (extraiters == loopfactor-1) jump L2 /// ... /// L1: LoopBody; /// L2: LoopBody; /// ... /// if tripcount < loopfactor jump End /// Loop: /// ... /// End: /// bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, LPPassManager *LPM) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) return false; // Use Scalar Evolution to compute the trip count. This allows more // loops to be unrolled than relying on induction var simplification if (!LPM) return false; ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); if (SE == 0) return false; // Only unroll loops with a computable trip count and the trip count needs // to be an int value (allowing a pointer type is a TODO item) const SCEV *BECount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) return false; // Add 1 since the backedge count doesn't include the first loop iteration const SCEV *TripCountSC = SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. if ((Count & (Count-1)) != 0) return false; // If this loop is nested, then the loop unroller changes the code in // parent loop, so the Scalar Evolution pass needs to be run again if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) SCEVExpander Expander(*SE, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Type *CountTy = TripCount->getType(); BinaryOperator *ModVal = BinaryOperator::CreateURem(TripCount, ConstantInt::get(CountTy, Count), "xtraiter"); ModVal->insertBefore(PreHeaderBR); // Check if for no extra iterations, then jump to unrolled loop Value *BranchVal = new ICmpInst(PreHeaderBR, ICmpInst::ICMP_NE, ModVal, ConstantInt::get(CountTy, 0), "lcmp"); // Branch to either the extra iterations or the unrolled loop // We will fix up the true branch label when adding loop body copies BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); assert(PreHeaderBR->isUnconditional() && PreHeaderBR->getSuccessor(0) == PEnd && "CFG edges in Preheader are not correct"); PreHeaderBR->eraseFromParent(); ValueToValueMapTy LVMap; Function *F = Header->getParent(); // These variables are used to update the CFG links in each iteration BasicBlock *CompareBB = 0; BasicBlock *LastLoopBB = PH; // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { std::vector<BasicBlock*> NewBlocks; ValueToValueMapTy VMap; // Clone all the basic blocks in the loop, but we don't clone the loop // This function adds the appropriate CFG connections. CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, LoopBlocks, VMap, LVMap, LI); LastLoopBB = cast<BasicBlock>(VMap[Latch]); // Insert the cloned blocks into function just before the original loop F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0], F->end()); // Generate the code for the comparison which determines if the loop // prolog code needs to be executed. if (leftOverIters == Count-1) { // There is no compare block for the fall-thru case when for the last // left over iteration CompareBB = NewBlocks[0]; } else { // Create a new block for the comparison BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", F, CompareBB); if (Loop *ParentLoop = L->getParentLoop()) { // Add the new block to the parent loop, if needed ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } // The comparison w/ the extra iteration value and branch Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, ConstantInt::get(CountTy, leftOverIters), "un.tmp"); // Branch to either the extra iterations or the unrolled loop BranchInst::Create(NewBlocks[0], CompareBB, BranchVal, NewBB); CompareBB = NewBB; PH->getTerminator()->setSuccessor(0, NewBB); VMap[NewPH] = CompareBB; } // Rewrite the cloned instruction operands to use the values // created when the clone is created. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) { RemapInstruction(I, VMap, RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); } } } // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, LPM->getAsPass()); NumRuntimeUnrolled++; return true; }
bool LoopInterchangeTransform::adjustLoopBranches() { DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); BasicBlock *InnerLoopLatchPredecessor = InnerLoopLatch->getUniquePredecessor(); BasicBlock *InnerLoopLatchSuccessor; BasicBlock *OuterLoopLatchSuccessor; BranchInst *OuterLoopLatchBI = dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); BranchInst *InnerLoopLatchBI = dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); BranchInst *OuterLoopHeaderBI = dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); BranchInst *InnerLoopHeaderBI = dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || !InnerLoopHeaderBI) return false; BranchInst *InnerLoopLatchPredecessorBI = dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); BranchInst *OuterLoopPredecessorBI = dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) return false; BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); if (!InnerLoopHeaderSuccessor) return false; // Adjust Loop Preheader and headers unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); } NumSucc = OuterLoopHeaderBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) OuterLoopHeaderBI->setSuccessor(i, LoopExit); else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); } // Adjust reduction PHI's now that the incoming block has changed. updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, OuterLoopHeader); BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); InnerLoopHeaderBI->eraseFromParent(); // -------------Adjust loop latches----------- if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); else InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); } // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with // the value and remove this PHI node from inner loop. SmallVector<PHINode *, 8> LcssaVec; for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) { PHINode *LcssaPhi = cast<PHINode>(I); LcssaVec.push_back(LcssaPhi); } for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) { PHINode *P = *I; Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); P->replaceAllUsesWith(Incoming); P->eraseFromParent(); } if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); else OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); else InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); } else { OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); } return true; }
/// \brief Simplify one loop and queue further loops for simplification. /// /// FIXME: Currently this accepts both lots of analyses that it uses and a raw /// Pass pointer. The Pass pointer is used by numerous utilities to update /// specific analyses. Rather than a pass it would be much cleaner and more /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { bool Changed = false; ReprocessLoop: // Check to see that no blocks (other than the header) in this loop have // predecessors that are not in the loop. This is not valid for natural // loops, but can occur if the blocks are unreachable. Since they are // unreachable we can just shamelessly delete those CFG edges! for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); BB != E; ++BB) { if (*BB == L->getHeader()) continue; SmallPtrSet<BasicBlock*, 4> BadPreds; for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) { BasicBlock *P = *PI; if (!L->contains(P)) BadPreds.insert(P); } // Delete each unique out-of-loop (and thus dead) predecessor. for (BasicBlock *P : BadPreds) { DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " << P->getName() << "\n"); // Inform each successor of each dead pred. for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI) (*SI)->removePredecessor(P); // Zap the dead pred's terminator and replace it with unreachable. TerminatorInst *TI = P->getTerminator(); TI->replaceAllUsesWith(UndefValue::get(TI->getType())); P->getTerminator()->eraseFromParent(); new UnreachableInst(P->getContext(), P); Changed = true; } } // If there are exiting blocks with branches on undef, resolve the undef in // the direction which will exit the loop. This will help simplify loop // trip count computations. SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), E = ExitingBlocks.end(); I != E; ++I) if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " << (*I)->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); // This may make the loop analyzable, force SCEV recomputation. if (SE) SE->forgetLoop(L); Changed = true; } } // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L, PP); if (Preheader) { ++NumInserted; Changed = true; } } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), E = ExitBlockSet.end(); I != E; ++I) { BasicBlock *ExitBlock = *I; for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); PI != PE; ++PI) // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) { ++NumInserted; Changed = true; } break; } } // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. BasicBlock *LoopLatch = L->getLoopLatch(); if (!LoopLatch) { // If this is really a nested loop, rip it out into a child loop. Don't do // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. Worklist.push_back(OuterL); // This is a big restructuring change, reprocess the whole loop. Changed = true; // GCC doesn't tail recursion eliminate this. // FIXME: It isn't clear we can't rely on LLVM to TRE this. goto ReprocessLoop; } } // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI); if (LoopLatch) { ++NumInserted; Changed = true; } } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); } // If this loop has multiple exits and the exits all go to the same // block, attempt to merge the exits. This helps several passes, such // as LoopRotation, which do not support loops with multiple exits. // SimplifyCFG also does this (and this code uses the same utility // function), however this code is loop-aware, where SimplifyCFG is // not. That gives it the advantage of being able to hoist // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. bool UniqueExit = true; if (!ExitBlocks.empty()) for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) if (ExitBlocks[i] != ExitBlocks[0]) { UniqueExit = false; break; } if (UniqueExit) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!BI || !BI->isConditional()) continue; CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); if (!CI || CI->getParent() != ExitingBlock) continue; // Attempt to hoist out all instructions except for the // comparison and the branch. bool AllInvariant = true; bool AnyInvariant = false; for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { Instruction *Inst = I++; // Skip debug info intrinsics. if (isa<DbgInfoIntrinsic>(Inst)) continue; if (Inst == CI) continue; if (!L->makeLoopInvariant(Inst, AnyInvariant, Preheader ? Preheader->getTerminator() : nullptr)) { AllInvariant = false; break; } } if (AnyInvariant) { Changed = true; // The loop disposition of all SCEV expressions that depend on any // hoisted values have also changed. if (SE) SE->forgetLoopDispositions(L); } if (!AllInvariant) continue; // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. if (!FoldBranchToCommonDest(BI)) continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); // Notify ScalarEvolution before deleting this block. Currently assume the // parent loop doesn't change (spliting edges doesn't count). If blocks, // CFG edges, or other values in the parent loop change, then we need call // to forgetLoop() for the parent instead. if (SE) SE->forgetLoop(L); assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = Node->getChildren(); while (!Children.empty()) { DomTreeNode *Child = Children.front(); DT->changeImmediateDominator(Child, Node->getIDom()); } DT->eraseNode(ExitingBlock); BI->getSuccessor(0)->removePredecessor(ExitingBlock); BI->getSuccessor(1)->removePredecessor(ExitingBlock); ExitingBlock->eraseFromParent(); } } return Changed; }
/// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) for (const Argument &II : OldFunc->args()) assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap[&BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { if (isa<PHINode>(VMap[PN])) PHIToResolve.push_back(PN); else break; } else { break; } } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); }
/// If \param [in] BB has more than one predecessor that is a conditional /// branch, attempt to use parallel and/or for the branch condition. \returns /// true on success. /// /// Before: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 /// br i1 %cmp1, label %if.then, label %lor.rhs /// /// lor.rhs: /// ...... /// %cmp11 = fcmp une float %tmp3, %tmp4 /// br i1 %cmp11, label %if.then, label %ifend /// /// if.end: // the merge block /// ...... /// /// if.then: // has two predecessors, both of them contains conditional branch. /// ...... /// br label %if.end; /// /// After: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 /// ...... /// %cmp11 = fcmp une float %tmp3, %tmp4 /// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. /// br i1 %cmp12, label %if.then, label %ifend /// /// if.end: /// ...... /// /// if.then: /// ...... /// br label %if.end; /// /// Current implementation handles two cases. /// Case 1: \param BB is on the else-path. /// /// BB1 /// / | /// BB2 | /// / \ | /// BB3 \ | where, BB1, BB2 contain conditional branches. /// \ | / BB3 contains unconditional branch. /// \ | / BB4 corresponds to \param BB which is also the merge. /// BB => BB4 /// /// /// Corresponding source code: /// /// if (a == b && c == d) /// statement; // BB3 /// /// Case 2: \param BB BB is on the then-path. /// /// BB1 /// / | /// | BB2 /// \ / | where BB1, BB2 contain conditional branches. /// BB => BB3 | BB3 contains unconditiona branch and corresponds /// \ / to \param BB. BB4 is the merge. /// BB4 /// /// Corresponding source code: /// /// if (a == b || c == d) /// statement; // BB3 /// /// In both cases, \param BB is the common successor of conditional branches. /// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches /// as its predecessors. /// bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P) { PHINode *PHI = dyn_cast<PHINode>(BB->begin()); if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. BasicBlock *LastCondBlock = NULL; BasicBlock *FirstCondBlock = NULL; BasicBlock *UnCondBlock = NULL; int Idx = -1; // Check predecessors of \param BB. SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { BasicBlock *Pred = *PI; BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); // All predecessors should terminate with a branch. if (!PBI) return false; BasicBlock *PP = Pred->getSinglePredecessor(); if (PBI->isUnconditional()) { // Case 1: Pred (BB3) is an unconditional block, it should // have a single predecessor (BB2) that is also a predecessor // of \param BB (BB4) and should not have address-taken. // There should exist only one such unconditional // branch among the predecessors. if (UnCondBlock || !PP || (Preds.count(PP) == 0) || Pred->hasAddressTaken()) return false; UnCondBlock = Pred; continue; } // Only conditional branches are allowed beyond this point. assert(PBI->isConditional()); // Condition's unique use should be the branch instruction. Value *PC = PBI->getCondition(); if (!PC || !PC->hasOneUse()) return false; if (PP && Preds.count(PP)) { // These are internal condition blocks to be merged from, e.g., // BB2 in both cases. // Should not be address-taken. if (Pred->hasAddressTaken()) return false; // Instructions in the internal condition blocks should be safe // to hoist up. for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { Instruction *CI = BI++; if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) return false; } } else { // This is the condition block to be merged into, e.g. BB1 in // both cases. if (FirstCondBlock) return false; FirstCondBlock = Pred; } // Find whether BB is uniformly on the true (or false) path // for all of its predecessors. BasicBlock *PS1 = PBI->getSuccessor(0); BasicBlock *PS2 = PBI->getSuccessor(1); BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; int CIdx = (PS1 == BB) ? 0 : 1; if (Idx == -1) Idx = CIdx; else if (CIdx != Idx) return false; // PS is the successor which is not BB. Check successors to identify // the last conditional branch. if (Preds.count(PS) == 0) { // Case 2. LastCondBlock = Pred; } else { // Case 1 BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); if (BPS && BPS->isUnconditional()) { // Case 1: PS(BB3) should be an unconditional branch. LastCondBlock = Pred; } } } if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) return false; TerminatorInst *TBB = LastCondBlock->getTerminator(); BasicBlock *PS1 = TBB->getSuccessor(0); BasicBlock *PS2 = TBB->getSuccessor(1); BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); // If PS1 does not jump into PS2, but PS2 jumps into PS1, // attempt branch inversion. if (!PBI1 || !PBI1->isUnconditional() || (PS1->getTerminator()->getSuccessor(0) != PS2)) { // Check whether PS2 jumps into PS1. if (!PBI2 || !PBI2->isUnconditional() || (PS2->getTerminator()->getSuccessor(0) != PS1)) return false; // Do branch inversion. BasicBlock *CurrBlock = LastCondBlock; bool EverChanged = false; while (1) { BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); CmpInst::Predicate Predicate = CI->getPredicate(); // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); BI->swapSuccessors(); EverChanged = true; } if (CurrBlock == FirstCondBlock) break; CurrBlock = CurrBlock->getSinglePredecessor(); } return EverChanged; } // PS1 must have a conditional branch. if (!PBI1 || !PBI1->isUnconditional()) return false; // PS2 should not contain PHI node. PHI = dyn_cast<PHINode>(PS2->begin()); if (PHI) return false; // Do the transformation. BasicBlock *CB; BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); bool Iteration = true; BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Value *PC = PBI->getCondition(); do { CB = PBI->getSuccessor(1 - Idx); // Delete the conditional branch. FirstCondBlock->getInstList().pop_back(); FirstCondBlock->getInstList() .splice(FirstCondBlock->end(), CB->getInstList()); PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); Value *CC = PBI->getCondition(); // Merge conditions. Builder.SetInsertPoint(PBI); Value *NC; if (Idx == 0) // Case 2, use parallel or. NC = Builder.CreateOr(PC, CC); else // Case 1, use parallel and. NC = Builder.CreateAnd(PC, CC); PBI->replaceUsesOfWith(CC, NC); PC = NC; if (CB == LastCondBlock) Iteration = false; // Remove internal conditional branches. CB->dropAllReferences(); // make CB unreachable and let downstream to delete the block. new UnreachableInst(CB->getContext(), CB); } while (Iteration); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); return true; }
/// Check whether \param BB is the merge block of a if-region. If yes, check /// whether there exists an adjacent if-region upstream, the two if-regions /// contain identical instuctions and can be legally merged. \returns true if /// the two if-regions are merged. /// /// From: /// if (a) /// statement; /// if (b) /// statement; /// /// To: /// if (a || b) /// statement; /// bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P) { BasicBlock *IfTrue2, *IfFalse2; Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); if (!CInst2) return false; BasicBlock *SecondEntryBlock = CInst2->getParent(); if (SecondEntryBlock->hasAddressTaken()) return false; BasicBlock *IfTrue1, *IfFalse1; Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); if (!CInst1) return false; BasicBlock *FirstEntryBlock = CInst1->getParent(); // Either then-path or else-path should be empty. if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) return false; if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) return false; TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); Instruction *PBI2 = SecondEntryBlock->begin(); if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, IfTrue2)) return false; if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, IfFalse2)) return false; // Check whether \param SecondEntryBlock has side-effect and is safe to // speculate. for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { Instruction *CI = BI; if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || !isSafeToSpeculativelyExecute(CI)) return false; } // Merge \param SecondEntryBlock into \param FirstEntryBlock. FirstEntryBlock->getInstList().pop_back(); FirstEntryBlock->getInstList() .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); Value *CC = PBI->getCondition(); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Builder.SetInsertPoint(PBI); Value *NC = Builder.CreateOr(CInst1, CC); PBI->replaceUsesOfWith(CC, NC); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); // Remove IfTrue1 if (IfTrue1 != FirstEntryBlock) { IfTrue1->dropAllReferences(); IfTrue1->eraseFromParent(); } // Remove IfFalse1 if (IfFalse1 != FirstEntryBlock) { IfFalse1->dropAllReferences(); IfFalse1->eraseFromParent(); } // Remove \param SecondEntryBlock SecondEntryBlock->dropAllReferences(); SecondEntryBlock->eraseFromParent(); DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); return true; }
void TripCountGenerator::generateVectorEstimatedTripCounts(Function &F){ LoopInfoEx& li = getAnalysis<LoopInfoEx>(); LoopNormalizerAnalysis& ln = getAnalysis<LoopNormalizerAnalysis>(); for(LoopInfoEx::iterator lit = li.begin(); lit != li.end(); lit++){ //Indicates if we don't have ways to determine the trip count bool unknownTC = false; Loop* loop = *lit; BasicBlock* header = loop->getHeader(); BasicBlock* entryBlock = ln.entryBlocks[header]; LoopControllersDepGraph& lcd = getAnalysis<LoopControllersDepGraph>(); lcd.setPerspective(header); /* * Here we are looking for the predicate that stops the loop. * * At this moment, we are only considering loops that are controlled by * integer comparisons. */ BasicBlock* exitBlock = findLoopControllerBlock(loop); assert(exitBlock && "Exiting Block not found!"); TerminatorInst* T = exitBlock->getTerminator(); BranchInst* BI = dyn_cast<BranchInst>(T); ICmpInst* CI = BI ? dyn_cast<ICmpInst>(BI->getCondition()) : NULL; Value* Op1 = NULL; Value* Op2 = NULL; if (!CI) unknownTC = true; else { int LoopClass; if (isIntervalComparison(CI)) { LoopClass = 0; NumIntervalLoops++; } else { LoopClass = 1; NumEqualityLoops++; } Op1 = getValueAtEntryPoint(CI->getOperand(0), header); Op2 = getValueAtEntryPoint(CI->getOperand(1), header); if((!Op1) || (!Op2) ) { if (!LoopClass) NumUnknownConditionsIL++; else NumUnknownConditionsEL++; unknownTC = true; } else { if (!(Op1->getType()->isIntegerTy() && Op2->getType()->isIntegerTy())) { //We only handle loop conditions that compares integer variables NumIncompatibleOperandTypes++; unknownTC = true; } } } ProgressVector* V1 = NULL; ProgressVector* V2 = NULL; if (!unknownTC) { V1 = generateConstantProgressVector(CI->getOperand(0), header); V2 = generateConstantProgressVector(CI->getOperand(1), header); if ((!V1) || (!V2)) { //TODO: Increment a statistic here unknownTC = true; } } if(!unknownTC) { generateVectorEstimatedTripCount(header, entryBlock, Op1, Op2, V1, V2, CI); NumVectorEstimatedTCs++; } } }
std::unique_ptr<FunctionOutliningInfo> PartialInlinerImpl::computeOutliningInfo(Function *F) { BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return std::unique_ptr<FunctionOutliningInfo>(); // Returns true if Succ is BB's successor auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { return is_contained(successors(BB), Succ); }; auto SuccSize = [](BasicBlock *BB) { return std::distance(succ_begin(BB), succ_end(BB)); }; auto IsReturnBlock = [](BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); return isa<ReturnInst>(TI); }; auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) { if (IsReturnBlock(Succ1)) return std::make_tuple(Succ1, Succ2); if (IsReturnBlock(Succ2)) return std::make_tuple(Succ2, Succ1); return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); }; // Detect a triangular shape: auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) { if (IsSuccessor(Succ1, Succ2)) return std::make_tuple(Succ1, Succ2); if (IsSuccessor(Succ2, Succ1)) return std::make_tuple(Succ2, Succ1); return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); }; std::unique_ptr<FunctionOutliningInfo> OutliningInfo = llvm::make_unique<FunctionOutliningInfo>(); BasicBlock *CurrEntry = EntryBlock; bool CandidateFound = false; do { // The number of blocks to be inlined has already reached // the limit. When MaxNumInlineBlocks is set to 0 or 1, this // disables partial inlining for the function. if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks) break; if (SuccSize(CurrEntry) != 2) break; BasicBlock *Succ1 = *succ_begin(CurrEntry); BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1); BasicBlock *ReturnBlock, *NonReturnBlock; std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); if (ReturnBlock) { OutliningInfo->Entries.push_back(CurrEntry); OutliningInfo->ReturnBlock = ReturnBlock; OutliningInfo->NonReturnBlock = NonReturnBlock; CandidateFound = true; break; } BasicBlock *CommSucc; BasicBlock *OtherSucc; std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2); if (!CommSucc) break; OutliningInfo->Entries.push_back(CurrEntry); CurrEntry = OtherSucc; } while (true); if (!CandidateFound) return std::unique_ptr<FunctionOutliningInfo>(); // Do sanity check of the entries: threre should not // be any successors (not in the entry set) other than // {ReturnBlock, NonReturnBlock} assert(OutliningInfo->Entries[0] == &F->front() && "Function Entry must be the first in Entries vector"); DenseSet<BasicBlock *> Entries; for (BasicBlock *E : OutliningInfo->Entries) Entries.insert(E); // Returns true of BB has Predecessor which is not // in Entries set. auto HasNonEntryPred = [Entries](BasicBlock *BB) { for (auto Pred : predecessors(BB)) { if (!Entries.count(Pred)) return true; } return false; }; auto CheckAndNormalizeCandidate = [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) { for (BasicBlock *E : OutliningInfo->Entries) { for (auto Succ : successors(E)) { if (Entries.count(Succ)) continue; if (Succ == OutliningInfo->ReturnBlock) OutliningInfo->ReturnBlockPreds.push_back(E); else if (Succ != OutliningInfo->NonReturnBlock) return false; } // There should not be any outside incoming edges either: if (HasNonEntryPred(E)) return false; } return true; }; if (!CheckAndNormalizeCandidate(OutliningInfo.get())) return std::unique_ptr<FunctionOutliningInfo>(); // Now further growing the candidate's inlining region by // peeling off dominating blocks from the outlining region: while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) { BasicBlock *Cand = OutliningInfo->NonReturnBlock; if (SuccSize(Cand) != 2) break; if (HasNonEntryPred(Cand)) break; BasicBlock *Succ1 = *succ_begin(Cand); BasicBlock *Succ2 = *(succ_begin(Cand) + 1); BasicBlock *ReturnBlock, *NonReturnBlock; std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock) break; if (NonReturnBlock->getSinglePredecessor() != Cand) break; // Now grow and update OutlininigInfo: OutliningInfo->Entries.push_back(Cand); OutliningInfo->NonReturnBlock = NonReturnBlock; OutliningInfo->ReturnBlockPreds.push_back(Cand); Entries.insert(Cand); } return OutliningInfo; }
/// Rotate loop LP. Return true if the loop is rotated. bool LoopRotate::rotateLoop(Loop *L) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; BasicBlock *OrigHeader = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. if (!L->isLoopExiting(OrigHeader)) return false; // Updating PHInodes in loops with multiple exits adds complexity. // Keep it simple, and restrict loop rotation to loops with one exit only. // In future, lift this restriction and support for multiple exits if // required. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); if (ExitBlocks.size() > 1) return false; // Check size of original header and reject loop if it is very big. { CodeMetrics Metrics; Metrics.analyzeBasicBlock(OrigHeader); if (Metrics.NumInsts > MAX_HEADER_SIZE) return false; } // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); BasicBlock *OrigLatch = L->getLoopLatch(); // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. if (OrigPreheader == 0 || OrigLatch == 0) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. BasicBlock *Exit = BI->getSuccessor(0); BasicBlock *NewHeader = BI->getSuccessor(1); if (L->contains(Exit)) std::swap(Exit, NewHeader); assert(NewHeader && "Unable to determine new loop header"); assert(L->contains(NewHeader) && !L->contains(Exit) && "Unable to determine loop header and exit blocks"); // This code assumes that the new header has exactly one predecessor. // Remove any single-entry PHI nodes in it. assert(NewHeader->getSinglePredecessor() && "New header doesn't have one pred!"); FoldSingleEntryPHINodes(NewHeader); // Begin by walking OrigHeader and populating ValueMap with an entry for // each Instruction. BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); ValueToValueMapTy ValueMap; // For PHI nodes, the value available in OldPreHeader is just the // incoming value from OldPreHeader. for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); while (I != E) { Instruction *Inst = I++; // If the instruction's operands are invariant and it doesn't read or write // memory, then it is safe to hoist. Doing this doesn't change the order of // execution in the preheader, but does prevent the instruction from // executing in each iteration of the loop. This means it is safe to hoist // something that might trap, but isn't safe to hoist something that reads // memory (without proving that the loop doesn't write). if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) { Inst->moveBefore(LoopEntryBranch); continue; } // Otherwise, create a duplicate of the instruction. Instruction *C = Inst->clone(); // Eagerly remap the operands of the instruction. RemapInstruction(C, ValueMap, RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. Value *V = SimplifyInstruction(C); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. delete C; ValueMap[Inst] = V; } else { // Otherwise, stick the new instruction into the new block! C->setName(Inst->getName()); C->insertBefore(LoopEntryBranch); ValueMap[Inst] = C; } } // Along with all the other instructions, we just cloned OrigHeader's // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. TerminatorInst *TI = OrigHeader->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin(); PHINode *PN = dyn_cast<PHINode>(BI); ++BI) PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove // OrigPreHeader's old terminator (the original branch into the loop), and // remove the corresponding incoming values from the PHI nodes in OrigHeader. LoopEntryBranch->eraseFromParent(); // If there were any uses of instructions in the duplicated block outside the // loop, update them, inserting PHI nodes as required RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap); // NewHeader is now the header of the loop. L->moveToHeader(NewHeader); assert(L->getHeader() == NewHeader && "Latch block is our new header"); // At this point, we've finished our major CFG changes. As part of cloning // the loop into the preheader we've simplified instructions and the // duplicated conditional branch may now be branching on a constant. If it is // branching on a constant and if that constant means that we enter the loop, // then we fold away the cond branch to an uncond branch. This simplifies the // loop in cases important for nested loops, and it also means we don't have // to split as many edges. BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); assert(PHBI->isConditional() && "Should be clone of BI condbr!"); if (!isa<ConstantInt>(PHBI->getCondition()) || PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != NewHeader) { // The conditional branch can't be folded, handle the general case. // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { // Since OrigPreheader now has the conditional branch to Exit block, it is // the dominator of Exit. DT->changeImmediateDominator(Exit, OrigPreheader); DT->changeImmediateDominator(NewHeader, OrigPreheader); // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(OrigHeader, OrigLatch); } // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and // thus is not a preheader anymore. Split the edge to form a real preheader. BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this); NewPH->setName(NewHeader->getName() + ".lr.ph"); // Preserve canonical loop form, which means that 'Exit' should have only one // predecessor. BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this); ExitSplit->moveBefore(Exit); } else { // We can fold the conditional branch in the preheader, this makes things // simpler. The first step is to remove the extra edge to the Exit block. Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); BranchInst::Create(NewHeader, PHBI); PHBI->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(NewHeader, OrigPreheader); DT->changeImmediateDominator(OrigHeader, OrigLatch); } } assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); // Now that the CFG and DomTree are in a consistent state again, try to merge // the OrigHeader block into OrigLatch. This will succeed if they are // connected by an unconditional branch. This is just a cleanup so the // emitted code isn't too gross in this common case. MergeBlockIntoPredecessor(OrigHeader, this); ++NumRotated; return true; }
bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; Value *Cond = BI->getCondition(); ICmpInst *CI = dyn_cast<ICmpInst>(Cond); if (!CI) return false; Value *RHS = CI->getOperand(1); ConstantInt *CV = dyn_cast<ConstantInt>(RHS); if (!CV) return false; bool isProb; if (CV->isZero()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == 0 -> Unlikely isProb = false; break; case CmpInst::ICMP_NE: // X != 0 -> Likely isProb = true; break; case CmpInst::ICMP_SLT: // X < 0 -> Unlikely isProb = false; break; case CmpInst::ICMP_SGT: // X > 0 -> Likely isProb = true; break; default: return false; } } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) { // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; } else if (CV->isAllOnesValue()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == -1 -> Unlikely isProb = false; break; case CmpInst::ICMP_NE: // X != -1 -> Likely isProb = true; break; case CmpInst::ICMP_SGT: // InstCombine canonicalizes X >= 0 into X > -1. // X >= 0 -> Likely isProb = true; break; default: return false; } } else { return false; } unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) std::swap(TakenIdx, NonTakenIdx); setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); return true; }
bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; Value *Cond = BI->getCondition(); ICmpInst *CI = dyn_cast<ICmpInst>(Cond); if (!CI) return false; Value *RHS = CI->getOperand(1); ConstantInt *CV = dyn_cast<ConstantInt>(RHS); if (!CV) return false; // If the LHS is the result of AND'ing a value with a single bit bitmask, // we don't have information about probabilities. if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) if (LHS->getOpcode() == Instruction::And) if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) if (AndRHS->getUniqueInteger().isPowerOf2()) return false; bool isProb; if (CV->isZero()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == 0 -> Unlikely isProb = false; break; case CmpInst::ICMP_NE: // X != 0 -> Likely isProb = true; break; case CmpInst::ICMP_SLT: // X < 0 -> Unlikely isProb = false; break; case CmpInst::ICMP_SGT: // X > 0 -> Likely isProb = true; break; default: return false; } } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) { // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; } else if (CV->isAllOnesValue()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == -1 -> Unlikely isProb = false; break; case CmpInst::ICMP_NE: // X != -1 -> Likely isProb = true; break; case CmpInst::ICMP_SGT: // InstCombine canonicalizes X >= 0 into X > -1. // X >= 0 -> Likely isProb = true; break; default: return false; } } else { return false; } unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) std::swap(TakenIdx, NonTakenIdx); setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); return true; }
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return /// instructions to the predecessor to enable tail call optimizations. The /// case it is currently looking for is: /// @code /// bb0: /// %tmp0 = tail call i32 @f0() /// br label %return /// bb1: /// %tmp1 = tail call i32 @f1() /// br label %return /// bb2: /// %tmp2 = tail call i32 @f2() /// br label %return /// return: /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] /// ret i32 %retval /// @endcode /// /// => /// /// @code /// bb0: /// %tmp0 = tail call i32 @f0() /// ret i32 %tmp0 /// bb1: /// %tmp1 = tail call i32 @f1() /// ret i32 %tmp1 /// bb2: /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; PHINode *PN = 0; BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); if (V) { BCI = dyn_cast<BitCastInst>(V); if (BCI) V = BCI->getOperand(0); PN = dyn_cast<PHINode>(V); if (!PN) return false; } BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) return false; // It's not safe to eliminate the sign / zero extension of the return value. // See llvm::isInTailCallPosition(). const Function *F = BB->getParent(); Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if (CallerRetAttr.hasAttribute(Attributes::ZExt) || CallerRetAttr.hasAttribute(Attributes::SExt)) return false; // Make sure there are no instructions between the PHI and return, or that the // return is the first instruction in the block. if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); if (&*BI == BCI) // Also skip over the bitcast. ++BI; if (&*BI != RI) return false; } else { BasicBlock::iterator BI = BB->begin(); while (isa<DbgInfoIntrinsic>(BI)) ++BI; if (&*BI != RI) return false; } /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. SmallVector<CallInst*, 4> TailCalls; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } else { SmallPtrSet<BasicBlock*, 4> VisitedBBs; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { if (!VisitedBBs.insert(*PI)) continue; BasicBlock::InstListType &InstList = (*PI)->getInstList(); BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); if (RI == RE) continue; CallInst *CI = dyn_cast<CallInst>(&*RI); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } bool Changed = false; for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { CallInst *CI = TailCalls[i]; CallSite CS(CI); // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes(); if (AttrBuilder(CalleeRetAttr). removeAttribute(Attributes::NoAlias) != AttrBuilder(CallerRetAttr). removeAttribute(Attributes::NoAlias)) continue; // Make sure the call instruction is followed by an unconditional branch to // the return block. BasicBlock *CallBB = CI->getParent(); BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) continue; // Duplicate the return into CallBB. (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); ModifiedDT = Changed = true; ++NumRetsDup; } // If we eliminated all predecessors of the block, delete the block now. if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; }
bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; Triple TT = Triple(L->getHeader()->getParent()->getParent()-> getTargetTriple()); if (!TT.isArch32Bit() && !TT.isArch64Bit()) return MadeChange; // Unknown arch. type. // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); } // If a nested loop has been converted, then we can't convert this loop. if (MadeChange) return MadeChange; #ifndef NDEBUG // Stop trying after reaching the limit (if any). int Limit = CTRLoopLimit; if (Limit >= 0) { if (Counter >= CTRLoopLimit) return false; Counter++; } #endif // We don't want to spill/restore the counter register, and so we don't // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) if (mightUseCTR(TT, *I)) return MadeChange; SmallVector<BasicBlock*, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); BasicBlock *CountedExitBlock = 0; const SCEV *ExitCount = 0; BranchInst *CountedExitBranch = 0; for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); DEBUG(dbgs() << "Exit Count for " << *L << " from block " << (*I)->getName() << ": " << *EC << "\n"); if (isa<SCEVCouldNotCompute>(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE->isLoopInvariant(EC, L)) continue; if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // exisiting block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (pred_iterator PI = pred_begin(L->getHeader()), PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { if (!L->contains(*PI)) continue; if (!DT->dominates(*I, *PI)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = (*I)->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (!BI->isConditional()) continue; CountedExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. CountedExitBlock = *I; ExitCount = EC; break; } if (!CountedExitBlock) return MadeChange; BasicBlock *Preheader = L->getLoopPreheader(); // If we don't have a preheader, then insert one. If we already have a // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(TT, Preheader)) Preheader = InsertPreheaderForLoop(L, this); if (!Preheader) return MadeChange; DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); // Insert the count into the preheader and replace the condition used by the // selected branch. MadeChange = true; SCEVExpander SCEVE(*SE, "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); ExitCount = SE->getAddExpr(ExitCount, SE->getConstant(CountType, 1)); Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); CountBuilder.CreateCall(MTCTRFunc, ECValue); IRBuilder<> CondBuilder(CountedExitBranch); Value *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); Value *NewCond = CondBuilder.CreateCall(DecFunc); Value *OldCond = CountedExitBranch->getCondition(); CountedExitBranch->setCondition(NewCond); // The false branch must exit the loop. if (!L->contains(CountedExitBranch->getSuccessor(0))) CountedExitBranch->swapSuccessors(); // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); DeleteDeadPHIs(CountedExitBlock); ++NumCTRLoops; return MadeChange; }
//insert a basic block with appropriate code //along a given edge void insertBB(Edge ed, getEdgeCode *edgeCode, Instruction *rInst, Value *countInst, int numPaths, int Methno, Value *threshold){ BasicBlock* BB1=ed.getFirst()->getElement(); BasicBlock* BB2=ed.getSecond()->getElement(); #ifdef DEBUG_PATH_PROFILES //debugging info cerr<<"Edges with codes ######################\n"; cerr<<BB1->getName()<<"->"<<BB2->getName()<<"\n"; cerr<<"########################\n"; #endif //We need to insert a BB between BB1 and BB2 TerminatorInst *TI=BB1->getTerminator(); BasicBlock *newBB=new BasicBlock("counter", BB1->getParent()); //get code for the new BB vector<Value *> retVec; edgeCode->getCode(rInst, countInst, BB1->getParent(), newBB, retVec); BranchInst *BI = cast<BranchInst>(TI); //Is terminator a branch instruction? //then we need to change branch destinations to include new BB if(BI->isUnconditional()){ BI->setUnconditionalDest(newBB); } else{ if(BI->getSuccessor(0)==BB2) BI->setSuccessor(0, newBB); if(BI->getSuccessor(1)==BB2) BI->setSuccessor(1, newBB); } BasicBlock *triggerBB = NULL; if(retVec.size()>0){ triggerBB = new BasicBlock("trigger", BB1->getParent()); getTriggerCode(BB1->getParent()->getParent(), triggerBB, Methno, retVec[1], countInst, rInst);//retVec[0]); //Instruction *castInst = new CastInst(retVec[0], Type::IntTy, ""); Instruction *etr = new LoadInst(threshold, "threshold"); //std::cerr<<"type1: "<<etr->getType()<<" type2: "<<retVec[0]->getType()<<"\n"; Instruction *cmpInst = new SetCondInst(Instruction::SetLE, etr, retVec[0], ""); Instruction *newBI2 = new BranchInst(triggerBB, BB2, cmpInst); //newBB->getInstList().push_back(castInst); newBB->getInstList().push_back(etr); newBB->getInstList().push_back(cmpInst); newBB->getInstList().push_back(newBI2); //triggerBB->getInstList().push_back(triggerInst); new BranchInst(BB2, 0, 0, triggerBB); } else{ new BranchInst(BB2, 0, 0, newBB); } //now iterate over BB2, and set its Phi nodes right for(BasicBlock::iterator BB2Inst = BB2->begin(), BBend = BB2->end(); BB2Inst != BBend; ++BB2Inst){ if(PHINode *phiInst=dyn_cast<PHINode>(BB2Inst)){ int bbIndex=phiInst->getBasicBlockIndex(BB1); assert(bbIndex>=0); phiInst->setIncomingBlock(bbIndex, newBB); ///check if trigger!=null, then add value corresponding to it too! if(retVec.size()>0){ assert(triggerBB && "BasicBlock with trigger should not be null!"); Value *vl = phiInst->getIncomingValue((unsigned int)bbIndex); phiInst->addIncoming(vl, triggerBB); } } } }
void DSWP::buildPDG(Loop *L) { //Initialize PDG for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) { BasicBlock *BB = *bi; for (BasicBlock::iterator ui = BB->begin(); ui != BB->end(); ui++) { Instruction *inst = &(*ui); //standardlize the name for all expr if (util.hasNewDef(inst)) { inst->setName(util.genId()); dname[inst] = inst->getNameStr(); } else { dname[inst] = util.genId(); } pdg[inst] = new vector<Edge>(); rev[inst] = new vector<Edge>(); } } //LoopInfo &li = getAnalysis<LoopInfo>(); /* * Memory dependency analysis */ MemoryDependenceAnalysis &mda = getAnalysis<MemoryDependenceAnalysis>(); for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) { BasicBlock *BB = *bi; for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ii++) { Instruction *inst = &(*ii); //data dependence = register dependence + memory dependence //begin register dependence for (Value::use_iterator ui = ii->use_begin(); ui != ii->use_end(); ui++) { if (Instruction *user = dyn_cast<Instruction>(*ui)) { addEdge(inst, user, REG); } } //finish register dependence //begin memory dependence MemDepResult mdr = mda.getDependency(inst); //TODO not sure clobbers mean!! if (mdr.isDef()) { Instruction *dep = mdr.getInst(); if (isa<LoadInst>(inst)) { if (isa<StoreInst>(dep)) { addEdge(dep, inst, DTRUE); //READ AFTER WRITE } } if (isa<StoreInst>(inst)) { if (isa<LoadInst>(dep)) { addEdge(dep, inst, DANTI); //WRITE AFTER READ } if (isa<StoreInst>(dep)) { addEdge(dep, inst, DOUT); //WRITE AFTER WRITE } } //READ AFTER READ IS INSERT AFTER PDG BUILD } //end memory dependence }//for ii }//for bi /* * begin control dependence */ PostDominatorTree &pdt = getAnalysis<PostDominatorTree>(); //cout << pdt.getRootNode()->getBlock()->getNameStr() << endl; /* * alien code part 1 */ LoopInfo *LI = &getAnalysis<LoopInfo>(); std::set<BranchInst*> backedgeParents; for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) { BasicBlock *BB = *bi; for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ii++) { Instruction *inst = ii; if (BranchInst *brInst = dyn_cast<BranchInst>(inst)) { // get the loop this instruction (and therefore basic block) belongs to Loop *instLoop = LI->getLoopFor(BB); bool branchesToHeader = false; for (int i = brInst->getNumSuccessors() - 1; i >= 0 && !branchesToHeader; i--) { // if the branch could exit, store it if (LI->getLoopFor(brInst->getSuccessor(i)) != instLoop) { branchesToHeader = true; } } if (branchesToHeader) { backedgeParents.insert(brInst); } } } } //build information for predecessor of blocks in post dominator tree for (Function::iterator bi = func->begin(); bi != func->end(); bi++) { BasicBlock *BB = bi; DomTreeNode *dn = pdt.getNode(BB); for (DomTreeNode::iterator di = dn->begin(); di != dn->end(); di++) { BasicBlock *CB = (*di)->getBlock(); pre[CB] = BB; } } // // //add dependency within a basicblock // for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) { // BasicBlock *BB = *bi; // Instruction *pre = NULL; // for (BasicBlock::iterator ui = BB->begin(); ui != BB->end(); ui++) { // Instruction *inst = &(*ui); // if (pre != NULL) { // addEdge(pre, inst, CONTROL); // } // pre = inst; // } // } // //the special kind of dependence need loop peeling ? I don't know whether this is needed // for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) { // BasicBlock *BB = *bi; // for (succ_iterator PI = succ_begin(BB); PI != succ_end(BB); ++PI) { // BasicBlock *succ = *PI; // // checkControlDependence(BB, succ, pdt); // } // } /* * alien code part 2 */ // add normal control dependencies // loop through each instruction for (Loop::block_iterator bbIter = L->block_begin(); bbIter != L->block_end(); ++bbIter) { BasicBlock *bb = *bbIter; // check the successors of this basic block if (BranchInst *branchInst = dyn_cast<BranchInst>(bb->getTerminator())) { if (branchInst->getNumSuccessors() > 1) { BasicBlock * succ = branchInst->getSuccessor(0); // if the successor is nested shallower than the current basic block, continue if (LI->getLoopDepth(bb) < LI->getLoopDepth(succ)) { continue; } // otherwise, add all instructions to graph as control dependence while (succ != NULL && succ != bb && LI->getLoopDepth(succ) >= LI->getLoopDepth(bb)) { Instruction *terminator = bb->getTerminator(); for (BasicBlock::iterator succInstIter = succ->begin(); &(*succInstIter) != succ->getTerminator(); ++succInstIter) { addEdge(terminator, &(*succInstIter), CONTROL); } if (BranchInst *succBrInst = dyn_cast<BranchInst>(succ->getTerminator())) { if (succBrInst->getNumSuccessors() > 1) { addEdge(terminator, succ->getTerminator(), CONTROL); } } if (BranchInst *br = dyn_cast<BranchInst>(succ->getTerminator())) { if (br->getNumSuccessors() == 1) { succ = br->getSuccessor(0); } else { succ = NULL; } } else { succ = NULL; } } } } } /* * alien code part 3 */ for (std::set<BranchInst*>::iterator exitIter = backedgeParents.begin(); exitIter != backedgeParents.end(); ++exitIter) { BranchInst *exitBranch = *exitIter; if (exitBranch->isConditional()) { BasicBlock *header = LI->getLoopFor(exitBranch->getParent())->getHeader(); for (BasicBlock::iterator ctrlIter = header->begin(); ctrlIter != header->end(); ++ctrlIter) { addEdge(exitBranch, &(*ctrlIter), CONTROL); } } } //end control dependence }
/// GetIfCondition - Given a basic block (BB) with two predecessors, /// check to see if the merge at this block is due /// to an "if condition". If so, return the boolean condition that determines /// which entry into BB will be taken. Also, return by references the block /// that will be entered from if the condition is true, and the block that will /// be entered if the condition is false. /// /// This does no checking to see if the true/false blocks have large or unsavory /// instructions in them. Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); BasicBlock *Pred1 = NULL; BasicBlock *Pred2 = NULL; if (SomePHI) { if (SomePHI->getNumIncomingValues() != 2) return NULL; Pred1 = SomePHI->getIncomingBlock(0); Pred2 = SomePHI->getIncomingBlock(1); } else { pred_iterator PI = pred_begin(BB), PE = pred_end(BB); if (PI == PE) // No predecessor return NULL; Pred1 = *PI++; if (PI == PE) // Only one predecessor return NULL; Pred2 = *PI++; if (PI != PE) // More than two predecessors return NULL; } // We can only handle branches. Other control flow will be lowered to // branches if possible anyway. BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); if (Pred1Br == 0 || Pred2Br == 0) return 0; // Eliminate code duplication by ensuring that Pred1Br is conditional if // either are. if (Pred2Br->isConditional()) { // If both branches are conditional, we don't have an "if statement". In // reality, we could transform this case, but since the condition will be // required anyway, we stand no chance of eliminating it, so the xform is // probably not profitable. if (Pred1Br->isConditional()) return 0; std::swap(Pred1, Pred2); std::swap(Pred1Br, Pred2Br); } if (Pred1Br->isConditional()) { // The only thing we have to watch out for here is to make sure that Pred2 // doesn't have incoming edges from other blocks. If it does, the condition // doesn't dominate BB. if (Pred2->getSinglePredecessor() == 0) return 0; // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && Pred1Br->getSuccessor(1) == Pred2) { IfTrue = Pred1; IfFalse = Pred2; } else if (Pred1Br->getSuccessor(0) == Pred2 && Pred1Br->getSuccessor(1) == BB) { IfTrue = Pred2; IfFalse = Pred1; } else { // We know that one arm of the conditional goes to BB, so the other must // go somewhere unrelated, and this must not be an "if statement". return 0; } return Pred1Br->getCondition(); } // Ok, if we got here, both predecessors end with an unconditional branch to // BB. Don't panic! If both blocks only have a single (identical) // predecessor, and THAT is a conditional branch, then we're all ok! BasicBlock *CommonPred = Pred1->getSinglePredecessor(); if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) return 0; // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); if (BI == 0) return 0; assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { IfTrue = Pred1; IfFalse = Pred2; } else { IfTrue = Pred2; IfFalse = Pred1; } return BI->getCondition(); }
/// \brief Analyze the predecessors of each block and build up predicates void StructurizeCFG::gatherPredicates(RegionNode *N) { RegionInfo *RI = ParentRegion->getRegionInfo(); BasicBlock *BB = N->getEntry(); BBPredicates &Pred = Predicates[BB]; BBPredicates &LPred = LoopPreds[BB]; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { // Ignore it if it's a branch from outside into our region entry if (!ParentRegion->contains(*PI)) continue; Region *R = RI->getRegionFor(*PI); if (R == ParentRegion) { // It's a top level block in our region BranchInst *Term = cast<BranchInst>((*PI)->getTerminator()); for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { BasicBlock *Succ = Term->getSuccessor(i); if (Succ != BB) continue; if (Visited.count(*PI)) { // Normal forward edge if (Term->isConditional()) { // Try to treat it like an ELSE block BasicBlock *Other = Term->getSuccessor(!i); if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(*PI)) { Pred[Other] = BoolFalse; Pred[*PI] = BoolTrue; continue; } } Pred[*PI] = buildCondition(Term, i, false); } else { // Back edge LPred[*PI] = buildCondition(Term, i, true); } } } else { // It's an exit from a sub region while (R->getParent() != ParentRegion) R = R->getParent(); // Edge from inside a subregion to its entry, ignore it if (*R == *N) continue; BasicBlock *Entry = R->getEntry(); if (Visited.count(Entry)) Pred[Entry] = BoolTrue; else LPred[Entry] = BoolFalse; } } }
bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set // then we actually perform accumulator recursion elimination instead of // simple tail recursion elimination. If the operation is an LLVM instruction // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then // we are handling the case when the return instruction returns a constant C // which is different to the constant returned by other return instructions // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a // special case of accumulator recursion, the operation being "return C". Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. BasicBlock::iterator BBI = CI; for (++BBI; &*BBI != Ret; ++BBI) { if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction // accumulates. AccumulatorRecursionInstr = BBI; } else { return false; // Otherwise, we cannot eliminate the tail recursion! } } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a // constant, return the value returned by the tail call, or that are being // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && !getCommonReturnValue(0, CI)) { // One case remains that we are able to handle: the current return // instruction returns a constant, and all other return instructions // return a different constant. if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) return false; // Current return instruction does not return a constant. // Check that all other return instructions return a common constant. If // so, record it in AccumulatorRecursionEliminationInitVal. AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); if (!AccumulatorRecursionEliminationInitVal) return false; } BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); BranchInst::Create(OldEntry, NewEntry); // If this tail call is marked 'tail' and if there are any allocas in the // entry block, move them up to the new entry block. TailCallsAreMarkedTail = CI->isTailCall(); if (TailCallsAreMarkedTail) // Move all fixed sized allocas from OldEntry to NewEntry. for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), NEBI = NewEntry->begin(); OEBI != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++)) if (isa<ConstantInt>(AI->getArraySize())) AI->moveBefore(NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. // For now, we initialize each PHI to only have the real arguments // which are passed in. Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); ArgumentPHIs.push_back(PN); } } // If this function has self recursive calls in the tail position where some // are marked tail and some are not, only transform one flavor or another. We // have to choose whether we move allocas in the entry block to the new entry // block or not, so we can't make a good choice for both. NOTE: We could do // slightly better here in the case that the function has no entry block // allocas. if (TailCallsAreMarkedTail && !CI->isTailCall()) return false; // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion // eliminations will happen on this function because of the way the // accumulator recursion predicate is set up. // if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); PHINode *AccPN = PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), std::distance(PB, PE) + 1, "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, // computed earlier. For any other existing branches to this block (due to // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if (P == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else AccPN->addIncoming(AccPN, P); } if (AccRecInstr) { // Add an incoming argument for the current block, which is computed by // our associative and commutative accumulator instruction. AccPN->addIncoming(AccRecInstr, BB); // Next, rewrite the accumulator recursion instruction so that it does not // use the result of the call anymore, instead, use the PHI node we just // inserted. AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); } else { // Add an incoming argument for the current block, which is just the // constant returned by the current return instruction. AccPN->addIncoming(Ret->getReturnValue(), BB); } // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will // actually rewrite the return value we are destroying, but that's ok. for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) RI->setOperand(0, AccPN); ++NumAccumAdded; } // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); NewBI->setDebugLoc(CI->getDebugLoc()); BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; return true; }
/// buildCFICheck - emits __cfi_check for the current module. void CrossDSOCFI::buildCFICheck(Module &M) { // FIXME: verify that __cfi_check ends up near the end of the code section, // but before the jump slots created in LowerTypeTests. llvm::DenseSet<uint64_t> TypeIds; SmallVector<MDNode *, 2> Types; for (GlobalObject &GO : M.global_objects()) { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); for (MDNode *Type : Types) { // Sanity check. GO must not be a function declaration. assert(!isa<Function>(&GO) || !cast<Function>(&GO)->isDeclaration()); if (ConstantInt *TypeId = extractNumericTypeId(Type)) TypeIds.insert(TypeId->getZExtValue()); } } LLVMContext &Ctx = M.getContext(); Constant *C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr); Function *F = dyn_cast<Function>(C); F->setAlignment(4096); auto args = F->arg_begin(); Value &CallSiteTypeId = *(args++); CallSiteTypeId.setName("CallSiteTypeId"); Value &Addr = *(args++); Addr.setName("Addr"); Value &CFICheckFailData = *(args++); CFICheckFailData.setName("CFICheckFailData"); assert(args == F->arg_end()); BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F); BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F); BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F); IRBuilder<> IRBFail(TrapBB); Constant *CFICheckFailFn = M.getOrInsertFunction( "__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr); IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr}); IRBFail.CreateBr(ExitBB); IRBuilder<> IRBExit(ExitBB); IRBExit.CreateRetVoid(); IRBuilder<> IRB(BB); SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, TypeIds.size()); for (uint64_t TypeId : TypeIds) { ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId); BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F); IRBuilder<> IRBTest(TestBB); Function *BitsetTestFn = Intrinsic::getDeclaration(&M, Intrinsic::type_test); Value *Test = IRBTest.CreateCall( BitsetTestFn, {&Addr, MetadataAsValue::get( Ctx, ConstantAsMetadata::get(CaseTypeId))}); BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB); BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights); SI->addCase(CaseTypeId, TestBB); ++NumTypeIds; } }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was succesful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { assert(L->isLCSSAForm()); BasicBlock *Header = L->getHeader(); BasicBlock *LatchBlock = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; return false; } // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); // Find trip multiple if count is not available unsigned TripMultiple = 1; if (TripCount == 0) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) DOUT << " Trip Count = " << TripCount << "\n"; if (TripMultiple != 1) DOUT << " Trip Multiple = " << TripMultiple << "\n"; // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } if (CompletelyUnroll) { DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { DEBUG(errs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DOUT << " with a breakout at trip " << BreakoutTrip; } else if (TripMultiple != 1) { DOUT << " with " << TripMultiple << " trips per branch"; } DOUT << "!\n"; } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. typedef DenseMap<const Value*, Value*> ValueMapTy; ValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); OrigPHINode.push_back(PN); if (Instruction *I = dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) if (L->contains(I->getParent())) LastValueMap[I] = I; } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); for (unsigned It = 1; It != Count; ++It) { char SuffixBuffer[100]; sprintf(SuffixBuffer, ".%d", It); std::vector<BasicBlock*> NewBlocks; for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { ValueMapTy ValueMap; BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI->getParent())) InVal = LastValueMap[InValI]; ValueMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks except // the successor of the latch block. The successor of the exit block will // be updated specially after unrolling all the way. if (*BB != LatchBlock) for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); UI != UE;) { Instruction *UseInst = cast<Instruction>(*UI); ++UI; if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) { PHINode *phi = cast<PHINode>(UseInst); Value *Incoming = phi->getIncomingValueForBlock(*BB); phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) { Latches.push_back(New); // Also, clear out the new latch's back edge so that it doesn't look // like a new loop, so that it's amenable to being merged with adjacent // blocks later on. TerminatorInst *Term = New->getTerminator(); assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); Term->setSuccessor(!ContinueOnTrue, NULL); } NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, LastValueMap); } // The latch block exits the loop. If there are any PHI nodes in the // successor blocks, update them to use the appropriate values computed as the // last iteration of the loop. if (Count != 1) { SmallPtrSet<PHINode*, 8> Users; for (Value::use_iterator UI = LatchBlock->use_begin(), UE = LatchBlock->use_end(); UI != UE; ++UI) if (PHINode *phi = dyn_cast<PHINode>(*UI)) Users.insert(phi); BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end(); SI != SE; ++SI) { PHINode *PN = *SI; Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI->getParent())) InVal = LastValueMap[InVal]; } PN->addIncoming(InVal, LastIterationBB); } } // Now, if we're doing complete unrolling, loop over the PHI nodes in the // original block, setting them to their incoming values. if (CompletelyUnroll) { BasicBlock *Preheader = L->getLoopPreheader(); for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { Term->setUnconditionalDest(Dest); // Merge adjacent basic blocks, if possible. if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { std::replace(Latches.begin(), Latches.end(), Dest, Fold); std::replace(Headers.begin(), Headers.end(), Dest, Fold); } } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Constant *C = ConstantFoldInstruction(Inst, Header->getContext())) { Inst->replaceAllUsesWith(C); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != NULL) LPM->deleteLoopFromQueue(L); // If we didn't completely unroll the loop, it should still be in LCSSA form. if (!CompletelyUnroll) assert(L->isLCSSAForm()); return true; }
bool ScopDetection::isValidCFG(BasicBlock &BB, DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; TerminatorInst *TI = BB.getTerminator(); // Return instructions are only valid if the region is the top level region. if (isa<ReturnInst>(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) return true; BranchInst *Br = dyn_cast<BranchInst>(TI); if (!Br) return invalid<ReportNonBranchTerminator>(Context, /*Assert=*/true, &BB); if (Br->isUnconditional()) return true; Value *Condition = Br->getCondition(); // UndefValue is not allowed as condition. if (isa<UndefValue>(Condition)) return invalid<ReportUndefCond>(Context, /*Assert=*/true, Br, &BB); // Only Constant and ICmpInst are allowed as condition. if (!(isa<Constant>(Condition) || isa<ICmpInst>(Condition))) { if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI)) Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB)); else return invalid<ReportInvalidCond>(Context, /*Assert=*/true, Br, &BB); } // Allow perfectly nested conditions. assert(Br->getNumSuccessors() == 2 && "Unexpected number of successors"); if (ICmpInst *ICmp = dyn_cast<ICmpInst>(Condition)) { // Unsigned comparisons are not allowed. They trigger overflow problems // in the code generation. // // TODO: This is not sufficient and just hides bugs. However it does pretty // well. if (ICmp->isUnsigned() && !AllowUnsigned) return false; // Are both operands of the ICmp affine? if (isa<UndefValue>(ICmp->getOperand(0)) || isa<UndefValue>(ICmp->getOperand(1))) return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); Loop *L = LI->getLoopFor(ICmp->getParent()); const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); if (!isAffineExpr(&CurRegion, LHS, *SE) || !isAffineExpr(&CurRegion, RHS, *SE)) { if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI)) Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB)); else return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, ICmp); } } // Allow loop exit conditions. Loop *L = LI->getLoopFor(&BB); if (L && L->getExitingBlock() == &BB) return true; // Allow perfectly nested conditions. Region *R = RI->getRegionFor(&BB); if (R->getEntry() != &BB) return invalid<ReportCondition>(Context, /*Assert=*/true, &BB); return true; }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree information if it /// is available, thus calling this pass will not invalidate either of them. /// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. assert(!DestBB->isLandingPad() && "Cannot split critical edge to a landing pad block!"); // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. if (DT == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) { SmallVector<BasicBlock *, 1> OrigPred; OrigPred.push_back(TIBB); CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); } // For each unique exit block... // FIXME: This code is functionally equivalent to the corresponding // loop in LoopSimplify. SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; if (TIL->contains(P)) { if (isa<IndirectBrInst>(P->getTerminator())) { Preds.clear(); break; } Preds.push_back(P); } else { HasPredOutsideOfLoop = true; } } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } // Update ProfileInfo if it is around. if (PI) PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is the upper bound of the iteration on which control exits /// LatchBlock. Control may exit the loop prior to TripCount iterations either /// via an early branch in other loop block or via LatchBlock terminator. This /// is relaxed from the general definition of trip count which is the number of /// times the loop header executes. Note that UnrollLoop assumes that the loop /// counter test is in LatchBlock in order to remove unnecesssary instances of /// the test. If control can exit the loop from the LatchBlock's terminator /// prior to TripCount iterations, flag PreserveCondBr needs to be set. /// /// PreserveCondBr indicates whether the conditional branch of the LatchBlock /// needs to be preserved. It is needed when we use trip count upper bound to /// fully unroll the loop. If PreserveOnlyFirst is also set then only the first /// conditional branch needs to be preserved. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that /// have a runtime (i.e. not compile time constant) trip count. Unrolling these /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" /// iterations before branching into the unrolled loop. UnrollLoop will not /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// /// If we want to perform PGO-based loop peeling, PeelCount is set to the /// number of iterations we want to peel off. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. if (TripCount == 0 && Count < 2 && PeelCount == 0) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; SmallVector<BasicBlock *, 4> ExitBlocks; L->getExitBlocks(ExitBlocks); std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks(); // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which // means that complete unrolling might break this form. We need to either fix // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For // now we just recompute LCSSA for the outer loop, but it should be possible // to fix it in-place. bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && any_of(ExitBlocks, [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); assert((!RuntimeTripCount || !PeelCount) && "Did not expect runtime trip-count unrolling " "and peeling for the same loop"); if (PeelCount) peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); // Loops containing convergent instructions must have a count that divides // their TripMultiple. DEBUG( { bool HasConvergent = false; for (auto &BB : L->blocks()) for (auto &I : *BB) if (auto CS = CallSite(&I)) HasConvergent |= CS.isConvergent(); assert((!HasConvergent || TripMultiple % Count == 0) && "Unroll count must divide trip multiple if loop contains a " "convergent operation."); });
/// SimplifyStoreAtEndOfBlock - Turn things like: /// if () { *P = v1; } else { *P = v2 } /// into a phi node with a store in the successor. /// /// Simplify things like: /// *P = v1; if () { *P = v2; } /// into a phi node with a store in the successor. /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; if (++PI == pred_end(DestBB)) return false; P = *PI; if (P != StoreBB) { if (OtherBB) return false; OtherBB = P; } if (++PI != pred_end(DestBB)) return false; // Bail out if all the relevant blocks aren't distinct (this can happen, // for example, if SI is in an infinite loop) if (StoreBB == DestBB || OtherBB == DestBB) return false; // Verify that the other block ends in a branch and is not otherwise empty. BasicBlock::iterator BBI = OtherBB->getTerminator(); BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; } // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. for (;; --BBI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; break; } // If we find something that may be using or overwriting the stored // value, or if we run out of instructions, we can't do the xform. if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || BBI == OtherBB->begin()) return false; } // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { // FIXME: This should really be AA driven. if (I->mayReadFromMemory() || I->mayWriteToMemory()) return false; } } // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); // If the two stores had the same TBAA tag, preserve it. if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag, OtherStore->getMetadata(LLVMContext::MD_tbaa)))) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); return true; }
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and /// an unconditional branch in it. void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { if (SinglePred != DestBB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); MergeBasicBlockIntoOnlyPred(DestBB, this); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB // to handle the new incoming edges it is about to have. PHINode *PN; for (BasicBlock::iterator BBI = DestBB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { // Remove the incoming value for BB, and remember it. Value *InVal = PN->removeIncomingValue(BB, false); // Two options: either the InVal is a phi node defined in BB or it is some // value that dominates BB. PHINode *InValPhi = dyn_cast<PHINode>(InVal); if (InValPhi && InValPhi->getParent() == BB) { // Add all of the input values of the input PHI as inputs of this phi. for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InValPhi->getIncomingValue(i), InValPhi->getIncomingBlock(i)); } else { // Otherwise, add one instance of the dominating value for each edge that // we will be adding. if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) PN->addIncoming(InVal, *PI); } } } // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); if (DT && !ModifiedDT) { BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); DT->changeImmediateDominator(DestBB, NewIDom); DT->eraseNode(BB); } if (PFI) { PFI->replaceAllUses(BB, DestBB); PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); ++NumBlocksElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. This way we don't // need to support "partial unrolling by 1". if (TripCount == 0 && Count < 2) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. ScalarEvolution *SE = PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr; if (SE) SE->forgetLoop(L); // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } // Report the unrolling decision. DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, Twine("completely unrolled loop with ") + Twine(TripCount) + " iterations"); } else { auto EmitDiag = [&](const Twine &T) { emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, "unrolled loop by a factor of " + Twine(Count) + T); }; DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); EmitDiag(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); EmitDiag(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; NewLoops[L] = L; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Tell LI about New. if (*BB == Header) { assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); L->addBasicBlockToLoop(New, *LI); } else { // Figure out which loop New is in. const Loop *OldLoop = LI->getLoopFor(*BB); assert(OldLoop && "Should (at least) be in the loop being unrolled!"); Loop *&NewLoop = NewLoops[OldLoop]; if (!NewLoop) { // Found a new sub-loop. assert(*BB == OldLoop->getHeader() && "Header should be first in RPO"); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); assert(NewLoopParent && "Expected parent loop before sub-loop in RPO"); NewLoop = new Loop; NewLoopParent->addChildLoop(NewLoop); // Forget the old loop, since its inputs may have changed. if (SE) SE->forgetLoop(OldLoop); } NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) // Loop over all of the PHI nodes in the block, changing them to use // the incoming values from the previous block. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; // Add phi entries for newly created values to all exit blocks. for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; ++SI) { if (L->contains(*SI)) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { if (*SI == Headers[i]) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Merge adjacent basic blocks, if possible. SmallPtrSet<Loop *, 4> ForgottenLoops; for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM, ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. AC->clear(); DominatorTree *DT = nullptr; if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. if (DominatorTreeWrapperPass *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { DT = &DTWP->getDomTree(); DT->recalculate(*L->getHeader()->getParent()); } // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Value *V = SimplifyInstruction(Inst)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (PP && DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after // deleteLoopFromQueue updates LoopInfo. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); formLCSSARecursively(*OuterL, *DT, LI, SE); } } return true; }
Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return nullptr; BasicBlock* returnBlock = nullptr; BasicBlock* nonReturnBlock = nullptr; unsigned returnCount = 0; for (BasicBlock *BB : successors(entryBlock)) { if (isa<ReturnInst>(BB->getTerminator())) { returnBlock = BB; returnCount++; } else nonReturnBlock = BB; } if (returnCount != 1) return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function* duplicateFunction = CloneFunction(F, VMap, /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(duplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock* preReturn = newReturnBlock; newReturnBlock = newReturnBlock->splitBasicBlock( newReturnBlock->getFirstNonPHI()); BasicBlock::iterator I = preReturn->begin(); BasicBlock::iterator Ins = newReturnBlock->begin(); while (I != preReturn->end()) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); retPhi->addIncoming(I, preReturn); retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), newEntryBlock); OldPhi->removeIncomingValue(newEntryBlock); ++I; } newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock*> toExtract; toExtract.push_back(newNonReturnBlock); for (Function::iterator FI = duplicateFunction->begin(), FE = duplicateFunction->end(); FI != FE; ++FI) if (&*FI != newEntryBlock && &*FI != newReturnBlock && &*FI != newNonReturnBlock) toExtract.push_back(FI); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.recalculate(*duplicateFunction); // Extract the body of the if. Function* extractedFunction = CodeExtractor(toExtract, &DT).extractCodeRegion(); InlineFunctionInfo IFI; // Inline the top-level if test into all callers. std::vector<User *> Users(duplicateFunction->user_begin(), duplicateFunction->user_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) if (CallInst *CI = dyn_cast<CallInst>(*UI)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. duplicateFunction->replaceAllUsesWith(F); duplicateFunction->eraseFromParent(); ++NumPartialInlined; return extractedFunction; }