// All edges with successors that aren't branches are "complex", because it // requires complex logic to pick which counter to update. GlobalVariable *GCOVProfiler::buildEdgeLookupTable( Function *F, GlobalVariable *Counters, const UniqueVector<BasicBlock *> &Preds, const UniqueVector<BasicBlock *> &Succs) { // TODO: support invoke, threads. We rely on the fact that nothing can modify // the whole-Module pred edge# between the time we set it and the time we next // read it. Threads and invoke make this untrue. // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); ArrayType *EdgeTableTy = ArrayType::get( Int64PtrTy, Succs.size() * Preds.size()); Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; Constant *NullValue = Constant::getNullValue(Int64PtrTy); for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i) EdgeTable[i] = NullValue; unsigned Edge = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { for (int i = 0; i != Successors; ++i) { BasicBlock *Succ = TI->getSuccessor(i); IRBuilder<> builder(Succ); Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge + i); EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + (Preds.idFor(BB)-1)] = cast<Constant>(Counter); } } Edge += Successors; } ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size()); GlobalVariable *EdgeTableGV = new GlobalVariable( *M, EdgeTableTy, true, GlobalValue::InternalLinkage, ConstantArray::get(EdgeTableTy, V), "__llvm_gcda_edge_table"); EdgeTableGV->setUnnamedAddr(true); return EdgeTableGV; }
// Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) return false; if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) return false; MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); if (!WeightsNode) return false; // Ensure there are weights for all of the successors. Note that the first // operand to the metadata node is a name, not a weight. if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) return false; // Build up the final weights that will be used in a temporary buffer, but // don't add them until all weihts are present. Each weight value is clamped // to [1, getMaxWeightFor(BB)]. uint32_t WeightLimit = getMaxWeightFor(BB); SmallVector<uint32_t, 2> Weights; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i)); if (!Weight) return false; Weights.push_back( std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit))); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); return true; }
/////////////////// // NEW begin /////////////////// // check dynamic pair satisfy anti-dependency bool idenRegion::isAntiDepPair(LoadInst *Load, StoreInst *Store) { // perform a DFS to check if store is after load typedef std::pair<BasicBlock *, BasicBlock::iterator> WorkItem; SmallVector<WorkItem, 8> Worklist; SmallPtrSet<BasicBlock *, 32> Visited; BasicBlock *LoadBB = Load->getParent(); Worklist.push_back(WorkItem(LoadBB, Load)); do { BasicBlock *BB; BasicBlock::iterator I, E; tie(BB, I) = Worklist.pop_back_val(); errs() << "... On BB " << BB->getName() << "\n"; // If we revisited LoadBB, we scan to Load to complete cycle // Otherwise we end at BB->end() E = (BB == LoadBB && I == BB->begin()) ? Load : BB->end(); // errs() << "... Last instruction on current BB is " << getLocator(*E) << "\n"; // iterate throught BB to check if Load instruction exist in the BB while (I != E) { // errs() << "...... Inst: " << getLocator(*I) << "\n"; if (isa<StoreInst>(I) && dyn_cast<StoreInst>(I) == Store) { return true; } ++I; } // get current BB's succesor TerminatorInst* ti = BB->getTerminator(); int numSuccesor = ti->getNumSuccessors(); for (int i = 0; i < numSuccesor; i++) { BasicBlock* nextSuc = ti->getSuccessor(i); // don't count backedge if (Visited.insert(nextSuc) && !DT->dominates(nextSuc, BB)) { Worklist.push_back(WorkItem(nextSuc, nextSuc->begin())); } } } while(!Worklist.empty()); return false; }
// Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) return false; if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) return false; MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); if (!WeightsNode) return false; // Check that the number of successors is manageable. assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors"); // Ensure there are weights for all of the successors. Note that the first // operand to the metadata node is a name, not a weight. if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) return false; // Build up the final weights that will be used in a temporary buffer. // Compute the sum of all weights to later decide whether they need to // be scaled to fit in 32 bits. uint64_t WeightSum = 0; SmallVector<uint32_t, 2> Weights; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { ConstantInt *Weight = mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i)); if (!Weight) return false; assert(Weight->getValue().getActiveBits() <= 32 && "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); // If the sum of weights does not fit in 32 bits, scale every weight down // accordingly. uint64_t ScalingFactor = (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; WeightSum = 0; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { uint32_t W = Weights[i] / ScalingFactor; WeightSum += W; setEdgeWeight(BB, i, W); } assert(WeightSum <= UINT32_MAX && "Expected weights to scale down to 32 bits"); return true; }
void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) { TerminatorInst *TI = getTerminator(); if (!TI) // Cope with being called on a BasicBlock that doesn't have a terminator // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this. return; for (BasicBlock *Succ : TI->successors()) { // N.B. Succ might not be a complete BasicBlock, so don't assume // that it ends with a non-phi instruction. for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) { PHINode *PN = dyn_cast<PHINode>(II); if (!PN) break; int i; while ((i = PN->getBasicBlockIndex(this)) >= 0) PN->setIncomingBlock(i, New); } } }
void Loop::setLoopID(MDNode *LoopID) const { assert(LoopID && "Loop ID should not be null"); assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand"); assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself"); if (isLoopSimplifyForm()) { getLoopLatch()->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); return; } BasicBlock *H = getHeader(); for (BasicBlock *BB : this->blocks()) { TerminatorInst *TI = BB->getTerminator(); for (BasicBlock *Successor : TI->successors()) { if (Successor == H) TI->setMetadata(LLVMContext::MD_loop, LoopID); } } }
/// MatchLoopHeaderHeuristic - Predict a successor that is a loop header or /// a loop pre-header and does not post-dominate will be taken. /// @returns a Prediction that is a pair in which the first element is the /// successor taken, and the second the successor not taken. Prediction BranchHeuristicsInfo::MatchLoopHeaderHeuristic(BasicBlock *root) const { bool matched = false; Prediction pred; // Last instruction of basic block. TerminatorInst *TI = root->getTerminator(); // Basic block successors. True and False branches. BasicBlock *trueSuccessor = TI->getSuccessor(0); BasicBlock *falseSuccessor = TI->getSuccessor(1); // Get the most inner loop in which the true successor basic block is in. Loop *loop = LI->getLoopFor(trueSuccessor); // Check if exists a loop, the true branch successor is a loop header or a // loop pre-header, and does not post dominate. if (loop && (trueSuccessor == loop->getHeader() || trueSuccessor == loop->getLoopPreheader()) && !PDT->dominates(trueSuccessor, root)) { matched = true; pred = std::make_pair(trueSuccessor, falseSuccessor); } // Get the most inner loop in which the false successor basic block is in. loop = LI->getLoopFor(falseSuccessor); // Check if exists a loop, // the false branch successor is a loop header or a loop pre-header, and // does not post dominate. if (loop && (falseSuccessor == loop->getHeader() || falseSuccessor == loop->getLoopPreheader()) && !PDT->dominates(falseSuccessor, root)) { // If the heuristic matches both branches, predict none. if (matched) return empty; matched = true; pred = std::make_pair(falseSuccessor, trueSuccessor); } return (matched ? pred : empty); }
/// MatchPointerHeuristic - Predict that a comparison of a pointer against /// null or of two pointers will fail. /// @returns a Prediction that is a pair in which the first element is the /// successor taken, and the second the successor not taken. Prediction BranchHeuristicsInfo::MatchPointerHeuristic(BasicBlock *root) const { // Last instruction of basic block. TerminatorInst *TI = root->getTerminator(); // Basic block successors. True and False branches. BasicBlock *trueSuccessor = TI->getSuccessor(0); BasicBlock *falseSuccessor = TI->getSuccessor(1); // Is the last instruction a Branch Instruction? BranchInst *BI = dyn_cast<BranchInst>(TI); if (!BI || !BI->isConditional()) return empty; // Conditional instruction. Value *cond = BI->getCondition(); // Pointer comparisons are integer comparisons. ICmpInst *II = dyn_cast<ICmpInst>(cond); if (!II) return empty; // An integer comparison has always two operands. Value *operand1 = II->getOperand(0); Value *operand2 = II->getOperand(1); // Obtain the type of comparison. enum ICmpInst::Predicate signedPred = II->getSignedPredicate(); // The heuristic states that it must be compared against null, // but in LLVM, null is also a PointerType, so it only requires // to test if there is a comparison between two pointers. if (signedPred == ICmpInst::ICMP_EQ && isa<PointerType>(operand1->getType()) && // NULL is a pointer type too isa<PointerType>(operand2->getType())) { // NULL is a pointer type too return std::make_pair(falseSuccessor, trueSuccessor); } else if (signedPred != ICmpInst::ICMP_EQ && isa<PointerType>(operand1->getType()) && isa<PointerType>(operand2->getType())) { return std::make_pair(trueSuccessor, falseSuccessor); } return empty; }
void Loop::setLoopID(MDNode *LoopID) const { assert(LoopID && "Loop ID should not be null"); assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand"); assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself"); if (isLoopSimplifyForm()) { getLoopLatch()->getTerminator()->setMetadata(LoopMDName, LoopID); return; } BasicBlock *H = getHeader(); for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { TerminatorInst *TI = (*I)->getTerminator(); for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { if (TI->getSuccessor(i) == H) TI->setMetadata(LoopMDName, LoopID); } } }
TerminatorInst * llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI) { BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); TerminatorInst *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); TerminatorInst *CheckTerm; if (Unreachable) CheckTerm = new UnreachableInst(C, ThenBlock); else CheckTerm = BranchInst::Create(Tail, ThenBlock); CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond); HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); if (DT) { if (DomTreeNode *OldNode = DT->getNode(Head)) { std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); DomTreeNode *NewNode = DT->addNewBlock(Tail, Head); for (DomTreeNode *Child : Children) DT->changeImmediateDominator(Child, NewNode); // Head dominates ThenBlock. DT->addNewBlock(ThenBlock, Head); } } if (LI) { if (Loop *L = LI->getLoopFor(Head)) { L->addBasicBlockToLoop(ThenBlock, *LI); L->addBasicBlockToLoop(Tail, *LI); } } return CheckTerm; }
/// matchEdges - Link every profile counter with an edge. unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, ArrayRef<unsigned> Counters) { if (Counters.size() == 0) return 0; unsigned ReadCount = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), Counters); } } } return ReadCount; }
void hammock::findIR (BasicBlock *bBOring, BasicBlock *bBSuss, PostDominatorTree &PD) { TerminatorInst *ti = bBSuss->getTerminator(); if (bBlocks.count(bBSuss)>0) { return; } //Mark BasicBlock bBlocks.insert(bBSuss); //If the basic block is a posdominator and is not the start basic block, just return if (PD.dominates(bBSuss, bBOring) && bBSuss != bBOring) { return; }else { //Advance the flooding //If there is successor, go there for (unsigned int i=0; i<ti->getNumSuccessors(); i++) { findIR (bBOring, ti->getSuccessor(i), PD); } } }
// In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool MadeChange = false; // Split all critical edges where the dest block has a PHI. if (CriticalEdgeSplit) { TerminatorInst *BBTI = BB.getTerminator(); if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { BasicBlock *SuccBB = BBTI->getSuccessor(i); if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) SplitEdgeNicely(BBTI, i, BackEdges, this); } } } SunkAddrs.clear(); CurInstIterator = BB.begin(); for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) MadeChange |= OptimizeInst(CurInstIterator++); return MadeChange; }
void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite) { Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Type *ShadowTy = IntegerType::get( *C, std::max(8U, TypeSize >> MappingScale)); Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *CmpVal = Constant::getNullValue(ShadowTy); Value *ShadowValue = IRB.CreateLoad( IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); size_t Granularity = 1 << MappingScale; TerminatorInst *CrashTerm = 0; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false); assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB); CrashTerm = new UnreachableInst(*C, CrashBlock); BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); ReplaceInstWithInst(CheckTerm, NewTerm); } else { CrashTerm = splitBlockAndInsertIfThen(Cmp, true); } Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex); Crash->setDebugLoc(OrigIns->getDebugLoc()); }
/// setBranchWeightMetadata - Translate the counter values associated with each /// edge into branch weights for each conditional branch (a branch with 2 or /// more desinations). void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, ProfileData &PB) { for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); unsigned NumSuccessors = TI->getNumSuccessors(); // If there is only one successor then we can not set a branch // probability as the target is certain. if (NumSuccessors < 2) continue; // Load the weights of all edges leading from this terminator. DEBUG(dbgs() << "-- Terminator with " << NumSuccessors << " successors:\n"); SmallVector<uint32_t, 4> Weights(NumSuccessors); for (unsigned s = 0 ; s < NumSuccessors ; ++s) { ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); Weights[s] = (uint32_t)PB.getEdgeWeight(edge); DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " << Weights[s] << "\n"); } // Set branch weight metadata. This will set branch probabilities of // 100%/0% if that is true of the dynamic execution. // BranchProbabilityInfo can account for this when it loads this metadata // (it gives the unexectuted branch a weight of 1 for the purposes of // probability calculations). MDBuilder MDB(TI->getContext()); MDNode *Node = MDB.createBranchWeights(Weights); TI->setMetadata(LLVMContext::MD_prof, Node); NumTermsAnnotated++; } } }
/// MatchLoopBranchHeuristic - Predict as taken an edge back to a loop's /// head. Predict as not taken an edge exiting a loop. /// @returns a Prediction that is a pair in which the first element is the /// successor taken, and the second the successor not taken. Prediction BranchHeuristicsInfo::MatchLoopBranchHeuristic(BasicBlock *root) const { bool matched = false; Prediction pred; // Last instruction of basic block. TerminatorInst *TI = root->getTerminator(); // Basic block successors. True and False branches. BasicBlock *trueSuccessor = TI->getSuccessor(0); BasicBlock *falseSuccessor = TI->getSuccessor(1); // True and false branch edges. Edge trueEdge = std::make_pair(root, trueSuccessor); Edge falseEdge = std::make_pair(root, falseSuccessor); // If the true branch is a back edge to a loop's head or the false branch is // an exit edge, match the heuristic. if ((BPI->isBackEdge(trueEdge) && LI->isLoopHeader(trueSuccessor)) || BPI->isExitEdge(falseEdge)) { matched = true; pred = std::make_pair(trueSuccessor, falseSuccessor); } // Check the opposite situation, the other branch. if ((BPI->isBackEdge(falseEdge) && LI->isLoopHeader(falseSuccessor)) || BPI->isExitEdge(trueEdge)) { // If the heuristic matches both branches, predict none. if (matched) return empty; matched = true; pred = std::make_pair(falseSuccessor, trueSuccessor); } return (matched ? pred : empty); }
void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) { assert((pred_begin(BB) == pred_end(BB) || // Can delete self loop. BB->getSinglePredecessor() == BB) && "Block is not dead!"); TerminatorInst *BBTerm = BB->getTerminator(); std::vector<DominatorTree::UpdateType> Updates; // Loop through all of our successors and make sure they know that one // of their predecessors is going away. if (DDT) Updates.reserve(BBTerm->getNumSuccessors()); for (BasicBlock *Succ : BBTerm->successors()) { Succ->removePredecessor(BB); if (DDT) Updates.push_back({DominatorTree::Delete, BB, Succ}); } // Zap all the instructions in the block. while (!BB->empty()) { Instruction &I = BB->back(); // If this instruction is used, replace uses with an arbitrary value. // Because control flow can't get here, we don't care what we replace the // value with. Note that since this block is unreachable, and all values // contained within it must dominate their uses, that all uses will // eventually be removed (they are themselves dead). if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); BB->getInstList().pop_back(); } if (DDT) { DDT->applyUpdates(Updates); DDT->deleteBB(BB); // Deferred deletion of BB. } else { BB->eraseFromParent(); // Zap the block! } }
void CheckInserter::insertCycleChecks(Function &F) { IdentifyBackEdges &IBE = getAnalysis<IdentifyBackEdges>(); for (Function::iterator B1 = F.begin(); B1 != F.end(); ++B1) { TerminatorInst *TI = B1->getTerminator(); for (unsigned j = 0; j < TI->getNumSuccessors(); ++j) { BasicBlock *B2 = TI->getSuccessor(j); unsigned BackEdgeID = IBE.getID(B1, B2); if (BackEdgeID != (unsigned)-1) { assert(BackEdgeID < MaxNumBackEdges); BasicBlock *BackEdgeBlock = BasicBlock::Create( F.getContext(), "backedge_" + B1->getName() + "_" + B2->getName(), &F); CallInst::Create(CycleCheck, ConstantInt::get(IntType, BackEdgeID), "", BackEdgeBlock); // BackEdgeBlock -> B2 // Fix the PHINodes in B2. BranchInst::Create(B2, BackEdgeBlock); for (BasicBlock::iterator I = B2->begin(); B2->getFirstNonPHI() != I; ++I) { PHINode *PHI = cast<PHINode>(I); // Note: If B2 has multiple incoming edges from B1 (e.g. B1 terminates // with a SelectInst), its PHINodes must also have multiple incoming // edges from B1. However, after adding BackEdgeBlock and essentially // merging the multiple incoming edges from B1, there will be only one // edge from BackEdgeBlock to B2. Therefore, we need to remove the // redundant incoming edges from B2's PHINodes. bool FirstIncomingFromB1 = true; for (unsigned k = 0; k < PHI->getNumIncomingValues(); ++k) { if (PHI->getIncomingBlock(k) == B1) { if (FirstIncomingFromB1) { FirstIncomingFromB1 = false; PHI->setIncomingBlock(k, BackEdgeBlock); } else { PHI->removeIncomingValue(k, false); --k; } } } } // B1 -> BackEdgeBlock // There might be multiple back edges from B1 to B2. Need to replace // them all. for (unsigned j2 = j; j2 < TI->getNumSuccessors(); ++j2) { if (TI->getSuccessor(j2) == B2) { TI->setSuccessor(j2, BackEdgeBlock); } } } } } }
//Return true if the subgraph denoted by bBlocks set attribute is a hammock graph bool hammock::checkHammock (Function &F) { //For each basicBlock for (Function::iterator Fit = F.begin(), Fend = F.end(); Fit != Fend; ++Fit) { TerminatorInst *ti = Fit->getTerminator(); if (bBlocks.count(Fit)==0) { //If it is out of subgraph //Check if some respective successor is marked for (unsigned int i=0; i<ti->getNumSuccessors(); i++) { if (bBlocks.count(ti->getSuccessor(i))>0) { return false; } } }else { /*//If it is in subgrah //Check if some respective successor is NOT marked for (unsigned int i=0; i<ti->getNumSuccessors(); i++) { if (bBlocks.count(ti->getSuccessor(i))==0) { return false; } }*/ } } return true; }
// visitCallInst - This converts all LLVM call instructions into invoke // instructions. The except part of the invoke goes to the "LongJmpBlkPre" // that grabs the exception and proceeds to determine if it's a longjmp // exception or not. void LowerSetJmp::visitCallInst(CallInst& CI) { if (CI.getCalledFunction()) if (!IsTransformableFunction(CI.getCalledFunction()->getName()) || CI.getCalledFunction()->isIntrinsic()) return; BasicBlock* OldBB = CI.getParent(); // If not reachable from a setjmp call, don't transform. if (!DFSBlocks.count(OldBB)) return; BasicBlock* NewBB = OldBB->splitBasicBlock(CI); assert(NewBB && "Couldn't split BB of \"call\" instruction!!"); DFSBlocks.insert(NewBB); NewBB->setName("Call2Invoke"); Function* Func = OldBB->getParent(); // Construct the new "invoke" instruction. TerminatorInst* Term = OldBB->getTerminator(); CallSite CS(&CI); std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); InvokeInst* II = InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], Params.begin(), Params.end(), CI.getName(), Term); II->setCallingConv(CI.getCallingConv()); II->setAttributes(CI.getAttributes()); // Replace the old call inst with the invoke inst and remove the call. CI.replaceAllUsesWith(II); CI.eraseFromParent(); // The old terminator is useless now that we have the invoke inst. Term->eraseFromParent(); ++CallsTransformed; }
/// SplitEdge - Split the edge connecting specified block. Pass P must /// not be NULL. BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); // If this is a critical edge, let SplitCriticalEdge do it. TerminatorInst *LatchTerm = BB->getTerminator(); if (SplitCriticalEdge(LatchTerm, SuccNum, P)) return LatchTerm->getSuccessor(SuccNum); // If the edge isn't critical, then BB has a single successor or Succ has a // single pred. Split the block. if (BasicBlock *SP = Succ->getSinglePredecessor()) { // If the successor only has a single pred, split the top of the successor // block. assert(SP == BB && "CFG broken"); SP = NULL; return SplitBlock(Succ, Succ->begin(), P); } // Otherwise, if BB has a single successor, split it at the bottom of the // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && "Should have a single succ!"); return SplitBlock(BB, BB->getTerminator(), P); }
// Cleanly removes a terminator instruction. void GNUstep::removeTerminator(BasicBlock *BB) { TerminatorInst *BBTerm = BB->getTerminator(); // Remove the BB as a predecessor from all of successors for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { BBTerm->getSuccessor(i)->removePredecessor(BB); } BBTerm->replaceAllUsesWith(UndefValue::get(BBTerm->getType())); // Remove the terminator instruction itself. BBTerm->eraseFromParent(); }
/// shouldEliminateUnconditionalBranch - Return true if this branch looks /// attractive to eliminate. We eliminate the branch if the destination basic /// block has <= 5 instructions in it, not counting PHI nodes. In practice, /// since one of these is a terminator instruction, this means that we will add /// up to 4 instructions to the new block. /// /// We don't count PHI nodes in the count since they will be removed when the /// contents of the block are copied over. /// bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, unsigned Threshold) { BranchInst *BI = dyn_cast<BranchInst>(TI); if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch! BasicBlock *Dest = BI->getSuccessor(0); if (Dest == BI->getParent()) return false; // Do not loop infinitely! // Do not inline a block if we will just get another branch to the same block! TerminatorInst *DTI = Dest->getTerminator(); if (BranchInst *DBI = dyn_cast<BranchInst>(DTI)) if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest) return false; // Do not loop infinitely! // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack, // because doing so would require breaking critical edges. This should be // fixed eventually. if (!DTI->use_empty()) return false; // Do not bother with blocks with only a single predecessor: simplify // CFG will fold these two blocks together! pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest); ++PI; if (PI == PE) return false; // Exactly one predecessor! BasicBlock::iterator I = Dest->getFirstNonPHI(); for (unsigned Size = 0; I != Dest->end(); ++I) { if (Size == Threshold) return false; // The block is too large. // Don't tail duplicate call instructions. They are very large compared to // other instructions. if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false; // Also alloca and malloc. if (isa<AllocaInst>(I)) return false; // Some vector instructions can expand into a number of instructions. if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || isa<InsertElementInst>(I)) return false; // Only count instructions that are not debugger intrinsics. if (!isa<DbgInfoIntrinsic>(I)) ++Size; } // Do not tail duplicate a block that has thousands of successors into a block // with a single successor if the block has many other predecessors. This can // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in // cases that have a large number of indirect gotos. unsigned NumSuccs = DTI->getNumSuccessors(); if (NumSuccs > 8) { unsigned TooMany = 128; if (NumSuccs >= TooMany) return false; TooMany = TooMany/NumSuccs; for (; PI != PE; ++PI) if (TooMany-- == 0) return false; } // If this unconditional branch is a fall-through, be careful about // tail duplicating it. In particular, we don't want to taildup it if the // original block will still be there after taildup is completed: doing so // would eliminate the fall-through, requiring unconditional branches. Function::iterator DestI = Dest; if (&*--DestI == BI->getParent()) { // The uncond branch is a fall-through. Tail duplication of the block is // will eliminate the fall-through-ness and end up cloning the terminator // at the end of the Dest block. Since the original Dest block will // continue to exist, this means that one or the other will not be able to // fall through. One typical example that this helps with is code like: // if (a) // foo(); // if (b) // foo(); // Cloning the 'if b' block into the end of the first foo block is messy. // The messy case is when the fall-through block falls through to other // blocks. This is what we would be preventing if we cloned the block. DestI = Dest; if (++DestI != Dest->getParent()->end()) { BasicBlock *DestSucc = DestI; // If any of Dest's successors are fall-throughs, don't do this xform. for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest); SI != SE; ++SI) if (*SI == DestSucc) return false; } } // Finally, check that we haven't redirected to this target block earlier; // there are cases where we loop forever if we don't check this (PR 2323). if (!CycleDetector.insert(Dest)) return false; return true; }
bool OptimalEdgeProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { errs() << "WARNING: cannot insert edge profiling into a module" << " with no main function!\n"; return false; // No main, no instrumentation! } // NumEdges counts all the edges that may be instrumented. Later on its // decided which edges to actually instrument, to achieve optimal profiling. // For the entry block a virtual edge (0,entry) is reserved, for each block // with no successors an edge (BB,0) is reserved. These edges are necessary // to calculate a truly optimal maximum spanning tree and thus an optimal // instrumentation. unsigned NumEdges = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; // Reserve space for (0,entry) edge. ++NumEdges; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { // Keep track of which blocks need to be instrumented. We don't want to // instrument blocks that are added as the result of breaking critical // edges! if (BB->getTerminator()->getNumSuccessors() == 0) { // Reserve space for (BB,0) edge. ++NumEdges; } else { NumEdges += BB->getTerminator()->getNumSuccessors(); } } } // In the profiling output a counter for each edge is reserved, but only few // are used. This is done to be able to read back in the profile without // calulating the maximum spanning tree again, instead each edge counter that // is not used is initialised with -1 to signal that this edge counter has to // be calculated from other edge counters on reading the profile info back // in. const Type *Int32 = Type::getInt32Ty(M.getContext()); const ArrayType *ATy = ArrayType::get(Int32, NumEdges); GlobalVariable *Counters = new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, Constant::getNullValue(ATy), "OptEdgeProfCounters"); NumEdgesInserted = 0; std::vector<Constant*> Initializer(NumEdges); Constant* Zero = ConstantInt::get(Int32, 0); Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); // Instrument all of the edges not in MST... unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual // edges (0,entry) and (BB,0) (for blocks with no successors) and this // edges also participate in the maximum spanning tree calculation. // The third parameter of MaximumSpanningTree() has the effect that not the // actual MST is returned but the edges _not_ in the MST. ProfileInfo::EdgeWeights ECs = getAnalysis<ProfileInfo>(*F).getEdgeWeights(F); std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end()); MaximumSpanningTree<BasicBlock> MST (EdgeVector); std::stable_sort(MST.begin(),MST.end()); // Check if (0,entry) not in the MST. If not, instrument edge // (IncrementCounterInBlock()) and set the counter initially to zero, if // the edge is in the MST the counter is initialised to -1. BasicBlock *entry = &(F->getEntryBlock()); ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,entry,i); IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); } // InsertedBlocks contains all blocks that were inserted for splitting an // edge, this blocks do not have to be instrumented. DenseSet<BasicBlock*> InsertedBlocks; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { // Check if block was not inserted and thus does not have to be // instrumented. if (InsertedBlocks.count(BB)) continue; // Okay, we have to add a counter of each outgoing edge not in MST. If // the outgoing edge is not critical don't split it, just insert the // counter in the source or destination of the edge. Also, if the block // has no successors, the virtual edge (BB,0) is processed. TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 0) { ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,BB,i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); } } for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { BasicBlock *Succ = TI->getSuccessor(s); ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ); if (!std::binary_search(MST.begin(), MST.end(), edge)) { // If the edge is critical, split it. bool wasInserted = SplitCriticalEdge(TI, s, this); Succ = TI->getSuccessor(s); if (wasInserted) InsertedBlocks.insert(Succ); // Okay, we are guaranteed that the edge is no longer critical. If // we only have a single successor, insert the counter in this block, // otherwise insert it in the successor block. if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block printEdgeCounter(edge,BB,i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block printEdgeCounter(edge,Succ,i); IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); } else { Initializer[i++] = (Uncounted); } } } } // Check if the number of edges counted at first was the number of edges we // considered for instrumentation. assert(i==NumEdges && "the number of edges in counting array is wrong"); // Assing the now completely defined initialiser to the array. Constant *init = ConstantArray::get(ATy, Initializer); Counters->setInitializer(init); // Add the initialization call to main. InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters); return true; }
/// \brief Simplify one loop and queue further loops for simplification. /// /// FIXME: Currently this accepts both lots of analyses that it uses and a raw /// Pass pointer. The Pass pointer is used by numerous utilities to update /// specific analyses. Rather than a pass it would be much cleaner and more /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { bool Changed = false; ReprocessLoop: // Check to see that no blocks (other than the header) in this loop have // predecessors that are not in the loop. This is not valid for natural // loops, but can occur if the blocks are unreachable. Since they are // unreachable we can just shamelessly delete those CFG edges! for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); BB != E; ++BB) { if (*BB == L->getHeader()) continue; SmallPtrSet<BasicBlock*, 4> BadPreds; for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) { BasicBlock *P = *PI; if (!L->contains(P)) BadPreds.insert(P); } // Delete each unique out-of-loop (and thus dead) predecessor. for (BasicBlock *P : BadPreds) { DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " << P->getName() << "\n"); // Inform each successor of each dead pred. for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI) (*SI)->removePredecessor(P); // Zap the dead pred's terminator and replace it with unreachable. TerminatorInst *TI = P->getTerminator(); TI->replaceAllUsesWith(UndefValue::get(TI->getType())); P->getTerminator()->eraseFromParent(); new UnreachableInst(P->getContext(), P); Changed = true; } } // If there are exiting blocks with branches on undef, resolve the undef in // the direction which will exit the loop. This will help simplify loop // trip count computations. SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), E = ExitingBlocks.end(); I != E; ++I) if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " << (*I)->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); // This may make the loop analyzable, force SCEV recomputation. if (SE) SE->forgetLoop(L); Changed = true; } } // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L, PP); if (Preheader) { ++NumInserted; Changed = true; } } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), E = ExitBlockSet.end(); I != E; ++I) { BasicBlock *ExitBlock = *I; for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); PI != PE; ++PI) // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PP)) { ++NumInserted; Changed = true; } break; } } // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. BasicBlock *LoopLatch = L->getLoopLatch(); if (!LoopLatch) { // If this is really a nested loop, rip it out into a child loop. Don't do // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, PP, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. Worklist.push_back(OuterL); // This is a big restructuring change, reprocess the whole loop. Changed = true; // GCC doesn't tail recursion eliminate this. // FIXME: It isn't clear we can't rely on LLVM to TRE this. goto ReprocessLoop; } } // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); if (LoopLatch) { ++NumInserted; Changed = true; } } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); } // If this loop has multiple exits and the exits all go to the same // block, attempt to merge the exits. This helps several passes, such // as LoopRotation, which do not support loops with multiple exits. // SimplifyCFG also does this (and this code uses the same utility // function), however this code is loop-aware, where SimplifyCFG is // not. That gives it the advantage of being able to hoist // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. bool UniqueExit = true; if (!ExitBlocks.empty()) for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) if (ExitBlocks[i] != ExitBlocks[0]) { UniqueExit = false; break; } if (UniqueExit) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!BI || !BI->isConditional()) continue; CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); if (!CI || CI->getParent() != ExitingBlock) continue; // Attempt to hoist out all instructions except for the // comparison and the branch. bool AllInvariant = true; bool AnyInvariant = false; for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { Instruction *Inst = I++; // Skip debug info intrinsics. if (isa<DbgInfoIntrinsic>(Inst)) continue; if (Inst == CI) continue; if (!L->makeLoopInvariant(Inst, AnyInvariant, Preheader ? Preheader->getTerminator() : nullptr)) { AllInvariant = false; break; } } if (AnyInvariant) { Changed = true; // The loop disposition of all SCEV expressions that depend on any // hoisted values have also changed. if (SE) SE->forgetLoopDispositions(L); } if (!AllInvariant) continue; // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. if (!FoldBranchToCommonDest(BI)) continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); // Notify ScalarEvolution before deleting this block. Currently assume the // parent loop doesn't change (spliting edges doesn't count). If blocks, // CFG edges, or other values in the parent loop change, then we need call // to forgetLoop() for the parent instead. if (SE) SE->forgetLoop(L); assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = Node->getChildren(); while (!Children.empty()) { DomTreeNode *Child = Children.front(); DT->changeImmediateDominator(Child, Node->getIDom()); } DT->eraseNode(ExitingBlock); BI->getSuccessor(0)->removePredecessor(ExitingBlock); BI->getSuccessor(1)->removePredecessor(ExitingBlock); ExitingBlock->eraseFromParent(); } } return Changed; }
/// \brief This method is called when the specified loop has more than one /// backedge in it. /// /// If this occurs, revector all of these backedges to target a new basic block /// and have that block branch to the loop header. This ensures that loops /// have exactly one backedge. static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, DominatorTree *DT, LoopInfo *LI) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); // Unique backedge insertion currently depends on having a preheader. if (!Preheader) return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName() + ".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), PN->getName()+".be", BETerminator); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; Value *UniqueValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); if (IBB == Preheader) { PreheaderIdx = i; } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { if (!UniqueValue) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; } } } // Delete all of the incoming values from the old PN except the preheader's assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); if (PreheaderIdx != 0) { PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) PN->removeIncomingValue(e-i, false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); // As an optimization, if all incoming values in the new PhiNode (which is a // subset of the incoming values of the old PHI node) have the same value, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); BEBlock->getInstList().erase(NewPN); } } // Now that all of the PHI nodes have been inserted and adjusted, modify the // backedge blocks to just to the BEBlock instead of the header. for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) if (TI->getSuccessor(Op) == Header) TI->setSuccessor(Op, BEBlock); } //===--- Update all analyses which we must preserve now -----------------===// // Update Loop Information - we know that this block is now in the current // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, *LI); // Update dominator information DT->splitBlock(BEBlock); return BEBlock; }
/// \brief Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not /// viable. It computes the cost and adjusts the threshold based on numerous /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the routine. bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; // Track whether the post-inlining function would have more than one basic // block. A single basic block is often intended for inlining. Balloon the // threshold by 50% until we pass the single-BB phase. bool SingleBB = true; int SingleBBBonus = Threshold / 2; Threshold += SingleBBBonus; // Perform some tweaks to the cost and threshold based on the direct // callsite information. // We want to more aggressively inline vector-dense kernels, so up the // threshold, and we'll lower it if the % of vector instructions gets too // low. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); FiftyPercentVectorBonus = Threshold; TenPercentVectorBonus = Threshold / 2; // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { if (TD && CS.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); unsigned PointerSize = TD->getPointerSizeInBits(); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; // If it generates more than 8 stores it is likely to be expanded as an // inline memcpy so we take that as an upper bound. Otherwise we assume // one load and one store per word copied. // FIXME: The maxStoresPerMemcpy setting from the target should be used // here instead of a magic number of 8, but it's not available via // DataLayout. NumStores = std::min(NumStores, 8U); Cost -= 2 * NumStores * InlineConstants::InstrCost; } else { // For non-byval arguments subtract off one instruction per call // argument. Cost -= InlineConstants::InstrCost; } } // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); if (OnlyOneCallAndLocalLinkage) Cost += InlineConstants::LastCallToStaticBonus; // If the instruction after the call, or if the normal destination of the // invoke is an unreachable instruction, the function is noreturn. As such, // there is little point in inlining this unless there is literally zero // cost. Instruction *Instr = CS.getInstruction(); if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { if (isa<UnreachableInst>(II->getNormalDest()->begin())) Threshold = 1; } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) Threshold = 1; // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. if (Cost > Threshold) return false; if (F.empty()) return true; Function *Caller = CS.getInstruction()->getParent()->getParent(); // Check if the caller function is recursive itself. for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); U != E; ++U) { CallSite Site(cast<Value>(*U)); if (!Site) continue; Instruction *I = Site.getInstruction(); if (I->getParent()->getParent() == Caller) { IsCallerRecursive = true; break; } } // Track whether we've seen a return instruction. The first return // instruction is free, as at least one will usually disappear in inlining. bool HasReturn = false; // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. CallSite::arg_iterator CAI = CS.arg_begin(); for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); FAI != FAE; ++FAI, ++CAI) { assert(CAI != CS.arg_end()); if (Constant *C = dyn_cast<Constant>(CAI)) SimplifiedValues[FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (isa<AllocaInst>(PtrArg)) { SROAArgValues[FAI] = PtrArg; SROAArgCosts[PtrArg] = 0; } } } NumConstantArgs = SimplifiedValues.size(); NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); NumAllocaArgs = SROAArgValues.size(); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this // particular call site in order to get more accurate cost estimates. This // requires a somewhat heavyweight iteration pattern: we need to walk the // basic blocks in a breadth-first order as we insert live successors. To // accomplish this, prioritizing for small iterations because we exit after // crossing our threshold, we use a small-size optimized SetVector. typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, SmallPtrSet<BasicBlock *, 16> > BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. if (Cost > (Threshold + VectorBonus)) break; BasicBlock *BB = BBWorklist[Idx]; if (BB->empty()) continue; // Handle the terminator cost here where we can track returns and other // function-wide constructs. TerminatorInst *TI = BB->getTerminator(); // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this // indirect jump would jump from the inlined copy of the function into the // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions // with indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. And as a QOI issue, // if someone is using a blockaddress without an indirectbr, and that // reference somehow ends up in another function or global, we probably // don't want to inline this function. if (isa<IndirectBrInst>(TI)) return false; if (!HasReturn && isa<ReturnInst>(TI)) HasReturn = true; else Cost += InlineConstants::InstrCost; // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) return false; // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; break; } // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); continue; } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); continue; } } // If we're unable to select a particular successor, just count all of // them. for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); // If we had any successors at this point, than post-inlining is likely to // have them as well. Note that we assume any basic blocks which existed // due to branches or switches which folded above will also fold after // inlining. if (SingleBB && TI->getNumSuccessors() > 1) { // Take off the bonus we applied to the threshold. Threshold -= SingleBBBonus; SingleBB = false; } } // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return false; Threshold += VectorBonus; return Cost < Threshold; }
bool GCOVProfiler::emitProfileArcs() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return false; bool Result = false; bool InsertIndCounterIncrCode = false; for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); DIArray SPs = CU.getSubprograms(); SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { DISubprogram SP(SPs.getElement(i)); if (!SP.Verify()) continue; Function *F = SP.getFunction(); if (!F) continue; if (!Result) Result = true; unsigned Edges = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); if (isa<ReturnInst>(TI)) ++Edges; else Edges += TI->getNumSuccessors(); } ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Edges); GlobalVariable *Counters = new GlobalVariable(*M, CounterTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); UniqueVector<BasicBlock *> ComplexEdgePreds; UniqueVector<BasicBlock *> ComplexEdgeSuccs; unsigned Edge = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); if (Successors) { IRBuilder<> Builder(TI); if (Successors == 1) { Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge); Value *Count = Builder.CreateLoad(Counter); Count = Builder.CreateAdd(Count, ConstantInt::get(Type::getInt64Ty(*Ctx),1)); Builder.CreateStore(Count, Counter); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { Value *Sel = Builder.CreateSelect( BI->getCondition(), ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); SmallVector<Value *, 2> Idx; Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); Idx.push_back(Sel); Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); Value *Count = Builder.CreateLoad(Counter); Count = Builder.CreateAdd(Count, ConstantInt::get(Type::getInt64Ty(*Ctx),1)); Builder.CreateStore(Count, Counter); } else { ComplexEdgePreds.insert(BB); for (int i = 0; i != Successors; ++i) ComplexEdgeSuccs.insert(TI->getSuccessor(i)); } Edge += Successors; } } if (!ComplexEdgePreds.empty()) { GlobalVariable *EdgeTable = buildEdgeLookupTable(F, Counters, ComplexEdgePreds, ComplexEdgeSuccs); GlobalVariable *EdgeState = getEdgeStateValue(); Type *Int32Ty = Type::getInt32Ty(*Ctx); for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); } for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { // call runtime to perform increment BasicBlock::iterator InsertPt = ComplexEdgeSuccs[i+1]->getFirstInsertionPt(); IRBuilder<> Builder(InsertPt); Value *CounterPtrArray = Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, i * ComplexEdgePreds.size()); // Build code to increment the counter. InsertIndCounterIncrCode = true; Builder.CreateCall2(getIncrementIndirectCounterFunc(), EdgeState, CounterPtrArray); } } } insertCounterWriteout(CountersBySP); insertFlush(CountersBySP); } if (InsertIndCounterIncrCode) insertIndirectCounterIncrement(); return Result; }
bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) { // Clone the program to try hacking it apart... ValueToValueMapTy VMap; Module *M = CloneModule(BD.getProgram(), VMap); // Convert list to set for fast lookup... SmallPtrSet<BasicBlock*, 8> Blocks; for (unsigned i = 0, e = BBs.size(); i != e; ++i) Blocks.insert(cast<BasicBlock>(VMap[BBs[i]])); outs() << "Checking for crash with only these blocks:"; unsigned NumPrint = Blocks.size(); if (NumPrint > 10) NumPrint = 10; for (unsigned i = 0, e = NumPrint; i != e; ++i) outs() << " " << BBs[i]->getName(); if (NumPrint < Blocks.size()) outs() << "... <" << Blocks.size() << " total>"; outs() << ": "; // Loop over and delete any hack up any blocks that are not listed... for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) if (!Blocks.count(&*BB) && BB->getTerminator()->getNumSuccessors()) { // Loop over all of the successors of this block, deleting any PHI nodes // that might include it. for (succ_iterator SI = succ_begin(&*BB), E = succ_end(&*BB); SI != E; ++SI) (*SI)->removePredecessor(&*BB); TerminatorInst *BBTerm = BB->getTerminator(); if (!BB->getTerminator()->getType()->isVoidTy()) BBTerm->replaceAllUsesWith(Constant::getNullValue(BBTerm->getType())); // Replace the old terminator instruction. BB->getInstList().pop_back(); new UnreachableInst(BB->getContext(), &*BB); } // The CFG Simplifier pass may delete one of the basic blocks we are // interested in. If it does we need to take the block out of the list. Make // a "persistent mapping" by turning basic blocks into <function, name> pairs. // This won't work well if blocks are unnamed, but that is just the risk we // have to take. std::vector<std::pair<std::string, std::string> > BlockInfo; for (BasicBlock *BB : Blocks) BlockInfo.emplace_back(BB->getParent()->getName(), BB->getName()); // Now run the CFG simplify pass on the function... std::vector<std::string> Passes; Passes.push_back("simplifycfg"); Passes.push_back("verify"); std::unique_ptr<Module> New = BD.runPassesOn(M, Passes); delete M; if (!New) { errs() << "simplifycfg failed!\n"; exit(1); } M = New.release(); // Try running on the hacked up program... if (TestFn(BD, M)) { BD.setNewProgram(M); // It crashed, keep the trimmed version... // Make sure to use basic block pointers that point into the now-current // module, and that they don't include any deleted blocks. BBs.clear(); const ValueSymbolTable &GST = M->getValueSymbolTable(); for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { Function *F = cast<Function>(GST.lookup(BlockInfo[i].first)); ValueSymbolTable &ST = F->getValueSymbolTable(); Value* V = ST.lookup(BlockInfo[i].second); if (V && V->getType() == Type::getLabelTy(V->getContext())) BBs.push_back(cast<BasicBlock>(V)); } return true; } delete M; // It didn't crash, try something else. return false; }
bool ScopDetection::isValidCFG(BasicBlock &BB, DetectionContext &Context) const { Region &RefRegion = Context.CurRegion; TerminatorInst *TI = BB.getTerminator(); // Return instructions are only valid if the region is the top level region. if (isa<ReturnInst>(TI) && !RefRegion.getExit() && TI->getNumOperands() == 0) return true; BranchInst *Br = dyn_cast<BranchInst>(TI); if (!Br) return invalid<ReportNonBranchTerminator>(Context, /*Assert=*/true, &BB); if (Br->isUnconditional()) return true; Value *Condition = Br->getCondition(); // UndefValue is not allowed as condition. if (isa<UndefValue>(Condition)) return invalid<ReportUndefCond>(Context, /*Assert=*/true, &BB); // Only Constant and ICmpInst are allowed as condition. if (!(isa<Constant>(Condition) || isa<ICmpInst>(Condition))) return invalid<ReportInvalidCond>(Context, /*Assert=*/true, &BB); // Allow perfectly nested conditions. assert(Br->getNumSuccessors() == 2 && "Unexpected number of successors"); if (ICmpInst *ICmp = dyn_cast<ICmpInst>(Condition)) { // Unsigned comparisons are not allowed. They trigger overflow problems // in the code generation. // // TODO: This is not sufficient and just hides bugs. However it does pretty // well. if (ICmp->isUnsigned()) return false; // Are both operands of the ICmp affine? if (isa<UndefValue>(ICmp->getOperand(0)) || isa<UndefValue>(ICmp->getOperand(1))) return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB); Loop *L = LI->getLoopFor(ICmp->getParent()); const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L); const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); if (!isAffineExpr(&Context.CurRegion, LHS, *SE) || !isAffineExpr(&Context.CurRegion, RHS, *SE)) return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS); } // Allow loop exit conditions. Loop *L = LI->getLoopFor(&BB); if (L && L->getExitingBlock() == &BB) return true; // Allow perfectly nested conditions. Region *R = RI->getRegionFor(&BB); if (R->getEntry() != &BB) return invalid<ReportCondition>(Context, /*Assert=*/true, &BB); return true; }