// Tries to remove a sanity check; returns true if it worked. bool AsapPass::optimizeCheckAway(llvm::Instruction *Inst) { BranchInst *BI = cast<BranchInst>(Inst); assert(BI->isConditional() && "Sanity check must be conditional branch."); unsigned int RegularBranch = getRegularBranch(BI, SCI); bool Changed = false; if (RegularBranch == 0) { BI->setCondition(ConstantInt::getTrue(Inst->getContext())); Changed = true; } else if (RegularBranch == 1) { BI->setCondition(ConstantInt::getFalse(Inst->getContext())); Changed = true; } else { // This can happen, e.g., in the following case: // array[-1] = a + b; // is transformed into // if (a + b overflows) // report_overflow() // else // report_index_out_of_bounds(); // In this case, removing the sanity check does not help much, so we // just do nothing. // Thanks to Will Dietz for his explanation at // http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-April/071958.html dbgs() << "Warning: Sanity check with no regular branch found.\n"; dbgs() << "The sanity check has been kept intact.\n"; } if (PrintRemovedChecks && Changed) { DebugLoc DL = getSanityCheckDebugLoc(BI, RegularBranch); printDebugLoc(DL, BI->getContext(), dbgs()); dbgs() << ": SanityCheck with cost "; dbgs() << *BI->getMetadata("cost")->getOperand(0); if (MDNode *IA = DL.getInlinedAt()) { dbgs() << " (inlined at "; printDebugLoc(DebugLoc(IA), BI->getContext(), dbgs()); dbgs() << ")"; } BasicBlock *Succ = BI->getSuccessor(RegularBranch == 0 ? 1 : 0); if (const CallInst *CI = SCI->findSanityCheckCall(Succ)) { dbgs() << " " << CI->getCalledFunction()->getName(); } dbgs() << "\n"; } return Changed; }
/// \brief Insert the missing branch conditions void StructurizeCFG::insertConditions(bool Loops) { BranchVector &Conds = Loops ? LoopConds : Conditions; Value *Default = Loops ? BoolTrue : BoolFalse; SSAUpdater PhiInserter; for (BranchVector::iterator I = Conds.begin(), E = Conds.end(); I != E; ++I) { BranchInst *Term = *I; assert(Term->isConditional()); BasicBlock *Parent = Term->getParent(); BasicBlock *SuccTrue = Term->getSuccessor(0); BasicBlock *SuccFalse = Term->getSuccessor(1); PhiInserter.Initialize(Boolean, ""); PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; NearestCommonDominator Dominator(DT); Dominator.addBlock(Parent, false); Value *ParentValue = 0; for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { if (PI->first == Parent) { ParentValue = PI->second; break; } PhiInserter.AddAvailableValue(PI->first, PI->second); Dominator.addBlock(PI->first); } if (ParentValue) { Term->setCondition(ParentValue); } else { if (!Dominator.wasResultExplicitMentioned()) PhiInserter.AddAvailableValue(Dominator.getResult(), Default); Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); } } }
bool RangedAddressSanitizer::runOnFunction(Function &F) { if (getenv("FASAN_DISABLE")) { SPM_DEBUG( dbgs() << "FASan : disabled\n" ); return false; } DL_ = &getAnalysis<DataLayout>(); DT_ = &getAnalysis<DominatorTree>(); LI_ = &getAnalysis<LoopInfo>(); RI_ = &getAnalysis<ReduceIndexation>(); #ifdef ENABLE_REUSE RE_ = &getAnalysis<RelativeExecutions>(); #endif RMM_ = &getAnalysis<RelativeMinMax>(); Module_ = F.getParent(); Context_ = &Module_->getContext(); Type *VoidTy = Type::getVoidTy(*Context_); IntegerType *IntTy = IntegerType::getInt64Ty(*Context_); IntegerType *BoolTy = IntegerType::getInt1Ty(*Context_); PointerType *IntPtrTy = PointerType::getUnqual(IntTy); PointerType *VoidPtrTy = PointerType::getInt8PtrTy(*Context_); SPM_DEBUG( F.dump() ); outs() << "[IterationInfo]\n"; for (Loop * loop : *LI_) { ii_visitLoop(loop); } outs() << "[EndOfIterationInfo]\n"; #if 0 // disabled initialization,shutdown sequence for FASan if (F.getName() == "main") { SPM_DEBUG(dbgs() << "RangedAddressSanitizer: inserting hwloc calls into " "main function\n"); FunctionType *FnType = FunctionType::get(VoidTy, ArrayRef<Type*>(), false); IRBuilder<> IRB(&(*F.getEntryBlock().begin())); Constant *Init = Module_->getOrInsertFunction("__spm_init", FnType); IRB.CreateCall(Init); Constant *End = Module_->getOrInsertFunction("__spm_end", FnType); for (auto &BB : F) { TerminatorInst *TI = BB.getTerminator(); if (isa<ReturnInst>(TI)) { IRB.SetInsertPoint(TI); IRB.CreateCall(End); } } } #endif if (!ClFunc.empty() && F.getName() != ClFunc) { SPM_DEBUG(dbgs() << "RangedAddressSanitizer: skipping function " << F.getName() << "\n"); return false; } Calls_.clear(); SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing function " << F.getName() << "\n"); std::vector<Type*> ReuseFnFormals = { VoidPtrTy, IntTy, IntTy, IntTy }; FunctionType *ReuseFnType = FunctionType::get(BoolTy, ReuseFnFormals, false); ReuseFn_ = F.getParent()->getOrInsertFunction("__fasan_check", ReuseFnType); ReuseFnDestroy_ = F.getParent()->getOrInsertFunction("__spm_give", ReuseFnType); // Visit all loops in bottom-up order (innter-most loops first) std::set<BasicBlock*> Processed; auto Entry = DT_->getRootNode(); for (auto ET = po_begin(Entry), EE = po_end(Entry); ET != EE; ++ET) { BasicBlock *Header = (*ET)->getBlock(); if (LI_->isLoopHeader(Header)) { SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing loop at " << Header->getName() << "\n"); Loop *L = LI_->getLoopFor(Header); if (L->getNumBackEdges() != 1 || std::distance(pred_begin(Header), pred_end(Header)) != 2) { SPM_DEBUG(dbgs() << "RangedAddressSanitizer: loop has multiple " << "backedges or multiple incoming outer blocks\n"); continue; } SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing loop at " << Header->getName() << "\n"); // visit all memory acccesses in this loop for (auto BB = L->block_begin(), BE = L->block_end(); BB != BE; ++BB) { if (!Processed.count(*BB)) { Processed.insert(*BB); for (auto &I : *(*BB)) generateCallFor(L, &I); } } } } // FAsan logic goes here std::map<const BasicBlock*,BasicBlock*> clonedBlockMap; // keeps track of cloned regions to avoid redundant cloning std::vector<CallInst*> ToInline; for (auto &CI : Calls_) { BasicBlock * Preheader = CI.Preheader; // TODO decide whether it is worthwhile to optimize for this case // insert range check IRBuilder<> IRB(Preheader->getTerminator()); Value *VoidArray = IRB.CreateBitCast(CI.Array, VoidPtrTy); std::vector<Value*> Args = { VoidArray, CI.Min, CI.Max, CI.Reuse }; CallInst *CR = IRB.CreateCall(ReuseFn_, Args); ToInline.push_back(CR); // verify if this loop was already instrumented TerminatorInst * preHeaderTerm = CR->getParent()->getTerminator(); BranchInst * preHeaderBranch = dyn_cast<BranchInst>(preHeaderTerm); if (preHeaderBranch && preHeaderBranch->isConditional()) { // discover the structure of the instrumented code (safe and default region) // abort, if this does not look like instrumented code BasicBlock * firstTarget = preHeaderBranch->getSuccessor(0); BasicBlock * secondTarget = preHeaderBranch->getSuccessor(1); BasicBlock * safeHeader, * defHeader; if (clonedBlockMap.count(firstTarget)) { defHeader = firstTarget; safeHeader = clonedBlockMap[firstTarget]; assert(safeHeader == secondTarget); } else { assert(clonedBlockMap.count(secondTarget)); defHeader = secondTarget; safeHeader = clonedBlockMap[secondTarget]; assert(safeHeader == firstTarget); } SPM_DEBUG( dbgs() << "FASan: (Unsupported) second array in safe region controlled by " << * preHeaderBranch << "\n" ); Loop * defLoop = LI_->getLoopFor(defHeader); assert(defLoop && "default region is not a loop!"); Loop::block_iterator itBodyBlock,S,E; S = defLoop->block_begin(); E = defLoop->block_end(); // mark accesses in cloned region as safe for (itBodyBlock = S;itBodyBlock != E; ++itBodyBlock) { BasicBlock * defBodyBlock = *itBodyBlock; BasicBlock * safeBodyBlock = clonedBlockMap[defBodyBlock]; for(auto & inst : *safeBodyBlock) { markSafeArrayUse(&inst, CI.Array); } } // add conjunctive test Value * oldCond = preHeaderBranch->getCondition(); Value * joinedCond = IRB.CreateAnd(oldCond, CR, "allsafe"); preHeaderBranch->setCondition(joinedCond); } else { // get loop Loop* finalLoop = CI.FinalLoop; Loop::block_iterator itBodyBlock,S,E; S = finalLoop->block_begin(); E = finalLoop->block_end(); // clone loop body (cloned loop will run unchecked) ValueToValueMapTy cloneMap; BasicBlock * clonedHeader = 0; std::vector<BasicBlock*> clonedBlocks; for (itBodyBlock = S;itBodyBlock != E; ++itBodyBlock) { const BasicBlock * bodyBlock = *itBodyBlock; BasicBlock * clonedBlock = CloneBasicBlock(bodyBlock, cloneMap, "_checked", &F, 0); cloneMap[bodyBlock] = clonedBlock; clonedBlockMap[bodyBlock] = clonedBlock; clonedBlocks.push_back(clonedBlock); if (bodyBlock == finalLoop->getHeader()) { clonedHeader = clonedBlock; SPM_DEBUG( dbgs() << "FASan: loop header case at " << bodyBlock->getName() << "\n" ); } else { SPM_DEBUG( dbgs() << "FASan: non-header block at " << bodyBlock->getName() << "\n" ); } } if (!clonedHeader) { // TODO run clean-up code SPM_DEBUG( dbgs() << "FASan: could not find header!\n"); abort(); } // Remap uses inside cloned region (mark pointers in the region as unguarded) for (BasicBlock * block : clonedBlocks) { for(auto & inst : *block) { RemapInstruction(&inst, cloneMap, RF_IgnoreMissingEntries); markSafeArrayUse(&inst, CI.Array); } } // TODO fix PHI-nodes in exit blocks // Rewire terminator of the range check to branch to the cloned region TerminatorInst * checkTermInst = CR->getParent()->getTerminator(); if (BranchInst * checkBranchInst = dyn_cast<BranchInst>(checkTermInst)) { if (checkBranchInst->isUnconditional()) { BasicBlock * defTarget = checkBranchInst->getSuccessor(0); BranchInst * modifiedBranchInst = BranchInst::Create(clonedHeader, defTarget, CR, checkBranchInst); checkBranchInst->replaceAllUsesWith(modifiedBranchInst); checkBranchInst->eraseFromParent(); } else { SPM_DEBUG( dbgs() << "FASan: Unexpected conditional branch (preheader should branch unconditional, other array checks will introduce conditional branches) " << * checkTermInst << "\n" ); abort(); } } else { SPM_DEBUG( dbgs() << "FASan: unsupported terminator type " << * checkTermInst << "\n" ); abort(); } } #if 0 IRB.SetInsertPoint(&(*CI.Final->begin())); IRB.CreateCall(ReuseFnDestroy_, Args); #endif SPM_DEBUG(dbgs() << "RangedAddressSanitizer: call instruction: " << *CR << "\n"); } // inline calls #ifdef FASAN_INLINE_RUNTIME for (CallInst * call : ToInline) { assert(call); InlineFunctionInfo IFI; InlineFunction(call, IFI, false); } #endif SPM_DEBUG( F.dump() ); return true; }
bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; Triple TT = Triple(L->getHeader()->getParent()->getParent()-> getTargetTriple()); if (!TT.isArch32Bit() && !TT.isArch64Bit()) return MadeChange; // Unknown arch. type. // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); } // If a nested loop has been converted, then we can't convert this loop. if (MadeChange) return MadeChange; #ifndef NDEBUG // Stop trying after reaching the limit (if any). int Limit = CTRLoopLimit; if (Limit >= 0) { if (Counter >= CTRLoopLimit) return false; Counter++; } #endif // We don't want to spill/restore the counter register, and so we don't // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) if (mightUseCTR(TT, *I)) return MadeChange; SmallVector<BasicBlock*, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); BasicBlock *CountedExitBlock = 0; const SCEV *ExitCount = 0; BranchInst *CountedExitBranch = 0; for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); DEBUG(dbgs() << "Exit Count for " << *L << " from block " << (*I)->getName() << ": " << *EC << "\n"); if (isa<SCEVCouldNotCompute>(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE->isLoopInvariant(EC, L)) continue; if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // exisiting block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (pred_iterator PI = pred_begin(L->getHeader()), PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { if (!L->contains(*PI)) continue; if (!DT->dominates(*I, *PI)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = (*I)->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (!BI->isConditional()) continue; CountedExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. CountedExitBlock = *I; ExitCount = EC; break; } if (!CountedExitBlock) return MadeChange; BasicBlock *Preheader = L->getLoopPreheader(); // If we don't have a preheader, then insert one. If we already have a // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(TT, Preheader)) Preheader = InsertPreheaderForLoop(L, this); if (!Preheader) return MadeChange; DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); // Insert the count into the preheader and replace the condition used by the // selected branch. MadeChange = true; SCEVExpander SCEVE(*SE, "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); ExitCount = SE->getAddExpr(ExitCount, SE->getConstant(CountType, 1)); Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); CountBuilder.CreateCall(MTCTRFunc, ECValue); IRBuilder<> CondBuilder(CountedExitBranch); Value *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); Value *NewCond = CondBuilder.CreateCall(DecFunc); Value *OldCond = CountedExitBranch->getCondition(); CountedExitBranch->setCondition(NewCond); // The false branch must exit the loop. if (!L->contains(CountedExitBranch->getSuccessor(0))) CountedExitBranch->swapSuccessors(); // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); DeleteDeadPHIs(CountedExitBlock); ++NumCTRLoops; return MadeChange; }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); DEBUG(L->dump()); DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { DEBUG(dbgs() << "Not in simplify form!\n"); return false; } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. assert(!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!"); // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); return false; } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { DEBUG(dbgs() << "Count failed constraint on overflow trip count calculation.\n"); return false; } // Loop structure is the following: // // PreHeader // Header // ... // Latch // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) DT->changeImmediateDominator(NewExit, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. Also update the dominator info for // OtherExits and their immediate successors, since we have new edges into // OtherExits. SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; for (auto *BB : OtherExits) { for (auto &II : *BB) { // Given we preserve LCSSA form, we know that the values used outside the // loop will be used through these phi nodes at the exit blocks that are // transformed below. if (!isa<PHINode>(II)) break; PHINode *Phi = cast<PHINode>(&II); unsigned oldNumOperands = Phi->getNumIncomingValues(); // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap[Phi->getIncomingValue(i)]; // newVal can be a constant or derived from values outside the loop, and // hence need not have a VMap value. if (!newVal) newVal = Phi->getIncomingValue(i); Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } } #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) for (BasicBlock *SuccBB : successors(BB)) { assert(!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!"); } #endif // Update the dominator info because the immediate dominator is no longer the // header of the original Loop. BB has edges both from L and remainder code. // Since the preheader determines which loop is run (L or directly jump to // the remainder code), we set the immediate dominator as the preheader. if (DT) { DT->changeImmediateDominator(BB, PreHeader); // Also update the IDom for immediate successors of BB. If the current // IDom is the header, update the IDom to be the preheader because that is // the nearest common dominator of all predecessors of SuccBB. We need to // check for IDom being the header because successors of exit blocks can // have edges from outside the loop, and we should not incorrectly update // the IDom in that case. for (BasicBlock *SuccBB: successors(BB)) if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { assert(!SuccBB->getSinglePredecessor() && "BB should be the IDom then!"); DT->changeImmediateDominator(SuccBB, PreHeader); } } } } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } if (remainderLoop && UnrollRemainder) { DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1, /*Force*/false, /*AllowRuntime*/false, /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true, /*PreserveOnlyFirst*/false, /*TripMultiple*/1, /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE, PreserveLCSSA); } NumRuntimeUnrolled++; return true; }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. if (!L->isLoopSimplifyForm()) return false; BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop if (!Exit) return false; // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). const SCEV *BECountSC = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) return false; unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) return false; // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) return false; BasicBlock *Latch = L->getLoopLatch(); // Loop structure is the following: // // PreHeader // Header // ... // Latch // Exit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split Exit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // Exit Exit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // Exit Exit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); NumRuntimeUnrolled++; return true; }
bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; // Do not convert small short loops to CTR loop. unsigned ConstTripCount = SE->getSmallConstantTripCount(L); if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { SmallPtrSet<const Value *, 32> EphValues; auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( *L->getHeader()->getParent()); CodeMetrics::collectEphemeralValues(L, &AC, EphValues); CodeMetrics Metrics; for (BasicBlock *BB : L->blocks()) Metrics.analyzeBasicBlock(BB, *TTI, EphValues); // 6 is an approximate latency for the mtctr instruction. if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) return false; } // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); LLVM_DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. if (MadeChange) return MadeChange; // Bail out if the loop has irreducible control flow. LoopBlocksRPO RPOT(L); RPOT.perform(LI); if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) return false; #ifndef NDEBUG // Stop trying after reaching the limit (if any). int Limit = CTRLoopLimit; if (Limit >= 0) { if (Counter >= CTRLoopLimit) return false; Counter++; } #endif // We don't want to spill/restore the counter register, and so we don't // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) if (mightUseCTR(*I)) return MadeChange; SmallVector<BasicBlock*, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // If there is an exit edge known to be frequently taken, // we should not transform this loop. for (auto &BB : ExitingBlocks) { Instruction *TI = BB->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { uint64_t TrueWeight = 0, FalseWeight = 0; if (!BI->isConditional() || !BI->extractProfMetadata(TrueWeight, FalseWeight)) continue; // If the exit path is more frequent than the loop path, // we return here without further analysis for this loop. bool TrueIsExit = !L->contains(BI->getSuccessor(0)); if (( TrueIsExit && FalseWeight < TrueWeight) || (!TrueIsExit && FalseWeight > TrueWeight)) return MadeChange; } } BasicBlock *CountedExitBlock = nullptr; const SCEV *ExitCount = nullptr; BranchInst *CountedExitBranch = nullptr; for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block " << (*I)->getName() << ": " << *EC << "\n"); if (isa<SCEVCouldNotCompute>(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE->isLoopInvariant(EC, L)) continue; if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) continue; // If this exiting block is contained in a nested loop, it is not eligible // for insertion of the branch-and-decrement since the inner loop would // end up messing up the value in the CTR. if (LI->getLoopFor(*I) != L) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // existing block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (pred_iterator PI = pred_begin(L->getHeader()), PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { if (!L->contains(*PI)) continue; if (!DT->dominates(*I, *PI)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = (*I)->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (!BI->isConditional()) continue; CountedExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. CountedExitBlock = *I; ExitCount = EC; break; } if (!CountedExitBlock) return MadeChange; BasicBlock *Preheader = L->getLoopPreheader(); // If we don't have a preheader, then insert one. If we already have a // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(Preheader)) Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); // Insert the count into the preheader and replace the condition used by the // selected branch. MadeChange = true; SCEVExpander SCEVE(*SE, *DL, "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); Function *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); CountBuilder.CreateCall(MTCTRFunc, ECValue); IRBuilder<> CondBuilder(CountedExitBranch); Function *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); Value *OldCond = CountedExitBranch->getCondition(); CountedExitBranch->setCondition(NewCond); // The false branch must exit the loop. if (!L->contains(CountedExitBranch->getSuccessor(0))) CountedExitBranch->swapSuccessors(); // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); // Run through the basic blocks of the loop and see if any of them have dead // PHIs that can be removed. for (auto I : L->blocks()) DeleteDeadPHIs(I); ++NumCTRLoops; return MadeChange; }