/// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, const TargetTransformInfo &TTI, AssumptionCache *AC) { SmallPtrSet<const Value *, 32> EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I, TTI, EphValues); NumCalls = Metrics.NumInlineCandidates; NotDuplicatable = Metrics.notDuplicatable; unsigned LoopSize = Metrics.NumInsts; // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's // not a problem for code quality. Also, the code using this size may assume // that each loop has at least three instructions (likely a conditional // branch, a comparison feeding that branch, and some kind of loop increment // feeding that comparison instruction). LoopSize = std::max(LoopSize, 3u); return LoopSize; }
// Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, AssumptionCache *AC) { LoopPropsMapIt PropsIt; bool Inserted; std::tie(PropsIt, Inserted) = LoopsProperties.insert(std::make_pair(L, LoopProperties())); LoopProperties &Props = PropsIt->second; if (Inserted) { // New loop. // Limit the number of instructions to avoid causing significant code // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. SmallPtrSet<const Value *, 32> EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); // FIXME: This is overly conservative because it does not take into // consideration code simplification opportunities and code that can // be shared by the resultant unswitched loops. CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I, TTI, EphValues); Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; if (Metrics.notDuplicatable) { DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() << ", contents cannot be " << "duplicated!\n"); return false; } } if (!Props.CanBeUnswitchedCount) { DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() << ", cost too high: " << L->getBlocks().size() << "\n"); return false; } // Be careful. This links are good only before new loop addition. CurrentLoopProperties = &Props; CurLoopInstructions = &Props.UnswitchedVals; return true; }
/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when /// LoopCond == Val to simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { initLoopData(); // If LoopSimplify was unable to form a preheader, don't do any unswitching. if (!loopPreheader) return false; Function *F = loopHeader->getParent(); Constant *CondVal = 0; BasicBlock *ExitBlock = 0; if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { // If the condition is trivial, always unswitch. There is no code growth // for this case. UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock); return true; } // Check to see if it would be profitable to unswitch current loop. // Do not do non-trivial unswitch while optimizing for size. if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) return false; // FIXME: This is overly conservative because it does not take into // consideration code simplification opportunities and code that can // be shared by the resultant unswitched loops. CodeMetrics Metrics; for (Loop::block_iterator I = currentLoop->block_begin(), E = currentLoop->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I); // Limit the number of instructions to avoid causing significant code // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. if (Metrics.NumInsts > Threshold || Metrics.NumBlocks * 5 > Threshold || Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " << currentLoop->getBlocks().size() << "\n"); return false; } UnswitchNontrivialCondition(LoopCond, Val, currentLoop); return true; }
/// Rotate loop LP. Return true if the loop is rotated. /// /// \param SimplifiedLatch is true if the latch was just folded into the final /// loop exit. In this case we may want to rotate even though the new latch is /// now an exiting branch. This rotation would have happened had the latch not /// been simplified. However, if SimplifiedLatch is false, then we avoid /// rotating loops in which the latch exits to avoid excessive or endless /// rotation. LoopRotate should be repeatable and converge to a canonical /// form. This property is satisfied because simplifying the loop latch can only /// happen once across multiple invocations of the LoopRotate pass. bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; BasicBlock *OrigHeader = L->getHeader(); BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (!BI || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. if (!L->isLoopExiting(OrigHeader)) return false; // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. if (!OrigLatch) return false; // Rotate if either the loop latch does *not* exit the loop, or if the loop // latch was just simplified. Or if we think it will be profitable. if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && !shouldRotateLoopExitingLatch(L)) return false; // Check size of original header and reject loop if it is very big or we can't // duplicate blocks inside it. { SmallPtrSet<const Value *, 32> EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); CodeMetrics Metrics; Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); if (Metrics.notDuplicatable) { LLVM_DEBUG( dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" << " instructions: "; L->dump()); return false; } if (Metrics.convergent) { LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " "instructions: "; L->dump()); return false; }
/// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I); NumCalls = Metrics.NumInlineCandidates; unsigned LoopSize = Metrics.NumInsts; // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's // not a problem for code quality. if (LoopSize == 0) LoopSize = 1; return LoopSize; }
// Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. bool LUAnalysisCache::countLoop(const Loop* L) { std::pair<LoopPropsMapIt, bool> InsertRes = LoopsProperties.insert(std::make_pair(L, LoopProperties())); LoopProperties& Props = InsertRes.first->second; if (InsertRes.second) { // New loop. // Limit the number of instructions to avoid causing significant code // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. // FIXME: This is overly conservative because it does not take into // consideration code simplification opportunities and code that can // be shared by the resultant unswitched loops. CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I); Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; } if (!Props.CanBeUnswitchedCount) { DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() << ", cost too high: " << L->getBlocks().size() << "\n"); return false; } // Be careful. This links are good only before new loop addition. CurrentLoopProperties = &Props; CurLoopInstructions = &Props.UnswitchedVals; return true; }
/// Rotate loop LP. Return true if the loop is rotated. /// /// \param SimplifiedLatch is true if the latch was just folded into the final /// loop exit. In this case we may want to rotate even though the new latch is /// now an exiting branch. This rotation would have happened had the latch not /// been simplified. However, if SimplifiedLatch is false, then we avoid /// rotating loops in which the latch exits to avoid excessive or endless /// rotation. LoopRotate should be repeatable and converge to a canonical /// form. This property is satisfied because simplifying the loop latch can only /// happen once across multiple invocations of the LoopRotate pass. bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; BasicBlock *OrigHeader = L->getHeader(); BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. if (!L->isLoopExiting(OrigHeader)) return false; // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. if (OrigLatch == 0) return false; // Rotate if either the loop latch does *not* exit the loop, or if the loop // latch was just simplified. if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch) return false; // Check size of original header and reject loop if it is very big or we can't // duplicate blocks inside it. { CodeMetrics Metrics; Metrics.analyzeBasicBlock(OrigHeader, *TTI); if (Metrics.notDuplicatable) { DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable" << " instructions: "; L->dump()); return false; } if (Metrics.NumInsts > MAX_HEADER_SIZE) return false; }
/// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, const TargetTransformInfo &TTI, AssumptionTracker *AT) { SmallPtrSet<const Value *, 32> EphValues; CodeMetrics::collectEphemeralValues(L, AT, EphValues); CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I, TTI, EphValues); NumCalls = Metrics.NumInlineCandidates; NotDuplicatable = Metrics.notDuplicatable; unsigned LoopSize = Metrics.NumInsts; // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's // not a problem for code quality. if (LoopSize == 0) LoopSize = 1; return LoopSize; }
bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { bool MadeChange = false; // Only prefetch in the inner-most loop if (!L->empty()) return MadeChange; SmallPtrSet<const Value *, 32> EphValues; CodeMetrics::collectEphemeralValues(L, AC, EphValues); // Calculate the number of iterations ahead to prefetch CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) { // If the loop already has prefetches, then assume that the user knows // what he or she is doing and don't add any more. for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) if (CallInst *CI = dyn_cast<CallInst>(J)) if (Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::prefetch) return MadeChange; Metrics.analyzeBasicBlock(*I, *TTI, EphValues); } unsigned LoopSize = Metrics.NumInsts; if (!LoopSize) LoopSize = 1; unsigned ItersAhead = PrefDist/LoopSize; if (!ItersAhead) ItersAhead = 1; SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) { for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { Value *PtrValue; Instruction *MemI; if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { MemI = LMemI; PtrValue = LMemI->getPointerOperand(); } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { if (!PrefetchWrites) continue; MemI = SMemI; PtrValue = SMemI->getPointerOperand(); } else continue; unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); if (PtrAddrSpace) continue; if (L->isLoopInvariant(PtrValue)) continue; const SCEV *LSCEV = SE->getSCEV(PtrValue); const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); if (!LSCEVAddRec) continue; // We don't want to double prefetch individual cache lines. If this load // is known to be within one cache line of some other load that has // already been prefetched, then don't prefetch this one as well. bool DupPref = false; for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end(); K != KE; ++K) { const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); if (const SCEVConstant *ConstPtrDiff = dyn_cast<SCEVConstant>(PtrDiff)) { int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); if (PD < (int64_t) CacheLineSize) { DupPref = true; break; } } } if (DupPref) continue; const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( SE->getConstant(LSCEVAddRec->getType(), ItersAhead), LSCEVAddRec->getStepRecurrence(*SE))); if (!isSafeToExpand(NextLSCEV, *SE)) continue; PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); IRBuilder<> Builder(MemI); Module *M = (*I)->getParent()->getParent(); Type *I32 = Type::getInt32Ty((*I)->getContext()); Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); Builder.CreateCall4(PrefetchFunc, PrefPtrValue, ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)); MadeChange = true; } } return MadeChange; }
/// Rotate loop LP. Return true if the loop is rotated. bool LoopRotate::rotateLoop(Loop *L) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; BasicBlock *OrigHeader = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. if (!L->isLoopExiting(OrigHeader)) return false; // Updating PHInodes in loops with multiple exits adds complexity. // Keep it simple, and restrict loop rotation to loops with one exit only. // In future, lift this restriction and support for multiple exits if // required. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); if (ExitBlocks.size() > 1) return false; // Check size of original header and reject loop if it is very big. { CodeMetrics Metrics; Metrics.analyzeBasicBlock(OrigHeader); if (Metrics.NumInsts > MAX_HEADER_SIZE) return false; } // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); BasicBlock *OrigLatch = L->getLoopLatch(); // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. if (OrigPreheader == 0 || OrigLatch == 0) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. BasicBlock *Exit = BI->getSuccessor(0); BasicBlock *NewHeader = BI->getSuccessor(1); if (L->contains(Exit)) std::swap(Exit, NewHeader); assert(NewHeader && "Unable to determine new loop header"); assert(L->contains(NewHeader) && !L->contains(Exit) && "Unable to determine loop header and exit blocks"); // This code assumes that the new header has exactly one predecessor. // Remove any single-entry PHI nodes in it. assert(NewHeader->getSinglePredecessor() && "New header doesn't have one pred!"); FoldSingleEntryPHINodes(NewHeader); // Begin by walking OrigHeader and populating ValueMap with an entry for // each Instruction. BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); ValueToValueMapTy ValueMap; // For PHI nodes, the value available in OldPreHeader is just the // incoming value from OldPreHeader. for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); while (I != E) { Instruction *Inst = I++; // If the instruction's operands are invariant and it doesn't read or write // memory, then it is safe to hoist. Doing this doesn't change the order of // execution in the preheader, but does prevent the instruction from // executing in each iteration of the loop. This means it is safe to hoist // something that might trap, but isn't safe to hoist something that reads // memory (without proving that the loop doesn't write). if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) { Inst->moveBefore(LoopEntryBranch); continue; } // Otherwise, create a duplicate of the instruction. Instruction *C = Inst->clone(); // Eagerly remap the operands of the instruction. RemapInstruction(C, ValueMap, RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. Value *V = SimplifyInstruction(C); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. delete C; ValueMap[Inst] = V; } else { // Otherwise, stick the new instruction into the new block! C->setName(Inst->getName()); C->insertBefore(LoopEntryBranch); ValueMap[Inst] = C; } } // Along with all the other instructions, we just cloned OrigHeader's // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. TerminatorInst *TI = OrigHeader->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin(); PHINode *PN = dyn_cast<PHINode>(BI); ++BI) PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove // OrigPreHeader's old terminator (the original branch into the loop), and // remove the corresponding incoming values from the PHI nodes in OrigHeader. LoopEntryBranch->eraseFromParent(); // If there were any uses of instructions in the duplicated block outside the // loop, update them, inserting PHI nodes as required RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap); // NewHeader is now the header of the loop. L->moveToHeader(NewHeader); assert(L->getHeader() == NewHeader && "Latch block is our new header"); // At this point, we've finished our major CFG changes. As part of cloning // the loop into the preheader we've simplified instructions and the // duplicated conditional branch may now be branching on a constant. If it is // branching on a constant and if that constant means that we enter the loop, // then we fold away the cond branch to an uncond branch. This simplifies the // loop in cases important for nested loops, and it also means we don't have // to split as many edges. BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); assert(PHBI->isConditional() && "Should be clone of BI condbr!"); if (!isa<ConstantInt>(PHBI->getCondition()) || PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != NewHeader) { // The conditional branch can't be folded, handle the general case. // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { // Since OrigPreheader now has the conditional branch to Exit block, it is // the dominator of Exit. DT->changeImmediateDominator(Exit, OrigPreheader); DT->changeImmediateDominator(NewHeader, OrigPreheader); // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(OrigHeader, OrigLatch); } // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and // thus is not a preheader anymore. Split the edge to form a real preheader. BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this); NewPH->setName(NewHeader->getName() + ".lr.ph"); // Preserve canonical loop form, which means that 'Exit' should have only one // predecessor. BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this); ExitSplit->moveBefore(Exit); } else { // We can fold the conditional branch in the preheader, this makes things // simpler. The first step is to remove the extra edge to the Exit block. Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); BranchInst::Create(NewHeader, PHBI); PHBI->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(NewHeader, OrigPreheader); DT->changeImmediateDominator(OrigHeader, OrigLatch); } } assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); // Now that the CFG and DomTree are in a consistent state again, try to merge // the OrigHeader block into OrigLatch. This will succeed if they are // connected by an unconditional branch. This is just a cleanup so the // emitted code isn't too gross in this common case. MergeBlockIntoPredecessor(OrigHeader, this); ++NumRotated; return true; }
bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; // Do not convert small short loops to CTR loop. unsigned ConstTripCount = SE->getSmallConstantTripCount(L); if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { SmallPtrSet<const Value *, 32> EphValues; auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( *L->getHeader()->getParent()); CodeMetrics::collectEphemeralValues(L, &AC, EphValues); CodeMetrics Metrics; for (BasicBlock *BB : L->blocks()) Metrics.analyzeBasicBlock(BB, *TTI, EphValues); // 6 is an approximate latency for the mtctr instruction. if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) return false; } // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); LLVM_DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. if (MadeChange) return MadeChange; // Bail out if the loop has irreducible control flow. LoopBlocksRPO RPOT(L); RPOT.perform(LI); if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) return false; #ifndef NDEBUG // Stop trying after reaching the limit (if any). int Limit = CTRLoopLimit; if (Limit >= 0) { if (Counter >= CTRLoopLimit) return false; Counter++; } #endif // We don't want to spill/restore the counter register, and so we don't // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) if (mightUseCTR(*I)) return MadeChange; SmallVector<BasicBlock*, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // If there is an exit edge known to be frequently taken, // we should not transform this loop. for (auto &BB : ExitingBlocks) { Instruction *TI = BB->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { uint64_t TrueWeight = 0, FalseWeight = 0; if (!BI->isConditional() || !BI->extractProfMetadata(TrueWeight, FalseWeight)) continue; // If the exit path is more frequent than the loop path, // we return here without further analysis for this loop. bool TrueIsExit = !L->contains(BI->getSuccessor(0)); if (( TrueIsExit && FalseWeight < TrueWeight) || (!TrueIsExit && FalseWeight > TrueWeight)) return MadeChange; } } BasicBlock *CountedExitBlock = nullptr; const SCEV *ExitCount = nullptr; BranchInst *CountedExitBranch = nullptr; for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block " << (*I)->getName() << ": " << *EC << "\n"); if (isa<SCEVCouldNotCompute>(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE->isLoopInvariant(EC, L)) continue; if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) continue; // If this exiting block is contained in a nested loop, it is not eligible // for insertion of the branch-and-decrement since the inner loop would // end up messing up the value in the CTR. if (LI->getLoopFor(*I) != L) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // existing block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (pred_iterator PI = pred_begin(L->getHeader()), PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { if (!L->contains(*PI)) continue; if (!DT->dominates(*I, *PI)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = (*I)->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (!BI->isConditional()) continue; CountedExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. CountedExitBlock = *I; ExitCount = EC; break; } if (!CountedExitBlock) return MadeChange; BasicBlock *Preheader = L->getLoopPreheader(); // If we don't have a preheader, then insert one. If we already have a // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(Preheader)) Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); // Insert the count into the preheader and replace the condition used by the // selected branch. MadeChange = true; SCEVExpander SCEVE(*SE, *DL, "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); Function *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); CountBuilder.CreateCall(MTCTRFunc, ECValue); IRBuilder<> CondBuilder(CountedExitBranch); Function *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); Value *OldCond = CountedExitBranch->getCondition(); CountedExitBranch->setCondition(NewCond); // The false branch must exit the loop. if (!L->contains(CountedExitBranch->getSuccessor(0))) CountedExitBranch->swapSuccessors(); // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); // Run through the basic blocks of the loop and see if any of them have dead // PHIs that can be removed. for (auto I : L->blocks()) DeleteDeadPHIs(I); ++NumCTRLoops; return MadeChange; }