/// Sinks instructions from loop's preheader to the loop body if the /// sum frequency of inserted copy is smaller than preheader's frequency. static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI, ScalarEvolution *SE) { BasicBlock *Preheader = L.getLoopPreheader(); if (!Preheader) return false; // Enable LoopSink only when runtime profile is available. // With static profile, the sinking decision may be sub-optimal. if (!Preheader->getParent()->getEntryCount()) return false; const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader); // If there are no basic blocks with lower frequency than the preheader then // we can avoid the detailed analysis as we will never find profitable sinking // opportunities. if (all_of(L.blocks(), [&](const BasicBlock *BB) { return BFI.getBlockFreq(BB) > PreheaderFreq; })) return false; bool Changed = false; AliasSetTracker CurAST(AA); // Compute alias set. for (BasicBlock *BB : L.blocks()) CurAST.add(*BB); // Sort loop's basic blocks by frequency SmallVector<BasicBlock *, 10> ColdLoopBBs; SmallDenseMap<BasicBlock *, int, 16> LoopBlockNumber; int i = 0; for (BasicBlock *B : L.blocks()) if (BFI.getBlockFreq(B) < BFI.getBlockFreq(L.getLoopPreheader())) { ColdLoopBBs.push_back(B); LoopBlockNumber[B] = ++i; } std::stable_sort(ColdLoopBBs.begin(), ColdLoopBBs.end(), [&](BasicBlock *A, BasicBlock *B) { return BFI.getBlockFreq(A) < BFI.getBlockFreq(B); }); // Traverse preheader's instructions in reverse order becaue if A depends // on B (A appears after B), A needs to be sinked first before B can be // sinked. for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) { Instruction *I = &*II++; if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) Changed = true; } if (Changed && SE) SE->forgetLoopDispositions(&L); return Changed; }
static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { bool Changed = false; // Copy blocks into a temporary array to avoid iterator invalidation issues // as we remove them. SmallVector<WeakTrackingVH, 16> Blocks(L.blocks()); for (auto &Block : Blocks) { // Attempt to merge blocks in the trivial case. Don't modify blocks which // belong to other loops. BasicBlock *Succ = cast_or_null<BasicBlock>(Block); if (!Succ) continue; BasicBlock *Pred = Succ->getSinglePredecessor(); if (!Pred || !Pred->getSingleSuccessor() || LI.getLoopFor(Pred) != &L) continue; // Merge Succ into Pred and delete it. MergeBlockIntoPredecessor(Succ, &DT, &LI); SE.forgetLoop(&L); Changed = true; } return Changed; }
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) { bool Changed = false; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); // Copy blocks into a temporary array to avoid iterator invalidation issues // as we remove them. SmallVector<WeakTrackingVH, 16> Blocks(L.blocks()); for (auto &Block : Blocks) { // Attempt to merge blocks in the trivial case. Don't modify blocks which // belong to other loops. BasicBlock *Succ = cast_or_null<BasicBlock>(Block); if (!Succ) continue; BasicBlock *Pred = Succ->getSinglePredecessor(); if (!Pred || !Pred->getSingleSuccessor() || LI.getLoopFor(Pred) != &L) continue; // Merge Succ into Pred and delete it. MergeBlockIntoPredecessor(Succ, &DTU, &LI, MSSAU); Changed = true; } return Changed; }
PreservedAnalyses PrintLoopPass::run(Loop &L, AnalysisManager<Loop> &) { OS << Banner; for (auto *Block : L.blocks()) if (Block) Block->print(OS); else OS << "Printing <null> block"; return PreservedAnalyses::all(); }
void LoopFinder::compute_single_precision_flag(LoopList* loops) { NEEDS_CLEANUP; // factor out the loop iterator // create loop exits for each loop int loop_index = loops->length() - 1; for (; loop_index >= 0; loop_index--) { Loop* loop = loops->at(loop_index); ScanBlocks scan(loop->blocks()); ScanResult scan_result; scan.scan(&scan_result); if (!scan_result.has_calls() && !scan_result.has_slow_cases() && !scan_result.has_class_init() && !scan_result.has_doubles() && scan_result.has_floats()) { FlagSetter fs(BlockBegin::single_precision_flag); loop->blocks()->iterate_forward(&fs); } } }
bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE) { bool Changed = false; // Get the set of exiting blocks. SmallVector<BasicBlock *, 8> ExitBlocks; L.getExitBlocks(ExitBlocks); if (ExitBlocks.empty()) return false; SmallVector<Instruction *, 8> Worklist; // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, put them into the worklist to rewrite those uses. for (BasicBlock *BB : L.blocks()) { // For large loops, avoid use-scanning by using dominance information: In // particular, if a block does not dominate any of the loop exits, then none // of the values defined in the block could be used outside the loop. if (!blockDominatesAnExit(BB, DT, ExitBlocks)) continue; for (Instruction &I : *BB) { // Reject two common cases fast: instructions with no uses (like stores) // and instructions with one use that is in the same block as this. if (I.use_empty() || (I.hasOneUse() && I.user_back()->getParent() == BB && !isa<PHINode>(I.user_back()))) continue; Worklist.push_back(&I); } } Changed = formLCSSAForInstructions(Worklist, DT, *LI); // If we modified the code, remove any caches about the loop from SCEV to // avoid dangling entries. // FIXME: This is a big hammer, can we clear the cache more selectively? if (SE && Changed) SE->forgetLoop(&L); assert(L.isLCSSAForm(DT)); return Changed; }
void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) { if (forcePrintModuleIR()) { // handling -print-module-scope OS << Banner << " (loop: "; L.getHeader()->printAsOperand(OS, false); OS << ")\n"; // printing whole module OS << *L.getHeader()->getModule(); return; } OS << Banner; auto *PreHeader = L.getLoopPreheader(); if (PreHeader) { OS << "\n; Preheader:"; PreHeader->print(OS); OS << "\n; Loop:"; } for (auto *Block : L.blocks()) if (Block) Block->print(OS); else OS << "Printing <null> block"; SmallVector<BasicBlock *, 8> ExitBlocks; L.getExitBlocks(ExitBlocks); if (!ExitBlocks.empty()) { OS << "\n; Exit blocks"; for (auto *Block : ExitBlocks) if (Block) Block->print(OS); else OS << "Printing <null> block"; } }
// Return the number of iterations to peel off that make conditions in the // body true/false. For example, if we peel 2 iterations off the loop below, // the condition i < 2 can be evaluated at compile time. // for (i = 0; i < n; i++) // if (i < 2) // .. // else // .. // } static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; for (auto *BB : L.blocks()) { auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || BI->isUnconditional()) continue; // Ignore loop exit condition. if (L.getLoopLatch() == BB) continue; Value *Condition = BI->getCondition(); Value *LeftVal, *RightVal; CmpInst::Predicate Pred; if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) continue; const SCEV *LeftSCEV = SE.getSCEV(LeftVal); const SCEV *RightSCEV = SE.getSCEV(RightVal); // Do not consider predicates that are known to be true or false // independently of the loop iteration. if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, RightSCEV)) continue; // Check if we have a condition with one AddRec and one non AddRec // expression. Normalize LeftSCEV to be the AddRec. if (!isa<SCEVAddRecExpr>(LeftSCEV)) { if (isa<SCEVAddRecExpr>(RightSCEV)) { std::swap(LeftSCEV, RightSCEV); Pred = ICmpInst::getSwappedPredicate(Pred); } else continue; } const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); // Avoid huge SCEV computations in the loop below, make sure we only // consider AddRecs of the loop we are trying to peel and avoid // non-monotonic predicates, as we will not be able to simplify the loop // body. // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can // simplify the loop, if we peel 1 additional iteration, if there // is no wrapping. bool Increasing; if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) continue; (void)Increasing; // Check if extending the current DesiredPeelCount lets us evaluate Pred // or !Pred in the loop body statically. unsigned NewPeelCount = DesiredPeelCount; const SCEV *IterVal = LeftAR->evaluateAtIteration( SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); // If the original condition is not known, get the negated predicate // (which holds on the else branch) and check if it is known. This allows // us to peel of iterations that make the original condition false. if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) Pred = ICmpInst::getInversePredicate(Pred); const SCEV *Step = LeftAR->getStepRecurrence(SE); while (NewPeelCount < MaxPeelCount && SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { IterVal = SE.getAddExpr(IterVal, Step); NewPeelCount++; } // Only peel the loop if the monotonic predicate !Pred becomes known in the // first iteration of the loop body after peeling. if (NewPeelCount > DesiredPeelCount && SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, RightSCEV)) DesiredPeelCount = NewPeelCount; } return DesiredPeelCount; }
void LIR_LocalCaching::cache_locals() { LoopList* loops = ir()->loops(); BlockList* all_blocks = ir()->code(); WordSizeList* local_name_to_offset_map = ir()->local_name_to_offset_map(); if (loops == NULL) { // collect global scan information BlockListScanInfo gsi(ir()->code()); RegisterManager* global_scan_info = gsi.info(); // just cache registers globally. LocalMappingSetter setter(cache_locals_for_blocks(all_blocks, global_scan_info)); all_blocks->iterate_forward(&setter); } else { assert(loops->length() != 0, "should be at least one loop"); int i; // collect all the blocks that are outside of the loops BlockList* non_loop_blocks = new BlockList; for (i = 0; i < all_blocks->length(); i++) { BlockBegin* b = all_blocks->at(i); if (b->loop_index() == -1 && b->next()->as_CachingChange() == NULL) { non_loop_blocks->append(b); } } RegisterManager* global_scan_info = new RegisterManager(); // scan each of the loops and the remaining blocks recording register usage // so we know what registers are free. RegisterManagerArray scan_infos(loops->length() + 1); for (i = 0; i < loops->length(); i++) { Loop* loop = loops->at(i); BlockListScanInfo lsi(loop->blocks()); scan_infos.at_put(i, lsi.info()); // accumulate the global state global_scan_info->merge(lsi.info()); } BlockListScanInfo lsi(non_loop_blocks); scan_infos.at_put(loops->length(), lsi.info()); // accumulate the global state global_scan_info->merge(lsi.info()); // use the global mapping as a guide in the rest of the register selection process. LocalMapping* global = cache_locals_for_blocks(all_blocks, global_scan_info, true); LocalMapping* pref = new LocalMapping(local_name_to_offset_map); pref->merge(global); pref->merge(_preferred); _preferred = pref; for (i = 0; i < loops->length(); i++) { if (i < LIRCacheLoopStart || (uint)i >= (uint)LIRCacheLoopStop) { continue; } Loop* loop = loops->at(i); LocalMapping* mapping = cache_locals_for_blocks(loop->blocks(), scan_infos.at(i)); LocalMappingSetter setter(mapping); loop->blocks()->iterate_forward(&setter); _preferred->merge(mapping); mapping->join(global); } LocalMapping* mapping = cache_locals_for_blocks(non_loop_blocks, scan_infos.at(loops->length())); mapping->join(global); LocalMappingSetter setter(mapping); non_loop_blocks->iterate_forward(&setter); } }
// Returns inner loops that have may or may not have calls LoopList* LoopFinder::find_loops(LoopList* loops, bool call_free_only) { LoopList* inner = new LoopList(); LoopList* outer = new LoopList(); int lng = loops->length(); // First step: find loops that have no calls and no backedges // in the loop except its own int i; for (i = 0; i < lng; i++) { Loop* loop = loops->at(i); int k = loop->nof_blocks() - 1; bool is_outer = false; for (; k >= 0; k--) { BlockBegin* b = loop->block_no(k); // Is this innermost loop: // - no block, except end block, may be a back edge start, // otherwise we have an outer loop if (!loop->is_end(b)) { BlockLoopInfo* bli = get_block_info(b); if (bli->is_backedge_start()) { if (WantsLargerLoops) { is_outer = true; } else { loop = NULL; } break; } } } if (loop != NULL) { ScanBlocks scan(loop->blocks()); ScanResult scan_result; scan.scan(&scan_result); if (!call_free_only || (!scan_result.has_calls() && !scan_result.has_slow_cases())) { if (is_outer) { outer->append(loop); } else { inner->append(loop); } } else { #ifndef PRODUCT if (PrintLoops && Verbose) { tty->print("Discarding loop with calls: "); loop->print(); } #endif // PRODUCT } } } // find all surviving outer loops and delete any inner loops contained inside them if (inner->length() > 0) { for (i = 0; i < outer->length() ; i++) { Loop* loop = outer->at(i); int k = loop->nof_blocks() - 1; for (; k >= 0; k--) { BlockBegin* b = loop->block_no(k); if (!loop->is_end(b)) { BlockLoopInfo* bli = get_block_info(b); if (bli->is_backedge_start()) { // find the loop contained inside this loop int j; for (j = 0; j < inner->length(); j++) { Loop* inner_loop = inner->at(j); if (inner_loop->is_end(b)) { inner->remove(inner_loop); break; } } for (j = 0; j < outer->length(); j++) { Loop* outer_loop = outer->at(j); if (outer_loop == loop) { continue; } if (outer_loop->is_end(b)) { outer->remove(outer_loop); break; } } } } } } inner->appendAll(outer); } // Second step: if several loops have the same loop-end, select the one // with fewer blocks. // if several loops have the same loop-start, select the one // with fewer blocks // now check for loops that have the same header and eliminate one for (i = 0; i < inner->length() ; i++) { Loop* current_loop = inner->at(i); BlockBegin* header = current_loop->start(); for (int n = i + 1; n < inner->length(); n++) { Loop* test_loop = inner->at(n); BlockBegin* test = test_loop->start(); Loop* discarded = NULL; bool same_end = false; for (int e = 0; e < current_loop->ends()->length(); e++) { if (test_loop->is_end(current_loop->ends()->at(e))) { same_end = true; } } if (header == test_loop->start() || same_end) { // discard loop with fewer blocks if (test_loop->nof_blocks() > current_loop->nof_blocks()) { if (WantsLargerLoops) { discarded = current_loop; } else { discarded = test_loop; } } else { if (WantsLargerLoops) { discarded = test_loop; } else { discarded = current_loop; } } inner->remove(discarded); #ifndef PRODUCT if (PrintLoops && Verbose && discarded) { tty->print("Discarding overlapping loop: "); discarded->print(); } #endif // PRODUCT // restart the computation i = -1; break; } } } if (inner->length() == 0) { // we removed all the loops if (PrintLoops && Verbose) { tty->print_cr("*** deleted all loops in %s", __FILE__); } set_not_ok(); return NULL; } else { return inner; } }