bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, ScalarEvolution &SE) { Loop *OuterL = InnerLoop->getParentLoop(); if (!OuterL) return true; // Get the backedge taken count for the inner loop BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch); if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) || !InnerLoopBECountSC->getType()->isIntegerTy()) return false; // Get whether count is invariant to the outer loop ScalarEvolution::LoopDisposition LD = SE.getLoopDisposition(InnerLoopBECountSC, OuterL); if (LD != ScalarEvolution::LoopInvariant) return false; return true; }
/// Split a condition into something semantically equivalent to (0 <= I < /// Limit), both comparisons signed and Len loop invariant on L and positive. /// On success, return true and set Index to I and UpperLimit to Limit. Return /// false on failure (we may still write to UpperLimit and Index on failure). /// It does not try to interpret I as a loop index. /// static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, Value *Condition, const SCEV *&Index, Value *&UpperLimit) { // TODO: currently this catches some silly cases like comparing "%idx slt 1". // Our transformations are still correct, but less likely to be profitable in // those cases. We have to come up with some heuristics that pick out the // range checks that are more profitable to clone a loop for. This function // in general can be made more robust. using namespace llvm::PatternMatch; Value *A = nullptr; Value *B = nullptr; ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; // In these early checks we assume that the matched UpperLimit is positive. // We'll verify that fact later, before returning true. if (match(Condition, m_And(m_Value(A), m_Value(B)))) { Value *IndexV = nullptr; Value *ExpectedUpperBoundCheck = nullptr; if (IsLowerBoundCheck(A, IndexV)) ExpectedUpperBoundCheck = B; else if (IsLowerBoundCheck(B, IndexV)) ExpectedUpperBoundCheck = A; else return false; if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) return false; Index = SE.getSCEV(IndexV); if (isa<SCEVCouldNotCompute>(Index)) return false; } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { switch (Pred) { default: return false; case ICmpInst::ICMP_SGT: std::swap(A, B); // fall through case ICmpInst::ICMP_SLT: UpperLimit = B; Index = SE.getSCEV(A); if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index)) return false; break; case ICmpInst::ICMP_UGT: std::swap(A, B); // fall through case ICmpInst::ICMP_ULT: UpperLimit = B; Index = SE.getSCEV(A); if (isa<SCEVCouldNotCompute>(Index)) return false; break; } } else { return false; } const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) || !SE.isKnownNonNegative(UpperLimitSCEV)) return false; if (SE.getLoopDisposition(UpperLimitSCEV, L) != ScalarEvolution::LoopInvariant) { DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() << " "; dbgs() << " UpperLimit is not loop invariant: " << UpperLimit->getName() << "\n";);
bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI) { /* We currently handle outer loops like this: | ForeFirst <----\ } Blocks | } ForeBlocks ForeLast | } | | SubLoopFirst <\ | } Blocks | | } SubLoopBlocks SubLoopLast -/ | } | | AftFirst | } Blocks | } AftBlocks AftLast ------/ } | There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks and AftBlocks, providing that there is one edge from Fores to SubLoops, one edge from SubLoops to Afts and a single outer loop exit (from Afts). In practice we currently limit Aft blocks to a single block, and limit things further in the profitablility checks of the unroll and jam pass. Because of the way we rearrange basic blocks, we also require that the Fore blocks on all unrolled iterations are safe to move before the SubLoop blocks of all iterations. So we require that the phi node looping operands of ForeHeader can be moved to at least the end of ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and match up Phi's correctly. i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2. It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2. There are then a number of checks along the lines of no calls, no exceptions, inner loop IV is consistent, etc. Note that for loops requiring runtime unrolling, UnrollRuntimeLoopRemainder can also fail in UnrollAndJamLoop if the trip count cannot be easily calculated. */ if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1) return false; Loop *SubLoop = L->getSubLoops()[0]; if (!SubLoop->isLoopSimplifyForm()) return false; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Exit = L->getExitingBlock(); BasicBlock *SubLoopHeader = SubLoop->getHeader(); BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); BasicBlock *SubLoopExit = SubLoop->getExitingBlock(); if (Latch != Exit) return false; if (SubLoopLatch != SubLoopExit) return false; if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) return false; // Split blocks into Fore/SubLoop/Aft based on dominators BasicBlockSet SubLoopBlocks; BasicBlockSet ForeBlocks; BasicBlockSet AftBlocks; if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks, &DT)) return false; // Aft blocks may need to move instructions to fore blocks, which becomes more // difficult if there are multiple (potentially conditionally executed) // blocks. For now we just exclude loops with multiple aft blocks. if (AftBlocks.size() != 1) return false; // Check inner loop IV is consistent between all iterations const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch); if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) || !SubLoopBECountSC->getType()->isIntegerTy()) return false; ScalarEvolution::LoopDisposition LD = SE.getLoopDisposition(SubLoopBECountSC, L); if (LD != ScalarEvolution::LoopInvariant) return false; // Check the loop safety info for exceptions. LoopSafetyInfo LSI; computeLoopSafetyInfo(&LSI, L); if (LSI.MayThrow) return false; // We've ruled out the easy stuff and now need to check that there are no // interdependencies which may prevent us from moving the: // ForeBlocks before Subloop and AftBlocks. // Subloop before AftBlocks. // ForeBlock phi operands before the subloop // Make sure we can move all instructions we need to before the subloop SmallVector<Instruction *, 8> Worklist; SmallPtrSet<Instruction *, 8> Visited; for (auto &Phi : Header->phis()) { Value *V = Phi.getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) Worklist.push_back(I); } while (!Worklist.empty()) { Instruction *I = Worklist.back(); Worklist.pop_back(); if (Visited.insert(I).second) { if (SubLoop->contains(I->getParent())) return false; if (AftBlocks.count(I->getParent())) { // If we hit a phi node in afts we know we are done (probably LCSSA) if (isa<PHINode>(I)) return false; if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory()) return false; for (auto &U : I->operands()) if (Instruction *II = dyn_cast<Instruction>(U)) Worklist.push_back(II); } } } // Check for memory dependencies which prohibit the unrolling we are doing. // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub. if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) return false; return true; }