std::tuple<std::vector<const SCEV *>, std::vector<int>> polly::getIndexExpressionsFromGEP(GetElementPtrInst *GEP, ScalarEvolution &SE) { std::vector<const SCEV *> Subscripts; std::vector<int> Sizes; Type *Ty = GEP->getPointerOperandType(); bool DroppedFirstDim = false; for (unsigned i = 1; i < GEP->getNumOperands(); i++) { const SCEV *Expr = SE.getSCEV(GEP->getOperand(i)); if (i == 1) { if (auto *PtrTy = dyn_cast<PointerType>(Ty)) { Ty = PtrTy->getElementType(); } else if (auto *ArrayTy = dyn_cast<ArrayType>(Ty)) { Ty = ArrayTy->getElementType(); } else { Subscripts.clear(); Sizes.clear(); break; } if (auto *Const = dyn_cast<SCEVConstant>(Expr)) if (Const->getValue()->isZero()) { DroppedFirstDim = true; continue; } Subscripts.push_back(Expr); continue; } auto *ArrayTy = dyn_cast<ArrayType>(Ty); if (!ArrayTy) { Subscripts.clear(); Sizes.clear(); break; } Subscripts.push_back(Expr); if (!(DroppedFirstDim && i == 2)) Sizes.push_back(ArrayTy->getNumElements()); Ty = ArrayTy->getElementType(); } return std::make_tuple(Subscripts, Sizes); }
// For Falkor, we want to avoid having too many strided loads in a loop since // that can exhaust the HW prefetcher resources. We adjust the unroller // MaxCount preference below to attempt to ensure unrolling doesn't create too // many strided loads. static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP) { enum { MaxStridedLoads = 7 }; auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) { int StridedLoads = 0; // FIXME? We could make this more precise by looking at the CFG and // e.g. not counting loads in each side of an if-then-else diamond. for (const auto BB : L->blocks()) { for (auto &I : *BB) { LoadInst *LMemI = dyn_cast<LoadInst>(&I); if (!LMemI) continue; Value *PtrValue = LMemI->getPointerOperand(); if (L->isLoopInvariant(PtrValue)) continue; const SCEV *LSCEV = SE.getSCEV(PtrValue); const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) continue; // FIXME? We could take pairing of unrolled load copies into account // by looking at the AddRec, but we would probably have to limit this // to loops with no stores or other memory optimization barriers. ++StridedLoads; // We've seen enough strided loads that seeing more won't make a // difference. if (StridedLoads > MaxStridedLoads / 2) return StridedLoads; } } return StridedLoads; }; int StridedLoads = countStridedLoads(L, SE); LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads << " strided loads\n"); // Pick the largest power of 2 unroll count that won't result in too many // strided loads. if (StridedLoads) { UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads); LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount << '\n'); } }
/// Split a condition into something semantically equivalent to (0 <= I < /// Limit), both comparisons signed and Len loop invariant on L and positive. /// On success, return true and set Index to I and UpperLimit to Limit. Return /// false on failure (we may still write to UpperLimit and Index on failure). /// It does not try to interpret I as a loop index. /// static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, Value *Condition, const SCEV *&Index, Value *&UpperLimit) { // TODO: currently this catches some silly cases like comparing "%idx slt 1". // Our transformations are still correct, but less likely to be profitable in // those cases. We have to come up with some heuristics that pick out the // range checks that are more profitable to clone a loop for. This function // in general can be made more robust. using namespace llvm::PatternMatch; Value *A = nullptr; Value *B = nullptr; ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; // In these early checks we assume that the matched UpperLimit is positive. // We'll verify that fact later, before returning true. if (match(Condition, m_And(m_Value(A), m_Value(B)))) { Value *IndexV = nullptr; Value *ExpectedUpperBoundCheck = nullptr; if (IsLowerBoundCheck(A, IndexV)) ExpectedUpperBoundCheck = B; else if (IsLowerBoundCheck(B, IndexV)) ExpectedUpperBoundCheck = A; else return false; if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) return false; Index = SE.getSCEV(IndexV); if (isa<SCEVCouldNotCompute>(Index)) return false; } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { switch (Pred) { default: return false; case ICmpInst::ICMP_SGT: std::swap(A, B); // fall through case ICmpInst::ICMP_SLT: UpperLimit = B; Index = SE.getSCEV(A); if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index)) return false; break; case ICmpInst::ICMP_UGT: std::swap(A, B); // fall through case ICmpInst::ICMP_ULT: UpperLimit = B; Index = SE.getSCEV(A); if (isa<SCEVCouldNotCompute>(Index)) return false; break; } } else { return false; } const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) || !SE.isKnownNonNegative(UpperLimitSCEV)) return false; if (SE.getLoopDisposition(UpperLimitSCEV, L) != ScalarEvolution::LoopInvariant) { DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() << " "; dbgs() << " UpperLimit is not loop invariant: " << UpperLimit->getName() << "\n";);
// Return the number of iterations to peel off that make conditions in the // body true/false. For example, if we peel 2 iterations off the loop below, // the condition i < 2 can be evaluated at compile time. // for (i = 0; i < n; i++) // if (i < 2) // .. // else // .. // } static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; for (auto *BB : L.blocks()) { auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || BI->isUnconditional()) continue; // Ignore loop exit condition. if (L.getLoopLatch() == BB) continue; Value *Condition = BI->getCondition(); Value *LeftVal, *RightVal; CmpInst::Predicate Pred; if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) continue; const SCEV *LeftSCEV = SE.getSCEV(LeftVal); const SCEV *RightSCEV = SE.getSCEV(RightVal); // Do not consider predicates that are known to be true or false // independently of the loop iteration. if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, RightSCEV)) continue; // Check if we have a condition with one AddRec and one non AddRec // expression. Normalize LeftSCEV to be the AddRec. if (!isa<SCEVAddRecExpr>(LeftSCEV)) { if (isa<SCEVAddRecExpr>(RightSCEV)) { std::swap(LeftSCEV, RightSCEV); Pred = ICmpInst::getSwappedPredicate(Pred); } else continue; } const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); // Avoid huge SCEV computations in the loop below, make sure we only // consider AddRecs of the loop we are trying to peel and avoid // non-monotonic predicates, as we will not be able to simplify the loop // body. // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can // simplify the loop, if we peel 1 additional iteration, if there // is no wrapping. bool Increasing; if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) continue; (void)Increasing; // Check if extending the current DesiredPeelCount lets us evaluate Pred // or !Pred in the loop body statically. unsigned NewPeelCount = DesiredPeelCount; const SCEV *IterVal = LeftAR->evaluateAtIteration( SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); // If the original condition is not known, get the negated predicate // (which holds on the else branch) and check if it is known. This allows // us to peel of iterations that make the original condition false. if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) Pred = ICmpInst::getInversePredicate(Pred); const SCEV *Step = LeftAR->getStepRecurrence(SE); while (NewPeelCount < MaxPeelCount && SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { IterVal = SE.getAddExpr(IterVal, Step); NewPeelCount++; } // Only peel the loop if the monotonic predicate !Pred becomes known in the // first iteration of the loop body after peeling. if (NewPeelCount > DesiredPeelCount && SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, RightSCEV)) DesiredPeelCount = NewPeelCount; } return DesiredPeelCount; }