Exemplo n.º 1
0
std::tuple<std::vector<const SCEV *>, std::vector<int>>
polly::getIndexExpressionsFromGEP(GetElementPtrInst *GEP, ScalarEvolution &SE) {
  std::vector<const SCEV *> Subscripts;
  std::vector<int> Sizes;

  Type *Ty = GEP->getPointerOperandType();

  bool DroppedFirstDim = false;

  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {

    const SCEV *Expr = SE.getSCEV(GEP->getOperand(i));

    if (i == 1) {
      if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
        Ty = PtrTy->getElementType();
      } else if (auto *ArrayTy = dyn_cast<ArrayType>(Ty)) {
        Ty = ArrayTy->getElementType();
      } else {
        Subscripts.clear();
        Sizes.clear();
        break;
      }
      if (auto *Const = dyn_cast<SCEVConstant>(Expr))
        if (Const->getValue()->isZero()) {
          DroppedFirstDim = true;
          continue;
        }
      Subscripts.push_back(Expr);
      continue;
    }

    auto *ArrayTy = dyn_cast<ArrayType>(Ty);
    if (!ArrayTy) {
      Subscripts.clear();
      Sizes.clear();
      break;
    }

    Subscripts.push_back(Expr);
    if (!(DroppedFirstDim && i == 2))
      Sizes.push_back(ArrayTy->getNumElements());

    Ty = ArrayTy->getElementType();
  }

  return std::make_tuple(Subscripts, Sizes);
}
Exemplo n.º 2
0
// For Falkor, we want to avoid having too many strided loads in a loop since
// that can exhaust the HW prefetcher resources.  We adjust the unroller
// MaxCount preference below to attempt to ensure unrolling doesn't create too
// many strided loads.
static void
getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                              TargetTransformInfo::UnrollingPreferences &UP) {
  enum { MaxStridedLoads = 7 };
  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
    int StridedLoads = 0;
    // FIXME? We could make this more precise by looking at the CFG and
    // e.g. not counting loads in each side of an if-then-else diamond.
    for (const auto BB : L->blocks()) {
      for (auto &I : *BB) {
        LoadInst *LMemI = dyn_cast<LoadInst>(&I);
        if (!LMemI)
          continue;

        Value *PtrValue = LMemI->getPointerOperand();
        if (L->isLoopInvariant(PtrValue))
          continue;

        const SCEV *LSCEV = SE.getSCEV(PtrValue);
        const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
        if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
          continue;

        // FIXME? We could take pairing of unrolled load copies into account
        // by looking at the AddRec, but we would probably have to limit this
        // to loops with no stores or other memory optimization barriers.
        ++StridedLoads;
        // We've seen enough strided loads that seeing more won't make a
        // difference.
        if (StridedLoads > MaxStridedLoads / 2)
          return StridedLoads;
      }
    }
    return StridedLoads;
  };

  int StridedLoads = countStridedLoads(L, SE);
  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
                    << " strided loads\n");
  // Pick the largest power of 2 unroll count that won't result in too many
  // strided loads.
  if (StridedLoads) {
    UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
    LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
                      << UP.MaxCount << '\n');
  }
}
/// Split a condition into something semantically equivalent to (0 <= I <
/// Limit), both comparisons signed and Len loop invariant on L and positive.
/// On success, return true and set Index to I and UpperLimit to Limit.  Return
/// false on failure (we may still write to UpperLimit and Index on failure).
/// It does not try to interpret I as a loop index.
///
static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE,
                                     Value *Condition, const SCEV *&Index,
                                     Value *&UpperLimit) {

  // TODO: currently this catches some silly cases like comparing "%idx slt 1".
  // Our transformations are still correct, but less likely to be profitable in
  // those cases.  We have to come up with some heuristics that pick out the
  // range checks that are more profitable to clone a loop for.  This function
  // in general can be made more robust.

  using namespace llvm::PatternMatch;

  Value *A = nullptr;
  Value *B = nullptr;
  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;

  // In these early checks we assume that the matched UpperLimit is positive.
  // We'll verify that fact later, before returning true.

  if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
    Value *IndexV = nullptr;
    Value *ExpectedUpperBoundCheck = nullptr;

    if (IsLowerBoundCheck(A, IndexV))
      ExpectedUpperBoundCheck = B;
    else if (IsLowerBoundCheck(B, IndexV))
      ExpectedUpperBoundCheck = A;
    else
      return false;

    if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit))
      return false;

    Index = SE.getSCEV(IndexV);

    if (isa<SCEVCouldNotCompute>(Index))
      return false;

  } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
    switch (Pred) {
    default:
      return false;

    case ICmpInst::ICMP_SGT:
      std::swap(A, B);
    // fall through
    case ICmpInst::ICMP_SLT:
      UpperLimit = B;
      Index = SE.getSCEV(A);
      if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index))
        return false;
      break;

    case ICmpInst::ICMP_UGT:
      std::swap(A, B);
    // fall through
    case ICmpInst::ICMP_ULT:
      UpperLimit = B;
      Index = SE.getSCEV(A);
      if (isa<SCEVCouldNotCompute>(Index))
        return false;
      break;
    }
  } else {
    return false;
  }

  const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit);
  if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) ||
      !SE.isKnownNonNegative(UpperLimitSCEV))
    return false;

  if (SE.getLoopDisposition(UpperLimitSCEV, L) !=
      ScalarEvolution::LoopInvariant) {
    DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName()
                 << " ";
          dbgs() << " UpperLimit is not loop invariant: "
                 << UpperLimit->getName() << "\n";);
Exemplo n.º 4
0
// Return the number of iterations to peel off that make conditions in the
// body true/false. For example, if we peel 2 iterations off the loop below,
// the condition i < 2 can be evaluated at compile time.
//  for (i = 0; i < n; i++)
//    if (i < 2)
//      ..
//    else
//      ..
//   }
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
                                         ScalarEvolution &SE) {
  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
  unsigned DesiredPeelCount = 0;

  for (auto *BB : L.blocks()) {
    auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
    if (!BI || BI->isUnconditional())
      continue;

    // Ignore loop exit condition.
    if (L.getLoopLatch() == BB)
      continue;

    Value *Condition = BI->getCondition();
    Value *LeftVal, *RightVal;
    CmpInst::Predicate Pred;
    if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
      continue;

    const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
    const SCEV *RightSCEV = SE.getSCEV(RightVal);

    // Do not consider predicates that are known to be true or false
    // independently of the loop iteration.
    if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV,
                            RightSCEV))
      continue;

    // Check if we have a condition with one AddRec and one non AddRec
    // expression. Normalize LeftSCEV to be the AddRec.
    if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
      if (isa<SCEVAddRecExpr>(RightSCEV)) {
        std::swap(LeftSCEV, RightSCEV);
        Pred = ICmpInst::getSwappedPredicate(Pred);
      } else
        continue;
    }

    const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);

    // Avoid huge SCEV computations in the loop below, make sure we only
    // consider AddRecs of the loop we are trying to peel and avoid
    // non-monotonic predicates, as we will not be able to simplify the loop
    // body.
    // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
    //        simplify the loop, if we peel 1 additional iteration, if there
    //        is no wrapping.
    bool Increasing;
    if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
        !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
      continue;
    (void)Increasing;

    // Check if extending the current DesiredPeelCount lets us evaluate Pred
    // or !Pred in the loop body statically.
    unsigned NewPeelCount = DesiredPeelCount;

    const SCEV *IterVal = LeftAR->evaluateAtIteration(
        SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);

    // If the original condition is not known, get the negated predicate
    // (which holds on the else branch) and check if it is known. This allows
    // us to peel of iterations that make the original condition false.
    if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
      Pred = ICmpInst::getInversePredicate(Pred);

    const SCEV *Step = LeftAR->getStepRecurrence(SE);
    while (NewPeelCount < MaxPeelCount &&
           SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
      IterVal = SE.getAddExpr(IterVal, Step);
      NewPeelCount++;
    }

    // Only peel the loop if the monotonic predicate !Pred becomes known in the
    // first iteration of the loop body after peeling.
    if (NewPeelCount > DesiredPeelCount &&
        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
                            RightSCEV))
      DesiredPeelCount = NewPeelCount;
  }

  return DesiredPeelCount;
}