Пример #1
0
bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI,
                            ScalarEvolution &SE, const DominatorTree &DT) {
  Loop *L = LI.getLoopFor(LInst->getParent());
  auto *Ptr = LInst->getPointerOperand();
  const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L);
  while (L && R.contains(L)) {
    if (!SE.isLoopInvariant(PtrSCEV, L))
      return false;
    L = L->getParentLoop();
  }

  for (auto *User : Ptr->users()) {
    auto *UserI = dyn_cast<Instruction>(User);
    if (!UserI || !R.contains(UserI))
      continue;
    if (!UserI->mayWriteToMemory())
      continue;

    auto &BB = *UserI->getParent();
    bool DominatesAllPredecessors = true;
    for (auto Pred : predecessors(R.getExit()))
      if (R.contains(Pred) && !DT.dominates(&BB, Pred))
        DominatesAllPredecessors = false;

    if (!DominatesAllPredecessors)
      continue;

    return false;
  }

  return true;
}
Пример #2
0
void IslNodeBuilder::createSubstitutions(isl_ast_expr *Expr, ScopStmt *Stmt,
                                         ValueMapT &VMap, LoopToScevMapT &LTS) {
  assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&
         "Expression of type 'op' expected");
  assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&
         "Opertation of type 'call' expected");
  for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) {
    isl_ast_expr *SubExpr;
    Value *V;

    SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1);
    V = ExprBuilder.create(SubExpr);
    ScalarEvolution *SE = Stmt->getParent()->getSE();
    LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);

    // CreateIntCast can introduce trunc expressions. This is correct, as the
    // result will always fit into the type of the original induction variable
    // (because we calculate a value of the original induction variable).
    const Value *OldIV = Stmt->getInductionVariableForDimension(i);
    if (OldIV) {
      V = Builder.CreateIntCast(V, OldIV->getType(), true);
      VMap[OldIV] = V;
    }
  }

  isl_ast_expr_free(Expr);
}
Пример #3
0
void IslNodeBuilder::createSubstitutions(
    __isl_take isl_pw_multi_aff *PMA, __isl_take isl_ast_build *Context,
    ScopStmt *Stmt, ValueMapT &VMap, LoopToScevMapT &LTS) {
  for (unsigned i = 0; i < isl_pw_multi_aff_dim(PMA, isl_dim_out); ++i) {
    isl_pw_aff *Aff;
    isl_ast_expr *Expr;
    const Value *OldIV;
    Value *V;

    Aff = isl_pw_multi_aff_get_pw_aff(PMA, i);
    Expr = isl_ast_build_expr_from_pw_aff(Context, Aff);
    OldIV = Stmt->getInductionVariableForDimension(i);
    V = ExprBuilder.create(Expr);

    // CreateIntCast can introduce trunc expressions. This is correct, as the
    // result will always fit into the type of the original induction variable
    // (because we calculate a value of the original induction variable).
    V = Builder.CreateIntCast(V, OldIV->getType(), true);
    VMap[OldIV] = V;
    ScalarEvolution *SE = Stmt->getParent()->getSE();
    LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);
  }

  isl_pw_multi_aff_free(PMA);
  isl_ast_build_free(Context);
}
Пример #4
0
bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
                             bool Signed) {
  unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
  APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
    APInt::getMaxValue(BitWidth);
  auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
  return SE.isAvailableAtLoopEntry(S, L) &&
         SE.isLoopEntryGuardedByCond(L, Predicate, S,
                                     SE.getConstant(Max));
}
Пример #5
0
static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
                            ScalarEvolution &SE) {
  bool Changed = false;
  // Copy blocks into a temporary array to avoid iterator invalidation issues
  // as we remove them.
  SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());

  for (auto &Block : Blocks) {
    // Attempt to merge blocks in the trivial case. Don't modify blocks which
    // belong to other loops.
    BasicBlock *Succ = cast_or_null<BasicBlock>(Block);
    if (!Succ)
      continue;

    BasicBlock *Pred = Succ->getSinglePredecessor();
    if (!Pred || !Pred->getSingleSuccessor() || LI.getLoopFor(Pred) != &L)
      continue;

    // Merge Succ into Pred and delete it.
    MergeBlockIntoPredecessor(Succ, &DT, &LI);

    SE.forgetLoop(&L);
    Changed = true;
  }

  return Changed;
}
Пример #6
0
/// isLoopDead - Determined if a loop is dead.  This assumes that we've already
/// checked for unique exit and exiting blocks, and that the code is in LCSSA
/// form.
bool LoopDeletion::isLoopDead(Loop *L, ScalarEvolution &SE,
                              SmallVectorImpl<BasicBlock *> &exitingBlocks,
                              SmallVectorImpl<BasicBlock *> &exitBlocks,
                              bool &Changed, BasicBlock *Preheader) {
  BasicBlock *exitBlock = exitBlocks[0];

  // Make sure that all PHI entries coming from the loop are loop invariant.
  // Because the code is in LCSSA form, any values used outside of the loop
  // must pass through a PHI in the exit block, meaning that this check is
  // sufficient to guarantee that no loop-variant values are used outside
  // of the loop.
  BasicBlock::iterator BI = exitBlock->begin();
  bool AllEntriesInvariant = true;
  bool AllOutgoingValuesSame = true;
  while (PHINode *P = dyn_cast<PHINode>(BI)) {
    Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);

    // Make sure all exiting blocks produce the same incoming value for the exit
    // block.  If there are different incoming values for different exiting
    // blocks, then it is impossible to statically determine which value should
    // be used.
    AllOutgoingValuesSame =
        all_of(makeArrayRef(exitingBlocks).slice(1), [&](BasicBlock *BB) {
          return incoming == P->getIncomingValueForBlock(BB);
        });

    if (!AllOutgoingValuesSame)
      break;

    if (Instruction *I = dyn_cast<Instruction>(incoming))
      if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
        AllEntriesInvariant = false;
        break;
      }

    ++BI;
  }

  if (Changed)
    SE.forgetLoopDispositions(L);

  if (!AllEntriesInvariant || !AllOutgoingValuesSame)
    return false;

  // Make sure that no instructions in the block have potential side-effects.
  // This includes instructions that could write to memory, and loads that are
  // marked volatile.  This could be made more aggressive by using aliasing
  // information to identify readonly and readnone calls.
  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
       LI != LE; ++LI) {
    for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
         BI != BE; ++BI) {
      if (BI->mayHaveSideEffects())
        return false;
    }
  }

  return true;
}
Пример #7
0
bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
                                              ScalarEvolution &SE) {
  Loop *OuterL = InnerLoop->getParentLoop();
  if (!OuterL)
    return true;

  // Get the backedge taken count for the inner loop
  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
  if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
      !InnerLoopBECountSC->getType()->isIntegerTy())
    return false;

  // Get whether count is invariant to the outer loop
  ScalarEvolution::LoopDisposition LD =
      SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
  if (LD != ScalarEvolution::LoopInvariant)
    return false;

  return true;
}
Пример #8
0
/// Determines if a loop is dead.
///
/// This assumes that we've already checked for unique exit and exiting blocks,
/// and that the code is in LCSSA form.
static bool isLoopDead(Loop *L, ScalarEvolution &SE,
                       SmallVectorImpl<BasicBlock *> &ExitingBlocks,
                       BasicBlock *ExitBlock, bool &Changed,
                       BasicBlock *Preheader) {
  // Make sure that all PHI entries coming from the loop are loop invariant.
  // Because the code is in LCSSA form, any values used outside of the loop
  // must pass through a PHI in the exit block, meaning that this check is
  // sufficient to guarantee that no loop-variant values are used outside
  // of the loop.
  BasicBlock::iterator BI = ExitBlock->begin();
  bool AllEntriesInvariant = true;
  bool AllOutgoingValuesSame = true;
  while (PHINode *P = dyn_cast<PHINode>(BI)) {
    Value *incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);

    // Make sure all exiting blocks produce the same incoming value for the exit
    // block.  If there are different incoming values for different exiting
    // blocks, then it is impossible to statically determine which value should
    // be used.
    AllOutgoingValuesSame =
        all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
          return incoming == P->getIncomingValueForBlock(BB);
        });

    if (!AllOutgoingValuesSame)
      break;

    if (Instruction *I = dyn_cast<Instruction>(incoming))
      if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
        AllEntriesInvariant = false;
        break;
      }

    ++BI;
  }

  if (Changed)
    SE.forgetLoopDispositions(L);

  if (!AllEntriesInvariant || !AllOutgoingValuesSame)
    return false;

  // Make sure that no instructions in the block have potential side-effects.
  // This includes instructions that could write to memory, and loads that are
  // marked volatile.
  for (auto &I : L->blocks())
    if (any_of(*I, [](Instruction &I) { return I.mayHaveSideEffects(); }))
      return false;
  return true;
}
Пример #9
0
static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
                            ScalarEvolution &SE, MemorySSAUpdater *MSSAU) {
  bool Changed = false;

  // Constant-fold terminators with known constant conditions.
  Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU);

  // Eliminate unconditional branches by merging blocks into their predecessors.
  Changed |= mergeBlocksIntoPredecessors(L, DT, LI, MSSAU);

  if (Changed)
    SE.forgetTopmostLoop(&L);

  return Changed;
}
      // If one loop has very large self trip count
      // we don't want to unroll it.
      // self trip count means trip count divide by the parent's trip count. for example
      // for (int i = 0; i < 16; i++) {
      //   for (int j = 0; j < 4; j++) {
      //     for (int k = 0; k < 2; k++) {
      //       ...
      //     }
      //     ...
      //   }
      // The inner loops j and k could be unrolled, but the loop i will not be unrolled.
      // The return value true means the L could be unrolled, otherwise, it could not
      // be unrolled.
      bool handleParentLoops(Loop *L, LPPassManager &LPM) {
        Loop *currL = L;
        ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
        BasicBlock *ExitBlock = currL->getLoopLatch();
        if (!ExitBlock || !L->isLoopExiting(ExitBlock))
          ExitBlock = currL->getExitingBlock();

        unsigned currTripCount = 0;
        bool shouldUnroll = true;
        if (ExitBlock)
          currTripCount = SE->getSmallConstantTripCount(L, ExitBlock);

        while(currL) {
          Loop *parentL = currL->getParentLoop();
          unsigned parentTripCount = 0;
          if (parentL) {
            BasicBlock *parentExitBlock = parentL->getLoopLatch();
            if (!parentExitBlock || !parentL->isLoopExiting(parentExitBlock))
              parentExitBlock = parentL->getExitingBlock();

            if (parentExitBlock)
              parentTripCount = SE->getSmallConstantTripCount(parentL, parentExitBlock);
          }
          if ((parentTripCount != 0 && currTripCount / parentTripCount > 16) ||
              (currTripCount > 32)) {
            if (currL == L)
              shouldUnroll = false;
            setUnrollID(currL, false);
            if (currL != L)
              LPM.deleteLoopFromQueue(currL);
          }
          currL = parentL;
          currTripCount = parentTripCount;
        }
        return shouldUnroll;
      }
Пример #11
0
std::tuple<std::vector<const SCEV *>, std::vector<int>>
polly::getIndexExpressionsFromGEP(GetElementPtrInst *GEP, ScalarEvolution &SE) {
  std::vector<const SCEV *> Subscripts;
  std::vector<int> Sizes;

  Type *Ty = GEP->getPointerOperandType();

  bool DroppedFirstDim = false;

  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {

    const SCEV *Expr = SE.getSCEV(GEP->getOperand(i));

    if (i == 1) {
      if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
        Ty = PtrTy->getElementType();
      } else if (auto *ArrayTy = dyn_cast<ArrayType>(Ty)) {
        Ty = ArrayTy->getElementType();
      } else {
        Subscripts.clear();
        Sizes.clear();
        break;
      }
      if (auto *Const = dyn_cast<SCEVConstant>(Expr))
        if (Const->getValue()->isZero()) {
          DroppedFirstDim = true;
          continue;
        }
      Subscripts.push_back(Expr);
      continue;
    }

    auto *ArrayTy = dyn_cast<ArrayType>(Ty);
    if (!ArrayTy) {
      Subscripts.clear();
      Sizes.clear();
      break;
    }

    Subscripts.push_back(Expr);
    if (!(DroppedFirstDim && i == 2))
      Sizes.push_back(ArrayTy->getNumElements());

    Ty = ArrayTy->getElementType();
  }

  return std::make_tuple(Subscripts, Sizes);
}
Пример #12
0
// For Falkor, we want to avoid having too many strided loads in a loop since
// that can exhaust the HW prefetcher resources.  We adjust the unroller
// MaxCount preference below to attempt to ensure unrolling doesn't create too
// many strided loads.
static void
getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                              TargetTransformInfo::UnrollingPreferences &UP) {
  enum { MaxStridedLoads = 7 };
  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
    int StridedLoads = 0;
    // FIXME? We could make this more precise by looking at the CFG and
    // e.g. not counting loads in each side of an if-then-else diamond.
    for (const auto BB : L->blocks()) {
      for (auto &I : *BB) {
        LoadInst *LMemI = dyn_cast<LoadInst>(&I);
        if (!LMemI)
          continue;

        Value *PtrValue = LMemI->getPointerOperand();
        if (L->isLoopInvariant(PtrValue))
          continue;

        const SCEV *LSCEV = SE.getSCEV(PtrValue);
        const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
        if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
          continue;

        // FIXME? We could take pairing of unrolled load copies into account
        // by looking at the AddRec, but we would probably have to limit this
        // to loops with no stores or other memory optimization barriers.
        ++StridedLoads;
        // We've seen enough strided loads that seeing more won't make a
        // difference.
        if (StridedLoads > MaxStridedLoads / 2)
          return StridedLoads;
      }
    }
    return StridedLoads;
  };

  int StridedLoads = countStridedLoads(L, SE);
  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
                    << " strided loads\n");
  // Pick the largest power of 2 unroll count that won't result in too many
  // strided loads.
  if (StridedLoads) {
    UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
    LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
                      << UP.MaxCount << '\n');
  }
}
Пример #13
0
/// Insert code in the prolog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
/// of loop bodes in the loop after unrolling. (Some folks refer
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
/// the switch instruction is generated.
///
///    extraiters = tripcount % loopfactor
///    if (extraiters == 0) jump Loop:
///    if (extraiters == loopfactor) jump L1
///    if (extraiters == loopfactor-1) jump L2
///    ...
///    L1:  LoopBody;
///    L2:  LoopBody;
///    ...
///    if tripcount < loopfactor jump End
///    Loop:
///    ...
///    End:
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
                                   LPPassManager *LPM) {
  // for now, only unroll loops that contain a single exit
  if (!L->getExitingBlock())
    return false;

  // Make sure the loop is in canonical form, and there is a single
  // exit block only.
  if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
    return false;

  // Use Scalar Evolution to compute the trip count.  This allows more
  // loops to be unrolled than relying on induction var simplification
  if (!LPM)
    return false;
  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
  if (!SE)
    return false;

  // Only unroll loops with a computable trip count and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item)
  const SCEV *BECount = SE->getBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
    return false;

  // Add 1 since the backedge count doesn't include the first loop iteration
  const SCEV *TripCountSC =
    SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC))
    return false;

  // We only handle cases when the unroll factor is a power of 2.
  // Count is the loop unroll factor, the number of extra copies added + 1.
  if ((Count & (Count-1)) != 0)
    return false;

  // If this loop is nested, then the loop unroller changes the code in
  // parent loop, so the Scalar Evolution pass needs to be run again
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

  BasicBlock *PH = L->getLoopPreheader();
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  // It helps to splits the original preheader twice, one for the end of the
  // prolog code and one for a new loop preheader
  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());

  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % (loop unroll factor + 1)
  SCEVExpander Expander(*SE, "loop-unroll");
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Type *CountTy = TripCount->getType();
  BinaryOperator *ModVal =
    BinaryOperator::CreateURem(TripCount,
                               ConstantInt::get(CountTy, Count),
                               "xtraiter");
  ModVal->insertBefore(PreHeaderBR);

  // Check if for no extra iterations, then jump to unrolled loop
  Value *BranchVal = new ICmpInst(PreHeaderBR,
                                  ICmpInst::ICMP_NE, ModVal,
                                  ConstantInt::get(CountTy, 0), "lcmp");
  // Branch to either the extra iterations or the unrolled loop
  // We will fix up the true branch label when adding loop body copies
  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
  assert(PreHeaderBR->isUnconditional() &&
         PreHeaderBR->getSuccessor(0) == PEnd &&
         "CFG edges in Preheader are not correct");
  PreHeaderBR->eraseFromParent();

  ValueToValueMapTy LVMap;
  Function *F = Header->getParent();
  // These variables are used to update the CFG links in each iteration
  BasicBlock *CompareBB = nullptr;
  BasicBlock *LastLoopBB = PH;
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
    std::vector<BasicBlock*> NewBlocks;
    ValueToValueMapTy VMap;

    // Clone all the basic blocks in the loop, but we don't clone the loop
    // This function adds the appropriate CFG connections.
    CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
                    LoopBlocks, VMap, LVMap, LI);
    LastLoopBB = cast<BasicBlock>(VMap[Latch]);

    // Insert the cloned blocks into function just before the original loop
    F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
                                  NewBlocks[0], F->end());

    // Generate the code for the comparison which determines if the loop
    // prolog code needs to be executed.
    if (leftOverIters == Count-1) {
      // There is no compare block for the fall-thru case when for the last
      // left over iteration
      CompareBB = NewBlocks[0];
    } else {
      // Create a new block for the comparison
      BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
                                             F, CompareBB);
      if (Loop *ParentLoop = L->getParentLoop()) {
        // Add the new block to the parent loop, if needed
        ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
      }

      // The comparison w/ the extra iteration value and branch
      Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
                                      ConstantInt::get(CountTy, leftOverIters),
                                      "un.tmp");
      // Branch to either the extra iterations or the unrolled loop
      BranchInst::Create(NewBlocks[0], CompareBB,
                         BranchVal, NewBB);
      CompareBB = NewBB;
      PH->getTerminator()->setSuccessor(0, NewBB);
      VMap[NewPH] = CompareBB;
    }

    // Rewrite the cloned instruction operands to use the values
    // created when the clone is created.
    for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
             E = NewBlocks[i]->end(); I != E; ++I) {
        RemapInstruction(I, VMap,
                         RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
      }
    }
  }

  // Connect the prolog code to the original loop and update the
  // PHI functions.
  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
                LPM->getAsPass());
  NumRuntimeUnrolled++;
  return true;
}
Пример #14
0
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
  if (skipOptnoneFunction(L))
    return false;

  LoopInfo *LI = &getAnalysis<LoopInfo>();
  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
  const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();

  BasicBlock *Header = L->getHeader();
  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
        << "] Loop %" << Header->getName() << "\n");
  (void)Header;

  TargetTransformInfo::UnrollingPreferences UP;
  UP.Threshold = CurrentThreshold;
  UP.OptSizeThreshold = OptSizeUnrollThreshold;
  UP.Count = CurrentCount;
  UP.Partial = CurrentAllowPartial;
  UP.Runtime = CurrentRuntime;
  TTI.getUnrollingPreferences(L, UP);

  // Determine the current unrolling threshold.  While this is normally set
  // from UnrollThreshold, it is overridden to a smaller value if the current
  // function is marked as optimize-for-size, and the unroll threshold was
  // not user specified.
  unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
  if (!UserThreshold &&
      Header->getParent()->getAttributes().
        hasAttribute(AttributeSet::FunctionIndex,
                     Attribute::OptimizeForSize))
    Threshold = UP.OptSizeThreshold;

  // Find trip count and trip multiple if count is not available
  unsigned TripCount = 0;
  unsigned TripMultiple = 1;
  // Find "latch trip count". UnrollLoop assumes that control cannot exit
  // via the loop latch on any iteration prior to TripCount. The loop may exit
  // early via an earlier branch.
  BasicBlock *LatchBlock = L->getLoopLatch();
  if (LatchBlock) {
    TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
    TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
  }

  bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;

  // Use a default unroll-count if the user doesn't specify a value
  // and the trip count is a run-time value.  The default is different
  // for run-time or compile-time trip count loops.
  unsigned Count = UserCount ? CurrentCount : UP.Count;
  if (Runtime && Count == 0 && TripCount == 0)
    Count = UnrollRuntimeCount;

  if (Count == 0) {
    // Conservative heuristic: if we know the trip count, see if we can
    // completely unroll (subject to the threshold, checked below); otherwise
    // try to find greatest modulo of the trip count which is still under
    // threshold value.
    if (TripCount == 0)
      return false;
    Count = TripCount;
  }

  // Enforce the threshold.
  if (Threshold != NoThreshold) {
    unsigned NumInlineCandidates;
    bool notDuplicatable;
    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
                                            notDuplicatable, TTI);
    DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
    if (notDuplicatable) {
      DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
            << " instructions.\n");
      return false;
    }
    if (NumInlineCandidates != 0) {
      DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
      return false;
    }
    uint64_t Size = (uint64_t)LoopSize*Count;
    if (TripCount != 1 && Size > Threshold) {
      DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
            << " because size: " << Size << ">" << Threshold << "\n");
      bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
      if (!AllowPartial && !(Runtime && TripCount == 0)) {
        DEBUG(dbgs() << "  will not try to unroll partially because "
              << "-unroll-allow-partial not given\n");
        return false;
      }
      if (TripCount) {
        // Reduce unroll count to be modulo of TripCount for partial unrolling
        Count = Threshold / LoopSize;
        while (Count != 0 && TripCount%Count != 0)
          Count--;
      }
      else if (Runtime) {
        // Reduce unroll count to be a lower power-of-two value
        while (Count != 0 && Size > Threshold) {
          Count >>= 1;
          Size = LoopSize*Count;
        }
      }
      if (Count < 2) {
        DEBUG(dbgs() << "  could not unroll partially\n");
        return false;
      }
      DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
    }
Пример #15
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, unsigned TripMultiple,
                      LoopInfo *LI, Pass *PP, LPPassManager *LPM) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (PP) {
    ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
    if (SE)
      SE->forgetLoop(L);
  }

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  // Report the unrolling decision.
  DebugLoc LoopLoc = L->getStartLoc();
  Function *F = Header->getParent();
  LLVMContext &Ctx = F->getContext();

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                           Twine("completely unrolled loop with ") +
                               Twine(TripCount) + " iterations");
  } else {
    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    Twine DiagMsg("unrolled loop by a factor of " + Twine(Count));
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
      DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip));
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
      DiagMsg.concat(" with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg);
  }

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  DominatorTree *DT = nullptr;
  if (PP) {
    // FIXME: Reconstruct dom info, because it is not preserved properly.
    // Incrementally updating domtree after loop unrolling would be easy.
    if (DominatorTreeWrapperPass *DTWP =
            PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
      DT = &DTWP->getDomTree();
      DT->recalculate(*L->getHeader()->getParent());
    }

    // Simplify any new induction variables in the partially unrolled loop.
    ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
    if (SE && !CompletelyUnroll) {
      SmallVector<WeakVH, 16> DeadInsts;
      simplifyLoopIVs(L, SE, LPM, DeadInsts);

      // Aggressively clean up dead instructions that simplifyLoopIVs already
      // identified. Any remaining should be cleaned up below.
      while (!DeadInsts.empty())
        if (Instruction *Inst =
            dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
          RecursivelyDeleteTriviallyDeadInstructions(Inst);
    }
  }
  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;

  Loop *OuterL = L->getParentLoop();
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != nullptr)
    LPM->deleteLoopFromQueue(L);

  // If we have a pass and a DominatorTree we should re-simplify impacted loops
  // to ensure subsequent analyses can rely on this form. We want to simplify
  // at least one layer outside of the loop that was unrolled so that any
  // changes to the parent loop exposed by the unrolling are considered.
  if (PP && DT) {
    if (!OuterL && !CompletelyUnroll)
      OuterL = L;
    if (OuterL) {
      ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
      simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE);

      // LCSSA must be performed on the outermost affected loop. The unrolled
      // loop's last loop latch is guaranteed to be in the outermost loop after
      // deleteLoopFromQueue updates LoopInfo.
      Loop *LatchLoop = LI->getLoopFor(Latches.back());
      if (!OuterL->contains(LatchLoop))
        while (OuterL->getParentLoop() != LatchLoop)
          OuterL = OuterL->getParentLoop();

      formLCSSARecursively(*OuterL, *DT, SE);
    }
  }

  return true;
}
Пример #16
0
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
    LoopInfo *LI = &getAnalysis<LoopInfo>();
    ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();

    BasicBlock *Header = L->getHeader();
    DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
          << "] Loop %" << Header->getName() << "\n");
    (void)Header;

    // Determine the current unrolling threshold.  While this is normally set
    // from UnrollThreshold, it is overridden to a smaller value if the current
    // function is marked as optimize-for-size, and the unroll threshold was
    // not user specified.
    unsigned Threshold = CurrentThreshold;
    if (!UserThreshold &&
            Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
        Threshold = OptSizeUnrollThreshold;

    // Find trip count and trip multiple if count is not available
    unsigned TripCount = 0;
    unsigned TripMultiple = 1;
    if (!NoSCEVUnroll) {
        // Find "latch trip count". UnrollLoop assumes that control cannot exit
        // via the loop latch on any iteration prior to TripCount. The loop may exit
        // early via an earlier branch.
        BasicBlock *LatchBlock = L->getLoopLatch();
        if (LatchBlock) {
            TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
            TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
        }
    }
    else {
        TripCount = L->getSmallConstantTripCount();
        if (TripCount == 0)
            TripMultiple = L->getSmallConstantTripMultiple();
    }
    // Automatically select an unroll count.
    unsigned Count = CurrentCount;
    if (Count == 0) {
        // Conservative heuristic: if we know the trip count, see if we can
        // completely unroll (subject to the threshold, checked below); otherwise
        // try to find greatest modulo of the trip count which is still under
        // threshold value.
        if (TripCount == 0)
            return false;
        Count = TripCount;
    }

    // Enforce the threshold.
    if (Threshold != NoThreshold) {
        const TargetData *TD = getAnalysisIfAvailable<TargetData>();
        unsigned NumInlineCandidates;
        unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
        DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
        if (NumInlineCandidates != 0) {
            DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
            return false;
        }
        uint64_t Size = (uint64_t)LoopSize*Count;
        if (TripCount != 1 && Size > Threshold) {
            DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
                  << " because size: " << Size << ">" << Threshold << "\n");
            if (!CurrentAllowPartial) {
                DEBUG(dbgs() << "  will not try to unroll partially because "
                      << "-unroll-allow-partial not given\n");
                return false;
            }
            // Reduce unroll count to be modulo of TripCount for partial unrolling
            Count = Threshold / LoopSize;
            while (Count != 0 && TripCount%Count != 0) {
                Count--;
            }
            if (Count < 2) {
                DEBUG(dbgs() << "  could not unroll partially\n");
                return false;
            }
            DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
        }
    }

    // Unroll the loop.
    if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
        return false;

    return true;
}
Пример #17
0
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
    if (skipOptnoneFunction(L))
        return false;

    LoopInfo *LI = &getAnalysis<LoopInfo>();
    ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
    const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();

    BasicBlock *Header = L->getHeader();
    DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
          << "] Loop %" << Header->getName() << "\n");

    if (HasUnrollDisablePragma(L)) {
        return false;
    }
    bool HasEnablePragma = HasUnrollEnablePragma(L);
    unsigned PragmaCount = UnrollCountPragmaValue(L);
    bool HasPragma = HasEnablePragma || PragmaCount > 0;

    TargetTransformInfo::UnrollingPreferences UP;
    getUnrollingPreferences(L, TTI, UP);

    // Find trip count and trip multiple if count is not available
    unsigned TripCount = 0;
    unsigned TripMultiple = 1;
    // Find "latch trip count". UnrollLoop assumes that control cannot exit
    // via the loop latch on any iteration prior to TripCount. The loop may exit
    // early via an earlier branch.
    BasicBlock *LatchBlock = L->getLoopLatch();
    if (LatchBlock) {
        TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
        TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
    }

    // Select an initial unroll count.  This may be reduced later based
    // on size thresholds.
    bool CountSetExplicitly;
    unsigned Count = selectUnrollCount(L, TripCount, HasEnablePragma, PragmaCount,
                                       UP, CountSetExplicitly);

    unsigned NumInlineCandidates;
    bool notDuplicatable;
    unsigned LoopSize =
        ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
    DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
    uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
    if (notDuplicatable) {
        DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
              << " instructions.\n");
        return false;
    }
    if (NumInlineCandidates != 0) {
        DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
        return false;
    }

    unsigned Threshold, PartialThreshold;
    selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold);

    // Given Count, TripCount and thresholds determine the type of
    // unrolling which is to be performed.
    enum { Full = 0, Partial = 1, Runtime = 2 };
    int Unrolling;
    if (TripCount && Count == TripCount) {
        if (Threshold != NoThreshold && UnrolledSize > Threshold) {
            DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
                  << " because size: " << UnrolledSize << ">" << Threshold
                  << "\n");
            Unrolling = Partial;
        } else {
            Unrolling = Full;
        }
    } else if (TripCount && Count < TripCount) {
        Unrolling = Partial;
    } else {
        Unrolling = Runtime;
    }

    // Reduce count based on the type of unrolling and the threshold values.
    unsigned OriginalCount = Count;
    bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
    if (Unrolling == Partial) {
        bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
        if (!AllowPartial && !CountSetExplicitly) {
            DEBUG(dbgs() << "  will not try to unroll partially because "
                  << "-unroll-allow-partial not given\n");
            return false;
        }
        if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
            // Reduce unroll count to be modulo of TripCount for partial unrolling.
            Count = PartialThreshold / LoopSize;
            while (Count != 0 && TripCount % Count != 0)
                Count--;
        }
    } else if (Unrolling == Runtime) {
        if (!AllowRuntime && !CountSetExplicitly) {
            DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count "
                  << "-unroll-runtime not given\n");
            return false;
        }
        // Reduce unroll count to be the largest power-of-two factor of
        // the original count which satisfies the threshold limit.
        while (Count != 0 && UnrolledSize > PartialThreshold) {
            Count >>= 1;
            UnrolledSize = LoopSize * Count;
        }
        if (Count > UP.MaxCount)
            Count = UP.MaxCount;
        DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
    }

    if (HasPragma) {
        // Emit optimization remarks if we are unable to unroll the loop
        // as directed by a pragma.
        DebugLoc LoopLoc = L->getStartLoc();
        Function *F = Header->getParent();
        LLVMContext &Ctx = F->getContext();
        if (HasEnablePragma && PragmaCount == 0) {
            if (TripCount && Count != TripCount) {
                emitOptimizationRemarkMissed(
                    Ctx, DEBUG_TYPE, *F, LoopLoc,
                    "Unable to fully unroll loop as directed by unroll(enable) pragma "
                    "because unrolled size is too large.");
            } else if (!TripCount) {
                emitOptimizationRemarkMissed(
                    Ctx, DEBUG_TYPE, *F, LoopLoc,
                    "Unable to fully unroll loop as directed by unroll(enable) pragma "
                    "because loop has a runtime trip count.");
            }
        } else if (PragmaCount > 0 && Count != OriginalCount) {
            emitOptimizationRemarkMissed(
                Ctx, DEBUG_TYPE, *F, LoopLoc,
                "Unable to unroll loop the number of times directed by "
                "unroll_count pragma because unrolled size is too large.");
        }
    }

    if (Unrolling != Full && Count < 2) {
        // Partial unrolling by 1 is a nop.  For full unrolling, a factor
        // of 1 makes sense because loop control can be eliminated.
        return false;
    }

    // Unroll the loop.
    if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, &LPM))
        return false;

    return true;
}
Пример #18
0
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
  if (skipOptnoneFunction(L))
    return false;

  Function &F = *L->getHeader()->getParent();

  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
  const TargetTransformInfo &TTI =
      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

  BasicBlock *Header = L->getHeader();
  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
        << "] Loop %" << Header->getName() << "\n");

  if (HasUnrollDisablePragma(L)) {
    return false;
  }
  bool PragmaFullUnroll = HasUnrollFullPragma(L);
  unsigned PragmaCount = UnrollCountPragmaValue(L);
  bool HasPragma = PragmaFullUnroll || PragmaCount > 0;

  TargetTransformInfo::UnrollingPreferences UP;
  getUnrollingPreferences(L, TTI, UP);

  // Find trip count and trip multiple if count is not available
  unsigned TripCount = 0;
  unsigned TripMultiple = 1;
  // If there are multiple exiting blocks but one of them is the latch, use the
  // latch for the trip count estimation. Otherwise insist on a single exiting
  // block for the trip count estimation.
  BasicBlock *ExitingBlock = L->getLoopLatch();
  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
    ExitingBlock = L->getExitingBlock();
  if (ExitingBlock) {
    TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
    TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
  }

  // Select an initial unroll count.  This may be reduced later based
  // on size thresholds.
  bool CountSetExplicitly;
  unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
                                     PragmaCount, UP, CountSetExplicitly);

  unsigned NumInlineCandidates;
  bool notDuplicatable;
  unsigned LoopSize =
      ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, &AC);
  DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");

  // When computing the unrolled size, note that the conditional branch on the
  // backedge and the comparison feeding it are not replicated like the rest of
  // the loop body (which is why 2 is subtracted).
  uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2;
  if (notDuplicatable) {
    DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
                 << " instructions.\n");
    return false;
  }
  if (NumInlineCandidates != 0) {
    DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
    return false;
  }

  unsigned Threshold, PartialThreshold;
  unsigned PercentDynamicCostSavedThreshold;
  unsigned DynamicCostSavingsDiscount;
  selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
                   PercentDynamicCostSavedThreshold,
                   DynamicCostSavingsDiscount);

  // Given Count, TripCount and thresholds determine the type of
  // unrolling which is to be performed.
  enum { Full = 0, Partial = 1, Runtime = 2 };
  int Unrolling;
  if (TripCount && Count == TripCount) {
    Unrolling = Partial;
    // If the loop is really small, we don't need to run an expensive analysis.
    if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
                            UnrolledSize, UnrolledSize)) {
      Unrolling = Full;
    } else {
      // The loop isn't that small, but we still can fully unroll it if that
      // helps to remove a significant number of instructions.
      // To check that, run additional analysis on the loop.
      if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
              L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))
        if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
                                DynamicCostSavingsDiscount, Cost->UnrolledCost,
                                Cost->RolledDynamicCost)) {
          Unrolling = Full;
        }
    }
  } else if (TripCount && Count < TripCount) {
    Unrolling = Partial;
  } else {
    Unrolling = Runtime;
  }

  // Reduce count based on the type of unrolling and the threshold values.
  unsigned OriginalCount = Count;
  bool AllowRuntime =
      (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
  // Don't unroll a runtime trip count loop with unroll full pragma.
  if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
    AllowRuntime = false;
  }
  if (Unrolling == Partial) {
    bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
    if (!AllowPartial && !CountSetExplicitly) {
      DEBUG(dbgs() << "  will not try to unroll partially because "
                   << "-unroll-allow-partial not given\n");
      return false;
    }
    if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
      // Reduce unroll count to be modulo of TripCount for partial unrolling.
      Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
      while (Count != 0 && TripCount % Count != 0)
        Count--;
    }
  } else if (Unrolling == Runtime) {
    if (!AllowRuntime && !CountSetExplicitly) {
      DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count "
                   << "-unroll-runtime not given\n");
      return false;
    }
    // Reduce unroll count to be the largest power-of-two factor of
    // the original count which satisfies the threshold limit.
    while (Count != 0 && UnrolledSize > PartialThreshold) {
      Count >>= 1;
      UnrolledSize = (LoopSize-2) * Count + 2;
    }
    if (Count > UP.MaxCount)
      Count = UP.MaxCount;
    DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
  }

  if (HasPragma) {
    if (PragmaCount != 0)
      // If loop has an unroll count pragma mark loop as unrolled to prevent
      // unrolling beyond that requested by the pragma.
      SetLoopAlreadyUnrolled(L);

    // Emit optimization remarks if we are unable to unroll the loop
    // as directed by a pragma.
    DebugLoc LoopLoc = L->getStartLoc();
    Function *F = Header->getParent();
    LLVMContext &Ctx = F->getContext();
    if (PragmaFullUnroll && PragmaCount == 0) {
      if (TripCount && Count != TripCount) {
        emitOptimizationRemarkMissed(
            Ctx, DEBUG_TYPE, *F, LoopLoc,
            "Unable to fully unroll loop as directed by unroll(full) pragma "
            "because unrolled size is too large.");
      } else if (!TripCount) {
        emitOptimizationRemarkMissed(
            Ctx, DEBUG_TYPE, *F, LoopLoc,
            "Unable to fully unroll loop as directed by unroll(full) pragma "
            "because loop has a runtime trip count.");
      }
    } else if (PragmaCount > 0 && Count != OriginalCount) {
      emitOptimizationRemarkMissed(
          Ctx, DEBUG_TYPE, *F, LoopLoc,
          "Unable to unroll loop the number of times directed by "
          "unroll_count pragma because unrolled size is too large.");
    }
  }

  if (Unrolling != Full && Count < 2) {
    // Partial unrolling by 1 is a nop.  For full unrolling, a factor
    // of 1 makes sense because loop control can be eliminated.
    return false;
  }

  // Unroll the loop.
  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
                  TripMultiple, LI, this, &LPM, &AC))
    return false;

  return true;
}
Пример #19
0
bool TrivialLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
  // Only unroll the deepest loops in the loop nest.
  if (!L->empty()) return false;

  LoopInfo *LI = &getAnalysis<LoopInfo>();
  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();

  BasicBlock *Header = L->getHeader();
  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
        << "] Loop %" << Header->getName() << "\n");
  (void)Header;

  // Find trip count and trip multiple if count is not available
  unsigned TripCount = 0;
  unsigned TripMultiple = 1;
  // Find "latch trip count". UnrollLoop assumes that control cannot exit
  // via the loop latch on any iteration prior to TripCount. The loop may exit
  // early via an earlier branch.
  BasicBlock *LatchBlock = L->getLoopLatch();
  if (LatchBlock) {
    TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
    TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
  }
  // Use a default unroll-count if the user doesn't specify a value
  // and the trip count is a run-time value.  The default is different
  // for run-time or compile-time trip count loops.
  // Conservative heuristic: if we know the trip count, see if we can
  // completely unroll (subject to the threshold, checked below); otherwise
  // try to find greatest modulo of the trip count which is still under
  // threshold value.
  if (TripCount == 0)
    return false;

  unsigned Count = TripCount;

  LoopMetrics Metrics(L, &getAnalysis<TargetData>(), *SE);
  if (!Metrics.initialize(LI, &getAnalysis<AliasAnalysis>())) {
    DEBUG(dbgs() << "  Not unrolling loop with strange instructions.\n");
    return false;
  }

  // FIXME: Read the threshold from the constraints script.
  uint64_t Threshold = 256000;

  if (TripCount != 1 && !Metrics.isUnrollAccaptable(Count, Threshold)) {
    DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
          << " because size >" << Threshold << "\n");
    if (TripCount) {
      // Search a feasible count by binary search.
      unsigned MaxCount = Count, MinCount = 1;

      while (MinCount <= MaxCount) {
        unsigned MidCount = MinCount + (MaxCount - MinCount) / 2;

        if (Metrics.isUnrollAccaptable(MidCount, Threshold)) {
          // MidCount is ok, try a bigger one.
          Count = MidCount;
          MinCount = MidCount + 1;
        } else
          // Else we had to try a smaller count.
          MaxCount = MidCount - 1;
      }

      // Reduce unroll count to be modulo of TripCount for partial unrolling
      while (Count != 0 && TripCount % Count != 0)
        --Count;
    }

    if (Count < 2) {
      DEBUG(dbgs() << "  could not unroll partially\n");
      return false;
    }
    DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
  }

  //assert(TripCount % Count == 0 && "Bad unroll count!");
  //assert(Metrics.isUnrollAccaptable(Count, Threshold) && "Bad unroll count!");

  // Unroll the loop.
  if (!UnrollLoop(L, Count, TripCount, false, TripMultiple, LI, &LPM))
    return false;

  return true;
}
Пример #20
0
/// Insert code in the prolog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
/// of loop bodes in the loop after unrolling. (Some folks refer
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
/// the switch instruction is generated.
///
///        extraiters = tripcount % loopfactor
///        if (extraiters == 0) jump Loop:
///        else jump Prol
/// Prol:  LoopBody;
///        extraiters -= 1                 // Omitted if unroll factor is 2.
///        if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
///        if (tripcount < loopfactor) jump End
/// Loop:
/// ...
/// End:
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
                                   bool AllowExpensiveTripCount, LoopInfo *LI,
                                   LPPassManager *LPM) {
    // for now, only unroll loops that contain a single exit
    if (!L->getExitingBlock())
        return false;

    // Make sure the loop is in canonical form, and there is a single
    // exit block only.
    if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
        return false;

    // Use Scalar Evolution to compute the trip count.  This allows more
    // loops to be unrolled than relying on induction var simplification
    if (!LPM)
        return false;
    ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
    if (!SE)
        return false;

    // Only unroll loops with a computable trip count and the trip count needs
    // to be an int value (allowing a pointer type is a TODO item)
    const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
    if (isa<SCEVCouldNotCompute>(BECountSC) ||
            !BECountSC->getType()->isIntegerTy())
        return false;

    unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

    // Add 1 since the backedge count doesn't include the first loop iteration
    const SCEV *TripCountSC =
        SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
    if (isa<SCEVCouldNotCompute>(TripCountSC))
        return false;

    BasicBlock *Header = L->getHeader();
    const DataLayout &DL = Header->getModule()->getDataLayout();
    SCEVExpander Expander(*SE, DL, "loop-unroll");
    if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
        return false;

    // We only handle cases when the unroll factor is a power of 2.
    // Count is the loop unroll factor, the number of extra copies added + 1.
    if (!isPowerOf2_32(Count))
        return false;

    // This constraint lets us deal with an overflowing trip count easily; see the
    // comment on ModVal below.
    if (Log2_32(Count) > BEWidth)
        return false;

    // If this loop is nested, then the loop unroller changes the code in
    // parent loop, so the Scalar Evolution pass needs to be run again
    if (Loop *ParentLoop = L->getParentLoop())
        SE->forgetLoop(ParentLoop);

    // Grab analyses that we preserve.
    auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
    auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;

    BasicBlock *PH = L->getLoopPreheader();
    BasicBlock *Latch = L->getLoopLatch();
    // It helps to splits the original preheader twice, one for the end of the
    // prolog code and one for a new loop preheader
    BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
    BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
    BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());

    // Compute the number of extra iterations required, which is:
    //  extra iterations = run-time trip count % (loop unroll factor + 1)
    Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                       PreHeaderBR);
    Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
                                            PreHeaderBR);

    IRBuilder<> B(PreHeaderBR);
    Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");

    // If ModVal is zero, we know that either
    //  1. there are no iteration to be run in the prologue loop
    // OR
    //  2. the addition computing TripCount overflowed
    //
    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
    // number of iterations that remain to be run in the original loop is a
    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
    // explicitly check this above).

    Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");

    // Branch to either the extra iterations or the cloned/unrolled loop
    // We will fix up the true branch label when adding loop body copies
    B.CreateCondBr(BranchVal, PEnd, PEnd);
    assert(PreHeaderBR->isUnconditional() &&
           PreHeaderBR->getSuccessor(0) == PEnd &&
           "CFG edges in Preheader are not correct");
    PreHeaderBR->eraseFromParent();
    Function *F = Header->getParent();
    // Get an ordered list of blocks in the loop to help with the ordering of the
    // cloned blocks in the prolog code
    LoopBlocksDFS LoopBlocks(L);
    LoopBlocks.perform(LI);

    //
    // For each extra loop iteration, create a copy of the loop's basic blocks
    // and generate a condition that branches to the copy depending on the
    // number of 'left over' iterations.
    //
    std::vector<BasicBlock *> NewBlocks;
    ValueToValueMapTy VMap;

    bool UnrollPrologue = Count == 2;

    // Clone all the basic blocks in the loop. If Count is 2, we don't clone
    // the loop, otherwise we create a cloned loop to execute the extra
    // iterations. This function adds the appropriate CFG connections.
    CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
                    VMap, LI);

    // Insert the cloned blocks into function just before the original loop
    F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],
                                  F->end());

    // Rewrite the cloned instruction operands to use the values
    // created when the clone is created.
    for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
        for (BasicBlock::iterator I = NewBlocks[i]->begin(),
                E = NewBlocks[i]->end();
                I != E; ++I) {
            RemapInstruction(I, VMap,
                             RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
        }
    }

    // Connect the prolog code to the original loop and update the
    // PHI functions.
    BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
    ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
                  /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass());
    NumRuntimeUnrolled++;
    return true;
}
Пример #21
0
/// Remove dead loops, by which we mean loops that do not impact the observable
/// behavior of the program other than finite running time.  Note we do ensure
/// that this never remove a loop that might be infinite, as doing so could
/// change the halting/non-halting nature of a program. NOTE: This entire
/// process relies pretty heavily on LoopSimplify and LCSSA in order to make
/// various safety checks work.
bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
                               LoopInfo &loopInfo) {
  assert(L->isLCSSAForm(DT) && "Expected LCSSA!");

  // We can only remove the loop if there is a preheader that we can
  // branch from after removing it.
  BasicBlock *preheader = L->getLoopPreheader();
  if (!preheader)
    return false;

  // If LoopSimplify form is not available, stay out of trouble.
  if (!L->hasDedicatedExits())
    return false;

  // We can't remove loops that contain subloops.  If the subloops were dead,
  // they would already have been removed in earlier executions of this pass.
  if (L->begin() != L->end())
    return false;

  SmallVector<BasicBlock *, 4> exitingBlocks;
  L->getExitingBlocks(exitingBlocks);

  SmallVector<BasicBlock *, 4> exitBlocks;
  L->getUniqueExitBlocks(exitBlocks);

  // We require that the loop only have a single exit block.  Otherwise, we'd
  // be in the situation of needing to be able to solve statically which exit
  // block will be branched to, or trying to preserve the branching logic in
  // a loop invariant manner.
  if (exitBlocks.size() != 1)
    return false;

  // Finally, we have to check that the loop really is dead.
  bool Changed = false;
  if (!isLoopDead(L, SE, exitingBlocks, exitBlocks, Changed, preheader))
    return Changed;

  // Don't remove loops for which we can't solve the trip count.
  // They could be infinite, in which case we'd be changing program behavior.
  const SCEV *S = SE.getMaxBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(S))
    return Changed;

  // Now that we know the removal is safe, remove the loop by changing the
  // branch from the preheader to go to the single exit block.
  BasicBlock *exitBlock = exitBlocks[0];

  // Because we're deleting a large chunk of code at once, the sequence in which
  // we remove things is very important to avoid invalidation issues.  Don't
  // mess with this unless you have good reason and know what you're doing.

  // Tell ScalarEvolution that the loop is deleted. Do this before
  // deleting the loop so that ScalarEvolution can look at the loop
  // to determine what it needs to clean up.
  SE.forgetLoop(L);

  // Connect the preheader directly to the exit block.
  TerminatorInst *TI = preheader->getTerminator();
  TI->replaceUsesOfWith(L->getHeader(), exitBlock);

  // Rewrite phis in the exit block to get their inputs from
  // the preheader instead of the exiting block.
  BasicBlock *exitingBlock = exitingBlocks[0];
  BasicBlock::iterator BI = exitBlock->begin();
  while (PHINode *P = dyn_cast<PHINode>(BI)) {
    int j = P->getBasicBlockIndex(exitingBlock);
    assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
    P->setIncomingBlock(j, preheader);
    for (unsigned i = 1; i < exitingBlocks.size(); ++i)
      P->removeIncomingValue(exitingBlocks[i]);
    ++BI;
  }

  // Update the dominator tree and remove the instructions and blocks that will
  // be deleted from the reference counting scheme.
  SmallVector<DomTreeNode*, 8> ChildNodes;
  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
       LI != LE; ++LI) {
    // Move all of the block's children to be children of the preheader, which
    // allows us to remove the domtree entry for the block.
    ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
    for (DomTreeNode *ChildNode : ChildNodes) {
      DT.changeImmediateDominator(ChildNode, DT[preheader]);
    }

    ChildNodes.clear();
    DT.eraseNode(*LI);

    // Remove the block from the reference counting scheme, so that we can
    // delete it freely later.
    (*LI)->dropAllReferences();
  }

  // Erase the instructions and the blocks without having to worry
  // about ordering because we already dropped the references.
  // NOTE: This iteration is safe because erasing the block does not remove its
  // entry from the loop's block list.  We do that in the next section.
  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
       LI != LE; ++LI)
    (*LI)->eraseFromParent();

  // Finally, the blocks from loopinfo.  This has to happen late because
  // otherwise our loop iterators won't work.

  SmallPtrSet<BasicBlock *, 8> blocks;
  blocks.insert(L->block_begin(), L->block_end());
  for (BasicBlock *BB : blocks)
    loopInfo.removeBlock(BB);

  // The last step is to update LoopInfo now that we've eliminated this loop.
  loopInfo.markAsRemoved(L);
  Changed = true;

  ++NumDeleted;

  return Changed;
}
Пример #22
0
bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
                                    ScalarEvolution &SE) {
  const SCEV *Zero = SE.getZero(S->getType());
  return SE.isAvailableAtLoopEntry(S, L) &&
         SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
}
Пример #23
0
// Return the number of iterations to peel off that make conditions in the
// body true/false. For example, if we peel 2 iterations off the loop below,
// the condition i < 2 can be evaluated at compile time.
//  for (i = 0; i < n; i++)
//    if (i < 2)
//      ..
//    else
//      ..
//   }
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
                                         ScalarEvolution &SE) {
  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
  unsigned DesiredPeelCount = 0;

  for (auto *BB : L.blocks()) {
    auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
    if (!BI || BI->isUnconditional())
      continue;

    // Ignore loop exit condition.
    if (L.getLoopLatch() == BB)
      continue;

    Value *Condition = BI->getCondition();
    Value *LeftVal, *RightVal;
    CmpInst::Predicate Pred;
    if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
      continue;

    const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
    const SCEV *RightSCEV = SE.getSCEV(RightVal);

    // Do not consider predicates that are known to be true or false
    // independently of the loop iteration.
    if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV,
                            RightSCEV))
      continue;

    // Check if we have a condition with one AddRec and one non AddRec
    // expression. Normalize LeftSCEV to be the AddRec.
    if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
      if (isa<SCEVAddRecExpr>(RightSCEV)) {
        std::swap(LeftSCEV, RightSCEV);
        Pred = ICmpInst::getSwappedPredicate(Pred);
      } else
        continue;
    }

    const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);

    // Avoid huge SCEV computations in the loop below, make sure we only
    // consider AddRecs of the loop we are trying to peel and avoid
    // non-monotonic predicates, as we will not be able to simplify the loop
    // body.
    // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
    //        simplify the loop, if we peel 1 additional iteration, if there
    //        is no wrapping.
    bool Increasing;
    if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
        !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
      continue;
    (void)Increasing;

    // Check if extending the current DesiredPeelCount lets us evaluate Pred
    // or !Pred in the loop body statically.
    unsigned NewPeelCount = DesiredPeelCount;

    const SCEV *IterVal = LeftAR->evaluateAtIteration(
        SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);

    // If the original condition is not known, get the negated predicate
    // (which holds on the else branch) and check if it is known. This allows
    // us to peel of iterations that make the original condition false.
    if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
      Pred = ICmpInst::getInversePredicate(Pred);

    const SCEV *Step = LeftAR->getStepRecurrence(SE);
    while (NewPeelCount < MaxPeelCount &&
           SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
      IterVal = SE.getAddExpr(IterVal, Step);
      NewPeelCount++;
    }

    // Only peel the loop if the monotonic predicate !Pred becomes known in the
    // first iteration of the loop body after peeling.
    if (NewPeelCount > DesiredPeelCount &&
        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
                            RightSCEV))
      DesiredPeelCount = NewPeelCount;
  }

  return DesiredPeelCount;
}
/// Split a condition into something semantically equivalent to (0 <= I <
/// Limit), both comparisons signed and Len loop invariant on L and positive.
/// On success, return true and set Index to I and UpperLimit to Limit.  Return
/// false on failure (we may still write to UpperLimit and Index on failure).
/// It does not try to interpret I as a loop index.
///
static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE,
                                     Value *Condition, const SCEV *&Index,
                                     Value *&UpperLimit) {

  // TODO: currently this catches some silly cases like comparing "%idx slt 1".
  // Our transformations are still correct, but less likely to be profitable in
  // those cases.  We have to come up with some heuristics that pick out the
  // range checks that are more profitable to clone a loop for.  This function
  // in general can be made more robust.

  using namespace llvm::PatternMatch;

  Value *A = nullptr;
  Value *B = nullptr;
  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;

  // In these early checks we assume that the matched UpperLimit is positive.
  // We'll verify that fact later, before returning true.

  if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
    Value *IndexV = nullptr;
    Value *ExpectedUpperBoundCheck = nullptr;

    if (IsLowerBoundCheck(A, IndexV))
      ExpectedUpperBoundCheck = B;
    else if (IsLowerBoundCheck(B, IndexV))
      ExpectedUpperBoundCheck = A;
    else
      return false;

    if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit))
      return false;

    Index = SE.getSCEV(IndexV);

    if (isa<SCEVCouldNotCompute>(Index))
      return false;

  } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
    switch (Pred) {
    default:
      return false;

    case ICmpInst::ICMP_SGT:
      std::swap(A, B);
    // fall through
    case ICmpInst::ICMP_SLT:
      UpperLimit = B;
      Index = SE.getSCEV(A);
      if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index))
        return false;
      break;

    case ICmpInst::ICMP_UGT:
      std::swap(A, B);
    // fall through
    case ICmpInst::ICMP_ULT:
      UpperLimit = B;
      Index = SE.getSCEV(A);
      if (isa<SCEVCouldNotCompute>(Index))
        return false;
      break;
    }
  } else {
    return false;
  }

  const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit);
  if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) ||
      !SE.isKnownNonNegative(UpperLimitSCEV))
    return false;

  if (SE.getLoopDisposition(UpperLimitSCEV, L) !=
      ScalarEvolution::LoopInvariant) {
    DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName()
                 << " ";
          dbgs() << " UpperLimit is not loop invariant: "
                 << UpperLimit->getName() << "\n";);
Пример #25
0
bool CustomUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
    errs() << "Entering loop unroll\n";
    LoopInfo *LI = &getAnalysis<LoopInfo>();
//  LP = &getAnalysis<LAMPLoadProfile>();
//  LL = &getAnalysis<label_loop>();
    ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
    const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();

    BasicBlock *Header = L->getHeader();
    DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
          << "] Loop %" << Header->getName() << "\n");
    (void)Header;

//  errs() << "ID of this loop is " << LP->LoopToIdMap[Header] << "\n";
//  errs() << "ID of this loop is " << LL->LoopToIdMap[Header] << "\n";

    // Determine the current unrolling threshold.  While this is normally set
    // from UnrollThreshold, it is overridden to a smaller value if the current
    // function is marked as optimize-for-size, and the unroll threshold was
    // not user specified.
//  unsigned Threshold = CurrentThreshold;
//  if (!UserThreshold &&
//      Header->getParent()->getAttributes().
//        hasAttribute(AttributeSet::FunctionIndex,
//                     Attribute::OptimizeForSize))
//    Threshold = OptSizeUnrollThreshold;

    // Find trip count and trip multiple if count is not available
    unsigned TripCount = 0;
    unsigned TripMultiple = 1;
    // Find "latch trip count". UnrollLoop assumes that control cannot exit
    // via the loop latch on any iteration prior to TripCount. The loop may exit
    // early via an earlier branch.
    BasicBlock *LatchBlock = L->getLoopLatch();
    if (LatchBlock) {
        errs() << "Assigning tripcount and tripmultiple\n";
        TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
        TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
    }
    // Use a default unroll-count if the user doesn't specify a value
    // and the trip count is a run-time value.  The default is different
    // for run-time or compile-time trip count loops.
    unsigned Count = CurrentCount;
//  if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
//    Count = UnrollRuntimeCount;

    errs() << "Chaecking for tripcount, count is " << Count << " tripcount is " << TripCount << "\n";
    if (Count == 0) {
        // Conservative heuristic: if we know the trip count, see if we can
        // completely unroll (subject to the threshold, checked below); otherwise
        // try to find greatest modulo of the trip count which is still under
        // threshold value.
        if (TripCount == 0)
            return false;
        Count = TripCount;
    }

    errs() << "Chaecking for threshold\n";
    // Enforce the threshold.
//  if (Threshold != NoThreshold) {
    unsigned NumInlineCandidates;
    bool notDuplicatable;
    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
                                            notDuplicatable, TTI);
    errs() << "Approximate loop size is " << LoopSize << "\n";
    DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
    if (notDuplicatable) {
        DEBUG(dbgs() << "  Not unrolling loop which contains non duplicatable"
              << " instructions.\n");
        return false;
    }
//    if (NumInlineCandidates != 0) {
//      DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
//      return false;
//    }
//    uint64_t Size = (uint64_t)LoopSize*Count;
//    if (TripCount != 1 && Size > Threshold) {
//      DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
//            << " because size: " << Size << ">" << Threshold << "\n");
//      if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
//        DEBUG(dbgs() << "  will not try to unroll partially because "
//              << "-custom-allow-partial not given\n");
//        return false;
//      }
//      if (TripCount) {
//        // Reduce unroll count to be modulo of TripCount for partial unrolling
//        Count = Threshold / LoopSize;
//        while (Count != 0 && TripCount%Count != 0)
//          Count--;
//      }
//      else if (UnrollRuntime) {
//        // Reduce unroll count to be a lower power-of-two value
//        while (Count != 0 && Size > Threshold) {
//          Count >>= 1;
//          Size = LoopSize*Count;
//        }
//      }
//      if (Count < 2) {
//        DEBUG(dbgs() << "  could not unroll partially\n");
//        return false;
//      }
//      DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
//    }
//  }

    errs() << "Unrolling the loop\n";
    errs()<< "Count is " << Count << ", tripcount is " << TripCount << ", allow runtime is " << UnrollRuntime << ", trip multiple is " << TripMultiple << "\n";
    // Unroll the loop.
    if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM)) {
        errs() << "Unrolling the loop failed\n";
        return false;
    }

    return true;
}
Пример #26
0
bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                                DependenceInfo &DI) {
  /* We currently handle outer loops like this:
        |
    ForeFirst    <----\    }
     Blocks           |    } ForeBlocks
    ForeLast          |    }
        |             |
    SubLoopFirst  <\  |    }
     Blocks        |  |    } SubLoopBlocks
    SubLoopLast   -/  |    }
        |             |
    AftFirst          |    }
     Blocks           |    } AftBlocks
    AftLast     ------/    }
        |

    There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
    and AftBlocks, providing that there is one edge from Fores to SubLoops,
    one edge from SubLoops to Afts and a single outer loop exit (from Afts).
    In practice we currently limit Aft blocks to a single block, and limit
    things further in the profitablility checks of the unroll and jam pass.

    Because of the way we rearrange basic blocks, we also require that
    the Fore blocks on all unrolled iterations are safe to move before the
    SubLoop blocks of all iterations. So we require that the phi node looping
    operands of ForeHeader can be moved to at least the end of ForeEnd, so that
    we can arrange cloned Fore Blocks before the subloop and match up Phi's
    correctly.

    i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
    It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.

    There are then a number of checks along the lines of no calls, no
    exceptions, inner loop IV is consistent, etc. Note that for loops requiring
    runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
    UnrollAndJamLoop if the trip count cannot be easily calculated.
  */

  if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
    return false;
  Loop *SubLoop = L->getSubLoops()[0];
  if (!SubLoop->isLoopSimplifyForm())
    return false;

  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Exit = L->getExitingBlock();
  BasicBlock *SubLoopHeader = SubLoop->getHeader();
  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
  BasicBlock *SubLoopExit = SubLoop->getExitingBlock();

  if (Latch != Exit)
    return false;
  if (SubLoopLatch != SubLoopExit)
    return false;

  if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
    return false;

  // Split blocks into Fore/SubLoop/Aft based on dominators
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
                                AftBlocks, &DT))
    return false;

  // Aft blocks may need to move instructions to fore blocks, which becomes more
  // difficult if there are multiple (potentially conditionally executed)
  // blocks. For now we just exclude loops with multiple aft blocks.
  if (AftBlocks.size() != 1)
    return false;

  // Check inner loop IV is consistent between all iterations
  const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
  if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
      !SubLoopBECountSC->getType()->isIntegerTy())
    return false;
  ScalarEvolution::LoopDisposition LD =
      SE.getLoopDisposition(SubLoopBECountSC, L);
  if (LD != ScalarEvolution::LoopInvariant)
    return false;

  // Check the loop safety info for exceptions.
  LoopSafetyInfo LSI;
  computeLoopSafetyInfo(&LSI, L);
  if (LSI.MayThrow)
    return false;

  // We've ruled out the easy stuff and now need to check that there are no
  // interdependencies which may prevent us from moving the:
  //  ForeBlocks before Subloop and AftBlocks.
  //  Subloop before AftBlocks.
  //  ForeBlock phi operands before the subloop

  // Make sure we can move all instructions we need to before the subloop
  SmallVector<Instruction *, 8> Worklist;
  SmallPtrSet<Instruction *, 8> Visited;
  for (auto &Phi : Header->phis()) {
    Value *V = Phi.getIncomingValueForBlock(Latch);
    if (Instruction *I = dyn_cast<Instruction>(V))
      Worklist.push_back(I);
  }
  while (!Worklist.empty()) {
    Instruction *I = Worklist.back();
    Worklist.pop_back();
    if (Visited.insert(I).second) {
      if (SubLoop->contains(I->getParent()))
        return false;
      if (AftBlocks.count(I->getParent())) {
        // If we hit a phi node in afts we know we are done (probably LCSSA)
        if (isa<PHINode>(I))
          return false;
        if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
          return false;
        for (auto &U : I->operands())
          if (Instruction *II = dyn_cast<Instruction>(U))
            Worklist.push_back(II);
      }
    }
  }

  // Check for memory dependencies which prohibit the unrolling we are doing.
  // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
  // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
  if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
    return false;

  return true;
}
Пример #27
0
/// Remove a loop if it is dead.
///
/// A loop is considered dead if it does not impact the observable behavior of
/// the program other than finite running time. This never removes a loop that
/// might be infinite (unless it is never executed), as doing so could change
/// the halting/non-halting nature of a program.
///
/// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
/// order to make various safety checks work.
///
/// \returns true if any changes were made. This may mutate the loop even if it
/// is unable to delete it due to hoisting trivially loop invariant
/// instructions out of the loop.
static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
                                           ScalarEvolution &SE, LoopInfo &LI) {
  assert(L->isLCSSAForm(DT) && "Expected LCSSA!");

  // We can only remove the loop if there is a preheader that we can branch from
  // after removing it. Also, if LoopSimplify form is not available, stay out
  // of trouble.
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader || !L->hasDedicatedExits()) {
    DEBUG(dbgs()
          << "Deletion requires Loop with preheader and dedicated exits.\n");
    return LoopDeletionResult::Unmodified;
  }
  // We can't remove loops that contain subloops.  If the subloops were dead,
  // they would already have been removed in earlier executions of this pass.
  if (L->begin() != L->end()) {
    DEBUG(dbgs() << "Loop contains subloops.\n");
    return LoopDeletionResult::Unmodified;
  }


  BasicBlock *ExitBlock = L->getUniqueExitBlock();

  if (ExitBlock && isLoopNeverExecuted(L)) {
    DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
    // Set incoming value to undef for phi nodes in the exit block.
    BasicBlock::iterator BI = ExitBlock->begin();
    while (PHINode *P = dyn_cast<PHINode>(BI)) {
      for (unsigned i = 0; i < P->getNumIncomingValues(); i++)
        P->setIncomingValue(i, UndefValue::get(P->getType()));
      BI++;
    }
    deleteDeadLoop(L, &DT, &SE, &LI);
    ++NumDeleted;
    return LoopDeletionResult::Deleted;
  }

  // The remaining checks below are for a loop being dead because all statements
  // in the loop are invariant.
  SmallVector<BasicBlock *, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);

  // We require that the loop only have a single exit block.  Otherwise, we'd
  // be in the situation of needing to be able to solve statically which exit
  // block will be branched to, or trying to preserve the branching logic in
  // a loop invariant manner.
  if (!ExitBlock) {
    DEBUG(dbgs() << "Deletion requires single exit block\n");
    return LoopDeletionResult::Unmodified;
  }
  // Finally, we have to check that the loop really is dead.
  bool Changed = false;
  if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) {
    DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n");
    return Changed ? LoopDeletionResult::Modified
                   : LoopDeletionResult::Unmodified;
  }

  // Don't remove loops for which we can't solve the trip count.
  // They could be infinite, in which case we'd be changing program behavior.
  const SCEV *S = SE.getMaxBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(S)) {
    DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n");
    return Changed ? LoopDeletionResult::Modified
                   : LoopDeletionResult::Unmodified;
  }

  DEBUG(dbgs() << "Loop is invariant, delete it!");
  deleteDeadLoop(L, &DT, &SE, &LI);
  ++NumDeleted;

  return LoopDeletionResult::Deleted;
}