Exemplo n.º 1
0
bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
                                              ScalarEvolution &SE) {
  Loop *OuterL = InnerLoop->getParentLoop();
  if (!OuterL)
    return true;

  // Get the backedge taken count for the inner loop
  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
  if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
      !InnerLoopBECountSC->getType()->isIntegerTy())
    return false;

  // Get whether count is invariant to the outer loop
  ScalarEvolution::LoopDisposition LD =
      SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
  if (LD != ScalarEvolution::LoopInvariant)
    return false;

  return true;
}
/// Split a condition into something semantically equivalent to (0 <= I <
/// Limit), both comparisons signed and Len loop invariant on L and positive.
/// On success, return true and set Index to I and UpperLimit to Limit.  Return
/// false on failure (we may still write to UpperLimit and Index on failure).
/// It does not try to interpret I as a loop index.
///
static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE,
                                     Value *Condition, const SCEV *&Index,
                                     Value *&UpperLimit) {

  // TODO: currently this catches some silly cases like comparing "%idx slt 1".
  // Our transformations are still correct, but less likely to be profitable in
  // those cases.  We have to come up with some heuristics that pick out the
  // range checks that are more profitable to clone a loop for.  This function
  // in general can be made more robust.

  using namespace llvm::PatternMatch;

  Value *A = nullptr;
  Value *B = nullptr;
  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;

  // In these early checks we assume that the matched UpperLimit is positive.
  // We'll verify that fact later, before returning true.

  if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
    Value *IndexV = nullptr;
    Value *ExpectedUpperBoundCheck = nullptr;

    if (IsLowerBoundCheck(A, IndexV))
      ExpectedUpperBoundCheck = B;
    else if (IsLowerBoundCheck(B, IndexV))
      ExpectedUpperBoundCheck = A;
    else
      return false;

    if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit))
      return false;

    Index = SE.getSCEV(IndexV);

    if (isa<SCEVCouldNotCompute>(Index))
      return false;

  } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
    switch (Pred) {
    default:
      return false;

    case ICmpInst::ICMP_SGT:
      std::swap(A, B);
    // fall through
    case ICmpInst::ICMP_SLT:
      UpperLimit = B;
      Index = SE.getSCEV(A);
      if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index))
        return false;
      break;

    case ICmpInst::ICMP_UGT:
      std::swap(A, B);
    // fall through
    case ICmpInst::ICMP_ULT:
      UpperLimit = B;
      Index = SE.getSCEV(A);
      if (isa<SCEVCouldNotCompute>(Index))
        return false;
      break;
    }
  } else {
    return false;
  }

  const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit);
  if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) ||
      !SE.isKnownNonNegative(UpperLimitSCEV))
    return false;

  if (SE.getLoopDisposition(UpperLimitSCEV, L) !=
      ScalarEvolution::LoopInvariant) {
    DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName()
                 << " ";
          dbgs() << " UpperLimit is not loop invariant: "
                 << UpperLimit->getName() << "\n";);
Exemplo n.º 3
0
bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                                DependenceInfo &DI) {
  /* We currently handle outer loops like this:
        |
    ForeFirst    <----\    }
     Blocks           |    } ForeBlocks
    ForeLast          |    }
        |             |
    SubLoopFirst  <\  |    }
     Blocks        |  |    } SubLoopBlocks
    SubLoopLast   -/  |    }
        |             |
    AftFirst          |    }
     Blocks           |    } AftBlocks
    AftLast     ------/    }
        |

    There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
    and AftBlocks, providing that there is one edge from Fores to SubLoops,
    one edge from SubLoops to Afts and a single outer loop exit (from Afts).
    In practice we currently limit Aft blocks to a single block, and limit
    things further in the profitablility checks of the unroll and jam pass.

    Because of the way we rearrange basic blocks, we also require that
    the Fore blocks on all unrolled iterations are safe to move before the
    SubLoop blocks of all iterations. So we require that the phi node looping
    operands of ForeHeader can be moved to at least the end of ForeEnd, so that
    we can arrange cloned Fore Blocks before the subloop and match up Phi's
    correctly.

    i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
    It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.

    There are then a number of checks along the lines of no calls, no
    exceptions, inner loop IV is consistent, etc. Note that for loops requiring
    runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
    UnrollAndJamLoop if the trip count cannot be easily calculated.
  */

  if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
    return false;
  Loop *SubLoop = L->getSubLoops()[0];
  if (!SubLoop->isLoopSimplifyForm())
    return false;

  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Exit = L->getExitingBlock();
  BasicBlock *SubLoopHeader = SubLoop->getHeader();
  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
  BasicBlock *SubLoopExit = SubLoop->getExitingBlock();

  if (Latch != Exit)
    return false;
  if (SubLoopLatch != SubLoopExit)
    return false;

  if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
    return false;

  // Split blocks into Fore/SubLoop/Aft based on dominators
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
                                AftBlocks, &DT))
    return false;

  // Aft blocks may need to move instructions to fore blocks, which becomes more
  // difficult if there are multiple (potentially conditionally executed)
  // blocks. For now we just exclude loops with multiple aft blocks.
  if (AftBlocks.size() != 1)
    return false;

  // Check inner loop IV is consistent between all iterations
  const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
  if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
      !SubLoopBECountSC->getType()->isIntegerTy())
    return false;
  ScalarEvolution::LoopDisposition LD =
      SE.getLoopDisposition(SubLoopBECountSC, L);
  if (LD != ScalarEvolution::LoopInvariant)
    return false;

  // Check the loop safety info for exceptions.
  LoopSafetyInfo LSI;
  computeLoopSafetyInfo(&LSI, L);
  if (LSI.MayThrow)
    return false;

  // We've ruled out the easy stuff and now need to check that there are no
  // interdependencies which may prevent us from moving the:
  //  ForeBlocks before Subloop and AftBlocks.
  //  Subloop before AftBlocks.
  //  ForeBlock phi operands before the subloop

  // Make sure we can move all instructions we need to before the subloop
  SmallVector<Instruction *, 8> Worklist;
  SmallPtrSet<Instruction *, 8> Visited;
  for (auto &Phi : Header->phis()) {
    Value *V = Phi.getIncomingValueForBlock(Latch);
    if (Instruction *I = dyn_cast<Instruction>(V))
      Worklist.push_back(I);
  }
  while (!Worklist.empty()) {
    Instruction *I = Worklist.back();
    Worklist.pop_back();
    if (Visited.insert(I).second) {
      if (SubLoop->contains(I->getParent()))
        return false;
      if (AftBlocks.count(I->getParent())) {
        // If we hit a phi node in afts we know we are done (probably LCSSA)
        if (isa<PHINode>(I))
          return false;
        if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
          return false;
        for (auto &U : I->operands())
          if (Instruction *II = dyn_cast<Instruction>(U))
            Worklist.push_back(II);
      }
    }
  }

  // Check for memory dependencies which prohibit the unrolling we are doing.
  // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
  // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
  if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
    return false;

  return true;
}