Esempio n. 1
0
/// Return the nearest parent loop among this block's successors. If a successor
/// is a subloop header, consider its parent to be the nearest parent of the
/// subloop's exits.
///
/// For subloop blocks, simply update SubloopParents and return NULL.
Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {

  // Initially for blocks directly contained by Unloop, NearLoop == Unloop and
  // is considered uninitialized.
  Loop *NearLoop = BBLoop;

  Loop *Subloop = nullptr;
  if (NearLoop != &Unloop && Unloop.contains(NearLoop)) {
    Subloop = NearLoop;
    // Find the subloop ancestor that is directly contained within Unloop.
    while (Subloop->getParentLoop() != &Unloop) {
      Subloop = Subloop->getParentLoop();
      assert(Subloop && "subloop is not an ancestor of the original loop");
    }
    // Get the current nearest parent of the Subloop exits, initially Unloop.
    NearLoop = SubloopParents.insert({Subloop, &Unloop}).first->second;
  }

  succ_iterator I = succ_begin(BB), E = succ_end(BB);
  if (I == E) {
    assert(!Subloop && "subloop blocks must have a successor");
    NearLoop = nullptr; // unloop blocks may now exit the function.
  }
  for (; I != E; ++I) {
    if (*I == BB)
      continue; // self loops are uninteresting

    Loop *L = LI->getLoopFor(*I);
    if (L == &Unloop) {
      // This successor has not been processed. This path must lead to an
      // irreducible backedge.
      assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB");
      FoundIB = true;
    }
    if (L != &Unloop && Unloop.contains(L)) {
      // Successor is in a subloop.
      if (Subloop)
        continue; // Branching within subloops. Ignore it.

      // BB branches from the original into a subloop header.
      assert(L->getParentLoop() == &Unloop && "cannot skip into nested loops");

      // Get the current nearest parent of the Subloop's exits.
      L = SubloopParents[L];
      // L could be Unloop if the only exit was an irreducible backedge.
    }
    if (L == &Unloop) {
      continue;
    }
    // Handle critical edges from Unloop into a sibling loop.
    if (L && !L->contains(&Unloop)) {
      L = L->getParentLoop();
    }
    // Remember the nearest parent loop among successors or subloop exits.
    if (NearLoop == &Unloop || !NearLoop || NearLoop->contains(L))
      NearLoop = L;
  }
  if (Subloop) {
    SubloopParents[Subloop] = NearLoop;
    return BBLoop;
  }
  return NearLoop;
}
Esempio n. 2
0
/// SplitBlockPredecessors - This method transforms BB by introducing a new
/// basic block into the function, and moving some of the predecessors of BB to
/// be predecessors of the new block.  The new predecessors are indicated by the
/// Preds array, which has NumPreds elements in it.  The new block is given a
/// suffix of 'Suffix'.
///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
/// In particular, it does not preserve LoopSimplify (because it's
/// complicated to handle the case where one of the edges being split
/// is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
                                         BasicBlock *const *Preds,
                                         unsigned NumPreds, const char *Suffix,
                                         Pass *P) {
  // Create new basic block, insert right before the original block.
  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
                                         BB->getParent(), BB);
  
  // The new block unconditionally branches to the old block.
  BranchInst *BI = BranchInst::Create(BB, NewBB);
  
  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
  Loop *L = LI ? LI->getLoopFor(BB) : 0;
  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);

  // Move the edges from Preds to point to NewBB instead of BB.
  // While here, if we need to preserve loop analyses, collect
  // some information about how this split will affect loops.
  bool HasLoopExit = false;
  bool IsLoopEntry = !!L;
  bool SplitMakesNewLoopHeader = false;
  for (unsigned i = 0; i != NumPreds; ++i) {
    // This is slightly more strict than necessary; the minimum requirement
    // is that there be no more than one indirectbr branching to BB. And
    // all BlockAddress uses would need to be updated.
    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
           "Cannot split an edge from an IndirectBrInst");

    Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);

    if (LI) {
      // If we need to preserve LCSSA, determine if any of
      // the preds is a loop exit.
      if (PreserveLCSSA)
        if (Loop *PL = LI->getLoopFor(Preds[i]))
          if (!PL->contains(BB))
            HasLoopExit = true;
      // If we need to preserve LoopInfo, note whether any of the
      // preds crosses an interesting loop boundary.
      if (L) {
        if (L->contains(Preds[i]))
          IsLoopEntry = false;
        else
          SplitMakesNewLoopHeader = true;
      }
    }
  }

  // Update dominator tree and dominator frontier if available.
  DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
  if (DT)
    DT->splitBlock(NewBB);
  if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
    DF->splitBlock(NewBB);

  // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
  // node becomes an incoming value for BB's phi node.  However, if the Preds
  // list is empty, we need to insert dummy entries into the PHI nodes in BB to
  // account for the newly created predecessor.
  if (NumPreds == 0) {
    // Insert dummy values as the incoming value.
    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
      cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
    return NewBB;
  }

  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;

  if (L) {
    if (IsLoopEntry) {
      // Add the new block to the nearest enclosing loop (and not an
      // adjacent loop). To find this, examine each of the predecessors and
      // determine which loops enclose them, and select the most-nested loop
      // which contains the loop containing the block being split.
      Loop *InnermostPredLoop = 0;
      for (unsigned i = 0; i != NumPreds; ++i)
        if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
          // Seek a loop which actually contains the block being split (to
          // avoid adjacent loops).
          while (PredLoop && !PredLoop->contains(BB))
            PredLoop = PredLoop->getParentLoop();
          // Select the most-nested of these loops which contains the block.
          if (PredLoop &&
              PredLoop->contains(BB) &&
              (!InnermostPredLoop ||
               InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
            InnermostPredLoop = PredLoop;
        }
      if (InnermostPredLoop)
        InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
    } else {
      L->addBasicBlockToLoop(NewBB, LI->getBase());
      if (SplitMakesNewLoopHeader)
        L->moveToHeader(NewBB);
    }
  }
  
  // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
    PHINode *PN = cast<PHINode>(I++);
    
    // Check to see if all of the values coming in are the same.  If so, we
    // don't need to create a new PHI node, unless it's needed for LCSSA.
    Value *InVal = 0;
    if (!HasLoopExit) {
      InVal = PN->getIncomingValueForBlock(Preds[0]);
      for (unsigned i = 1; i != NumPreds; ++i)
        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
          InVal = 0;
          break;
        }
    }

    if (InVal) {
      // If all incoming values for the new PHI would be the same, just don't
      // make a new PHI.  Instead, just remove the incoming values from the old
      // PHI.
      for (unsigned i = 0; i != NumPreds; ++i)
        PN->removeIncomingValue(Preds[i], false);
    } else {
      // If the values coming into the block are not the same, we need a PHI.
      // Create the new PHI node, insert it into NewBB at the end of the block
      PHINode *NewPHI =
        PHINode::Create(PN->getType(), PN->getName()+".ph", BI);
      if (AA) AA->copyValue(PN, NewPHI);
      
      // Move all of the PHI values for 'Preds' to the new PHI.
      for (unsigned i = 0; i != NumPreds; ++i) {
        Value *V = PN->removeIncomingValue(Preds[i], false);
        NewPHI->addIncoming(V, Preds[i]);
      }
      InVal = NewPHI;
    }
    
    // Add an incoming value to the PHI node in the loop for the preheader
    // edge.
    PN->addIncoming(InVal, NewBB);
  }
  
  return NewBB;
}
Esempio n. 3
0
// Sinks \p I from the loop \p L's preheader to its uses. Returns true if
// sinking is successful.
// \p LoopBlockNumber is used to sort the insertion blocks to ensure
// determinism.
static bool sinkInstruction(Loop &L, Instruction &I,
                            const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
                            const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber,
                            LoopInfo &LI, DominatorTree &DT,
                            BlockFrequencyInfo &BFI) {
  // Compute the set of blocks in loop L which contain a use of I.
  SmallPtrSet<BasicBlock *, 2> BBs;
  for (auto &U : I.uses()) {
    Instruction *UI = cast<Instruction>(U.getUser());
    // We cannot sink I to PHI-uses.
    if (dyn_cast<PHINode>(UI))
      return false;
    // We cannot sink I if it has uses outside of the loop.
    if (!L.contains(LI.getLoopFor(UI->getParent())))
      return false;
    BBs.insert(UI->getParent());
  }

  // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max
  // BBs.size() to avoid expensive computation.
  // FIXME: Handle code size growth for min_size and opt_size.
  if (BBs.size() > MaxNumberOfUseBBsForSinking)
    return false;

  // Find the set of BBs that we should insert a copy of I.
  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto =
      findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI);
  if (BBsToSinkInto.empty())
    return false;

  // Copy the final BBs into a vector and sort them using the total ordering
  // of the loop block numbers as iterating the set doesn't give a useful
  // order. No need to stable sort as the block numbers are a total ordering.
  SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
  SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(),
                             BBsToSinkInto.end());
  std::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(),
            [&](BasicBlock *A, BasicBlock *B) {
              return *LoopBlockNumber.find(A) < *LoopBlockNumber.find(B);
            });

  BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
  // FIXME: Optimize the efficiency for cloned value replacement. The current
  //        implementation is O(SortedBBsToSinkInto.size() * I.num_uses()).
  for (BasicBlock *N : SortedBBsToSinkInto) {
    if (N == MoveBB)
      continue;
    // Clone I and replace its uses.
    Instruction *IC = I.clone();
    IC->setName(I.getName());
    IC->insertBefore(&*N->getFirstInsertionPt());
    // Replaces uses of I with IC in N
    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
      Use &U = *UI++;
      auto *I = cast<Instruction>(U.getUser());
      if (I->getParent() == N)
        U.set(IC);
    }
    // Replaces uses of I with IC in blocks dominated by N
    replaceDominatedUsesWith(&I, IC, DT, N);
    DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName()
                 << '\n');
    NumLoopSunkCloned++;
  }
  DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n');
  NumLoopSunk++;
  I.moveBefore(&*MoveBB->getFirstInsertionPt());

  return true;
}
Esempio n. 4
0
/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA
/// analysis information.
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
                                      ArrayRef<BasicBlock *> Preds,
                                      Pass *P, bool &HasLoopExit) {
  if (!P) return;

  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
  Loop *L = LI ? LI->getLoopFor(OldBB) : 0;

  // If we need to preserve loop analyses, collect some information about how
  // this split will affect loops.
  bool IsLoopEntry = !!L;
  bool SplitMakesNewLoopHeader = false;
  if (LI) {
    bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
    for (ArrayRef<BasicBlock*>::iterator
           i = Preds.begin(), e = Preds.end(); i != e; ++i) {
      BasicBlock *Pred = *i;

      // If we need to preserve LCSSA, determine if any of the preds is a loop
      // exit.
      if (PreserveLCSSA)
        if (Loop *PL = LI->getLoopFor(Pred))
          if (!PL->contains(OldBB))
            HasLoopExit = true;

      // If we need to preserve LoopInfo, note whether any of the preds crosses
      // an interesting loop boundary.
      if (!L) continue;
      if (L->contains(Pred))
        IsLoopEntry = false;
      else
        SplitMakesNewLoopHeader = true;
    }
  }

  // Update dominator tree if available.
  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
  if (DT)
    DT->splitBlock(NewBB);

  if (!L) return;

  if (IsLoopEntry) {
    // Add the new block to the nearest enclosing loop (and not an adjacent
    // loop). To find this, examine each of the predecessors and determine which
    // loops enclose them, and select the most-nested loop which contains the
    // loop containing the block being split.
    Loop *InnermostPredLoop = 0;
    for (ArrayRef<BasicBlock*>::iterator
           i = Preds.begin(), e = Preds.end(); i != e; ++i) {
      BasicBlock *Pred = *i;
      if (Loop *PredLoop = LI->getLoopFor(Pred)) {
        // Seek a loop which actually contains the block being split (to avoid
        // adjacent loops).
        while (PredLoop && !PredLoop->contains(OldBB))
          PredLoop = PredLoop->getParentLoop();

        // Select the most-nested of these loops which contains the block.
        if (PredLoop && PredLoop->contains(OldBB) &&
            (!InnermostPredLoop ||
             InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
          InnermostPredLoop = PredLoop;
      }
    }

    if (InnermostPredLoop)
      InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
  } else {
    L->addBasicBlockToLoop(NewBB, LI->getBase());
    if (SplitMakesNewLoopHeader)
      L->moveToHeader(NewBB);
  }
}
Esempio n. 5
0
/// For every instruction from the worklist, check to see if it has any uses
/// that are outside the current loop.  If so, insert LCSSA PHI nodes and
/// rewrite the uses.
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
                                    DominatorTree &DT, LoopInfo &LI) {
    SmallVector<Use *, 16> UsesToRewrite;
    SmallSetVector<PHINode *, 16> PHIsToRemove;
    PredIteratorCache PredCache;
    bool Changed = false;

    // Cache the Loop ExitBlocks across this loop.  We expect to get a lot of
    // instructions within the same loops, computing the exit blocks is
    // expensive, and we're not mutating the loop structure.
    SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;

    while (!Worklist.empty()) {
        UsesToRewrite.clear();

        Instruction *I = Worklist.pop_back_val();
        BasicBlock *InstBB = I->getParent();
        Loop *L = LI.getLoopFor(InstBB);
        if (!LoopExitBlocks.count(L))
            L->getExitBlocks(LoopExitBlocks[L]);
        assert(LoopExitBlocks.count(L));
        const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];

        if (ExitBlocks.empty())
            continue;

        // Tokens cannot be used in PHI nodes, so we skip over them.
        // We can run into tokens which are live out of a loop with catchswitch
        // instructions in Windows EH if the catchswitch has one catchpad which
        // is inside the loop and another which is not.
        if (I->getType()->isTokenTy())
            continue;

        for (Use &U : I->uses()) {
            Instruction *User = cast<Instruction>(U.getUser());
            BasicBlock *UserBB = User->getParent();
            if (PHINode *PN = dyn_cast<PHINode>(User))
                UserBB = PN->getIncomingBlock(U);

            if (InstBB != UserBB && !L->contains(UserBB))
                UsesToRewrite.push_back(&U);
        }

        // If there are no uses outside the loop, exit with no change.
        if (UsesToRewrite.empty())
            continue;

        ++NumLCSSA; // We are applying the transformation

        // Invoke instructions are special in that their result value is not
        // available along their unwind edge. The code below tests to see whether
        // DomBB dominates the value, so adjust DomBB to the normal destination
        // block, which is effectively where the value is first usable.
        BasicBlock *DomBB = InstBB;
        if (InvokeInst *Inv = dyn_cast<InvokeInst>(I))
            DomBB = Inv->getNormalDest();

        DomTreeNode *DomNode = DT.getNode(DomBB);

        SmallVector<PHINode *, 16> AddedPHIs;
        SmallVector<PHINode *, 8> PostProcessPHIs;

        SmallVector<PHINode *, 4> InsertedPHIs;
        SSAUpdater SSAUpdate(&InsertedPHIs);
        SSAUpdate.Initialize(I->getType(), I->getName());

        // Insert the LCSSA phi's into all of the exit blocks dominated by the
        // value, and add them to the Phi's map.
        for (BasicBlock *ExitBB : ExitBlocks) {
            if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
                continue;

            // If we already inserted something for this BB, don't reprocess it.
            if (SSAUpdate.HasValueForBlock(ExitBB))
                continue;

            PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
                                          I->getName() + ".lcssa", &ExitBB->front());

            // Add inputs from inside the loop for this PHI.
            for (BasicBlock *Pred : PredCache.get(ExitBB)) {
                PN->addIncoming(I, Pred);

                // If the exit block has a predecessor not within the loop, arrange for
                // the incoming value use corresponding to that predecessor to be
                // rewritten in terms of a different LCSSA PHI.
                if (!L->contains(Pred))
                    UsesToRewrite.push_back(
                        &PN->getOperandUse(PN->getOperandNumForIncomingValue(
                                               PN->getNumIncomingValues() - 1)));
            }

            AddedPHIs.push_back(PN);

            // Remember that this phi makes the value alive in this block.
            SSAUpdate.AddAvailableValue(ExitBB, PN);

            // LoopSimplify might fail to simplify some loops (e.g. when indirect
            // branches are involved). In such situations, it might happen that an
            // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
            // create PHIs in such an exit block, we are also inserting PHIs into L2's
            // header. This could break LCSSA form for L2 because these inserted PHIs
            // can also have uses outside of L2. Remember all PHIs in such situation
            // as to revisit than later on. FIXME: Remove this if indirectbr support
            // into LoopSimplify gets improved.
            if (auto *OtherLoop = LI.getLoopFor(ExitBB))
                if (!L->contains(OtherLoop))
                    PostProcessPHIs.push_back(PN);
        }

        // Rewrite all uses outside the loop in terms of the new PHIs we just
        // inserted.
        for (Use *UseToRewrite : UsesToRewrite) {
            // If this use is in an exit block, rewrite to use the newly inserted PHI.
            // This is required for correctness because SSAUpdate doesn't handle uses
            // in the same block.  It assumes the PHI we inserted is at the end of the
            // block.
            Instruction *User = cast<Instruction>(UseToRewrite->getUser());
            BasicBlock *UserBB = User->getParent();
            if (PHINode *PN = dyn_cast<PHINode>(User))
                UserBB = PN->getIncomingBlock(*UseToRewrite);

            if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
                // Tell the VHs that the uses changed. This updates SCEV's caches.
                if (UseToRewrite->get()->hasValueHandle())
                    ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
                UseToRewrite->set(&UserBB->front());
                continue;
            }

            // Otherwise, do full PHI insertion.
            SSAUpdate.RewriteUse(*UseToRewrite);
        }

        // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
        // to post-process them to keep LCSSA form.
        for (PHINode *InsertedPN : InsertedPHIs) {
            if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
                if (!L->contains(OtherLoop))
                    PostProcessPHIs.push_back(InsertedPN);
        }

        // Post process PHI instructions that were inserted into another disjoint
        // loop and update their exits properly.
        for (auto *PostProcessPN : PostProcessPHIs) {
            if (PostProcessPN->use_empty())
                continue;

            // Reprocess each PHI instruction.
            Worklist.push_back(PostProcessPN);
        }

        // Keep track of PHI nodes that we want to remove because they did not have
        // any uses rewritten.
        for (PHINode *PN : AddedPHIs)
            if (PN->use_empty())
                PHIsToRemove.insert(PN);

        Changed = true;
    }
    // Remove PHI nodes that did not have any uses rewritten.
    for (PHINode *PN : PHIsToRemove) {
        assert (PN->use_empty() && "Trying to remove a phi with uses.");
        PN->eraseFromParent();
    }
    return Changed;
}
Esempio n. 6
0
// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
// single block and then recurses into the successors.
// The algorithm preserves the flow condition, meaning that the sum of the
// weight of the incoming edges must be equal the block weight which must in
// turn be equal to the sume of the weights of the outgoing edges.
// Since the flow of an block is deterimined from the current state of the
// flow, once an edge has a flow assigned this flow is never changed again,
// otherwise it would be possible to violate the flow condition in another
// block.
void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {

  // Break the recursion if this BasicBlock was already visited.
  if (BBToVisit.find(BB) == BBToVisit.end()) return;

  // Read the LoopInfo for this block.
  bool  BBisHeader = LI->isLoopHeader(BB);
  Loop* BBLoop     = LI->getLoopFor(BB);

  // To get the block weight, read all incoming edges.
  double BBWeight = 0;
  std::set<BasicBlock*> ProcessedPreds;
  for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
        bbi != bbe; ++bbi ) {
    // If this block was not considered already, add weight.
    Edge edge = getEdge(*bbi,BB);
    double w = getEdgeWeight(edge);
    if (ProcessedPreds.insert(*bbi).second) {
      BBWeight += ignoreMissing(w);
    }
    // If this block is a loop header and the predecessor is contained in this
    // loop, thus the edge is a backedge, continue and do not check if the
    // value is valid.
    if (BBisHeader && BBLoop->contains(*bbi)) {
      printEdgeError(edge, "but is backedge, continuing");
      continue;
    }
    // If the edges value is missing (and this is no loop header, and this is
    // no backedge) return, this block is currently non estimatable.
    if (w == MissingValue) {
      printEdgeError(edge, "returning");
      return;
    }
  }
  if (getExecutionCount(BB) != MissingValue) {
    BBWeight = getExecutionCount(BB);
  }

  // Fetch all necessary information for current block.
  SmallVector<Edge, 8> ExitEdges;
  SmallVector<Edge, 8> Edges;
  if (BBLoop) {
    BBLoop->getExitEdges(ExitEdges);
  }

  // If this is a loop header, consider the following:
  // Exactly the flow that is entering this block, must exit this block too. So
  // do the following: 
  // *) get all the exit edges, read the flow that is already leaving this
  // loop, remember the edges that do not have any flow on them right now.
  // (The edges that have already flow on them are most likely exiting edges of
  // other loops, do not touch those flows because the previously caclulated
  // loopheaders would not be exact anymore.)
  // *) In case there is not a single exiting edge left, create one at the loop
  // latch to prevent the flow from building up in the loop.
  // *) Take the flow that is not leaving the loop already and distribute it on
  // the remaining exiting edges.
  // (This ensures that all flow that enters the loop also leaves it.)
  // *) Increase the flow into the loop by increasing the weight of this block.
  // There is at least one incoming backedge that will bring us this flow later
  // on. (So that the flow condition in this node is valid again.)
  if (BBisHeader) {
    double incoming = BBWeight;
    // Subtract the flow leaving the loop.
    std::set<Edge> ProcessedExits;
    for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
         ee = ExitEdges.end(); ei != ee; ++ei) {
      if (ProcessedExits.insert(*ei).second) {
        double w = getEdgeWeight(*ei);
        if (w == MissingValue) {
          Edges.push_back(*ei);
          // Check if there is a necessary minimal weight, if yes, subtract it 
          // from weight.
          if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
            incoming -= MinimalWeight[*ei];
            DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
          }
        } else {
          incoming -= w;
        }
      }
    }
    // If no exit edges, create one:
    if (Edges.size() == 0) {
      BasicBlock *Latch = BBLoop->getLoopLatch();
      if (Latch) {
        Edge edge = getEdge(Latch,0);
        EdgeInformation[BB->getParent()][edge] = BBWeight;
        printEdgeWeight(edge);
        edge = getEdge(Latch, BB);
        EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
        printEdgeWeight(edge);
      }
    }

    // Distribute remaining weight to the exting edges. To prevent fractions
    // from building up and provoking precision problems the weight which is to
    // be distributed is split and the rounded, the last edge gets a somewhat
    // bigger value, but we are close enough for an estimation.
    double fraction = floor(incoming/Edges.size());
    for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
         ei != ee; ++ei) {
      double w = 0;
      if (ei != (ee-1)) {
        w = fraction;
        incoming -= fraction;
      } else {
        w = incoming;
      }
      EdgeInformation[BB->getParent()][*ei] += w;
      // Read necessary minimal weight.
      if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
        EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
        DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
      }
      printEdgeWeight(*ei);
      
      // Add minimal weight to paths to all exit edges, this is used to ensure
      // that enough flow is reaching this edges.
      Path p;
      const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
      while (Dest != BB) {
        const BasicBlock *Parent = p.find(Dest)->second;
        Edge e = getEdge(Parent, Dest);
        if (MinimalWeight.find(e) == MinimalWeight.end()) {
          MinimalWeight[e] = 0;
        }
        MinimalWeight[e] += w;
        DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
        Dest = Parent;
      }
    }
    // Increase flow into the loop.
    BBWeight *= (ExecCount+1);
  }

  BlockInformation[BB->getParent()][BB] = BBWeight;
  // Up until now we considered only the loop exiting edges, now we have a
  // definite block weight and must distribute this onto the outgoing edges.
  // Since there may be already flow attached to some of the edges, read this
  // flow first and remember the edges that have still now flow attached.
  Edges.clear();
  std::set<BasicBlock*> ProcessedSuccs;

  succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
  // Also check for (BB,0) edges that may already contain some flow. (But only
  // in case there are no successors.)
  if (bbi == bbe) {
    Edge edge = getEdge(BB,0);
    EdgeInformation[BB->getParent()][edge] = BBWeight;
    printEdgeWeight(edge);
  }
  for ( ; bbi != bbe; ++bbi ) {
    if (ProcessedSuccs.insert(*bbi).second) {
      Edge edge = getEdge(BB,*bbi);
      double w = getEdgeWeight(edge);
      if (w != MissingValue) {
        BBWeight -= getEdgeWeight(edge);
      } else {
        Edges.push_back(edge);
        // If minimal weight is necessary, reserve weight by subtracting weight
        // from block weight, this is readded later on.
        if (MinimalWeight.find(edge) != MinimalWeight.end()) {
          BBWeight -= MinimalWeight[edge];
          DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
        }
      }
    }
  }

  double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0;
  // Finally we know what flow is still not leaving the block, distribute this
  // flow onto the empty edges.
  for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
       ei != ee; ++ei) {
    if (ei != (ee-1)) {
      EdgeInformation[BB->getParent()][*ei] += fraction;
      BBWeight -= fraction;
    } else {
      EdgeInformation[BB->getParent()][*ei] += BBWeight;
    }
    // Readd minial necessary weight.
    if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
      EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
      DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
    }
    printEdgeWeight(*ei);
  }

  // This block is visited, mark this before the recursion.
  BBToVisit.erase(BB);

  // Recurse into successors.
  for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
       bbi != bbe; ++bbi) {
    recurseBasicBlock(*bbi);
  }
}
Esempio n. 7
0
/// Update DominatorTree, LoopInfo, and LCCSA analysis information.
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
                                      ArrayRef<BasicBlock *> Preds,
                                      DominatorTree *DT, LoopInfo *LI,
                                      bool PreserveLCSSA, bool &HasLoopExit) {
  // Update dominator tree if available.
  if (DT)
    DT->splitBlock(NewBB);

  // The rest of the logic is only relevant for updating the loop structures.
  if (!LI)
    return;

  assert(DT && "DT should be available to update LoopInfo!");
  Loop *L = LI->getLoopFor(OldBB);

  // If we need to preserve loop analyses, collect some information about how
  // this split will affect loops.
  bool IsLoopEntry = !!L;
  bool SplitMakesNewLoopHeader = false;
  for (BasicBlock *Pred : Preds) {
    // Preds that are not reachable from entry should not be used to identify if
    // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks
    // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader
    // as true and make the NewBB the header of some loop. This breaks LI.
    if (!DT->isReachableFromEntry(Pred))
      continue;
    // If we need to preserve LCSSA, determine if any of the preds is a loop
    // exit.
    if (PreserveLCSSA)
      if (Loop *PL = LI->getLoopFor(Pred))
        if (!PL->contains(OldBB))
          HasLoopExit = true;

    // If we need to preserve LoopInfo, note whether any of the preds crosses
    // an interesting loop boundary.
    if (!L)
      continue;
    if (L->contains(Pred))
      IsLoopEntry = false;
    else
      SplitMakesNewLoopHeader = true;
  }

  // Unless we have a loop for OldBB, nothing else to do here.
  if (!L)
    return;

  if (IsLoopEntry) {
    // Add the new block to the nearest enclosing loop (and not an adjacent
    // loop). To find this, examine each of the predecessors and determine which
    // loops enclose them, and select the most-nested loop which contains the
    // loop containing the block being split.
    Loop *InnermostPredLoop = nullptr;
    for (BasicBlock *Pred : Preds) {
      if (Loop *PredLoop = LI->getLoopFor(Pred)) {
        // Seek a loop which actually contains the block being split (to avoid
        // adjacent loops).
        while (PredLoop && !PredLoop->contains(OldBB))
          PredLoop = PredLoop->getParentLoop();

        // Select the most-nested of these loops which contains the block.
        if (PredLoop && PredLoop->contains(OldBB) &&
            (!InnermostPredLoop ||
             InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
          InnermostPredLoop = PredLoop;
      }
    }

    if (InnermostPredLoop)
      InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI);
  } else {
    L->addBasicBlockToLoop(NewBB, *LI);
    if (SplitMakesNewLoopHeader)
      L->moveToHeader(NewBB);
  }
}
Esempio n. 8
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
/// iterations before branching into the unrolled loop.  UnrollLoop will not
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
/// AllowExpensiveTripCount is false.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, bool AllowExpensiveTripCount,
                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
                      DominatorTree *DT, AssumptionCache *AC,
                      bool PreserveLCSSA) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;
  SmallVector<BasicBlock *, 4> ExitBlocks;
  L->getExitBlocks(ExitBlocks);
  Loop *ParentL = L->getParentLoop();
  bool AllExitsAreInsideParentLoop = !ParentL ||
      std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
                  [&](BasicBlock *BB) { return ParentL->contains(BB); });

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount &&
      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
                               PreserveLCSSA))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (SE)
    SE->forgetLoop(L);

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  // Report the unrolling decision.
  DebugLoc LoopLoc = L->getStartLoc();
  Function *F = Header->getParent();
  LLVMContext &Ctx = F->getContext();

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                           Twine("completely unrolled loop with ") +
                               Twine(TripCount) + " iterations");
  } else {
    auto EmitDiag = [&](const Twine &T) {
      emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                             "unrolled loop by a factor of " + Twine(Count) +
                                 T);
    };

    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
      EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip));
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      EmitDiag(" with " + Twine(TripMultiple) + " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
      EmitDiag(" with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
  }

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;
    SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
    NewLoops[L] = L;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Tell LI about New.
      if (*BB == Header) {
        assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
        L->addBasicBlockToLoop(New, *LI);
      } else {
        // Figure out which loop New is in.
        const Loop *OldLoop = LI->getLoopFor(*BB);
        assert(OldLoop && "Should (at least) be in the loop being unrolled!");

        Loop *&NewLoop = NewLoops[OldLoop];
        if (!NewLoop) {
          // Found a new sub-loop.
          assert(*BB == OldLoop->getHeader() &&
                 "Header should be first in RPO");

          Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
          assert(NewLoopParent &&
                 "Expected parent loop before sub-loop in RPO");
          NewLoop = new Loop;
          NewLoopParent->addChildLoop(NewLoop);

          // Forget the old loop, since its inputs may have changed.
          if (SE)
            SE->forgetLoop(OldLoop);
        }
        NewLoop->addBasicBlockToLoop(New, *LI);
      }

      if (*BB == Header)
        // Loop over all of the PHI nodes in the block, changing them to use
        // the incoming values from the previous block.
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(&*I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll) {
      if (j == 0)
        Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<Loop *, 4> ForgottenLoops;
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE,
                                                      ForgottenLoops))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  // FIXME: We could register any cloned assumptions instead of clearing the
  // whole function's cache.
  AC->clear();

  // FIXME: Reconstruct dom info, because it is not preserved properly.
  // Incrementally updating domtree after loop unrolling would be easy.
  if (DT)
    DT->recalculate(*L->getHeader()->getParent());

  // Simplify any new induction variables in the partially unrolled loop.
  if (SE && !CompletelyUnroll) {
    SmallVector<WeakVH, 16> DeadInsts;
    simplifyLoopIVs(L, SE, DT, LI, DeadInsts);

    // Aggressively clean up dead instructions that simplifyLoopIVs already
    // identified. Any remaining should be cleaned up below.
    while (!DeadInsts.empty())
      if (Instruction *Inst =
              dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
        RecursivelyDeleteTriviallyDeadInstructions(Inst);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const DataLayout &DL = Header->getModule()->getDataLayout();
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = &*I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst, DL))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;

  Loop *OuterL = L->getParentLoop();
  // Update LoopInfo if the loop is completely removed.
  if (CompletelyUnroll)
    LI->updateUnloop(L);;

  // If we have a pass and a DominatorTree we should re-simplify impacted loops
  // to ensure subsequent analyses can rely on this form. We want to simplify
  // at least one layer outside of the loop that was unrolled so that any
  // changes to the parent loop exposed by the unrolling are considered.
  if (DT) {
    if (!OuterL && !CompletelyUnroll)
      OuterL = L;
    if (OuterL) {
      bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);

      // LCSSA must be performed on the outermost affected loop. The unrolled
      // loop's last loop latch is guaranteed to be in the outermost loop after
      // LoopInfo's been updated by updateUnloop.
      Loop *LatchLoop = LI->getLoopFor(Latches.back());
      if (!OuterL->contains(LatchLoop))
        while (OuterL->getParentLoop() != LatchLoop)
          OuterL = OuterL->getParentLoop();

      if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified))
        formLCSSARecursively(*OuterL, *DT, LI, SE);
      else
        assert(OuterL->isLCSSAForm(*DT) &&
               "Loops should be in LCSSA form after loop-unroll.");
    }
  }

  return true;
}
Esempio n. 9
0
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
/// unroll count is non-zero.
///
/// This function performs the following:
/// - Create PHI nodes at prolog end block to combine values
///   that exit the prolog code and jump around the prolog.
/// - Add a PHI operand to a PHI node at the loop exit block
///   for values that exit the prolog and go around the loop.
/// - Branch around the original loop if the trip count is less
///   than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
                          BasicBlock *PrologExit,
                          BasicBlock *OriginalLoopLatchExit,
                          BasicBlock *PreHeader, BasicBlock *NewPreHeader,
                          ValueToValueMapTy &VMap, DominatorTree *DT,
                          LoopInfo *LI, bool PreserveLCSSA) {
  BasicBlock *Latch = L->getLoopLatch();
  assert(Latch && "Loop must have a latch");
  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);

  // Create a PHI node for each outgoing value from the original loop
  // (which means it is an outgoing value from the prolog code too).
  // The new PHI node is inserted in the prolog end basic block.
  // The new PHI node value is added as an operand of a PHI node in either
  // the loop header or the loop exit block.
  for (BasicBlock *Succ : successors(Latch)) {
    for (Instruction &BBI : *Succ) {
      PHINode *PN = dyn_cast<PHINode>(&BBI);
      // Exit when we passed all PHI nodes.
      if (!PN)
        break;
      // Add a new PHI node to the prolog end block and add the
      // appropriate incoming values.
      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
                                       PrologExit->getFirstNonPHI());
      // Adding a value to the new PHI node from the original loop preheader.
      // This is the value that skips all the prolog code.
      if (L->contains(PN)) {
        NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
                           PreHeader);
      } else {
        NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
      }

      Value *V = PN->getIncomingValueForBlock(Latch);
      if (Instruction *I = dyn_cast<Instruction>(V)) {
        if (L->contains(I)) {
          V = VMap.lookup(I);
        }
      }
      // Adding a value to the new PHI node from the last prolog block
      // that was created.
      NewPN->addIncoming(V, PrologLatch);

      // Update the existing PHI node operand with the value from the
      // new PHI node.  How this is done depends on if the existing
      // PHI node is in the original loop block, or the exit block.
      if (L->contains(PN)) {
        PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
      } else {
        PN->addIncoming(NewPN, PrologExit);
      }
    }
  }

  // Make sure that created prolog loop is in simplified form
  SmallVector<BasicBlock *, 4> PrologExitPreds;
  Loop *PrologLoop = LI->getLoopFor(PrologLatch);
  if (PrologLoop) {
    for (BasicBlock *PredBB : predecessors(PrologExit))
      if (PrologLoop->contains(PredBB))
        PrologExitPreds.push_back(PredBB);

    SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
                           PreserveLCSSA);
  }

  // Create a branch around the original loop, which is taken if there are no
  // iterations remaining to be executed after running the prologue.
  Instruction *InsertPt = PrologExit->getTerminator();
  IRBuilder<> B(InsertPt);

  assert(Count != 0 && "nonsensical Count!");

  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
  // This means %xtraiter is (BECount + 1) and all of the iterations of this
  // loop were executed by the prologue.  Note that if BECount <u (Count - 1)
  // then (BECount + 1) cannot unsigned-overflow.
  Value *BrLoopExit =
      B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
  // Split the exit to maintain loop canonicalization guarantees
  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
                         PreserveLCSSA);
  // Add the branch to the exit block (around the unrolled loop)
  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
  InsertPt->eraseFromParent();
  if (DT)
    DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
}
Esempio n. 10
0
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
                                                     const LoopInfo &LI,
                                                     SccInfo &SccI) {
  int SccNum;
  Loop *L = LI.getLoopFor(BB);
  if (!L) {
    SccNum = getSCCNum(BB, SccI);
    if (SccNum < 0)
      return false;
  }

  SmallPtrSet<const BasicBlock*, 8> UnlikelyBlocks;
  if (L)
    computeUnlikelySuccessors(BB, L, UnlikelyBlocks);

  SmallVector<unsigned, 8> BackEdges;
  SmallVector<unsigned, 8> ExitingEdges;
  SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
  SmallVector<unsigned, 8> UnlikelyEdges;

  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
    // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch
    // irreducible loops.
    if (L) {
      if (UnlikelyBlocks.count(*I) != 0)
        UnlikelyEdges.push_back(I.getSuccessorIndex());
      else if (!L->contains(*I))
        ExitingEdges.push_back(I.getSuccessorIndex());
      else if (L->getHeader() == *I)
        BackEdges.push_back(I.getSuccessorIndex());
      else
        InEdges.push_back(I.getSuccessorIndex());
    } else {
      if (getSCCNum(*I, SccI) != SccNum)
        ExitingEdges.push_back(I.getSuccessorIndex());
      else if (isSCCHeader(*I, SccNum, SccI))
        BackEdges.push_back(I.getSuccessorIndex());
      else
        InEdges.push_back(I.getSuccessorIndex());
    }
  }

  if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty())
    return false;

  // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
  // normalize them so that they sum up to one.
  unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
                   (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
                   (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) +
                   (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);

  if (uint32_t numBackEdges = BackEdges.size()) {
    BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
    auto Prob = TakenProb / numBackEdges;
    for (unsigned SuccIdx : BackEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numInEdges = InEdges.size()) {
    BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
    auto Prob = TakenProb / numInEdges;
    for (unsigned SuccIdx : InEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numExitingEdges = ExitingEdges.size()) {
    BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT,
                                                       Denom);
    auto Prob = NotTakenProb / numExitingEdges;
    for (unsigned SuccIdx : ExitingEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) {
    BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT,
                                                       Denom);
    auto Prob = UnlikelyProb / numUnlikelyEdges;
    for (unsigned SuccIdx : UnlikelyEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  return true;
}
Esempio n. 11
0
bool LocalFrequencies::isBackEdge(Edge E)
{
  LoopInfo &LI = getAnalysis<LoopInfo>();
  Loop *L = LI.getLoopFor(E.second);
  return L && (L->getHeader() == E.second) && L->contains(E.first);
}
static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
                             AssumptionCache &AC, const TargetLibraryInfo &TLI,
                             MemorySSAUpdater *MSSAU) {
  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
  SimplifyQuery SQ(DL, &TLI, &DT, &AC);

  // On the first pass over the loop body we try to simplify every instruction.
  // On subsequent passes, we can restrict this to only simplifying instructions
  // where the inputs have been updated. We end up needing two sets: one
  // containing the instructions we are simplifying in *this* pass, and one for
  // the instructions we will want to simplify in the *next* pass. We use
  // pointers so we can swap between two stably allocated sets.
  SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;

  // Track the PHI nodes that have already been visited during each iteration so
  // that we can identify when it is necessary to iterate.
  SmallPtrSet<PHINode *, 4> VisitedPHIs;

  // While simplifying we may discover dead code or cause code to become dead.
  // Keep track of all such instructions and we will delete them at the end.
  SmallVector<Instruction *, 8> DeadInsts;

  // First we want to create an RPO traversal of the loop body. By processing in
  // RPO we can ensure that definitions are processed prior to uses (for non PHI
  // uses) in all cases. This ensures we maximize the simplifications in each
  // iteration over the loop and minimizes the possible causes for continuing to
  // iterate.
  LoopBlocksRPO RPOT(&L);
  RPOT.perform(&LI);
  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;

  bool Changed = false;
  for (;;) {
    if (MSSAU && VerifyMemorySSA)
      MSSA->verifyMemorySSA();
    for (BasicBlock *BB : RPOT) {
      for (Instruction &I : *BB) {
        if (auto *PI = dyn_cast<PHINode>(&I))
          VisitedPHIs.insert(PI);

        if (I.use_empty()) {
          if (isInstructionTriviallyDead(&I, &TLI))
            DeadInsts.push_back(&I);
          continue;
        }

        // We special case the first iteration which we can detect due to the
        // empty `ToSimplify` set.
        bool IsFirstIteration = ToSimplify->empty();

        if (!IsFirstIteration && !ToSimplify->count(&I))
          continue;

        Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I));
        if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
          continue;

        for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
             UI != UE;) {
          Use &U = *UI++;
          auto *UserI = cast<Instruction>(U.getUser());
          U.set(V);

          // If the instruction is used by a PHI node we have already processed
          // we'll need to iterate on the loop body to converge, so add it to
          // the next set.
          if (auto *UserPI = dyn_cast<PHINode>(UserI))
            if (VisitedPHIs.count(UserPI)) {
              Next->insert(UserPI);
              continue;
            }

          // If we are only simplifying targeted instructions and the user is an
          // instruction in the loop body, add it to our set of targeted
          // instructions. Because we process defs before uses (outside of PHIs)
          // we won't have visited it yet.
          //
          // We also skip any uses outside of the loop being simplified. Those
          // should always be PHI nodes due to LCSSA form, and we don't want to
          // try to simplify those away.
          assert((L.contains(UserI) || isa<PHINode>(UserI)) &&
                 "Uses outside the loop should be PHI nodes due to LCSSA!");
          if (!IsFirstIteration && L.contains(UserI))
            ToSimplify->insert(UserI);
        }

        if (MSSAU)
          if (Instruction *SimpleI = dyn_cast_or_null<Instruction>(V))
            if (MemoryAccess *MA = MSSA->getMemoryAccess(&I))
              if (MemoryAccess *ReplacementMA = MSSA->getMemoryAccess(SimpleI))
                MA->replaceAllUsesWith(ReplacementMA);

        assert(I.use_empty() && "Should always have replaced all uses!");
        if (isInstructionTriviallyDead(&I, &TLI))
          DeadInsts.push_back(&I);
        ++NumSimplified;
        Changed = true;
      }
    }

    // Delete any dead instructions found thus far now that we've finished an
    // iteration over all instructions in all the loop blocks.
    if (!DeadInsts.empty()) {
      Changed = true;
      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU);
    }

    if (MSSAU && VerifyMemorySSA)
      MSSA->verifyMemorySSA();

    // If we never found a PHI that needs to be simplified in the next
    // iteration, we're done.
    if (Next->empty())
      break;

    // Otherwise, put the next set in place for the next iteration and reset it
    // and the visited PHIs for that iteration.
    std::swap(Next, ToSimplify);
    Next->clear();
    VisitedPHIs.clear();
    DeadInsts.clear();
  }

  return Changed;
}
Esempio n. 13
0
bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                                DependenceInfo &DI) {
  /* We currently handle outer loops like this:
        |
    ForeFirst    <----\    }
     Blocks           |    } ForeBlocks
    ForeLast          |    }
        |             |
    SubLoopFirst  <\  |    }
     Blocks        |  |    } SubLoopBlocks
    SubLoopLast   -/  |    }
        |             |
    AftFirst          |    }
     Blocks           |    } AftBlocks
    AftLast     ------/    }
        |

    There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
    and AftBlocks, providing that there is one edge from Fores to SubLoops,
    one edge from SubLoops to Afts and a single outer loop exit (from Afts).
    In practice we currently limit Aft blocks to a single block, and limit
    things further in the profitablility checks of the unroll and jam pass.

    Because of the way we rearrange basic blocks, we also require that
    the Fore blocks on all unrolled iterations are safe to move before the
    SubLoop blocks of all iterations. So we require that the phi node looping
    operands of ForeHeader can be moved to at least the end of ForeEnd, so that
    we can arrange cloned Fore Blocks before the subloop and match up Phi's
    correctly.

    i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
    It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.

    There are then a number of checks along the lines of no calls, no
    exceptions, inner loop IV is consistent, etc. Note that for loops requiring
    runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
    UnrollAndJamLoop if the trip count cannot be easily calculated.
  */

  if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
    return false;
  Loop *SubLoop = L->getSubLoops()[0];
  if (!SubLoop->isLoopSimplifyForm())
    return false;

  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Exit = L->getExitingBlock();
  BasicBlock *SubLoopHeader = SubLoop->getHeader();
  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
  BasicBlock *SubLoopExit = SubLoop->getExitingBlock();

  if (Latch != Exit)
    return false;
  if (SubLoopLatch != SubLoopExit)
    return false;

  if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
    return false;

  // Split blocks into Fore/SubLoop/Aft based on dominators
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
                                AftBlocks, &DT))
    return false;

  // Aft blocks may need to move instructions to fore blocks, which becomes more
  // difficult if there are multiple (potentially conditionally executed)
  // blocks. For now we just exclude loops with multiple aft blocks.
  if (AftBlocks.size() != 1)
    return false;

  // Check inner loop IV is consistent between all iterations
  const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
  if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
      !SubLoopBECountSC->getType()->isIntegerTy())
    return false;
  ScalarEvolution::LoopDisposition LD =
      SE.getLoopDisposition(SubLoopBECountSC, L);
  if (LD != ScalarEvolution::LoopInvariant)
    return false;

  // Check the loop safety info for exceptions.
  LoopSafetyInfo LSI;
  computeLoopSafetyInfo(&LSI, L);
  if (LSI.MayThrow)
    return false;

  // We've ruled out the easy stuff and now need to check that there are no
  // interdependencies which may prevent us from moving the:
  //  ForeBlocks before Subloop and AftBlocks.
  //  Subloop before AftBlocks.
  //  ForeBlock phi operands before the subloop

  // Make sure we can move all instructions we need to before the subloop
  SmallVector<Instruction *, 8> Worklist;
  SmallPtrSet<Instruction *, 8> Visited;
  for (auto &Phi : Header->phis()) {
    Value *V = Phi.getIncomingValueForBlock(Latch);
    if (Instruction *I = dyn_cast<Instruction>(V))
      Worklist.push_back(I);
  }
  while (!Worklist.empty()) {
    Instruction *I = Worklist.back();
    Worklist.pop_back();
    if (Visited.insert(I).second) {
      if (SubLoop->contains(I->getParent()))
        return false;
      if (AftBlocks.count(I->getParent())) {
        // If we hit a phi node in afts we know we are done (probably LCSSA)
        if (isa<PHINode>(I))
          return false;
        if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
          return false;
        for (auto &U : I->operands())
          if (Instruction *II = dyn_cast<Instruction>(U))
            Worklist.push_back(II);
      }
    }
  }

  // Check for memory dependencies which prohibit the unrolling we are doing.
  // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
  // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
  if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
    return false;

  return true;
}
Esempio n. 14
0
/*
  This method performs Unroll and Jam. For a simple loop like:
  for (i = ..)
    Fore(i)
    for (j = ..)
      SubLoop(i, j)
    Aft(i)

  Instead of doing normal inner or outer unrolling, we do:
  for (i = .., i+=2)
    Fore(i)
    Fore(i+1)
    for (j = ..)
      SubLoop(i, j)
      SubLoop(i+1, j)
    Aft(i)
    Aft(i+1)

  So the outer loop is essetially unrolled and then the inner loops are fused
  ("jammed") together into a single loop. This can increase speed when there
  are loads in SubLoop that are invariant to i, as they become shared between
  the now jammed inner loops.

  We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
  Fore blocks are those before the inner loop, Aft are those after. Normal
  Unroll code is used to copy each of these sets of blocks and the results are
  combined together into the final form above.

  isSafeToUnrollAndJam should be used prior to calling this to make sure the
  unrolling will be valid. Checking profitablility is also advisable.
*/
LoopUnrollResult
llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
                       unsigned TripMultiple, bool UnrollRemainder,
                       LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {

  // When we enter here we should have already checked that it is safe
  BasicBlock *Header = L->getHeader();
  assert(L->getSubLoops().size() == 1);
  Loop *SubLoop = *L->begin();

  // Don't enter the unroll code if there is nothing to do.
  if (TripCount == 0 && Count < 2) {
    LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
    return LoopUnrollResult::Unmodified;
  }

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = (Count == TripCount);

  // We use the runtime remainder in cases where we don't know trip multiple
  if (TripMultiple == 1 || TripMultiple % Count != 0) {
    if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
                                    /*UseEpilogRemainder*/ true,
                                    UnrollRemainder, LI, SE, DT, AC, true)) {
      LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
                           "generated when assuming runtime trip count\n");
      return LoopUnrollResult::Unmodified;
    }
  }

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (SE) {
    SE->forgetLoop(L);
    SE->forgetLoop(SubLoop);
  }

  using namespace ore;
  // Report the unrolling decision.
  if (CompletelyUnroll) {
    LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
                      << Header->getName() << " with trip count " << TripCount
                      << "!\n");
    ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
                                 L->getHeader())
              << "completely unroll and jammed loop with "
              << NV("UnrollCount", TripCount) << " iterations");
  } else {
    auto DiagBuilder = [&]() {
      OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
                              L->getHeader());
      return Diag << "unroll and jammed loop by a factor of "
                  << NV("UnrollCount", Count);
    };

    LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
                      << " by " << Count);
    if (TripMultiple != 1) {
      LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      ORE->emit([&]() {
        return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
                             << " trips per branch";
      });
    } else {
      LLVM_DEBUG(dbgs() << " with run-time trip count");
      ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
    }
    LLVM_DEBUG(dbgs() << "!\n");
  }

  BasicBlock *Preheader = L->getLoopPreheader();
  BasicBlock *LatchBlock = L->getLoopLatch();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  assert(Preheader && LatchBlock && Header);
  assert(BI && !BI->isUnconditional());
  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
  bool SubLoopContinueOnTrue = SubLoop->contains(
      SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));

  // Partition blocks in an outer/inner loop pair into blocks before and after
  // the loop
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
                           DT);

  // We keep track of the entering/first and exiting/last block of each of
  // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
  // blocks easier.
  std::vector<BasicBlock *> ForeBlocksFirst;
  std::vector<BasicBlock *> ForeBlocksLast;
  std::vector<BasicBlock *> SubLoopBlocksFirst;
  std::vector<BasicBlock *> SubLoopBlocksLast;
  std::vector<BasicBlock *> AftBlocksFirst;
  std::vector<BasicBlock *> AftBlocksLast;
  ForeBlocksFirst.push_back(Header);
  ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
  SubLoopBlocksFirst.push_back(SubLoop->getHeader());
  SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
  AftBlocksFirst.push_back(SubLoop->getExitBlock());
  AftBlocksLast.push_back(L->getExitingBlock());
  // Maps Blocks[0] -> Blocks[It]
  ValueToValueMapTy LastValueMap;

  // Move any instructions from fore phi operands from AftBlocks into Fore.
  moveHeaderPhiOperandsToForeBlocks(
      Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
      AftBlocks);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);
  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  if (Header->getParent()->isDebugInfoForProfiling())
    for (BasicBlock *BB : L->getBlocks())
      for (Instruction &I : *BB)
        if (!isa<DbgInfoIntrinsic>(&I))
          if (const DILocation *DIL = I.getDebugLoc())
            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));

  // Copy all blocks
  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock *> NewBlocks;
    // Maps Blocks[It] -> Blocks[It-1]
    DenseMap<Value *, Value *> PrevItValueMap;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      if (ForeBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == ForeBlocksFirst[0])
          ForeBlocksFirst.push_back(New);
        if (*BB == ForeBlocksLast[0])
          ForeBlocksLast.push_back(New);
      } else if (SubLoopBlocks.count(*BB)) {
        SubLoop->addBasicBlockToLoop(New, *LI);

        if (*BB == SubLoopBlocksFirst[0])
          SubLoopBlocksFirst.push_back(New);
        if (*BB == SubLoopBlocksLast[0])
          SubLoopBlocksLast.push_back(New);
      } else if (AftBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == AftBlocksFirst[0])
          AftBlocksFirst.push_back(New);
        if (*BB == AftBlocksLast[0])
          AftBlocksLast.push_back(New);
      } else {
        llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
      }

      // Update our running maps of newest clones
      PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI) {
        PrevItValueMap[VI->second] =
            const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
        LastValueMap[VI->first] = VI->second;
      }

      NewBlocks.push_back(New);

      // Update DomTree:
      if (*BB == ForeBlocksFirst[0])
        DT->addNewBlock(New, ForeBlocksLast[It - 1]);
      else if (*BB == SubLoopBlocksFirst[0])
        DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
      else if (*BB == AftBlocksFirst[0])
        DT->addNewBlock(New, AftBlocksLast[It - 1]);
      else {
        // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
        // structure.
        auto BBDomNode = DT->getNode(*BB);
        auto BBIDom = BBDomNode->getIDom();
        BasicBlock *OriginalBBIDom = BBIDom->getBlock();
        assert(OriginalBBIDom);
        assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
        DT->addNewBlock(
            New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
      }
    }

    // Remap all instructions in the most recent iteration
    for (BasicBlock *NewBlock : NewBlocks) {
      for (Instruction &I : *NewBlock) {
        ::remapInstruction(&I, LastValueMap);
        if (auto *II = dyn_cast<IntrinsicInst>(&I))
          if (II->getIntrinsicID() == Intrinsic::assume)
            AC->registerAssumption(II);
      }
    }

    // Alter the ForeBlocks phi's, pointing them at the latest version of the
    // value from the previous iteration's phis
    for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
      Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
      assert(OldValue && "should have incoming edge from Aft[It]");
      Value *NewValue = OldValue;
      if (Value *PrevValue = PrevItValueMap[OldValue])
        NewValue = PrevValue;

      assert(Phi.getNumOperands() == 2);
      Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
      Phi.setIncomingValue(0, NewValue);
      Phi.removeIncomingValue(1);
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // finish up connecting the blocks and phi nodes. At this point LastValueMap
  // is the last unrolled iterations values.

  // Update Phis in BB from OldBB to point to NewBB
  auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
                            BasicBlock *NewBB) {
    for (PHINode &Phi : BB->phis()) {
      int I = Phi.getBasicBlockIndex(OldBB);
      Phi.setIncomingBlock(I, NewBB);
    }
  };
  // Update Phis in BB from OldBB to point to NewBB and use the latest value
  // from LastValueMap
  auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
                                     BasicBlock *NewBB,
                                     ValueToValueMapTy &LastValueMap) {
    for (PHINode &Phi : BB->phis()) {
      for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
        if (Phi.getIncomingBlock(b) == OldBB) {
          Value *OldValue = Phi.getIncomingValue(b);
          if (Value *LastValue = LastValueMap[OldValue])
            Phi.setIncomingValue(b, LastValue);
          Phi.setIncomingBlock(b, NewBB);
          break;
        }
      }
    }
  };
  // Move all the phis from Src into Dest
  auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
    Instruction *insertPoint = Dest->getFirstNonPHI();
    while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
      Phi->moveBefore(insertPoint);
  };

  // Update the PHI values outside the loop to point to the last block
  updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
                           LastValueMap);

  // Update ForeBlocks successors and phi nodes
  BranchInst *ForeTerm =
      cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
  BasicBlock *Dest = SubLoopBlocksFirst[0];
  ForeTerm->setSuccessor(0, Dest);

  if (CompletelyUnroll) {
    while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
      Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
      Phi->getParent()->getInstList().erase(Phi);
    }
  } else {
    // Update the PHI values to point to the last aft block
    updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
                             AftBlocksLast.back(), LastValueMap);
  }

  for (unsigned It = 1; It != Count; It++) {
    // Remap ForeBlock successors from previous iteration to this
    BranchInst *ForeTerm =
        cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
    BasicBlock *Dest = ForeBlocksFirst[It];
    ForeTerm->setSuccessor(0, Dest);
  }

  // Subloop successors and phis
  BranchInst *SubTerm =
      cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
  SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
  SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
  updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
                  ForeBlocksLast.back());
  updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *SubTerm =
        cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
    SubTerm->eraseFromParent();

    updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
                    ForeBlocksLast.back());
    updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
  }

  // Aft blocks successors and phis
  BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
  if (CompletelyUnroll) {
    BranchInst::Create(LoopExit, Term);
    Term->eraseFromParent();
  } else {
    Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
  }
  updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *AftTerm =
        cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(AftBlocksFirst[It], AftTerm);
    AftTerm->eraseFromParent();

    updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
  }

  // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
  // new ones required.
  if (Count != 1) {
    SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
                           SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
                           SubLoopBlocksLast[0], AftBlocksFirst[0]);

    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           SubLoopBlocksLast.back(), AftBlocksFirst[0]);
    DT->applyUpdates(DTUpdates);
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<BasicBlock *, 16> MergeBlocks;
  MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
  MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
  MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
  while (!MergeBlocks.empty()) {
    BasicBlock *BB = *MergeBlocks.begin();
    BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
    if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
        // Don't remove BB and add Fold as they are the same BB
        assert(Fold == BB);
        (void)Fold;
        MergeBlocks.erase(Dest);
      } else
        MergeBlocks.erase(BB);
    } else
      MergeBlocks.erase(BB);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
  simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);

  NumCompletelyUnrolledAndJammed += CompletelyUnroll;
  ++NumUnrolledAndJammed;

#ifndef NDEBUG
  // We shouldn't have done anything to break loop simplify form or LCSSA.
  Loop *OuterL = L->getParentLoop();
  Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
  assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
  if (!CompletelyUnroll)
    assert(L->isLoopSimplifyForm());
  assert(SubLoop->isLoopSimplifyForm());
  assert(DT->verify());
#endif

  // Update LoopInfo if the loop is completely removed.
  if (CompletelyUnroll)
    LI->erase(L);

  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
                          : LoopUnrollResult::PartiallyUnrolled;
}
void BranchProbabilities::propagateFreq(BasicBlock *BB)
{
    // if BB has been visited then return
    if (_Visited.count(BB))
    {
        return;
    }

    // 1. find bfreq(BB)
    if (BB == _head)
    {
        _bfreq[BB] = new float(1.0f);
    }
    else
    {
        // in order to be a back edge, BB must be a loop header and
        // pred must be contained in the loop
        Loop *L = _LI->getLoopFor(BB);
        if (!(L && (L->getHeader() == BB)))
        {
            L = NULL;
        }

        for (pred_iterator BI = pred_begin(BB), Bend = pred_end(BB); BI != Bend;
                ++BI)
        {
            BasicBlock *BBp = *BI;
            if (!_Visited.count(BBp) && !(L && L->contains(BBp) &&
                                          (L->getLoopPreheader() != BBp)))
            {
                return;
            }
        }

        _bfreq[BB] = new float(0.0f);
        float cyclic_probability = 0.0f;
        for (pred_iterator BI = pred_begin(BB), Bend = pred_end(BB); BI != Bend;
                ++BI)
        {
            BasicBlock *BBp = *BI;
            if (L && L->contains(BBp) && (L->getLoopPreheader() != BBp))
                cyclic_probability += (*(_BackEdgeProb[BBp][BB]));
            else
                (*(_bfreq[BB])) += (*(_Freq[BBp][BB]));
        }
        if (cyclic_probability > 0.95f)
            cyclic_probability = 0.95f;
        (*(_bfreq[BB])) = (*(_bfreq[BB])) / (1.0f - cyclic_probability);
    }

    // 2. calculate the frequencies of b's out edges
    _Visited.insert(BB);
    for (succ_iterator BI = succ_begin(BB), Bend = succ_end(BB); BI != Bend;
            ++BI)
    {
        BasicBlock *BBs = *BI;
        float fFreq = getProb(BB, BBs) * (*(_bfreq[BB]));
        _Freq[BB][BBs] = new float(fFreq);

        // update back_edge_prob(BB, BBs) so it
        // can be used by outer loops to calculate
        // cyclic_probability of inner loops
        if (BBs == _head)
            (*(_BackEdgeProb[BB][BBs])) = fFreq;
    }

    // 3. propagate to successor blocks
    for (succ_iterator BI = succ_begin(BB), Bend = succ_end(BB); BI != Bend;
            ++BI)
    {
        BasicBlock *BBs = *BI;
        Loop *L = _LI->getLoopFor(BBs);
        if (!(L && (BBs == L->getHeader()) && L->contains(BB) &&
                (BB != L->getLoopPreheader())))
            propagateFreq(BBs);
    }
}
Esempio n. 16
0
/// Given an instruction in the loop, check to see if it has any uses that are
/// outside the current loop.  If so, insert LCSSA PHI nodes and rewrite the
/// uses.
static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
                               const SmallVectorImpl<BasicBlock *> &ExitBlocks,
                               PredIteratorCache &PredCache, LoopInfo *LI) {
  SmallVector<Use *, 16> UsesToRewrite;

  // Tokens cannot be used in PHI nodes, so we skip over them.
  // We can run into tokens which are live out of a loop with catchswitch
  // instructions in Windows EH if the catchswitch has one catchpad which
  // is inside the loop and another which is not.
  if (Inst.getType()->isTokenTy())
    return false;

  BasicBlock *InstBB = Inst.getParent();

  for (Use &U : Inst.uses()) {
    Instruction *User = cast<Instruction>(U.getUser());
    BasicBlock *UserBB = User->getParent();
    if (PHINode *PN = dyn_cast<PHINode>(User))
      UserBB = PN->getIncomingBlock(U);

    if (InstBB != UserBB && !L.contains(UserBB))
      UsesToRewrite.push_back(&U);
  }

  // If there are no uses outside the loop, exit with no change.
  if (UsesToRewrite.empty())
    return false;

  ++NumLCSSA; // We are applying the transformation

  // Invoke instructions are special in that their result value is not available
  // along their unwind edge. The code below tests to see whether DomBB
  // dominates the value, so adjust DomBB to the normal destination block,
  // which is effectively where the value is first usable.
  BasicBlock *DomBB = Inst.getParent();
  if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
    DomBB = Inv->getNormalDest();

  DomTreeNode *DomNode = DT.getNode(DomBB);

  SmallVector<PHINode *, 16> AddedPHIs;
  SmallVector<PHINode *, 8> PostProcessPHIs;

  SSAUpdater SSAUpdate;
  SSAUpdate.Initialize(Inst.getType(), Inst.getName());

  // Insert the LCSSA phi's into all of the exit blocks dominated by the
  // value, and add them to the Phi's map.
  for (BasicBlock *ExitBB : ExitBlocks) {
    if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
      continue;

    // If we already inserted something for this BB, don't reprocess it.
    if (SSAUpdate.HasValueForBlock(ExitBB))
      continue;

    PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
                                  Inst.getName() + ".lcssa", &ExitBB->front());

    // Add inputs from inside the loop for this PHI.
    for (BasicBlock *Pred : PredCache.get(ExitBB)) {
      PN->addIncoming(&Inst, Pred);

      // If the exit block has a predecessor not within the loop, arrange for
      // the incoming value use corresponding to that predecessor to be
      // rewritten in terms of a different LCSSA PHI.
      if (!L.contains(Pred))
        UsesToRewrite.push_back(
            &PN->getOperandUse(PN->getOperandNumForIncomingValue(
                 PN->getNumIncomingValues() - 1)));
    }

    AddedPHIs.push_back(PN);

    // Remember that this phi makes the value alive in this block.
    SSAUpdate.AddAvailableValue(ExitBB, PN);

    // LoopSimplify might fail to simplify some loops (e.g. when indirect
    // branches are involved). In such situations, it might happen that an exit
    // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create
    // PHIs in such an exit block, we are also inserting PHIs into L2's header.
    // This could break LCSSA form for L2 because these inserted PHIs can also
    // have uses outside of L2. Remember all PHIs in such situation as to
    // revisit than later on. FIXME: Remove this if indirectbr support into
    // LoopSimplify gets improved.
    if (auto *OtherLoop = LI->getLoopFor(ExitBB))
      if (!L.contains(OtherLoop))
        PostProcessPHIs.push_back(PN);
  }

  // Rewrite all uses outside the loop in terms of the new PHIs we just
  // inserted.
  for (Use *UseToRewrite : UsesToRewrite) {
    // If this use is in an exit block, rewrite to use the newly inserted PHI.
    // This is required for correctness because SSAUpdate doesn't handle uses in
    // the same block.  It assumes the PHI we inserted is at the end of the
    // block.
    Instruction *User = cast<Instruction>(UseToRewrite->getUser());
    BasicBlock *UserBB = User->getParent();
    if (PHINode *PN = dyn_cast<PHINode>(User))
      UserBB = PN->getIncomingBlock(*UseToRewrite);

    if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
      // Tell the VHs that the uses changed. This updates SCEV's caches.
      if (UseToRewrite->get()->hasValueHandle())
        ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
      UseToRewrite->set(&UserBB->front());
      continue;
    }

    // Otherwise, do full PHI insertion.
    SSAUpdate.RewriteUse(*UseToRewrite);
  }

  // Post process PHI instructions that were inserted into another disjoint loop
  // and update their exits properly.
  for (auto *I : PostProcessPHIs) {
    if (I->use_empty())
      continue;

    BasicBlock *PHIBB = I->getParent();
    Loop *OtherLoop = LI->getLoopFor(PHIBB);
    SmallVector<BasicBlock *, 8> EBs;
    OtherLoop->getExitBlocks(EBs);
    if (EBs.empty())
      continue;

    // Recurse and re-process each PHI instruction. FIXME: we should really
    // convert this entire thing to a worklist approach where we process a
    // vector of instructions...
    processInstruction(*OtherLoop, *I, DT, EBs, PredCache, LI);
  }

  // Remove PHI nodes that did not have any uses rewritten.
  for (PHINode *PN : AddedPHIs)
    if (PN->use_empty())
      PN->eraseFromParent();

  return true;
}