/// Create a clone of the blocks in a loop and connect them together.
/// This function doesn't create a clone of the loop structure.
///
/// There are two value maps that are defined and used.  VMap is
/// for the values in the current loop instance.  LVMap contains
/// the values from the last loop instance.  We need the LVMap values
/// to update the inital values for the current loop instance.
///
static void CloneLoopBlocks(Loop *L,
                            bool FirstCopy,
                            BasicBlock *InsertTop,
                            BasicBlock *InsertBot,
                            std::vector<BasicBlock *> &NewBlocks,
                            LoopBlocksDFS &LoopBlocks,
                            ValueToValueMapTy &VMap,
                            ValueToValueMapTy &LVMap,
                            LoopInfo *LI) {

  BasicBlock *Preheader = L->getLoopPreheader();
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  Function *F = Header->getParent();
  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
  // For each block in the original loop, create a new copy,
  // and update the value map with the newly created values.
  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
    NewBlocks.push_back(NewBB);

    if (Loop *ParentLoop = L->getParentLoop())
      ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());

    VMap[*BB] = NewBB;
    if (Header == *BB) {
      // For the first block, add a CFG connection to this newly
      // created block
      InsertTop->getTerminator()->setSuccessor(0, NewBB);

      // Change the incoming values to the ones defined in the
      // previously cloned loop.
      for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
        PHINode *NewPHI = cast<PHINode>(VMap[I]);
        if (FirstCopy) {
          // We replace the first phi node with the value from the preheader
          VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
          NewBB->getInstList().erase(NewPHI);
        } else {
          // Update VMap with values from the previous block
          unsigned idx = NewPHI->getBasicBlockIndex(Latch);
          Value *InVal = NewPHI->getIncomingValue(idx);
          if (Instruction *I = dyn_cast<Instruction>(InVal))
            if (L->contains(I))
              InVal = LVMap[InVal];
          NewPHI->setIncomingValue(idx, InVal);
          NewPHI->setIncomingBlock(idx, InsertTop);
        }
      }
    }

    if (Latch == *BB) {
      VMap.erase((*BB)->getTerminator());
      NewBB->getTerminator()->eraseFromParent();
      BranchInst::Create(InsertBot, NewBB);
    }
  }
  // LastValueMap is updated with the values for the current loop
  // which are used the next time this function is called.
  for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
       VI != VE; ++VI) {
    LVMap[VI->first] = VI->second;
  }
}
Beispiel #2
0
/// UnswitchNontrivialCondition - We determined that the loop is profitable
/// to unswitch when LIC equal Val.  Split it into loop versions and test the
/// condition outside of either loop.  Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
                                               Loop *L) {
  Function *F = loopHeader->getParent();
  DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
        << loopHeader->getName() << " [" << L->getBlocks().size()
        << " blocks] in Function " << F->getName()
        << " when '" << *Val << "' == " << *LIC << "\n");

  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
    SE->forgetLoop(L);

  LoopBlocks.clear();
  NewBlocks.clear();

  // First step, split the preheader and exit blocks, and add these blocks to
  // the LoopBlocks list.
  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
  LoopBlocks.push_back(NewPreheader);

  // We want the loop to come after the preheader, but before the exit blocks.
  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());

  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getUniqueExitBlocks(ExitBlocks);

  // Split all of the edges from inside the loop to their exit blocks.  Update
  // the appropriate Phi nodes as we do so.
  SplitExitEdges(L, ExitBlocks);

  // The exit blocks may have been changed due to edge splitting, recompute.
  ExitBlocks.clear();
  L->getUniqueExitBlocks(ExitBlocks);

  // Add exit blocks to the loop blocks.
  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());

  // Next step, clone all of the basic blocks that make up the loop (including
  // the loop preheader and exit blocks), keeping track of the mapping between
  // the instructions and blocks.
  NewBlocks.reserve(LoopBlocks.size());
  ValueToValueMapTy VMap;
  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);

    NewBlocks.push_back(NewBB);
    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
  }

  // Splice the newly inserted blocks into the function right before the
  // original preheader.
  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
                                NewBlocks[0], F->end());

  // Now we create the new Loop object for the versioned loop.
  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);

  // Recalculate unswitching quota, inherit simplified switches info for NewBB,
  // Probably clone more loop-unswitch related loop properties.
  BranchesInfo.cloneData(NewLoop, L, VMap);

  Loop *ParentLoop = L->getParentLoop();
  if (ParentLoop) {
    // Make sure to add the cloned preheader and exit blocks to the parent loop
    // as well.
    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
  }

  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
    // The new exit block should be in the same loop as the old one.
    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());

    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
           "Exit block should have been split to have one successor!");
    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);

    // If the successor of the exit block had PHI nodes, add an entry for
    // NewExit.
    PHINode *PN;
    for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
      PN = cast<PHINode>(I);
      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
      ValueToValueMapTy::iterator It = VMap.find(V);
      if (It != VMap.end()) V = It->second;
      PN->addIncoming(V, NewExit);
    }

    if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
      PN = PHINode::Create(LPad->getType(), 0, "",
                           ExitSucc->getFirstInsertionPt());

      for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
           I != E; ++I) {
        BasicBlock *BB = *I;
        LandingPadInst *LPI = BB->getLandingPadInst();
        LPI->replaceAllUsesWith(PN);
        PN->addIncoming(LPI, BB);
      }
    }
  }

  // Rewrite the code to refer to itself.
  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);

  // Rewrite the original preheader to select between versions of the loop.
  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
         "Preheader splitting did not work correctly!");

  // Emit the new branch that selects between the two versions of this loop.
  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
  LPM->deleteSimpleAnalysisValue(OldBR, L);
  OldBR->eraseFromParent();

  LoopProcessWorklist.push_back(NewLoop);
  redoLoop = true;

  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
  // deletes the instruction (for example by simplifying a PHI that feeds into
  // the condition that we're unswitching on), we don't rewrite the second
  // iteration.
  WeakVH LICHandle(LIC);

  // Now we rewrite the original code to know that the condition is true and the
  // new code to know that the condition is false.
  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);

  // It's possible that simplifying one loop could cause the other to be
  // changed to another value or a constant.  If its a constant, don't simplify
  // it.
  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
      LICHandle && !isa<Constant>(LICHandle))
    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
}
Beispiel #3
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
/// iterations before branching into the unrolled loop.  UnrollLoop will not
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
/// AllowExpensiveTripCount is false.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, bool AllowExpensiveTripCount,
                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
                      DominatorTree *DT, AssumptionCache *AC,
                      bool PreserveLCSSA) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;
  SmallVector<BasicBlock *, 4> ExitBlocks;
  L->getExitBlocks(ExitBlocks);
  Loop *ParentL = L->getParentLoop();
  bool AllExitsAreInsideParentLoop = !ParentL ||
      std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
                  [&](BasicBlock *BB) { return ParentL->contains(BB); });

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount &&
      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
                               PreserveLCSSA))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (SE)
    SE->forgetLoop(L);

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  // Report the unrolling decision.
  DebugLoc LoopLoc = L->getStartLoc();
  Function *F = Header->getParent();
  LLVMContext &Ctx = F->getContext();

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                           Twine("completely unrolled loop with ") +
                               Twine(TripCount) + " iterations");
  } else {
    auto EmitDiag = [&](const Twine &T) {
      emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                             "unrolled loop by a factor of " + Twine(Count) +
                                 T);
    };

    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
      EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip));
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      EmitDiag(" with " + Twine(TripMultiple) + " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
      EmitDiag(" with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
  }

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;
    SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
    NewLoops[L] = L;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Tell LI about New.
      if (*BB == Header) {
        assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
        L->addBasicBlockToLoop(New, *LI);
      } else {
        // Figure out which loop New is in.
        const Loop *OldLoop = LI->getLoopFor(*BB);
        assert(OldLoop && "Should (at least) be in the loop being unrolled!");

        Loop *&NewLoop = NewLoops[OldLoop];
        if (!NewLoop) {
          // Found a new sub-loop.
          assert(*BB == OldLoop->getHeader() &&
                 "Header should be first in RPO");

          Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
          assert(NewLoopParent &&
                 "Expected parent loop before sub-loop in RPO");
          NewLoop = new Loop;
          NewLoopParent->addChildLoop(NewLoop);

          // Forget the old loop, since its inputs may have changed.
          if (SE)
            SE->forgetLoop(OldLoop);
        }
        NewLoop->addBasicBlockToLoop(New, *LI);
      }

      if (*BB == Header)
        // Loop over all of the PHI nodes in the block, changing them to use
        // the incoming values from the previous block.
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(&*I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll) {
      if (j == 0)
        Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<Loop *, 4> ForgottenLoops;
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE,
                                                      ForgottenLoops))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  // FIXME: We could register any cloned assumptions instead of clearing the
  // whole function's cache.
  AC->clear();

  // FIXME: Reconstruct dom info, because it is not preserved properly.
  // Incrementally updating domtree after loop unrolling would be easy.
  if (DT)
    DT->recalculate(*L->getHeader()->getParent());

  // Simplify any new induction variables in the partially unrolled loop.
  if (SE && !CompletelyUnroll) {
    SmallVector<WeakVH, 16> DeadInsts;
    simplifyLoopIVs(L, SE, DT, LI, DeadInsts);

    // Aggressively clean up dead instructions that simplifyLoopIVs already
    // identified. Any remaining should be cleaned up below.
    while (!DeadInsts.empty())
      if (Instruction *Inst =
              dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
        RecursivelyDeleteTriviallyDeadInstructions(Inst);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const DataLayout &DL = Header->getModule()->getDataLayout();
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = &*I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst, DL))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;

  Loop *OuterL = L->getParentLoop();
  // Update LoopInfo if the loop is completely removed.
  if (CompletelyUnroll)
    LI->updateUnloop(L);;

  // If we have a pass and a DominatorTree we should re-simplify impacted loops
  // to ensure subsequent analyses can rely on this form. We want to simplify
  // at least one layer outside of the loop that was unrolled so that any
  // changes to the parent loop exposed by the unrolling are considered.
  if (DT) {
    if (!OuterL && !CompletelyUnroll)
      OuterL = L;
    if (OuterL) {
      bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);

      // LCSSA must be performed on the outermost affected loop. The unrolled
      // loop's last loop latch is guaranteed to be in the outermost loop after
      // LoopInfo's been updated by updateUnloop.
      Loop *LatchLoop = LI->getLoopFor(Latches.back());
      if (!OuterL->contains(LatchLoop))
        while (OuterL->getParentLoop() != LatchLoop)
          OuterL = OuterL->getParentLoop();

      if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified))
        formLCSSARecursively(*OuterL, *DT, LI, SE);
      else
        assert(OuterL->isLCSSAForm(*DT) &&
               "Loops should be in LCSSA form after loop-unroll.");
    }
  }

  return true;
}
Beispiel #4
0
void SuperBlock::fixSideEntrances() {
    // due to merging of BBs, some superblocks may have 1 BB remaining
    list<map<BasicBlock*, list<BasicBlock*> >::iterator > delSuperBlocks;

    for (map<BasicBlock*, list<BasicBlock*> >::iterator sp = superBlocks.begin(),
            sp_e = superBlocks.end();  sp != sp_e; ++sp) {
        // we need to keep track of the predecessor of the current basic block
        // being checked
        BasicBlock* prev = sp->first;

        // don't clone basic blocks if the code size threshold is achieved
        if (currCodeSize/originalCodeSize > CODE_EXPANSION_THRESHOLD) {
            break;
        }

        // the first basic block for a superblock need not be duplicated
        for (list<BasicBlock*>::iterator bb = sp->second.begin(),
                bb_e = sp->second.end(); bb != bb_e; ++bb) {
            // first, collect all predecessors for this BB
            // (note: we could not just iterate through as the predecessor set may
            // change
            list<BasicBlock*> predBBs;
            for (pred_iterator pred = pred_begin(*bb), pred_e = pred_end(*bb);
                    pred != pred_e; ++pred) {
                predBBs.push_back(*pred);
            }

            // now, walk through all predecessors of this current basic block
            BasicBlock* clonedBB = NULL;
            for (list<BasicBlock*>::iterator pred = predBBs.begin(),
                    pred_e = predBBs.end(); pred != pred_e; ++pred) {
                // if it is not the predecessor of this current basic block present in
                // the superblock, duplicate!
                if (*pred != prev) {
                    // there is no need to clone this BB multiple times
                    if (clonedBB == NULL) {
                        ValueToValueMapTy vmap;

                        // clone this basic block, and place the corresponding code after
                        // the last BB of this superblock
                        clonedBB = CloneBasicBlock(*bb, vmap, ".cloned",
                                                   (*bb)->getParent());
                        vmap[*bb] = clonedBB;

                        /*
                        errs() << "@@ BEFORE: " << *clonedBB << "\n";
                        // fix phi nodes in the cloned BB
                        for (BasicBlock::iterator I = clonedBB->begin(); isa<PHINode>(I); ++I) {
                          PHINode* PN = dyn_cast<PHINode>(I);
                          int bbIdx = PN->getBasicBlockIndex(prev);
                          if (bbIdx != -1) {
                            PN->removeIncomingValue(bbIdx, false);
                          }
                        }
                        */

                        // add size of duplicated BBs to total code size count
                        currCodeSize += clonedBB->size();

                        // modify operands in this basic block
                        for (BasicBlock::iterator instr = clonedBB->begin(),
                                instr_e = clonedBB->end(); instr != instr_e; ++instr) {
                            for (unsigned idx = 0, num_ops = instr->getNumOperands();
                                    idx < num_ops; ++idx) {
                                Value* op = instr->getOperand(idx);
                                ValueToValueMapTy::iterator op_it = vmap.find(op);
                                if (op_it != vmap.end()) {
                                    instr->setOperand(idx, op_it->second);
                                }
                            }
                        }
                    }

                    // remove phi nodes into this BB in the trace
                    /*
                    for (BasicBlock::iterator I = (*bb)->begin(); isa<PHINode>(I); ++I) {
                      PHINode* PN = dyn_cast<PHINode>(I);
                      int bbIdx = PN->getBasicBlockIndex(*pred);
                      if (bbIdx != -1) {
                        PN->removeIncomingValue(bbIdx, false);
                      }
                    }
                    */

                    // modify the branch instruction of the predecessor not in the superblock to
                    // branch to the cloned basic block
                    Instruction* br_instr = (*pred)->getTerminator();
                    br_instr->replaceUsesOfWith((Value*)*bb, (Value*)clonedBB);
                }
            }

            // determine if we can merge the BB (definitely can be merged), and its clone
            // with theirpredecessors
            if (clonedBB != NULL) {
                if (MergeBlockIntoPredecessor(*bb, this)) {
                    // since we have merged this BB, delete from our superblock mappings
                    partOfSuperBlock.erase(*bb);
                    bb = sp->second.erase(bb);
                    --bb;

                    if (sp->second.empty()) {
                        delSuperBlocks.push_back(sp);
                    }
                }

                MergeBlockIntoPredecessor(clonedBB, this);
            }

            prev = *bb;
        }
    }

    // erase some superblocks (which only have 1 BB remaining)
    for (list<map<BasicBlock*, list<BasicBlock*> >::iterator >::iterator del =
                delSuperBlocks.begin(), del_e = delSuperBlocks.end(); del != del_e;
            ++del) {
        superBlocks.erase(*del);
    }
}
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
  if (SE)
    SE->forgetLoop(L);

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
  } else {
    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
    }
    DEBUG(dbgs() << "!\n");
  }

  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  // FIXME: Reconstruct dom info, because it is not preserved properly.
  // Incrementally updating domtree after loop unrolling would be easy.
  if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
    DT->runOnFunction(*L->getHeader()->getParent());

  // Simplify any new induction variables in the partially unrolled loop.
  if (SE && !CompletelyUnroll) {
    SmallVector<WeakVH, 16> DeadInsts;
    simplifyLoopIVs(L, SE, LPM, DeadInsts);

    // Aggressively clean up dead instructions that simplifyLoopIVs already
    // identified. Any remaining should be cleaned up below.
    while (!DeadInsts.empty())
      if (Instruction *Inst =
          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
        RecursivelyDeleteTriviallyDeadInstructions(Inst);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != NULL)
    LPM->deleteLoopFromQueue(L);

  return true;
}
Beispiel #6
0
// MakeFunctionClone - If the specified function needs to be modified for pool
// allocation support, make a clone of it, adding additional arguments as
// necessary, and return it.  If not, just return null.
//
Function* RTAssociate::MakeFunctionClone(Function &F, FuncInfo& FI, DSGraph* G) {
  if (G->node_begin() == G->node_end()) return 0;

  if (FI.ArgNodes.empty())
    return 0; // No need to clone if no pools need to be passed in!

  // Update statistics..
  NumArgsAdded += FI.ArgNodes.size();
  if (MaxArgsAdded < FI.ArgNodes.size()) MaxArgsAdded = FI.ArgNodes.size();
  ++NumCloned;

  // Figure out what the arguments are to be for the new version of the
  // function
  FunctionType *OldFuncTy = F.getFunctionType();
  std::vector<Type*> ArgTys(FI.ArgNodes.size(), PoolDescPtrTy);
  ArgTys.reserve(OldFuncTy->getNumParams() + FI.ArgNodes.size());

  ArgTys.insert(ArgTys.end(), OldFuncTy->param_begin(), OldFuncTy->param_end());

  // Create the new function prototype
  FunctionType *FuncTy = FunctionType::get(OldFuncTy->getReturnType(), ArgTys,
                                           OldFuncTy->isVarArg());
  // Create the new function...
  Function *New = Function::Create(FuncTy, Function::InternalLinkage, F.getName());
  New->copyAttributesFrom(&F);
  F.getParent()->getFunctionList().insert(&F, New);

  // Set the rest of the new arguments names to be PDa<n> and add entries to the
  // pool descriptors map
  Function::arg_iterator NI = New->arg_begin();
  for (unsigned i = 0, e = FI.ArgNodes.size(); i != e; ++i, ++NI) {
    FI.PoolDescriptors[FI.ArgNodes[i]] = CreateArgPool(FI.ArgNodes[i], NI);
    NI->setName("PDa");
  }

  // Map the existing arguments of the old function to the corresponding
  // arguments of the new function, and copy over the names.
  ValueToValueMapTy ValueMap;
  for (Function::arg_iterator I = F.arg_begin();
          NI != New->arg_end(); ++I, ++NI) {
    ValueMap[I] = NI;
    NI->setName(I->getName());
  }

  // Perform the cloning.
  SmallVector<ReturnInst*,100> Returns;
  // TODO: review the boolean flag here
  CloneFunctionInto(New, &F, ValueMap, true, Returns);

  //
  // The CloneFunctionInto() function will copy the parameter attributes
  // verbatim.  This is incorrect; each attribute should be shifted one so
  // that the pool descriptor has no attributes.
  //
  const AttributeSet OldAttrs = New->getAttributes();
  if (!OldAttrs.isEmpty()) {
    AttributeSet NewAttrs;
    for (unsigned index = 0; index < OldAttrs.getNumSlots(); ++index) {
      const AttributeSet & PAWI = OldAttrs.getSlotAttributes(index);
      unsigned argIndex = OldAttrs.getSlotIndex(index);

      // If it's not the return value, move the attribute to the next
      // parameter.
      if (argIndex) ++argIndex;

      // Add the parameter to the new list.
      NewAttrs = NewAttrs.addAttributes(F.getContext(), argIndex, PAWI);
    }

    // Assign the new attributes to the function clone
    New->setAttributes(NewAttrs);
  }

  for (ValueToValueMapTy::iterator I = ValueMap.begin(),
          E = ValueMap.end(); I != E; ++I)
    FI.NewToOldValueMap.insert(std::make_pair(I->second, const_cast<Value*>(I->first)));

  return FI.Clone = New;
}
/*
  This method performs Unroll and Jam. For a simple loop like:
  for (i = ..)
    Fore(i)
    for (j = ..)
      SubLoop(i, j)
    Aft(i)

  Instead of doing normal inner or outer unrolling, we do:
  for (i = .., i+=2)
    Fore(i)
    Fore(i+1)
    for (j = ..)
      SubLoop(i, j)
      SubLoop(i+1, j)
    Aft(i)
    Aft(i+1)

  So the outer loop is essetially unrolled and then the inner loops are fused
  ("jammed") together into a single loop. This can increase speed when there
  are loads in SubLoop that are invariant to i, as they become shared between
  the now jammed inner loops.

  We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
  Fore blocks are those before the inner loop, Aft are those after. Normal
  Unroll code is used to copy each of these sets of blocks and the results are
  combined together into the final form above.

  isSafeToUnrollAndJam should be used prior to calling this to make sure the
  unrolling will be valid. Checking profitablility is also advisable.
*/
LoopUnrollResult
llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
                       unsigned TripMultiple, bool UnrollRemainder,
                       LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {

  // When we enter here we should have already checked that it is safe
  BasicBlock *Header = L->getHeader();
  assert(L->getSubLoops().size() == 1);
  Loop *SubLoop = *L->begin();

  // Don't enter the unroll code if there is nothing to do.
  if (TripCount == 0 && Count < 2) {
    LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
    return LoopUnrollResult::Unmodified;
  }

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = (Count == TripCount);

  // We use the runtime remainder in cases where we don't know trip multiple
  if (TripMultiple == 1 || TripMultiple % Count != 0) {
    if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
                                    /*UseEpilogRemainder*/ true,
                                    UnrollRemainder, LI, SE, DT, AC, true)) {
      LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
                           "generated when assuming runtime trip count\n");
      return LoopUnrollResult::Unmodified;
    }
  }

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (SE) {
    SE->forgetLoop(L);
    SE->forgetLoop(SubLoop);
  }

  using namespace ore;
  // Report the unrolling decision.
  if (CompletelyUnroll) {
    LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
                      << Header->getName() << " with trip count " << TripCount
                      << "!\n");
    ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
                                 L->getHeader())
              << "completely unroll and jammed loop with "
              << NV("UnrollCount", TripCount) << " iterations");
  } else {
    auto DiagBuilder = [&]() {
      OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
                              L->getHeader());
      return Diag << "unroll and jammed loop by a factor of "
                  << NV("UnrollCount", Count);
    };

    LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
                      << " by " << Count);
    if (TripMultiple != 1) {
      LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      ORE->emit([&]() {
        return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
                             << " trips per branch";
      });
    } else {
      LLVM_DEBUG(dbgs() << " with run-time trip count");
      ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
    }
    LLVM_DEBUG(dbgs() << "!\n");
  }

  BasicBlock *Preheader = L->getLoopPreheader();
  BasicBlock *LatchBlock = L->getLoopLatch();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  assert(Preheader && LatchBlock && Header);
  assert(BI && !BI->isUnconditional());
  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
  bool SubLoopContinueOnTrue = SubLoop->contains(
      SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));

  // Partition blocks in an outer/inner loop pair into blocks before and after
  // the loop
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
                           DT);

  // We keep track of the entering/first and exiting/last block of each of
  // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
  // blocks easier.
  std::vector<BasicBlock *> ForeBlocksFirst;
  std::vector<BasicBlock *> ForeBlocksLast;
  std::vector<BasicBlock *> SubLoopBlocksFirst;
  std::vector<BasicBlock *> SubLoopBlocksLast;
  std::vector<BasicBlock *> AftBlocksFirst;
  std::vector<BasicBlock *> AftBlocksLast;
  ForeBlocksFirst.push_back(Header);
  ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
  SubLoopBlocksFirst.push_back(SubLoop->getHeader());
  SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
  AftBlocksFirst.push_back(SubLoop->getExitBlock());
  AftBlocksLast.push_back(L->getExitingBlock());
  // Maps Blocks[0] -> Blocks[It]
  ValueToValueMapTy LastValueMap;

  // Move any instructions from fore phi operands from AftBlocks into Fore.
  moveHeaderPhiOperandsToForeBlocks(
      Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
      AftBlocks);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);
  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  if (Header->getParent()->isDebugInfoForProfiling())
    for (BasicBlock *BB : L->getBlocks())
      for (Instruction &I : *BB)
        if (!isa<DbgInfoIntrinsic>(&I))
          if (const DILocation *DIL = I.getDebugLoc())
            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));

  // Copy all blocks
  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock *> NewBlocks;
    // Maps Blocks[It] -> Blocks[It-1]
    DenseMap<Value *, Value *> PrevItValueMap;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      if (ForeBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == ForeBlocksFirst[0])
          ForeBlocksFirst.push_back(New);
        if (*BB == ForeBlocksLast[0])
          ForeBlocksLast.push_back(New);
      } else if (SubLoopBlocks.count(*BB)) {
        SubLoop->addBasicBlockToLoop(New, *LI);

        if (*BB == SubLoopBlocksFirst[0])
          SubLoopBlocksFirst.push_back(New);
        if (*BB == SubLoopBlocksLast[0])
          SubLoopBlocksLast.push_back(New);
      } else if (AftBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == AftBlocksFirst[0])
          AftBlocksFirst.push_back(New);
        if (*BB == AftBlocksLast[0])
          AftBlocksLast.push_back(New);
      } else {
        llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
      }

      // Update our running maps of newest clones
      PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI) {
        PrevItValueMap[VI->second] =
            const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
        LastValueMap[VI->first] = VI->second;
      }

      NewBlocks.push_back(New);

      // Update DomTree:
      if (*BB == ForeBlocksFirst[0])
        DT->addNewBlock(New, ForeBlocksLast[It - 1]);
      else if (*BB == SubLoopBlocksFirst[0])
        DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
      else if (*BB == AftBlocksFirst[0])
        DT->addNewBlock(New, AftBlocksLast[It - 1]);
      else {
        // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
        // structure.
        auto BBDomNode = DT->getNode(*BB);
        auto BBIDom = BBDomNode->getIDom();
        BasicBlock *OriginalBBIDom = BBIDom->getBlock();
        assert(OriginalBBIDom);
        assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
        DT->addNewBlock(
            New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
      }
    }

    // Remap all instructions in the most recent iteration
    for (BasicBlock *NewBlock : NewBlocks) {
      for (Instruction &I : *NewBlock) {
        ::remapInstruction(&I, LastValueMap);
        if (auto *II = dyn_cast<IntrinsicInst>(&I))
          if (II->getIntrinsicID() == Intrinsic::assume)
            AC->registerAssumption(II);
      }
    }

    // Alter the ForeBlocks phi's, pointing them at the latest version of the
    // value from the previous iteration's phis
    for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
      Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
      assert(OldValue && "should have incoming edge from Aft[It]");
      Value *NewValue = OldValue;
      if (Value *PrevValue = PrevItValueMap[OldValue])
        NewValue = PrevValue;

      assert(Phi.getNumOperands() == 2);
      Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
      Phi.setIncomingValue(0, NewValue);
      Phi.removeIncomingValue(1);
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // finish up connecting the blocks and phi nodes. At this point LastValueMap
  // is the last unrolled iterations values.

  // Update Phis in BB from OldBB to point to NewBB
  auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
                            BasicBlock *NewBB) {
    for (PHINode &Phi : BB->phis()) {
      int I = Phi.getBasicBlockIndex(OldBB);
      Phi.setIncomingBlock(I, NewBB);
    }
  };
  // Update Phis in BB from OldBB to point to NewBB and use the latest value
  // from LastValueMap
  auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
                                     BasicBlock *NewBB,
                                     ValueToValueMapTy &LastValueMap) {
    for (PHINode &Phi : BB->phis()) {
      for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
        if (Phi.getIncomingBlock(b) == OldBB) {
          Value *OldValue = Phi.getIncomingValue(b);
          if (Value *LastValue = LastValueMap[OldValue])
            Phi.setIncomingValue(b, LastValue);
          Phi.setIncomingBlock(b, NewBB);
          break;
        }
      }
    }
  };
  // Move all the phis from Src into Dest
  auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
    Instruction *insertPoint = Dest->getFirstNonPHI();
    while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
      Phi->moveBefore(insertPoint);
  };

  // Update the PHI values outside the loop to point to the last block
  updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
                           LastValueMap);

  // Update ForeBlocks successors and phi nodes
  BranchInst *ForeTerm =
      cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
  BasicBlock *Dest = SubLoopBlocksFirst[0];
  ForeTerm->setSuccessor(0, Dest);

  if (CompletelyUnroll) {
    while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
      Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
      Phi->getParent()->getInstList().erase(Phi);
    }
  } else {
    // Update the PHI values to point to the last aft block
    updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
                             AftBlocksLast.back(), LastValueMap);
  }

  for (unsigned It = 1; It != Count; It++) {
    // Remap ForeBlock successors from previous iteration to this
    BranchInst *ForeTerm =
        cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
    BasicBlock *Dest = ForeBlocksFirst[It];
    ForeTerm->setSuccessor(0, Dest);
  }

  // Subloop successors and phis
  BranchInst *SubTerm =
      cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
  SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
  SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
  updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
                  ForeBlocksLast.back());
  updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *SubTerm =
        cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
    SubTerm->eraseFromParent();

    updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
                    ForeBlocksLast.back());
    updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
  }

  // Aft blocks successors and phis
  BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
  if (CompletelyUnroll) {
    BranchInst::Create(LoopExit, Term);
    Term->eraseFromParent();
  } else {
    Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
  }
  updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *AftTerm =
        cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(AftBlocksFirst[It], AftTerm);
    AftTerm->eraseFromParent();

    updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
  }

  // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
  // new ones required.
  if (Count != 1) {
    SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
                           SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
                           SubLoopBlocksLast[0], AftBlocksFirst[0]);

    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           SubLoopBlocksLast.back(), AftBlocksFirst[0]);
    DT->applyUpdates(DTUpdates);
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<BasicBlock *, 16> MergeBlocks;
  MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
  MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
  MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
  while (!MergeBlocks.empty()) {
    BasicBlock *BB = *MergeBlocks.begin();
    BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
    if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
        // Don't remove BB and add Fold as they are the same BB
        assert(Fold == BB);
        (void)Fold;
        MergeBlocks.erase(Dest);
      } else
        MergeBlocks.erase(BB);
    } else
      MergeBlocks.erase(BB);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
  simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);

  NumCompletelyUnrolledAndJammed += CompletelyUnroll;
  ++NumUnrolledAndJammed;

#ifndef NDEBUG
  // We shouldn't have done anything to break loop simplify form or LCSSA.
  Loop *OuterL = L->getParentLoop();
  Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
  assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
  if (!CompletelyUnroll)
    assert(L->isLoopSimplifyForm());
  assert(SubLoop->isLoopSimplifyForm());
  assert(DT->verify());
#endif

  // Update LoopInfo if the loop is completely removed.
  if (CompletelyUnroll)
    LI->erase(L);

  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
                          : LoopUnrollResult::PartiallyUnrolled;
}
Beispiel #8
0
void WinEHPrepare::cloneCommonBlocks(Function &F) {
  // We need to clone all blocks which belong to multiple funclets.  Values are
  // remapped throughout the funclet to propagate both the new instructions
  // *and* the new basic blocks themselves.
  for (auto &Funclets : FuncletBlocks) {
    BasicBlock *FuncletPadBB = Funclets.first;
    std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
    Value *FuncletToken;
    if (FuncletPadBB == &F.getEntryBlock())
      FuncletToken = ConstantTokenNone::get(F.getContext());
    else
      FuncletToken = FuncletPadBB->getFirstNonPHI();

    std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
    ValueToValueMapTy VMap;
    for (BasicBlock *BB : BlocksInFunclet) {
      ColorVector &ColorsForBB = BlockColors[BB];
      // We don't need to do anything if the block is monochromatic.
      size_t NumColorsForBB = ColorsForBB.size();
      if (NumColorsForBB == 1)
        continue;

      DEBUG_WITH_TYPE("winehprepare-coloring",
                      dbgs() << "  Cloning block \'" << BB->getName()
                              << "\' for funclet \'" << FuncletPadBB->getName()
                              << "\'.\n");

      // Create a new basic block and copy instructions into it!
      BasicBlock *CBB =
          CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName()));
      // Insert the clone immediately after the original to ensure determinism
      // and to keep the same relative ordering of any funclet's blocks.
      CBB->insertInto(&F, BB->getNextNode());

      // Add basic block mapping.
      VMap[BB] = CBB;

      // Record delta operations that we need to perform to our color mappings.
      Orig2Clone.emplace_back(BB, CBB);
    }

    // If nothing was cloned, we're done cloning in this funclet.
    if (Orig2Clone.empty())
      continue;

    // Update our color mappings to reflect that one block has lost a color and
    // another has gained a color.
    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;

      BlocksInFunclet.push_back(NewBlock);
      ColorVector &NewColors = BlockColors[NewBlock];
      assert(NewColors.empty() && "A new block should only have one color!");
      NewColors.push_back(FuncletPadBB);

      DEBUG_WITH_TYPE("winehprepare-coloring",
                      dbgs() << "  Assigned color \'" << FuncletPadBB->getName()
                              << "\' to block \'" << NewBlock->getName()
                              << "\'.\n");

      BlocksInFunclet.erase(
          std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
          BlocksInFunclet.end());
      ColorVector &OldColors = BlockColors[OldBlock];
      OldColors.erase(
          std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
          OldColors.end());

      DEBUG_WITH_TYPE("winehprepare-coloring",
                      dbgs() << "  Removed color \'" << FuncletPadBB->getName()
                              << "\' from block \'" << OldBlock->getName()
                              << "\'.\n");
    }

    // Loop over all of the instructions in this funclet, fixing up operand
    // references as we go.  This uses VMap to do all the hard work.
    for (BasicBlock *BB : BlocksInFunclet)
      // Loop over all instructions, fixing each one as we find it...
      for (Instruction &I : *BB)
        RemapInstruction(&I, VMap,
                         RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);

    // Catchrets targeting cloned blocks need to be updated separately from
    // the loop above because they are not in the current funclet.
    SmallVector<CatchReturnInst *, 2> FixupCatchrets;
    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;

      FixupCatchrets.clear();
      for (BasicBlock *Pred : predecessors(OldBlock))
        if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
          if (CatchRet->getCatchSwitchParentPad() == FuncletToken)
            FixupCatchrets.push_back(CatchRet);

      for (CatchReturnInst *CatchRet : FixupCatchrets)
        CatchRet->setSuccessor(NewBlock);
    }

    auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
      unsigned NumPreds = PN->getNumIncomingValues();
      for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
           ++PredIdx) {
        BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
        bool EdgeTargetsFunclet;
        if (auto *CRI =
                dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
          EdgeTargetsFunclet = (CRI->getCatchSwitchParentPad() == FuncletToken);
        } else {
          ColorVector &IncomingColors = BlockColors[IncomingBlock];
          assert(!IncomingColors.empty() && "Block not colored!");
          assert((IncomingColors.size() == 1 ||
                  llvm::all_of(IncomingColors,
                               [&](BasicBlock *Color) {
                                 return Color != FuncletPadBB;
                               })) &&
                 "Cloning should leave this funclet's blocks monochromatic");
          EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
        }
        if (IsForOldBlock != EdgeTargetsFunclet)
          continue;
        PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
        // Revisit the next entry.
        --PredIdx;
        --PredEnd;
      }
    };

    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;
      for (PHINode &OldPN : OldBlock->phis()) {
        UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true);
      }
      for (PHINode &NewPN : NewBlock->phis()) {
        UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false);
      }
    }

    // Check to see if SuccBB has PHI nodes. If so, we need to add entries to
    // the PHI nodes for NewBB now.
    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;
      for (BasicBlock *SuccBB : successors(NewBlock)) {
        for (PHINode &SuccPN : SuccBB->phis()) {
          // Ok, we have a PHI node.  Figure out what the incoming value was for
          // the OldBlock.
          int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock);
          if (OldBlockIdx == -1)
            break;
          Value *IV = SuccPN.getIncomingValue(OldBlockIdx);

          // Remap the value if necessary.
          if (auto *Inst = dyn_cast<Instruction>(IV)) {
            ValueToValueMapTy::iterator I = VMap.find(Inst);
            if (I != VMap.end())
              IV = I->second;
          }

          SuccPN.addIncoming(IV, NewBlock);
        }
      }
    }

    for (ValueToValueMapTy::value_type VT : VMap) {
      // If there were values defined in BB that are used outside the funclet,
      // then we now have to update all uses of the value to use either the
      // original value, the cloned value, or some PHI derived value.  This can
      // require arbitrary PHI insertion, of which we are prepared to do, clean
      // these up now.
      SmallVector<Use *, 16> UsesToRename;

      auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first));
      if (!OldI)
        continue;
      auto *NewI = cast<Instruction>(VT.second);
      // Scan all uses of this instruction to see if it is used outside of its
      // funclet, and if so, record them in UsesToRename.
      for (Use &U : OldI->uses()) {
        Instruction *UserI = cast<Instruction>(U.getUser());
        BasicBlock *UserBB = UserI->getParent();
        ColorVector &ColorsForUserBB = BlockColors[UserBB];
        assert(!ColorsForUserBB.empty());
        if (ColorsForUserBB.size() > 1 ||
            *ColorsForUserBB.begin() != FuncletPadBB)
          UsesToRename.push_back(&U);
      }

      // If there are no uses outside the block, we're done with this
      // instruction.
      if (UsesToRename.empty())
        continue;

      // We found a use of OldI outside of the funclet.  Rename all uses of OldI
      // that are outside its funclet to be uses of the appropriate PHI node
      // etc.
      SSAUpdater SSAUpdate;
      SSAUpdate.Initialize(OldI->getType(), OldI->getName());
      SSAUpdate.AddAvailableValue(OldI->getParent(), OldI);
      SSAUpdate.AddAvailableValue(NewI->getParent(), NewI);

      while (!UsesToRename.empty())
        SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val());
    }
  }
}
Beispiel #9
0
void WorklessInstrument::CloneInnerLoop(Loop * pLoop, vector<BasicBlock *> & vecAdd, ValueToValueMapTy & VMap, set<BasicBlock *> & setCloned)
{
	Function * pFunction = pLoop->getHeader()->getParent();
	BasicBlock * pPreHeader = vecAdd[0];

	SmallVector<BasicBlock *, 4> ExitBlocks;
	pLoop->getExitBlocks(ExitBlocks);

	set<BasicBlock *> setExitBlocks;

	for(unsigned long i = 0; i < ExitBlocks.size(); i++)
	{
		setExitBlocks.insert(ExitBlocks[i]);
	}

	for(unsigned long i = 0; i < ExitBlocks.size(); i++ )
	{
		VMap[ExitBlocks[i]] = ExitBlocks[i];
	}

	vector<BasicBlock *> ToClone;
	vector<BasicBlock *> BeenCloned;

	
	//clone loop
	ToClone.push_back(pLoop->getHeader());

	while(ToClone.size()>0)
	{
		BasicBlock * pCurrent = ToClone.back();
		ToClone.pop_back();

		WeakVH & BBEntry = VMap[pCurrent];
		if (BBEntry)
		{
			continue;
		}

		BasicBlock * NewBB;
		BBEntry = NewBB = BasicBlock::Create(pCurrent->getContext(), "", pFunction);

		if(pCurrent->hasName())
		{
			NewBB->setName(pCurrent->getName() + ".CPI");
		}

		if(pCurrent->hasAddressTaken())
		{
			errs() << "hasAddressTaken branch\n" ;
			exit(0);
		}

		for(BasicBlock::const_iterator II = pCurrent->begin(); II != pCurrent->end(); ++II )
		{
			Instruction * NewInst = II->clone();
			if(II->hasName())
			{
				NewInst->setName(II->getName() + ".CPI");
			}
			VMap[II] = NewInst;
			NewBB->getInstList().push_back(NewInst);
		}

		const TerminatorInst *TI = pCurrent->getTerminator();
		for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
		{
			ToClone.push_back(TI->getSuccessor(i));
		}

		setCloned.insert(NewBB);
		BeenCloned.push_back(NewBB);
	}

	//remap value used inside loop
	vector<BasicBlock *>::iterator itVecBegin = BeenCloned.begin();
	vector<BasicBlock *>::iterator itVecEnd = BeenCloned.end();

	for(; itVecBegin != itVecEnd; itVecBegin ++)
	{
		for(BasicBlock::iterator II = (*itVecBegin)->begin(); II != (*itVecBegin)->end(); II ++ )
		{
			//II->dump();
			RemapInstruction(II, VMap);
		}
	}

	//add to the else if body
	BasicBlock * pElseBody = vecAdd[1];

	BasicBlock * pClonedHeader = cast<BasicBlock>(VMap[pLoop->getHeader()]);

	BranchInst::Create(pClonedHeader, pElseBody);

	//errs() << pPreHeader->getName() << "\n";
	for(BasicBlock::iterator II = pClonedHeader->begin(); II != pClonedHeader->end(); II ++ )
	{
		if(PHINode * pPHI = dyn_cast<PHINode>(II))
		{
			vector<int> vecToRemoved;
			for (unsigned i = 0, e = pPHI->getNumIncomingValues(); i != e; ++i) 
			{
				if(pPHI->getIncomingBlock(i) == pPreHeader)
				{
					pPHI->setIncomingBlock(i, pElseBody);
				}
			}
		}
	}

	set<BasicBlock *> setProcessedBlock;

	for(unsigned long i = 0; i < ExitBlocks.size(); i++ )
	{
		if(setProcessedBlock.find(ExitBlocks[i]) != setProcessedBlock.end() )
		{
			continue;
		}
		else
		{
			setProcessedBlock.insert(ExitBlocks[i]);
		}

		for(BasicBlock::iterator II = ExitBlocks[i]->begin(); II != ExitBlocks[i]->end(); II ++ )
		{
			if(PHINode * pPHI = dyn_cast<PHINode>(II))
			{
				unsigned numIncomming = pPHI->getNumIncomingValues();
				for(unsigned i = 0; i<numIncomming; i++)
				{
					BasicBlock * incommingBlock = pPHI->getIncomingBlock(i);
					if(VMap.find(incommingBlock) != VMap.end() )
					{
						Value * incommingValue = pPHI->getIncomingValue(i);

						if(VMap.find(incommingValue) != VMap.end() )
						{
							incommingValue = VMap[incommingValue];
						}

						pPHI->addIncoming(incommingValue, cast<BasicBlock>(VMap[incommingBlock]));

					}
				} 

			}
		}
	}
}