Exemplo n.º 1
0
void LowerEmAsyncify::FindContextVariables(AsyncCallEntry & Entry) {
  BasicBlock *AfterCallBlock = Entry.AfterCallBlock;

  Function & F = *AfterCallBlock->getParent();

  // Create a new entry block as if in the callback function
  // theck check variables that no longer properly dominate their uses
  BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "", &F, &F.getEntryBlock());
  BranchInst::Create(AfterCallBlock, EntryBlock);

  DominatorTreeWrapperPass DTW;
  DTW.runOnFunction(F);
  DominatorTree& DT = DTW.getDomTree();

  // These blocks may be using some values defined at or before AsyncCallBlock
  BasicBlockSet Ramifications = FindReachableBlocksFrom(AfterCallBlock); 

  SmallPtrSet<Value*, 256> ContextVariables;
  Values Pending;

  // Examine the instructions, find all variables that we need to store in the context
  for (BasicBlockSet::iterator RI = Ramifications.begin(), RE = Ramifications.end(); RI != RE; ++RI) {
    for (BasicBlock::iterator I = (*RI)->begin(), E = (*RI)->end(); I != E; ++I) {
      for (unsigned i = 0, NumOperands = I->getNumOperands(); i < NumOperands; ++i) {
        Value *O = I->getOperand(i);
        if (Instruction *Inst = dyn_cast<Instruction>(O)) {
          if (Inst == Entry.AsyncCallInst) continue; // for the original async call, we will load directly from async return value
          if (ContextVariables.count(Inst) != 0)  continue; // already examined 

          if (!DT.dominates(Inst, I->getOperandUse(i))) {
            // `I` is using `Inst`, yet `Inst` does not dominate `I` if we arrive directly at AfterCallBlock
            // so we need to save `Inst` in the context
            ContextVariables.insert(Inst);
            Pending.push_back(Inst);
          }
        } else if (Argument *Arg = dyn_cast<Argument>(O)) {
          // count() should be as fast/slow as insert, so just insert here 
          ContextVariables.insert(Arg);
        }
      }
    }
  }

  // restore F
  EntryBlock->eraseFromParent();  

  Entry.ContextVariables.clear();
  Entry.ContextVariables.reserve(ContextVariables.size());
  for (SmallPtrSet<Value*, 256>::iterator I = ContextVariables.begin(), E = ContextVariables.end(); I != E; ++I) {
    Entry.ContextVariables.push_back(*I);
  }
}
Exemplo n.º 2
0
/// Fold the loop tail into the loop exit by speculating the loop tail
/// instructions. Typically, this is a single post-increment. In the case of a
/// simple 2-block loop, hoisting the increment can be much better than
/// duplicating the entire loop header. In the case of loops with early exits,
/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
bool LoopRotate::simplifyLoopLatch(Loop *L) {
  BasicBlock *Latch = L->getLoopLatch();
  if (!Latch || Latch->hasAddressTaken())
    return false;

  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
  if (!Jmp || !Jmp->isUnconditional())
    return false;

  BasicBlock *LastExit = Latch->getSinglePredecessor();
  if (!LastExit || !L->isLoopExiting(LastExit))
    return false;

  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
  if (!BI)
    return false;

  if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
    return false;

  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
        << LastExit->getName() << "\n");

  // Hoist the instructions from Latch into LastExit.
  LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);

  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
  BasicBlock *Header = Jmp->getSuccessor(0);
  assert(Header == L->getHeader() && "expected a backward branch");

  // Remove Latch from the CFG so that LastExit becomes the new Latch.
  BI->setSuccessor(FallThruPath, Header);
  Latch->replaceSuccessorsPhiUsesWith(LastExit);
  Jmp->eraseFromParent();

  // Nuke the Latch block.
  assert(Latch->empty() && "unable to evacuate Latch");
  LI->removeBlock(Latch);
  if (DominatorTreeWrapperPass *DTWP =
          getAnalysisIfAvailable<DominatorTreeWrapperPass>())
    DTWP->getDomTree().eraseNode(Latch);
  Latch->eraseFromParent();
  return true;
}
Exemplo n.º 3
0
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
/// predecessor is known to have one successor (DestBB!).  Eliminate the edge
/// between them, moving the instructions in the predecessor into DestBB and
/// deleting the predecessor block.
///
void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
  // If BB has single-entry PHI nodes, fold them.
  while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
    Value *NewVal = PN->getIncomingValue(0);
    // Replace self referencing PHI with undef, it must be dead.
    if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
    PN->replaceAllUsesWith(NewVal);
    PN->eraseFromParent();
  }
  
  BasicBlock *PredBB = DestBB->getSinglePredecessor();
  assert(PredBB && "Block doesn't have a single predecessor!");
  
  // Splice all the instructions from PredBB to DestBB.
  PredBB->getTerminator()->eraseFromParent();
  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
  
  // Anything that branched to PredBB now branches to DestBB.
  PredBB->replaceAllUsesWith(DestBB);
  
  // Nuke BB.
  PredBB->eraseFromParent();
}
Exemplo n.º 4
0
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
/// instructions to the predecessor to enable tail call optimizations. The
/// case it is currently looking for is:
/// bb0:
///   %tmp0 = tail call i32 @f0()
///   br label %return
/// bb1:
///   %tmp1 = tail call i32 @f1()
///   br label %return
/// bb2:
///   %tmp2 = tail call i32 @f2()
///   br label %return
/// return:
///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
///   ret i32 %retval
///
/// =>
///
/// bb0:
///   %tmp0 = tail call i32 @f0()
///   ret i32 %tmp0
/// bb1:
///   %tmp1 = tail call i32 @f1()
///   ret i32 %tmp1
/// bb2:
///   %tmp2 = tail call i32 @f2()
///   ret i32 %tmp2
///
bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
    if (!TLI)
        return false;

    Value *V = RI->getReturnValue();
    PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
    if (V && !PN)
        return false;

    BasicBlock *BB = RI->getParent();
    if (PN && PN->getParent() != BB)
        return false;

    // It's not safe to eliminate the sign / zero extension of the return value.
    // See llvm::isInTailCallPosition().
    const Function *F = BB->getParent();
    Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
    if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
        return false;

    // Make sure there are no instructions between the PHI and return, or that the
    // return is the first instruction in the block.
    if (PN) {
        BasicBlock::iterator BI = BB->begin();
        do {
            ++BI;
        }
        while (isa<DbgInfoIntrinsic>(BI));
        if (&*BI != RI)
            return false;
    } else {
        BasicBlock::iterator BI = BB->begin();
        while (isa<DbgInfoIntrinsic>(BI)) ++BI;
        if (&*BI != RI)
            return false;
    }

    /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
    /// call.
    SmallVector<CallInst*, 4> TailCalls;
    if (PN) {
        for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
            CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
            // Make sure the phi value is indeed produced by the tail call.
            if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
                    TLI->mayBeEmittedAsTailCall(CI))
                TailCalls.push_back(CI);
        }
    } else {
        SmallPtrSet<BasicBlock*, 4> VisitedBBs;
        for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
            if (!VisitedBBs.insert(*PI))
                continue;

            BasicBlock::InstListType &InstList = (*PI)->getInstList();
            BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
            BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
            do {
                ++RI;
            }
            while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
            if (RI == RE)
                continue;

            CallInst *CI = dyn_cast<CallInst>(&*RI);
            if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
                TailCalls.push_back(CI);
        }
    }

    bool Changed = false;
    for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
        CallInst *CI = TailCalls[i];
        CallSite CS(CI);

        // Conservatively require the attributes of the call to match those of the
        // return. Ignore noalias because it doesn't affect the call sequence.
        Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
        if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
            continue;

        // Make sure the call instruction is followed by an unconditional branch to
        // the return block.
        BasicBlock *CallBB = CI->getParent();
        BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
        if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
            continue;

        // Duplicate the return into CallBB.
        (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
        ModifiedDT = Changed = true;
        ++NumRetsDup;
    }

    // If we eliminated all predecessors of the block, delete the block now.
    if (Changed && pred_begin(BB) == pred_end(BB))
        BB->eraseFromParent();

    return Changed;
}
Exemplo n.º 5
0
/// \brief Simplify one loop and queue further loops for simplification.
///
/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
/// Pass pointer. The Pass pointer is used by numerous utilities to update
/// specific analyses. Rather than a pass it would be much cleaner and more
/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
                            DominatorTree *DT, LoopInfo *LI,
                            ScalarEvolution *SE, Pass *PP,
                            AssumptionCache *AC) {
  bool Changed = false;
ReprocessLoop:

  // Check to see that no blocks (other than the header) in this loop have
  // predecessors that are not in the loop.  This is not valid for natural
  // loops, but can occur if the blocks are unreachable.  Since they are
  // unreachable we can just shamelessly delete those CFG edges!
  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
       BB != E; ++BB) {
    if (*BB == L->getHeader()) continue;

    SmallPtrSet<BasicBlock*, 4> BadPreds;
    for (pred_iterator PI = pred_begin(*BB),
         PE = pred_end(*BB); PI != PE; ++PI) {
      BasicBlock *P = *PI;
      if (!L->contains(P))
        BadPreds.insert(P);
    }

    // Delete each unique out-of-loop (and thus dead) predecessor.
    for (BasicBlock *P : BadPreds) {

      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
                   << P->getName() << "\n");

      // Inform each successor of each dead pred.
      for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI)
        (*SI)->removePredecessor(P);
      // Zap the dead pred's terminator and replace it with unreachable.
      TerminatorInst *TI = P->getTerminator();
       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
      P->getTerminator()->eraseFromParent();
      new UnreachableInst(P->getContext(), P);
      Changed = true;
    }
  }

  // If there are exiting blocks with branches on undef, resolve the undef in
  // the direction which will exit the loop. This will help simplify loop
  // trip count computations.
  SmallVector<BasicBlock*, 8> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       E = ExitingBlocks.end(); I != E; ++I)
    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
      if (BI->isConditional()) {
        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {

          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
                       << (*I)->getName() << "\n");

          BI->setCondition(ConstantInt::get(Cond->getType(),
                                            !L->contains(BI->getSuccessor(0))));

          // This may make the loop analyzable, force SCEV recomputation.
          if (SE)
            SE->forgetLoop(L);

          Changed = true;
        }
      }

  // Does the loop already have a preheader?  If so, don't insert one.
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    Preheader = InsertPreheaderForLoop(L, PP);
    if (Preheader) {
      ++NumInserted;
      Changed = true;
    }
  }

  // Next, check to make sure that all exit nodes of the loop only have
  // predecessors that are inside of the loop.  This check guarantees that the
  // loop preheader/header will dominate the exit blocks.  If the exit block has
  // predecessors from outside of the loop, split the edge now.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getExitBlocks(ExitBlocks);

  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
                                               ExitBlocks.end());
  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
         E = ExitBlockSet.end(); I != E; ++I) {
    BasicBlock *ExitBlock = *I;
    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
         PI != PE; ++PI)
      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
      // allowed.
      if (!L->contains(*PI)) {
        if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PP)) {
          ++NumInserted;
          Changed = true;
        }
        break;
      }
  }

  // If the header has more than two predecessors at this point (from the
  // preheader and from multiple backedges), we must adjust the loop.
  BasicBlock *LoopLatch = L->getLoopLatch();
  if (!LoopLatch) {
    // If this is really a nested loop, rip it out into a child loop.  Don't do
    // this for loops with a giant number of backedges, just factor them into a
    // common backedge instead.
    if (L->getNumBackEdges() < 8) {
      if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, PP, AC)) {
        ++NumNested;
        // Enqueue the outer loop as it should be processed next in our
        // depth-first nest walk.
        Worklist.push_back(OuterL);

        // This is a big restructuring change, reprocess the whole loop.
        Changed = true;
        // GCC doesn't tail recursion eliminate this.
        // FIXME: It isn't clear we can't rely on LLVM to TRE this.
        goto ReprocessLoop;
      }
    }

    // If we either couldn't, or didn't want to, identify nesting of the loops,
    // insert a new block that all backedges target, then make it jump to the
    // loop header.
    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
    if (LoopLatch) {
      ++NumInserted;
      Changed = true;
    }
  }

  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();

  // Scan over the PHI nodes in the loop header.  Since they now have only two
  // incoming values (the loop is canonicalized), we may have simplified the PHI
  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
  PHINode *PN;
  for (BasicBlock::iterator I = L->getHeader()->begin();
       (PN = dyn_cast<PHINode>(I++)); )
    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
      if (SE) SE->forgetValue(PN);
      PN->replaceAllUsesWith(V);
      PN->eraseFromParent();
    }

  // If this loop has multiple exits and the exits all go to the same
  // block, attempt to merge the exits. This helps several passes, such
  // as LoopRotation, which do not support loops with multiple exits.
  // SimplifyCFG also does this (and this code uses the same utility
  // function), however this code is loop-aware, where SimplifyCFG is
  // not. That gives it the advantage of being able to hoist
  // loop-invariant instructions out of the way to open up more
  // opportunities, and the disadvantage of having the responsibility
  // to preserve dominator information.
  bool UniqueExit = true;
  if (!ExitBlocks.empty())
    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
      if (ExitBlocks[i] != ExitBlocks[0]) {
        UniqueExit = false;
        break;
      }
  if (UniqueExit) {
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
      BasicBlock *ExitingBlock = ExitingBlocks[i];
      if (!ExitingBlock->getSinglePredecessor()) continue;
      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
      if (!BI || !BI->isConditional()) continue;
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      if (!CI || CI->getParent() != ExitingBlock) continue;

      // Attempt to hoist out all instructions except for the
      // comparison and the branch.
      bool AllInvariant = true;
      bool AnyInvariant = false;
      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
        Instruction *Inst = I++;
        // Skip debug info intrinsics.
        if (isa<DbgInfoIntrinsic>(Inst))
          continue;
        if (Inst == CI)
          continue;
        if (!L->makeLoopInvariant(Inst, AnyInvariant,
                                  Preheader ? Preheader->getTerminator()
                                            : nullptr)) {
          AllInvariant = false;
          break;
        }
      }
      if (AnyInvariant) {
        Changed = true;
        // The loop disposition of all SCEV expressions that depend on any
        // hoisted values have also changed.
        if (SE)
          SE->forgetLoopDispositions(L);
      }
      if (!AllInvariant) continue;

      // The block has now been cleared of all instructions except for
      // a comparison and a conditional branch. SimplifyCFG may be able
      // to fold it now.
      if (!FoldBranchToCommonDest(BI))
        continue;

      // Success. The block is now dead, so remove it from the loop,
      // update the dominator tree and delete it.
      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
                   << ExitingBlock->getName() << "\n");

      // Notify ScalarEvolution before deleting this block. Currently assume the
      // parent loop doesn't change (spliting edges doesn't count). If blocks,
      // CFG edges, or other values in the parent loop change, then we need call
      // to forgetLoop() for the parent instead.
      if (SE)
        SE->forgetLoop(L);

      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
      Changed = true;
      LI->removeBlock(ExitingBlock);

      DomTreeNode *Node = DT->getNode(ExitingBlock);
      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
        Node->getChildren();
      while (!Children.empty()) {
        DomTreeNode *Child = Children.front();
        DT->changeImmediateDominator(Child, Node->getIDom());
      }
      DT->eraseNode(ExitingBlock);

      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
      ExitingBlock->eraseFromParent();
    }
  }

  return Changed;
}
Exemplo n.º 6
0
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly.  The
/// effect of this is to copy significantly less code in cases where (for
/// example) a function call with constant arguments is inlined, and those
/// constant arguments cause a significant amount of code in the callee to be
/// dead.  Since this doesn't produce an exact copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                     ValueToValueMapTy &VMap,
                                     bool ModuleLevelChanges,
                                     SmallVectorImpl<ReturnInst*> &Returns,
                                     const char *NameSuffix, 
                                     ClonedCodeInfo *CodeInfo,
                                     const TargetData *TD,
                                     Instruction *TheCall) {
  assert(NameSuffix && "NameSuffix cannot be null!");
  
#ifndef NDEBUG
  for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
       E = OldFunc->arg_end(); II != E; ++II)
    assert(VMap.count(II) && "No mapping from source argument specified!");
#endif

  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
                            Returns, NameSuffix, CodeInfo, TD);

  // Clone the entry block, and anything recursively reachable from it.
  std::vector<const BasicBlock*> CloneWorklist;
  CloneWorklist.push_back(&OldFunc->getEntryBlock());
  while (!CloneWorklist.empty()) {
    const BasicBlock *BB = CloneWorklist.back();
    CloneWorklist.pop_back();
    PFC.CloneBlock(BB, CloneWorklist);
  }
  
  // Loop over all of the basic blocks in the old function.  If the block was
  // reachable, we have cloned it and the old block is now in the value map:
  // insert it into the new function in the right order.  If not, ignore it.
  //
  // Defer PHI resolution until rest of function is resolved.
  SmallVector<const PHINode*, 16> PHIToResolve;
  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
       BI != BE; ++BI) {
    Value *V = VMap[BI];
    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
    if (NewBB == 0) continue;  // Dead block.

    // Add the new block to the new function.
    NewFunc->getBasicBlockList().push_back(NewBB);
    
    // Loop over all of the instructions in the block, fixing up operand
    // references as we go.  This uses VMap to do all the hard work.
    //
    BasicBlock::iterator I = NewBB->begin();

    DebugLoc TheCallDL;
    if (TheCall) 
      TheCallDL = TheCall->getDebugLoc();
    
    // Handle PHI nodes specially, as we have to remove references to dead
    // blocks.
    if (PHINode *PN = dyn_cast<PHINode>(I)) {
      // Skip over all PHI nodes, remembering them for later.
      BasicBlock::const_iterator OldI = BI->begin();
      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
        PHIToResolve.push_back(cast<PHINode>(OldI));
    }
    
    // Otherwise, remap the rest of the instructions normally.
    for (; I != NewBB->end(); ++I)
      RemapInstruction(I, VMap,
                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
  }
  
  // Defer PHI resolution until rest of function is resolved, PHI resolution
  // requires the CFG to be up-to-date.
  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
    const PHINode *OPN = PHIToResolve[phino];
    unsigned NumPreds = OPN->getNumIncomingValues();
    const BasicBlock *OldBB = OPN->getParent();
    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);

    // Map operands for blocks that are live and remove operands for blocks
    // that are dead.
    for (; phino != PHIToResolve.size() &&
         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
      OPN = PHIToResolve[phino];
      PHINode *PN = cast<PHINode>(VMap[OPN]);
      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
        Value *V = VMap[PN->getIncomingBlock(pred)];
        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
          Value *InVal = MapValue(PN->getIncomingValue(pred),
                                  VMap, 
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
          assert(InVal && "Unknown input value?");
          PN->setIncomingValue(pred, InVal);
          PN->setIncomingBlock(pred, MappedBlock);
        } else {
          PN->removeIncomingValue(pred, false);
          --pred, --e;  // Revisit the next entry.
        }
      } 
    }
    
    // The loop above has removed PHI entries for those blocks that are dead
    // and has updated others.  However, if a block is live (i.e. copied over)
    // but its terminator has been changed to not go to this block, then our
    // phi nodes will have invalid entries.  Update the PHI nodes in this
    // case.
    PHINode *PN = cast<PHINode>(NewBB->begin());
    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
    if (NumPreds != PN->getNumIncomingValues()) {
      assert(NumPreds < PN->getNumIncomingValues());
      // Count how many times each predecessor comes to this block.
      std::map<BasicBlock*, unsigned> PredCount;
      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
           PI != E; ++PI)
        --PredCount[*PI];
      
      // Figure out how many entries to remove from each PHI.
      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
        ++PredCount[PN->getIncomingBlock(i)];
      
      // At this point, the excess predecessor entries are positive in the
      // map.  Loop over all of the PHIs and remove excess predecessor
      // entries.
      BasicBlock::iterator I = NewBB->begin();
      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
        for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
             E = PredCount.end(); PCI != E; ++PCI) {
          BasicBlock *Pred     = PCI->first;
          for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
            PN->removeIncomingValue(Pred, false);
        }
      }
    }
    
    // If the loops above have made these phi nodes have 0 or 1 operand,
    // replace them with undef or the input value.  We must do this for
    // correctness, because 0-operand phis are not valid.
    PN = cast<PHINode>(NewBB->begin());
    if (PN->getNumIncomingValues() == 0) {
      BasicBlock::iterator I = NewBB->begin();
      BasicBlock::const_iterator OldI = OldBB->begin();
      while ((PN = dyn_cast<PHINode>(I++))) {
        Value *NV = UndefValue::get(PN->getType());
        PN->replaceAllUsesWith(NV);
        assert(VMap[OldI] == PN && "VMap mismatch");
        VMap[OldI] = NV;
        PN->eraseFromParent();
        ++OldI;
      }
    }
    // NOTE: We cannot eliminate single entry phi nodes here, because of
    // VMap.  Single entry phi nodes can have multiple VMap entries
    // pointing at them.  Thus, deleting one would require scanning the VMap
    // to update any entries in it that would require that.  This would be
    // really slow.
  }
  
  // Now that the inlined function body has been fully constructed, go through
  // and zap unconditional fall-through branches.  This happen all the time when
  // specializing code: code specialization turns conditional branches into
  // uncond branches, and this code folds them.
  Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
  while (I != NewFunc->end()) {
    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
    if (!BI || BI->isConditional()) { ++I; continue; }
    
    // Note that we can't eliminate uncond branches if the destination has
    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
    // require scanning the VMap to update any entries that point to the phi
    // node.
    BasicBlock *Dest = BI->getSuccessor(0);
    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
      ++I; continue;
    }
    
    // We know all single-entry PHI nodes in the inlined function have been
    // removed, so we just need to splice the blocks.
    BI->eraseFromParent();
    
    // Make all PHI nodes that referred to Dest now refer to I as their source.
    Dest->replaceAllUsesWith(I);

    // Move all the instructions in the succ to the pred.
    I->getInstList().splice(I->end(), Dest->getInstList());
    
    // Remove the dest block.
    Dest->eraseFromParent();
    
    // Do not increment I, iteratively merge all things this block branches to.
  }
}
Exemplo n.º 7
0
/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
/// This routine is used to remove split condition's dead branch, dominated by
/// DeadBB. LiveBB dominates split conidition's other branch.
void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, 
                                  BasicBlock *LiveBB) {

  // First update DeadBB's dominance frontier. 
  SmallVector<BasicBlock *, 8> FrontierBBs;
  DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
  if (DeadBBDF != DF->end()) {
    SmallVector<BasicBlock *, 8> PredBlocks;
    
    DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
    for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
           DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI) 
      {
      BasicBlock *FrontierBB = *DeadBBSetI;
      FrontierBBs.push_back(FrontierBB);

      // Rremove any PHI incoming edge from blocks dominated by DeadBB.
      PredBlocks.clear();
      for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
          PI != PE; ++PI) {
        BasicBlock *P = *PI;
        if (P == DeadBB || DT->dominates(DeadBB, P))
          PredBlocks.push_back(P);
      }

      for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
          FBI != FBE; ++FBI) {
        if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
          for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
                PE = PredBlocks.end(); PI != PE; ++PI) {
            BasicBlock *P = *PI;
            PN->removeIncomingValue(P);
          }
        }
        else
          break;
      }      
    }
  }
  
  // Now remove DeadBB and all nodes dominated by DeadBB in df order.
  SmallVector<BasicBlock *, 32> WorkList;
  DomTreeNode *DN = DT->getNode(DeadBB);
  for (df_iterator<DomTreeNode*> DI = df_begin(DN),
         E = df_end(DN); DI != E; ++DI) {
    BasicBlock *BB = DI->getBlock();
    WorkList.push_back(BB);
    BB->replaceAllUsesWith(UndefValue::get(
                                       Type::getLabelTy(DeadBB->getContext())));
  }

  while (!WorkList.empty()) {
    BasicBlock *BB = WorkList.back(); WorkList.pop_back();
    LPM->deleteSimpleAnalysisValue(BB, LP);
    for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); 
        BBI != BBE; ) {
      Instruction *I = BBI;
      ++BBI;
      I->replaceAllUsesWith(UndefValue::get(I->getType()));
      LPM->deleteSimpleAnalysisValue(I, LP);
      I->eraseFromParent();
    }
    DT->eraseNode(BB);
    DF->removeBlock(BB);
    LI->removeBlock(BB);
    BB->eraseFromParent();
  }

  // Update Frontier BBs' dominator info.
  while (!FrontierBBs.empty()) {
    BasicBlock *FBB = FrontierBBs.back(); FrontierBBs.pop_back();
    BasicBlock *NewDominator = FBB->getSinglePredecessor();
    if (!NewDominator) {
      pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
      NewDominator = *PI;
      ++PI;
      if (NewDominator != LiveBB) {
        for(; PI != PE; ++PI) {
          BasicBlock *P = *PI;
          if (P == LiveBB) {
            NewDominator = LiveBB;
            break;
          }
          NewDominator = DT->findNearestCommonDominator(NewDominator, P);
        }
      }
    }
    assert (NewDominator && "Unable to fix dominator info.");
    DT->changeImmediateDominator(FBB, NewDominator);
    DF->changeImmediateDominator(FBB, NewDominator, DT);
  }

}
Exemplo n.º 8
0
/// SimplifyCode - Okay, now that we have simplified some instructions in the
/// loop, walk over it and constant prop, dce, and fold control flow where
/// possible.  Note that this is effectively a very simple loop-structure-aware
/// optimizer.  During processing of this loop, L could very well be deleted, so
/// it must not be used.
///
/// FIXME: When the loop optimizer is more mature, separate this out to a new
/// pass.
///
void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
  while (!Worklist.empty()) {
    Instruction *I = Worklist.back();
    Worklist.pop_back();

    // Simple DCE.
    if (isInstructionTriviallyDead(I)) {
      DEBUG(dbgs() << "Remove dead instruction '" << *I);
      
      // Add uses to the worklist, which may be dead now.
      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
        if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
          Worklist.push_back(Use);
      LPM->deleteSimpleAnalysisValue(I, L);
      RemoveFromWorklist(I, Worklist);
      I->eraseFromParent();
      ++NumSimplify;
      continue;
    }

    // See if instruction simplification can hack this up.  This is common for
    // things like "select false, X, Y" after unswitching made the condition be
    // 'false'.
    if (Value *V = SimplifyInstruction(I, 0, DT))
      if (LI->replacementPreservesLCSSAForm(I, V)) {
        ReplaceUsesOfWith(I, V, Worklist, L, LPM);
        continue;
      }

    // Special case hacks that appear commonly in unswitched code.
    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
      if (BI->isUnconditional()) {
        // If BI's parent is the only pred of the successor, fold the two blocks
        // together.
        BasicBlock *Pred = BI->getParent();
        BasicBlock *Succ = BI->getSuccessor(0);
        BasicBlock *SinglePred = Succ->getSinglePredecessor();
        if (!SinglePred) continue;  // Nothing to do.
        assert(SinglePred == Pred && "CFG broken");

        DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " 
              << Succ->getName() << "\n");
        
        // Resolve any single entry PHI nodes in Succ.
        while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
        
        // Move all of the successor contents from Succ to Pred.
        Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
                                   Succ->end());
        LPM->deleteSimpleAnalysisValue(BI, L);
        BI->eraseFromParent();
        RemoveFromWorklist(BI, Worklist);
        
        // If Succ has any successors with PHI nodes, update them to have
        // entries coming from Pred instead of Succ.
        Succ->replaceAllUsesWith(Pred);
        
        // Remove Succ from the loop tree.
        LI->removeBlock(Succ);
        LPM->deleteSimpleAnalysisValue(Succ, L);
        Succ->eraseFromParent();
        ++NumSimplify;
        continue;
      }
      
      if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
        // Conditional branch.  Turn it into an unconditional branch, then
        // remove dead blocks.
        continue;  // FIXME: Enable.

        DEBUG(dbgs() << "Folded branch: " << *BI);
        BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
        BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
        DeadSucc->removePredecessor(BI->getParent(), true);
        Worklist.push_back(BranchInst::Create(LiveSucc, BI));
        LPM->deleteSimpleAnalysisValue(BI, L);
        BI->eraseFromParent();
        RemoveFromWorklist(BI, Worklist);
        ++NumSimplify;

        RemoveBlockIfDead(DeadSucc, Worklist, L);
      }
      continue;
    }
  }
}
Exemplo n.º 9
0
// InlineFunction - This function inlines the called function into the basic
// block of the caller.  This returns false if it is not possible to inline this
// call.  The program is still in a well defined state if this occurs though.
//
// Note that this only does one level of inlining.  For example, if the
// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
// exists in the instruction stream.  Similiarly this will inline a recursive
// function by one level.
//
bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;


  // If the call to the callee is not a tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

  // If the call to the callee cannot throw, set the 'nounwind' flag on any
  // calls that we inline.
  bool MarkNoUnwind = CS.doesNotThrow();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // GC poses two hazards to inlining, which only occur when the callee has GC:
  //  1. If the caller has no GC, then the callee's GC must be propagated to the
  //     caller.
  //  2. If the caller has a differing GC, it is invalid to inline.
  if (CalledFunc->hasGC()) {
    if (!Caller->hasGC())
      Caller->setGC(CalledFunc->getGC());
    else if (CalledFunc->getGC() != Caller->getGC())
      return false;
  }

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  //
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  std::vector<ReturnInst*> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;

  { // Scope to destroy ValueMap after cloning.
    DenseMap<const Value*, Value*> ValueMap;

    assert(CalledFunc->arg_size() == CS.arg_size() &&
           "No varargs calls can be inlined!");

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    CallSite::arg_iterator AI = CS.arg_begin();
    unsigned ArgNo = 0;
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
      Value *ActualArg = *AI;

      // When byval arguments actually inlined, we need to make the copy implied
      // by them explicit.  However, we don't do this if the callee is readonly
      // or readnone, because the copy would be unneeded: the callee doesn't
      // modify the struct.
      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
          !CalledFunc->onlyReadsMemory()) {
        const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
        const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);

        // Create the alloca.  If we have TargetData, use nice alignment.
        unsigned Align = 1;
        if (TD) Align = TD->getPrefTypeAlignment(AggTy);
        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(),
                                          Caller->begin()->begin());
        // Emit a memcpy.
        const Type *Tys[] = { Type::Int64Ty };
        Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                       Intrinsic::memcpy, 
                                                       Tys, 1);
        Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
        Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);

        Value *Size;
        if (TD == 0)
          Size = ConstantExpr::getSizeOf(AggTy);
        else
          Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy));

        // Always generate a memcpy of alignment 1 here because we don't know
        // the alignment of the src pointer.  Other optimizations can infer
        // better alignment.
        Value *CallArgs[] = {
          DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1)
        };
        CallInst *TheMemCpy =
          CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);

        // If we have a call graph, update it.
        if (CG) {
          CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
          CallGraphNode *CallerNode = (*CG)[Caller];
          CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
        }

        // Uses of the argument in the function should use our new alloca
        // instead.
        ActualArg = NewAlloca;
      }

      ValueMap[I] = ActualArg;
    }

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
                              &InlinedFunctionInfo, TD);

    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;

    // Update the callgraph if requested.
    if (CG)
      UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG);
  }

  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  //
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
           E = FirstNewBlock->end(); I != E; )
      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
        // If the alloca is now dead, remove it.  This often occurs due to code
        // specialization.
        if (AI->use_empty()) {
          AI->eraseFromParent();
          continue;
        }

        if (isa<Constant>(AI->getArraySize())) {
          // Scan for the block of allocas that we can move over, and move them
          // all at once.
          while (isa<AllocaInst>(I) &&
                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
            ++I;

          // Transfer all of the allocas over in a block.  Using splice means
          // that the instructions aren't removed from the symbol table, then
          // reinserted.
          Caller->getEntryBlock().getInstList().splice(
              InsertPoint,
              FirstNewBlock->getInstList(),
              AI, I);
        }
      }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    // Get the two intrinsics we care about.
    Constant *StackSave, *StackRestore;
    StackSave    = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore);

    // If we are preserving the callgraph, add edges to the stacksave/restore
    // functions for the calls we insert.
    CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
    if (CG) {
      // We know that StackSave/StackRestore are Function*'s, because they are
      // intrinsics which must have the right types.
      StackSaveCGN    = CG->getOrInsertFunction(cast<Function>(StackSave));
      StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore));
      CallerNode = (*CG)[Caller];
    }

    // Insert the llvm.stacksave.
    CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
                                          FirstNewBlock->begin());
    if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);

    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
      if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
    }

    // Count the number of StackRestore calls we insert.
    unsigned NumStackRestores = Returns.size();

    // If we are inlining an invoke instruction, insert restores before each
    // unwind.  These unwinds will be rewritten into branches later.
    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
           BB != E; ++BB)
        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
          CallInst::Create(StackRestore, SavedPtr, "", UI);
          ++NumStackRestores;
        }
    }
  }

  // If we are inlining tail call instruction through a call site that isn't
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.  Also, calls inlined through a 'nounwind' call site should be marked
  // 'nounwind'.
  if (InlinedFunctionInfo.ContainsCalls &&
      (MustClearTailCallFlags || MarkNoUnwind)) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          if (MustClearTailCallFlags)
            CI->setTailCall(false);
          if (MarkNoUnwind)
            CI->setDoesNotThrow();
        }
  }

  // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
  // instructions are unreachable.
  if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB) {
      TerminatorInst *Term = BB->getTerminator();
      if (isa<UnwindInst>(Term)) {
        new UnreachableInst(Term);
        BB->getInstList().erase(Term);
      }
    }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any inlined 'unwind' instructions into branches to the invoke exception
  // destination, and call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
      BranchInst::Create(II->getNormalDest(), TheCall);

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty()) {
      ReturnInst *R = Returns[0];
      TheCall->replaceAllUsesWith(R->getReturnValue());
    }
    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->eraseFromParent();

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->eraseFromParent();

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  const Type *RTy = CalledFunc->getReturnType();

  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    PHINode *PHI = 0;
    if (!TheCall->use_empty()) {
      PHI = PHINode::Create(RTy, TheCall->getName(),
                            AfterCallBB->begin());
      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions adding entries to the PHI node
    // as appropriate.
    if (PHI) {
      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
        ReturnInst *RI = Returns[i];
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }
    }

    // Add a branch to the merge points and remove return instructions.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];
      BranchInst::Create(AfterCallBB, RI);
      RI->eraseFromParent();
    }
  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty())
      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);

  return true;
}
Exemplo n.º 10
0
/// For each (predecessor, conditions from predecessors) pair, it will split the
/// basic block containing the call site, hook it up to the predecessor and
/// replace the call instruction with new call instructions, which contain
/// constraints based on the conditions from their predecessors.
/// For example, in the IR below with an OR condition, the call-site can
/// be split. In this case, Preds for Tail is [(Header, a == null),
/// (TBB, a != null, b == null)]. Tail is replaced by 2 split blocks, containing
/// CallInst1, which has constraints based on the conditions from Head and
/// CallInst2, which has constraints based on the conditions coming from TBB.
///
/// From :
///
///   Header:
///     %c = icmp eq i32* %a, null
///     br i1 %c %Tail, %TBB
///   TBB:
///     %c2 = icmp eq i32* %b, null
///     br i1 %c %Tail, %End
///   Tail:
///     %ca = call i1  @callee (i32* %a, i32* %b)
///
///  to :
///
///   Header:                          // PredBB1 is Header
///     %c = icmp eq i32* %a, null
///     br i1 %c %Tail-split1, %TBB
///   TBB:                             // PredBB2 is TBB
///     %c2 = icmp eq i32* %b, null
///     br i1 %c %Tail-split2, %End
///   Tail-split1:
///     %ca1 = call @callee (i32* null, i32* %b)         // CallInst1
///    br %Tail
///   Tail-split2:
///     %ca2 = call @callee (i32* nonnull %a, i32* null) // CallInst2
///    br %Tail
///   Tail:
///    %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2]
///
/// Note that in case any arguments at the call-site are constrained by its
/// predecessors, new call-sites with more constrained arguments will be
/// created in createCallSitesOnPredicatedArgument().
static void splitCallSite(
    CallSite CS,
    const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds) {
  Instruction *Instr = CS.getInstruction();
  BasicBlock *TailBB = Instr->getParent();
  bool IsMustTailCall = CS.isMustTailCall();

  PHINode *CallPN = nullptr;

  // `musttail` calls must be followed by optional `bitcast`, and `ret`. The
  // split blocks will be terminated right after that so there're no users for
  // this phi in a `TailBB`.
  if (!IsMustTailCall && Instr->getNumUses())
    CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call");

  DEBUG(dbgs() << "split call-site : " << *Instr << " into \n");

  assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2.");
  // ValueToValueMapTy is neither copy nor moveable, so we use a simple array
  // here.
  ValueToValueMapTy ValueToValueMaps[2];
  for (unsigned i = 0; i < Preds.size(); i++) {
    BasicBlock *PredBB = Preds[i].first;
    BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
        TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i]);
    assert(SplitBlock && "Unexpected new basic block split.");

    Instruction *NewCI =
        &*std::prev(SplitBlock->getTerminator()->getIterator());
    CallSite NewCS(NewCI);
    addConditions(NewCS, Preds[i].second);

    // Handle PHIs used as arguments in the call-site.
    for (PHINode &PN : TailBB->phis()) {
      unsigned ArgNo = 0;
      for (auto &CI : CS.args()) {
        if (&*CI == &PN) {
          NewCS.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock));
        }
        ++ArgNo;
      }
    }
    DEBUG(dbgs() << "    " << *NewCI << " in " << SplitBlock->getName()
                 << "\n");
    if (CallPN)
      CallPN->addIncoming(NewCI, SplitBlock);

    // Clone and place bitcast and return instructions before `TI`
    if (IsMustTailCall)
      copyMustTailReturn(SplitBlock, Instr, NewCI);
  }

  NumCallSiteSplit++;

  // FIXME: remove TI in `copyMustTailReturn`
  if (IsMustTailCall) {
    // Remove superfluous `br` terminators from the end of the Split blocks
    // NOTE: Removing terminator removes the SplitBlock from the TailBB's
    // predecessors. Therefore we must get complete list of Splits before
    // attempting removal.
    SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB)));
    assert(Splits.size() == 2 && "Expected exactly 2 splits!");
    for (unsigned i = 0; i < Splits.size(); i++)
      Splits[i]->getTerminator()->eraseFromParent();

    // Erase the tail block once done with musttail patching
    TailBB->eraseFromParent();
    return;
  }

  auto *OriginalBegin = &*TailBB->begin();
  // Replace users of the original call with a PHI mering call-sites split.
  if (CallPN) {
    CallPN->insertBefore(OriginalBegin);
    Instr->replaceAllUsesWith(CallPN);
  }

  // Remove instructions moved to split blocks from TailBB, from the duplicated
  // call instruction to the beginning of the basic block. If an instruction
  // has any uses, add a new PHI node to combine the values coming from the
  // split blocks. The new PHI nodes are placed before the first original
  // instruction, so we do not end up deleting them. By using reverse-order, we
  // do not introduce unnecessary PHI nodes for def-use chains from the call
  // instruction to the beginning of the block.
  auto I = Instr->getReverseIterator();
  while (I != TailBB->rend()) {
    Instruction *CurrentI = &*I++;
    if (!CurrentI->use_empty()) {
      // If an existing PHI has users after the call, there is no need to create
      // a new one.
      if (isa<PHINode>(CurrentI))
        continue;
      PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size());
      for (auto &Mapping : ValueToValueMaps)
        NewPN->addIncoming(Mapping[CurrentI],
                           cast<Instruction>(Mapping[CurrentI])->getParent());
      NewPN->insertBefore(&*TailBB->begin());
      CurrentI->replaceAllUsesWith(NewPN);
    }
    CurrentI->eraseFromParent();
    // We are done once we handled the first original instruction in TailBB.
    if (CurrentI == OriginalBegin)
      break;
  }
}
Exemplo n.º 11
0
/// This works like CloneAndPruneFunctionInto, except that it does not clone the
/// entire function. Instead it starts at an instruction provided by the caller
/// and copies (and prunes) only the code reachable from that instruction.
void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                     const Instruction *StartingInst,
                                     ValueToValueMapTy &VMap,
                                     bool ModuleLevelChanges,
                                     SmallVectorImpl<ReturnInst *> &Returns,
                                     const char *NameSuffix,
                                     ClonedCodeInfo *CodeInfo) {
  assert(NameSuffix && "NameSuffix cannot be null!");

  ValueMapTypeRemapper *TypeMapper = nullptr;
  ValueMaterializer *Materializer = nullptr;

#ifndef NDEBUG
  // If the cloning starts at the beginning of the function, verify that
  // the function arguments are mapped.
  if (!StartingInst)
    for (const Argument &II : OldFunc->args())
      assert(VMap.count(&II) && "No mapping from source argument specified!");
#endif

  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
                            NameSuffix, CodeInfo);
  const BasicBlock *StartingBB;
  if (StartingInst)
    StartingBB = StartingInst->getParent();
  else {
    StartingBB = &OldFunc->getEntryBlock();
    StartingInst = &StartingBB->front();
  }

  // Clone the entry block, and anything recursively reachable from it.
  std::vector<const BasicBlock*> CloneWorklist;
  PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
  while (!CloneWorklist.empty()) {
    const BasicBlock *BB = CloneWorklist.back();
    CloneWorklist.pop_back();
    PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
  }
  
  // Loop over all of the basic blocks in the old function.  If the block was
  // reachable, we have cloned it and the old block is now in the value map:
  // insert it into the new function in the right order.  If not, ignore it.
  //
  // Defer PHI resolution until rest of function is resolved.
  SmallVector<const PHINode*, 16> PHIToResolve;
  for (const BasicBlock &BI : *OldFunc) {
    Value *V = VMap[&BI];
    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
    if (!NewBB) continue;  // Dead block.

    // Add the new block to the new function.
    NewFunc->getBasicBlockList().push_back(NewBB);

    // Handle PHI nodes specially, as we have to remove references to dead
    // blocks.
    for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
      // PHI nodes may have been remapped to non-PHI nodes by the caller or
      // during the cloning process.
      if (const PHINode *PN = dyn_cast<PHINode>(I)) {
        if (isa<PHINode>(VMap[PN]))
          PHIToResolve.push_back(PN);
        else
          break;
      } else {
        break;
      }
    }

    // Finally, remap the terminator instructions, as those can't be remapped
    // until all BBs are mapped.
    RemapInstruction(NewBB->getTerminator(), VMap,
                     ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
                     TypeMapper, Materializer);
  }
  
  // Defer PHI resolution until rest of function is resolved, PHI resolution
  // requires the CFG to be up-to-date.
  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
    const PHINode *OPN = PHIToResolve[phino];
    unsigned NumPreds = OPN->getNumIncomingValues();
    const BasicBlock *OldBB = OPN->getParent();
    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);

    // Map operands for blocks that are live and remove operands for blocks
    // that are dead.
    for (; phino != PHIToResolve.size() &&
         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
      OPN = PHIToResolve[phino];
      PHINode *PN = cast<PHINode>(VMap[OPN]);
      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
        Value *V = VMap[PN->getIncomingBlock(pred)];
        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
          Value *InVal = MapValue(PN->getIncomingValue(pred),
                                  VMap, 
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
          assert(InVal && "Unknown input value?");
          PN->setIncomingValue(pred, InVal);
          PN->setIncomingBlock(pred, MappedBlock);
        } else {
          PN->removeIncomingValue(pred, false);
          --pred, --e;  // Revisit the next entry.
        }
      } 
    }
    
    // The loop above has removed PHI entries for those blocks that are dead
    // and has updated others.  However, if a block is live (i.e. copied over)
    // but its terminator has been changed to not go to this block, then our
    // phi nodes will have invalid entries.  Update the PHI nodes in this
    // case.
    PHINode *PN = cast<PHINode>(NewBB->begin());
    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
    if (NumPreds != PN->getNumIncomingValues()) {
      assert(NumPreds < PN->getNumIncomingValues());
      // Count how many times each predecessor comes to this block.
      std::map<BasicBlock*, unsigned> PredCount;
      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
           PI != E; ++PI)
        --PredCount[*PI];
      
      // Figure out how many entries to remove from each PHI.
      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
        ++PredCount[PN->getIncomingBlock(i)];
      
      // At this point, the excess predecessor entries are positive in the
      // map.  Loop over all of the PHIs and remove excess predecessor
      // entries.
      BasicBlock::iterator I = NewBB->begin();
      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
        for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
             E = PredCount.end(); PCI != E; ++PCI) {
          BasicBlock *Pred     = PCI->first;
          for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
            PN->removeIncomingValue(Pred, false);
        }
      }
    }
    
    // If the loops above have made these phi nodes have 0 or 1 operand,
    // replace them with undef or the input value.  We must do this for
    // correctness, because 0-operand phis are not valid.
    PN = cast<PHINode>(NewBB->begin());
    if (PN->getNumIncomingValues() == 0) {
      BasicBlock::iterator I = NewBB->begin();
      BasicBlock::const_iterator OldI = OldBB->begin();
      while ((PN = dyn_cast<PHINode>(I++))) {
        Value *NV = UndefValue::get(PN->getType());
        PN->replaceAllUsesWith(NV);
        assert(VMap[&*OldI] == PN && "VMap mismatch");
        VMap[&*OldI] = NV;
        PN->eraseFromParent();
        ++OldI;
      }
    }
  }

  // Make a second pass over the PHINodes now that all of them have been
  // remapped into the new function, simplifying the PHINode and performing any
  // recursive simplifications exposed. This will transparently update the
  // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
  // two PHINodes, the iteration over the old PHIs remains valid, and the
  // mapping will just map us to the new node (which may not even be a PHI
  // node).
  for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
    if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
      recursivelySimplifyInstruction(PN);

  // Now that the inlined function body has been fully constructed, go through
  // and zap unconditional fall-through branches. This happens all the time when
  // specializing code: code specialization turns conditional branches into
  // uncond branches, and this code folds them.
  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
  Function::iterator I = Begin;
  while (I != NewFunc->end()) {
    // Check if this block has become dead during inlining or other
    // simplifications. Note that the first block will appear dead, as it has
    // not yet been wired up properly.
    if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
                       I->getSinglePredecessor() == &*I)) {
      BasicBlock *DeadBB = &*I++;
      DeleteDeadBlock(DeadBB);
      continue;
    }

    // We need to simplify conditional branches and switches with a constant
    // operand. We try to prune these out when cloning, but if the
    // simplification required looking through PHI nodes, those are only
    // available after forming the full basic block. That may leave some here,
    // and we still want to prune the dead code as early as possible.
    ConstantFoldTerminator(&*I);

    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
    if (!BI || BI->isConditional()) { ++I; continue; }
    
    BasicBlock *Dest = BI->getSuccessor(0);
    if (!Dest->getSinglePredecessor()) {
      ++I; continue;
    }

    // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
    // above should have zapped all of them..
    assert(!isa<PHINode>(Dest->begin()));

    // We know all single-entry PHI nodes in the inlined function have been
    // removed, so we just need to splice the blocks.
    BI->eraseFromParent();
    
    // Make all PHI nodes that referred to Dest now refer to I as their source.
    Dest->replaceAllUsesWith(&*I);

    // Move all the instructions in the succ to the pred.
    I->getInstList().splice(I->end(), Dest->getInstList());
    
    // Remove the dest block.
    Dest->eraseFromParent();
    
    // Do not increment I, iteratively merge all things this block branches to.
  }

  // Make a final pass over the basic blocks from the old function to gather
  // any return instructions which survived folding. We have to do this here
  // because we can iteratively remove and merge returns above.
  for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
                          E = NewFunc->end();
       I != E; ++I)
    if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
      Returns.push_back(RI);
}
Exemplo n.º 12
0
/// Check whether \param BB is the merge block of a if-region.  If yes, check
/// whether there exists an adjacent if-region upstream, the two if-regions
/// contain identical instuctions and can be legally merged.  \returns true if
/// the two if-regions are merged.
///
/// From:
/// if (a)
///   statement;
/// if (b)
///   statement;
///
/// To:
/// if (a || b)
///   statement;
///
bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
                                  Pass *P) {
  BasicBlock *IfTrue2, *IfFalse2;
  Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
  Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
  if (!CInst2)
    return false;

  BasicBlock *SecondEntryBlock = CInst2->getParent();
  if (SecondEntryBlock->hasAddressTaken())
    return false;

  BasicBlock *IfTrue1, *IfFalse1;
  Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
  Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
  if (!CInst1)
    return false;

  BasicBlock *FirstEntryBlock = CInst1->getParent();

  // Either then-path or else-path should be empty.
  if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
    return false;
  if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
    return false;

  TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
  Instruction *PBI2 = SecondEntryBlock->begin();

  if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
                            IfTrue2))
    return false;

  if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
                            IfFalse2))
    return false;

  // Check whether \param SecondEntryBlock has side-effect and is safe to
  // speculate.
  for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
    Instruction *CI = BI;
    if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
        !isSafeToSpeculativelyExecute(CI))
      return false;
  }

  // Merge \param SecondEntryBlock into \param FirstEntryBlock.
  FirstEntryBlock->getInstList().pop_back();
  FirstEntryBlock->getInstList()
      .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
  BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
  Value *CC = PBI->getCondition();
  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
  Builder.SetInsertPoint(PBI);
  Value *NC = Builder.CreateOr(CInst1, CC);
  PBI->replaceUsesOfWith(CC, NC);
  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);

  // Remove IfTrue1
  if (IfTrue1 != FirstEntryBlock) {
    IfTrue1->dropAllReferences();
    IfTrue1->eraseFromParent();
  }

  // Remove IfFalse1
  if (IfFalse1 != FirstEntryBlock) {
    IfFalse1->dropAllReferences();
    IfFalse1->eraseFromParent();
  }

  // Remove \param SecondEntryBlock
  SecondEntryBlock->dropAllReferences();
  SecondEntryBlock->eraseFromParent();
  DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
  return true;
}
Exemplo n.º 13
0
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller.  This returns false if it is not possible to inline
/// this call.  The program is still in a well defined state if this occurs
/// though.
///
/// Note that this only does one level of inlining.  For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream.  Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                          bool InsertLifetime) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  // If IFI has any state in it, zap it before we fill it in.
  IFI.reset();
  
  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;

  // If the call to the callee is not a tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

  // If the call to the callee cannot throw, set the 'nounwind' flag on any
  // calls that we inline.
  bool MarkNoUnwind = CS.doesNotThrow();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // GC poses two hazards to inlining, which only occur when the callee has GC:
  //  1. If the caller has no GC, then the callee's GC must be propagated to the
  //     caller.
  //  2. If the caller has a differing GC, it is invalid to inline.
  if (CalledFunc->hasGC()) {
    if (!Caller->hasGC())
      Caller->setGC(CalledFunc->getGC());
    else if (CalledFunc->getGC() != Caller->getGC())
      return false;
  }

  // Get the personality function from the callee if it contains a landing pad.
  Value *CalleePersonality = 0;
  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
       I != E; ++I)
    if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
      const BasicBlock *BB = II->getUnwindDest();
      const LandingPadInst *LP = BB->getLandingPadInst();
      CalleePersonality = LP->getPersonalityFn();
      break;
    }

  // Find the personality function used by the landing pads of the caller. If it
  // exists, then check to see that it matches the personality function used in
  // the callee.
  if (CalleePersonality) {
    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
         I != E; ++I)
      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
        const BasicBlock *BB = II->getUnwindDest();
        const LandingPadInst *LP = BB->getLandingPadInst();

        // If the personality functions match, then we can perform the
        // inlining. Otherwise, we can't inline.
        // TODO: This isn't 100% true. Some personality functions are proper
        //       supersets of others and can be used in place of the other.
        if (LP->getPersonalityFn() != CalleePersonality)
          return false;

        break;
      }
  }

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  SmallVector<ReturnInst*, 8> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;

  { // Scope to destroy VMap after cloning.
    ValueToValueMapTy VMap;

    assert(CalledFunc->arg_size() == CS.arg_size() &&
           "No varargs calls can be inlined!");

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    CallSite::arg_iterator AI = CS.arg_begin();
    unsigned ArgNo = 0;
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
      Value *ActualArg = *AI;
      const Argument *Arg = I;

      // When byval arguments actually inlined, we need to make the copy implied
      // by them explicit.  However, we don't do this if the callee is readonly
      // or readnone, because the copy would be unneeded: the callee doesn't
      // modify the struct.
      if (CS.isByValArgument(ArgNo)) {
        ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI,
                                        CalledFunc->getParamAlignment(ArgNo+1));
 
        // Calls that we inline may use the new alloca, so we need to clear
        // their 'tail' flags if HandleByValArgument introduced a new alloca and
        // the callee has calls.
        MustClearTailCallFlags |= ActualArg != *AI;
      }

      VMap[I] = ActualArg;
    }

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
                              /*ModuleLevelChanges=*/false, Returns, ".i",
                              &InlinedFunctionInfo, IFI.TD, TheCall);

    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;

    // Update the callgraph if requested.
    if (IFI.CG)
      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);

    // Update inlined instructions' line number information.
    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
  }

  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
         E = FirstNewBlock->end(); I != E; ) {
      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
      if (AI == 0) continue;
      
      // If the alloca is now dead, remove it.  This often occurs due to code
      // specialization.
      if (AI->use_empty()) {
        AI->eraseFromParent();
        continue;
      }

      if (!isa<Constant>(AI->getArraySize()))
        continue;
      
      // Keep track of the static allocas that we inline into the caller.
      IFI.StaticAllocas.push_back(AI);
      
      // Scan for the block of allocas that we can move over, and move them
      // all at once.
      while (isa<AllocaInst>(I) &&
             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
        ++I;
      }

      // Transfer all of the allocas over in a block.  Using splice means
      // that the instructions aren't removed from the symbol table, then
      // reinserted.
      Caller->getEntryBlock().getInstList().splice(InsertPoint,
                                                   FirstNewBlock->getInstList(),
                                                   AI, I);
    }
  }

  // Leave lifetime markers for the static alloca's, scoping them to the
  // function we just inlined.
  if (InsertLifetime && !IFI.StaticAllocas.empty()) {
    IRBuilder<> builder(FirstNewBlock->begin());
    for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
      AllocaInst *AI = IFI.StaticAllocas[ai];

      // If the alloca is already scoped to something smaller than the whole
      // function then there's no need to add redundant, less accurate markers.
      if (hasLifetimeMarkers(AI))
        continue;

      // Try to determine the size of the allocation.
      ConstantInt *AllocaSize = 0;
      if (ConstantInt *AIArraySize =
          dyn_cast<ConstantInt>(AI->getArraySize())) {
        if (IFI.TD) {
          Type *AllocaType = AI->getAllocatedType();
          uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
          uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
          assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
          // Check that array size doesn't saturate uint64_t and doesn't
          // overflow when it's multiplied by type size.
          if (AllocaArraySize != ~0ULL &&
              UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
            AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
                                          AllocaArraySize * AllocaTypeSize);
          }
        }
      }

      builder.CreateLifetimeStart(AI, AllocaSize);
      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
        IRBuilder<> builder(Returns[ri]);
        builder.CreateLifetimeEnd(AI, AllocaSize);
      }
    }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    // Get the two intrinsics we care about.
    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);

    // Insert the llvm.stacksave.
    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
      .CreateCall(StackSave, "savedstack");

    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
    }
  }

  // If we are inlining tail call instruction through a call site that isn't
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.  Also, calls inlined through a 'nounwind' call site should be marked
  // 'nounwind'.
  if (InlinedFunctionInfo.ContainsCalls &&
      (MustClearTailCallFlags || MarkNoUnwind)) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          if (MustClearTailCallFlags)
            CI->setTailCall(false);
          if (MarkNoUnwind)
            CI->setDoesNotThrow();
        }
  }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
      NewBr->setDebugLoc(Returns[0]->getDebugLoc());
    }

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty()) {
      ReturnInst *R = Returns[0];
      if (TheCall == R->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(R->getReturnValue());
    }
    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->eraseFromParent();

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->eraseFromParent();

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  BranchInst *CreatedBranchToNormalDest = NULL;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  Type *RTy = CalledFunc->getReturnType();

  PHINode *PHI = 0;
  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    if (!TheCall->use_empty()) {
      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
                            AfterCallBB->begin());
      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions adding entries to the PHI node
    // as appropriate.
    if (PHI) {
      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
        ReturnInst *RI = Returns[i];
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }
    }


    // Add a branch to the merge points and remove return instructions.
    DebugLoc Loc;
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];
      BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
      Loc = RI->getDebugLoc();
      BI->setDebugLoc(Loc);
      RI->eraseFromParent();
    }
    // We need to set the debug location to *somewhere* inside the
    // inlined function. The line number may be nonsensical, but the
    // instruction will at least be associated with the right
    // function.
    if (CreatedBranchToNormalDest)
      CreatedBranchToNormalDest->setDebugLoc(Loc);
  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty()) {
      if (TheCall == Returns[0]->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
    }

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    if (CreatedBranchToNormalDest)
      CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);

  // If we inserted a phi node, check to see if it has a single value (e.g. all
  // the entries are the same or undef).  If so, remove the PHI so it doesn't
  // block other optimizations.
  if (PHI) {
    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
      PHI->replaceAllUsesWith(V);
      PHI->eraseFromParent();
    }
  }

  return true;
}
// InlineFunction - This function inlines the called function into the basic
// block of the caller.  This returns false if it is not possible to inline this
// call.  The program is still in a well defined state if this occurs though.
//
// Note that this only does one level of inlining.  For example, if the
// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
// exists in the instruction stream.  Similiarly this will inline a recursive
// function by one level.
//
bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;


  // If the call to the callee is a non-tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    isa<CallInst>(TheCall) && !cast<CallInst>(TheCall)->isTailCall();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  //
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  std::vector<ReturnInst*> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;
  
  { // Scope to destroy ValueMap after cloning.
    DenseMap<const Value*, Value*> ValueMap;

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    assert(std::distance(CalledFunc->arg_begin(), CalledFunc->arg_end()) ==
           std::distance(CS.arg_begin(), CS.arg_end()) &&
           "No varargs calls can be inlined!");
    CallSite::arg_iterator AI = CS.arg_begin();
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
           E = CalledFunc->arg_end(); I != E; ++I, ++AI)
      ValueMap[I] = *AI;

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
                              &InlinedFunctionInfo, TD);
    
    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;
    
    // Update the callgraph if requested.
    if (CG)
      UpdateCallGraphAfterInlining(Caller, CalledFunc, FirstNewBlock, ValueMap,
                                   *CG);
  }
 
  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  //
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
           E = FirstNewBlock->end(); I != E; )
      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
        // If the alloca is now dead, remove it.  This often occurs due to code
        // specialization.
        if (AI->use_empty()) {
          AI->eraseFromParent();
          continue;
        }
        
        if (isa<Constant>(AI->getArraySize())) {
          // Scan for the block of allocas that we can move over, and move them
          // all at once.
          while (isa<AllocaInst>(I) &&
                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
            ++I;

          // Transfer all of the allocas over in a block.  Using splice means
          // that the instructions aren't removed from the symbol table, then
          // reinserted.
          Caller->getEntryBlock().getInstList().splice(
              InsertPoint,
              FirstNewBlock->getInstList(),
              AI, I);
        }
      }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    const Type *BytePtr = PointerType::get(Type::Int8Ty);
    // Get the two intrinsics we care about.
    Constant *StackSave, *StackRestore;
    StackSave    = M->getOrInsertFunction("llvm.stacksave", BytePtr, NULL);
    StackRestore = M->getOrInsertFunction("llvm.stackrestore", Type::VoidTy,
                                          BytePtr, NULL);

    // If we are preserving the callgraph, add edges to the stacksave/restore
    // functions for the calls we insert.
    CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
    if (CG) {
      // We know that StackSave/StackRestore are Function*'s, because they are
      // intrinsics which must have the right types.
      StackSaveCGN    = CG->getOrInsertFunction(cast<Function>(StackSave));
      StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore));
      CallerNode = (*CG)[Caller];
    }
      
    // Insert the llvm.stacksave.
    CallInst *SavedPtr = new CallInst(StackSave, "savedstack", 
                                      FirstNewBlock->begin());
    if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
      
    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      CallInst *CI = new CallInst(StackRestore, SavedPtr, "", Returns[i]);
      if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
    }

    // Count the number of StackRestore calls we insert.
    unsigned NumStackRestores = Returns.size();
    
    // If we are inlining an invoke instruction, insert restores before each
    // unwind.  These unwinds will be rewritten into branches later.
    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
           BB != E; ++BB)
        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
          new CallInst(StackRestore, SavedPtr, "", UI);
          ++NumStackRestores;
        }
    }
  }

  // If we are inlining tail call instruction through a call site that isn't 
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.
  if (MustClearTailCallFlags && InlinedFunctionInfo.ContainsCalls) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I))
          CI->setTailCall(false);
  }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any inlined 'unwind' instructions into branches to the invoke exception
  // destination, and call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
      new BranchInst(II->getNormalDest(), TheCall);

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty())
      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());

    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->getParent()->getInstList().erase(TheCall);

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->getParent()->getInstList().erase(Returns[0]);

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    BranchInst *NewBr = new BranchInst(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  //
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    //
    PHINode *PHI = 0;
    if (!TheCall->use_empty()) {
      PHI = new PHINode(CalledFunc->getReturnType(),
                        TheCall->getName(), AfterCallBB->begin());

      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      //
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions, turning them into unconditional
    // branches to the merge point now, and adding entries to the PHI node as
    // appropriate.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];

      if (PHI) {
        assert(RI->getReturnValue() && "Ret should have value!");
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }

      // Add a branch to the merge point where the PHI node lives if it exists.
      new BranchInst(AfterCallBB, RI);

      // Delete the return instruction now
      RI->getParent()->getInstList().erase(RI);
    }

  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty())
      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);
  
  return true;
}
Exemplo n.º 15
0
void DSWP::cleanup(Loop *L, LPPassManager &LPM) {
  // Move some instructions that may not have been inserted in the right
  // place, delete the old loop, and clean up our aux data structures for this
  // loop.

  /*
   * move the produce instructions, which have been inserted after the branch,
   * in front of it
   */
  for (int i = 0; i < MAX_THREAD; i++) {
      for (Function::iterator bi = allFunc[i]->begin(), be = allFunc[i]->end(); bi != be; ++bi) {
          BasicBlock *bb = bi;
          TerminatorInst *term = NULL;
          for (BasicBlock::iterator ii = bb->begin(), ie = bb->end(); ii != ie; ++ii) {
              Instruction *inst = ii;
              if (isa<TerminatorInst>(inst)) {
                  term = dyn_cast<TerminatorInst>(inst);
                  break;
              }
          }

          if (term == NULL) {
              error("term cannot be null");
          }

          while (true) {
              Instruction *last = &bb->getInstList().back();
              if (isa<TerminatorInst>(last))
                  break;
              last->moveBefore(term);
          }
      }
  }

  /*
   * move the phi nodes to the top of the block
   */
  for (int i = 0; i < MAX_THREAD; i++) {
      for (Function::iterator bi = allFunc[i]->begin(), be = allFunc[i]->end(); bi != be; ++bi) {
          BasicBlock *bb = bi;
          Instruction *first_nonphi = bb->getFirstNonPHI();
          BasicBlock::iterator ii = bb->begin(), ie = bb->end();

          // advance the iterator up to one past first_nonphi
          while (&(*ii) != first_nonphi) {
              ++ii;
          }
          ++ii;

          // move any phi nodes after the first nonphi to before it
          for (BasicBlock::iterator i_next; ii != ie; ii = i_next) {
              i_next = ii;
              ++i_next;
              Instruction *inst = ii;
              if (isa<PHINode>(inst)) {
                  inst->moveBefore(first_nonphi);
              }
          }
      }
  }

  cout << "begin to delete loop" << endl;
  for (Loop::block_iterator bi = L->block_begin(), be = L->block_end(); bi != be; ++bi) {
      BasicBlock *BB = *bi;
      for (BasicBlock::iterator ii = BB->begin(), i_next, ie = BB->end(); ii != ie; ii = i_next) {
          i_next = ii;
          ++i_next;
          Instruction &inst = *ii;
          inst.replaceAllUsesWith(UndefValue::get(inst.getType()));
          inst.eraseFromParent();
      }
  }

  // Delete the basic blocks only afterwards
  // so that backwards branch instructions don't break
  for (Loop::block_iterator bi = L->block_begin(), be = L->block_end(); bi != be; ++bi) {
      BasicBlock *BB = *bi;
      BB->eraseFromParent();
  }

  LPM.deleteLoopFromQueue(L);

}
Exemplo n.º 16
0
bool partition::runOnLoop(Loop* L, LPPassManager &LPM) {
    
    errs() << "***************************  Loop encountered: ************************" << '\n' << L->getHeader()->getName() << '\n';
    
    if (function->getName() != "main")
        return false;


    IntegerType* int32Ty = Type::getInt32Ty(*context);
    IntegerType* int64Ty = Type::getInt64Ty(*context);
    PointerType* voidPtrTy = Type::getInt8PtrTy(*context);

    FunctionType* funcTy = FunctionType::get(int32Ty, false);
   
    Constant* func1_c;
    Function* func1;

    func1_c = module->getOrInsertFunction("func1", funcTy);
    func1 = cast<Function>(func1_c);


    Function* pro = module->getFunction("produce");
    Function* con = module->getFunction("consume");
    
    BasicBlock* func1EntryBlock = BasicBlock::Create(*context, "entry.func1", func1);
    AllocaInst* i_var = new AllocaInst(int32Ty, NULL, 4, "i", func1EntryBlock);
    
    Value* liveIn;
    BasicBlock *forCond, *forBody, *forInc;
    ValueToValueMapTy VMap;
    std::map<BasicBlock *, BasicBlock *> BlockMap;
    
    for (Loop::block_iterator BB = L->block_begin(), BBe = L->block_end(); BB != BBe; ++BB) {
        BasicBlock* func1Block = CloneBasicBlock(*BB, VMap, ".func1", func1);
        BlockMap[*BB] = func1Block;

        if ((*BB)->getName() == "for.cond") 
            forCond = func1Block;
        if ((*BB)->getName() == "for.body") 
            forBody = func1Block;
        if ((*BB)->getName() == "for.inc") 
            forInc = func1Block;

        for (BasicBlock::iterator it = func1Block->begin(), ite = func1Block->end(); it != ite; ++it) {
            for (User::op_iterator oit = it->op_begin(), oite = it->op_end(); oit != oite; ++oit) {
                if (VMap[*oit] != NULL) {
                    *oit = VMap[*oit];
                } else {
                    Constant* cons = dyn_cast<Constant>(*oit);
                    BranchInst* br = dyn_cast<BranchInst>(it);
                    if (cons == NULL && br == NULL) {
                        liveIn = *oit;
                        *oit = i_var;
                    }
                }
               
            }
        }

        if ((*BB)->getName() == "for.body") {
            Instruction* term = (*BB)->getTerminator();
            term->removeFromParent();
            for (int i = 0; i < 7; i++) {
                (*BB)->back().eraseFromParent();
            }
            term->insertAfter(&(*BB)->back());
            (*BB)->front().eraseFromParent();
            LoadInst* load = new LoadInst(liveIn, "", false, 4, term); 

            std::vector<Value *> produce_args;
            CastInst* cast = CastInst::CreateIntegerCast(load, int64Ty, true);
            cast->insertAfter(load);
            produce_args.push_back(cast);
            ConstantInt* val = ConstantInt::get(int32Ty, (uint32_t) 3);
            produce_args.push_back(val);
            CallInst::Create(pro, ArrayRef<Value*>(produce_args), "", term);

            produce_args.pop_back();
            val = ConstantInt::get(int32Ty, (uint32_t) 2);
            produce_args.push_back(val);
            CallInst::Create(pro, ArrayRef<Value*>(produce_args), "", term);
        }
    
    }

    // set branch instructions to restructure the CFG in created function
    BasicBlock* func1EndBlock = BasicBlock::Create(*context, "if.end.func1", func1); 
    BasicBlock* garbageBB = BasicBlock::Create(*context, "garbage", func1);
    ConstantInt* retVal_g = ConstantInt::get(int32Ty, (uint32_t) 0);
    ReturnInst* ret_g = ReturnInst::Create(*context, retVal_g, garbageBB);

    
    for (Function::iterator fit = func1->begin(), fite = func1->end(); fit != fite; ++fit) {
        if (fit->getTerminator() == NULL || fit->getName() == "garbage")
            continue;
      
        BranchInst* br = dyn_cast<BranchInst>(fit->getTerminator());
        int numSuccessors = br->getNumSuccessors();
        
        for (int i = 0; i < numSuccessors; i++) {
            BasicBlock* successor = br->getSuccessor(i);
            
            if (BlockMap[successor] != NULL) {
                
                br->setSuccessor(i, BlockMap[successor]);
            } 
            else {
                br->setSuccessor(i, func1EndBlock);
            }
            
        }
/*
        if (fit->getName() == "for.body.func1") {
            for (int i = 0; i < 4; i++) {
                BasicBlock::iterator it = fit->begin();
                it->moveBefore(ret_g);
            }
        }
        */
    }
    garbageBB->eraseFromParent();

    BranchInst* br = dyn_cast<BranchInst>(forBody->getTerminator());
    br->setSuccessor(0, forCond);
    forInc->eraseFromParent();


    // Create return instruction for func1EndBlock and set a branch from loop header to func1EndBlock
    ConstantInt* retVal = ConstantInt::get(int32Ty, (uint32_t) 0);
    ReturnInst* ret1 = ReturnInst::Create(*context, retVal, func1EndBlock);
    BasicBlock* loopHeader = BlockMap.at(L->getHeader());
    BranchInst* brInst = BranchInst::Create(loopHeader, func1EntryBlock);
    
    // add produce function call
    std::vector<Value *> produce_args;
    ConstantInt* val = ConstantInt::get(int64Ty, (uint64_t) 0);
    produce_args.push_back(val);
    val = ConstantInt::get(int32Ty, (uint32_t) 5);
    produce_args.push_back(val);
    CallInst::Create(pro, ArrayRef<Value*>(produce_args), "", func1EndBlock->getTerminator());
    
    // add consume function call
    int q_id = 2;
    for (Value::use_iterator uit = i_var->use_begin(), uite = i_var->use_end(); uit != uite; ++uit) {
        std::vector<Value *> consume_args;
        ConstantInt* val = ConstantInt::get(int32Ty, (uint32_t) q_id); 
        consume_args.push_back(val);
        CallInst* call = CallInst::Create(con, ArrayRef<Value*>(consume_args));
        Instruction* inst = dyn_cast<Instruction>(*uit);
        call->insertAfter(inst);
        CastInst* cast = CastInst::CreateIntegerCast(call, int32Ty, true);
        cast->insertAfter(call);
        (*uit)->replaceAllUsesWith(cast);
        inst->eraseFromParent();
        q_id++;
    }

    i_var->eraseFromParent();

    // add produce and consume function calls to main thread
    // transmit the function pointer to created function by a produce call
    BasicBlock* loopPreheader = L->getLoopPreheader();
    produce_args.clear();
    CastInst* cast = CastInst::CreatePointerCast(func1, int64Ty);
    cast->insertBefore(loopPreheader->getTerminator());
    produce_args.push_back(cast);
    val = ConstantInt::get(int32Ty, (uint32_t) 0);
    produce_args.push_back(val);
    CallInst::Create(pro, ArrayRef<Value*>(produce_args), "", loopPreheader->getTerminator());
  
    // transmit induction variable to created function by a produce call
    Instruction* load = &L->getHeader()->front();
    produce_args.clear();
    cast = CastInst::CreateIntegerCast(load, int64Ty, true);
    cast->insertAfter(load);
    produce_args.push_back(cast);
    val = ConstantInt::get(int32Ty, (uint32_t) 4);
    produce_args.push_back(val);
    CallInst::Create(pro, ArrayRef<Value*>(produce_args))->insertAfter(cast);



    return true;
}