CallInst*
TailCallElim::FindTRECandidate(Instruction *TI,
                               bool CannotTailCallElimCallsMarkedTail) {
  BasicBlock *BB = TI->getParent();
  Function *F = BB->getParent();

  if (&BB->front() == TI) // Make sure there is something before the terminator.
    return 0;

  // Scan backwards from the return, checking to see if there is a tail call in
  // this block.  If so, set CI to it.
  CallInst *CI = 0;
  BasicBlock::iterator BBI = TI;
  while (true) {
    CI = dyn_cast<CallInst>(BBI);
    if (CI && CI->getCalledFunction() == F)
      break;

    if (BBI == BB->begin())
      return 0;          // Didn't find a potential tail call.
    --BBI;
  }

  // If this call is marked as a tail call, and if there are dynamic allocas in
  // the function, we cannot perform this optimization.
  if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
    return 0;

  // As a special case, detect code like this:
  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
  // and disable this xform in this case, because the code generator will
  // lower the call to fabs into inline code.
  if (BB == &F->getEntryBlock() &&
      FirstNonDbg(BB->front()) == CI &&
      FirstNonDbg(llvm::next(BB->begin())) == TI &&
      CI->getCalledFunction() &&
      !TTI->isLoweredToCall(CI->getCalledFunction())) {
    // A single-block function with just a call and a return. Check that
    // the arguments match.
    CallSite::arg_iterator I = CallSite(CI).arg_begin(),
                           E = CallSite(CI).arg_end();
    Function::arg_iterator FI = F->arg_begin(),
                           FE = F->arg_end();
    for (; I != E && FI != FE; ++I, ++FI)
      if (*I != &*FI) break;
    if (I == E && FI == FE)
      return 0;
  }

  return CI;
}
Beispiel #2
0
/// Replaces the given call site (Call or Invoke) with a gc.statepoint
/// intrinsic with an empty deoptimization arguments list.  This does
/// NOT do explicit relocation for GC support.
static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) {
  assert(CS.getInstruction()->getModule() && "must be set");

  // TODO: technically, a pass is not allowed to get functions from within a
  // function pass since it might trigger a new function addition.  Refactor
  // this logic out to the initialization of the pass.  Doesn't appear to
  // matter in practice.

  // Then go ahead and use the builder do actually do the inserts.  We insert
  // immediately before the previous instruction under the assumption that all
  // arguments will be available here.  We can't insert afterwards since we may
  // be replacing a terminator.
  IRBuilder<> Builder(CS.getInstruction());

  // Note: The gc args are not filled in at this time, that's handled by
  // RewriteStatepointsForGC (which is currently under review).

  // Create the statepoint given all the arguments
  Instruction *Token = nullptr;

  uint64_t ID;
  uint32_t NumPatchBytes;

  AttributeSet OriginalAttrs = CS.getAttributes();
  Attribute AttrID =
      OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, "statepoint-id");
  Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
      AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");

  AttrBuilder AttrsToRemove;
  bool HasID = AttrID.isStringAttribute() &&
               !AttrID.getValueAsString().getAsInteger(10, ID);

  if (HasID)
    AttrsToRemove.addAttribute("statepoint-id");
  else
    ID = 0xABCDEF00;

  bool HasNumPatchBytes =
      AttrNumPatchBytes.isStringAttribute() &&
      !AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);

  if (HasNumPatchBytes)
    AttrsToRemove.addAttribute("statepoint-num-patch-bytes");
  else
    NumPatchBytes = 0;

  OriginalAttrs = OriginalAttrs.removeAttributes(
      CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
      AttrsToRemove);

  if (CS.isCall()) {
    CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
    CallInst *Call = Builder.CreateGCStatepointCall(
        ID, NumPatchBytes, CS.getCalledValue(),
        makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
        "safepoint_token");
    Call->setTailCall(ToReplace->isTailCall());
    Call->setCallingConv(ToReplace->getCallingConv());

    // In case if we can handle this set of attributes - set up function
    // attributes directly on statepoint and return attributes later for
    // gc_result intrinsic.
    Call->setAttributes(OriginalAttrs.getFnAttributes());

    Token = Call;

    // Put the following gc_result and gc_relocate calls immediately after
    // the old call (which we're about to delete).
    assert(ToReplace->getNextNode() && "not a terminator, must have next");
    Builder.SetInsertPoint(ToReplace->getNextNode());
    Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
  } else if (CS.isInvoke()) {
    InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());

    // Insert the new invoke into the old block.  We'll remove the old one in a
    // moment at which point this will become the new terminator for the
    // original block.
    Builder.SetInsertPoint(ToReplace->getParent());
    InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
        ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(),
        ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
        None, None, "safepoint_token");

    Invoke->setCallingConv(ToReplace->getCallingConv());

    // In case if we can handle this set of attributes - set up function
    // attributes directly on statepoint and return attributes later for
    // gc_result intrinsic.
    Invoke->setAttributes(OriginalAttrs.getFnAttributes());

    Token = Invoke;

    // We'll insert the gc.result into the normal block
    BasicBlock *NormalDest = ToReplace->getNormalDest();
    // Can not insert gc.result in case of phi nodes preset.
    // Should have removed this cases prior to running this function
    assert(!isa<PHINode>(NormalDest->begin()));
    Instruction *IP = &*(NormalDest->getFirstInsertionPt());
    Builder.SetInsertPoint(IP);
  } else {
    llvm_unreachable("unexpect type of CallSite");
  }
  assert(Token);

  // Handle the return value of the original call - update all uses to use a
  // gc_result hanging off the statepoint node we just inserted

  // Only add the gc_result iff there is actually a used result
  if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
    std::string TakenName =
        CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
    CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName);
    GCResult->setAttributes(OriginalAttrs.getRetAttributes());
    return GCResult;
  } else {
    // No return value for the call.
    return nullptr;
  }
}
static bool markTails(Function &F, bool &AllCallsAreTailCalls) {
  if (F.callsFunctionThatReturnsTwice())
    return false;
  AllCallsAreTailCalls = true;

  // The local stack holds all alloca instructions and all byval arguments.
  AllocaDerivedValueTracker Tracker;
  for (Argument &Arg : F.args()) {
    if (Arg.hasByValAttr())
      Tracker.walk(&Arg);
  }
  for (auto &BB : F) {
    for (auto &I : BB)
      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
        Tracker.walk(AI);
  }

  bool Modified = false;

  // Track whether a block is reachable after an alloca has escaped. Blocks that
  // contain the escaping instruction will be marked as being visited without an
  // escaped alloca, since that is how the block began.
  enum VisitType {
    UNVISITED,
    UNESCAPED,
    ESCAPED
  };
  DenseMap<BasicBlock *, VisitType> Visited;

  // We propagate the fact that an alloca has escaped from block to successor.
  // Visit the blocks that are propagating the escapedness first. To do this, we
  // maintain two worklists.
  SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;

  // We may enter a block and visit it thinking that no alloca has escaped yet,
  // then see an escape point and go back around a loop edge and come back to
  // the same block twice. Because of this, we defer setting tail on calls when
  // we first encounter them in a block. Every entry in this list does not
  // statically use an alloca via use-def chain analysis, but may find an alloca
  // through other means if the block turns out to be reachable after an escape
  // point.
  SmallVector<CallInst *, 32> DeferredTails;

  BasicBlock *BB = &F.getEntryBlock();
  VisitType Escaped = UNESCAPED;
  do {
    for (auto &I : *BB) {
      if (Tracker.EscapePoints.count(&I))
        Escaped = ESCAPED;

      CallInst *CI = dyn_cast<CallInst>(&I);
      if (!CI || CI->isTailCall())
        continue;

      bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles();

      if (!IsNoTail && CI->doesNotAccessMemory()) {
        // A call to a readnone function whose arguments are all things computed
        // outside this function can be marked tail. Even if you stored the
        // alloca address into a global, a readnone function can't load the
        // global anyhow.
        //
        // Note that this runs whether we know an alloca has escaped or not. If
        // it has, then we can't trust Tracker.AllocaUsers to be accurate.
        bool SafeToTail = true;
        for (auto &Arg : CI->arg_operands()) {
          if (isa<Constant>(Arg.getUser()))
            continue;
          if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
            if (!A->hasByValAttr())
              continue;
          SafeToTail = false;
          break;
        }
        if (SafeToTail) {
          emitOptimizationRemark(
              F.getContext(), "tailcallelim", F, CI->getDebugLoc(),
              "marked this readnone call a tail call candidate");
          CI->setTailCall();
          Modified = true;
          continue;
        }
      }

      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
        DeferredTails.push_back(CI);
      } else {
        AllCallsAreTailCalls = false;
      }
    }

    for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
      auto &State = Visited[SuccBB];
      if (State < Escaped) {
        State = Escaped;
        if (State == ESCAPED)
          WorklistEscaped.push_back(SuccBB);
        else
          WorklistUnescaped.push_back(SuccBB);
      }
    }

    if (!WorklistEscaped.empty()) {
      BB = WorklistEscaped.pop_back_val();
      Escaped = ESCAPED;
    } else {
      BB = nullptr;
      while (!WorklistUnescaped.empty()) {
        auto *NextBB = WorklistUnescaped.pop_back_val();
        if (Visited[NextBB] == UNESCAPED) {
          BB = NextBB;
          Escaped = UNESCAPED;
          break;
        }
      }
    }
  } while (BB);

  for (CallInst *CI : DeferredTails) {
    if (Visited[CI->getParent()] != ESCAPED) {
      // If the escape point was part way through the block, calls after the
      // escape point wouldn't have been put into DeferredTails.
      emitOptimizationRemark(F.getContext(), "tailcallelim", F,
                             CI->getDebugLoc(),
                             "marked this call a tail call candidate");
      CI->setTailCall();
      Modified = true;
    } else {
      AllCallsAreTailCalls = false;
    }
  }

  return Modified;
}
/// Replaces the given call site (Call or Invoke) with a gc.statepoint
/// intrinsic with an empty deoptimization arguments list.  This does
/// NOT do explicit relocation for GC support.
static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
                                    Pass *P) {
  BasicBlock *BB = CS.getInstruction()->getParent();
  Function *F = BB->getParent();
  Module *M = F->getParent();
  assert(M && "must be set");

  // TODO: technically, a pass is not allowed to get functions from within a
  // function pass since it might trigger a new function addition.  Refactor
  // this logic out to the initialization of the pass.  Doesn't appear to
  // matter in practice.

  // Fill in the one generic type'd argument (the function is also vararg)
  std::vector<Type *> argTypes;
  argTypes.push_back(CS.getCalledValue()->getType());

  Function *gc_statepoint_decl = Intrinsic::getDeclaration(
      M, Intrinsic::experimental_gc_statepoint, argTypes);

  // Then go ahead and use the builder do actually do the inserts.  We insert
  // immediately before the previous instruction under the assumption that all
  // arguments will be available here.  We can't insert afterwards since we may
  // be replacing a terminator.
  Instruction *insertBefore = CS.getInstruction();
  IRBuilder<> Builder(insertBefore);
  // First, create the statepoint (with all live ptrs as arguments).
  std::vector<llvm::Value *> args;
  // target, #args, unused, args
  Value *Target = CS.getCalledValue();
  args.push_back(Target);
  int callArgSize = CS.arg_size();
  args.push_back(
      ConstantInt::get(Type::getInt32Ty(M->getContext()), callArgSize));
  // TODO: add a 'Needs GC-rewrite' later flag
  args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 0));

  // Copy all the arguments of the original call
  args.insert(args.end(), CS.arg_begin(), CS.arg_end());

  // Create the statepoint given all the arguments
  Instruction *token = nullptr;
  AttributeSet return_attributes;
  if (CS.isCall()) {
    CallInst *toReplace = cast<CallInst>(CS.getInstruction());
    CallInst *call =
        Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
    call->setTailCall(toReplace->isTailCall());
    call->setCallingConv(toReplace->getCallingConv());

    // Before we have to worry about GC semantics, all attributes are legal
    AttributeSet new_attrs = toReplace->getAttributes();
    // In case if we can handle this set of sttributes - set up function attrs
    // directly on statepoint and return attrs later for gc_result intrinsic.
    call->setAttributes(new_attrs.getFnAttributes());
    return_attributes = new_attrs.getRetAttributes();
    // TODO: handle param attributes

    token = call;

    // Put the following gc_result and gc_relocate calls immediately after the
    // the old call (which we're about to delete)
    BasicBlock::iterator next(toReplace);
    assert(BB->end() != next && "not a terminator, must have next");
    next++;
    Instruction *IP = &*(next);
    Builder.SetInsertPoint(IP);
    Builder.SetCurrentDebugLocation(IP->getDebugLoc());

  } else if (CS.isInvoke()) {
    InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());

    // Insert the new invoke into the old block.  We'll remove the old one in a
    // moment at which point this will become the new terminator for the
    // original block.
    InvokeInst *invoke = InvokeInst::Create(
        gc_statepoint_decl, toReplace->getNormalDest(),
        toReplace->getUnwindDest(), args, "", toReplace->getParent());
    invoke->setCallingConv(toReplace->getCallingConv());

    // Currently we will fail on parameter attributes and on certain
    // function attributes.
    AttributeSet new_attrs = toReplace->getAttributes();
    // In case if we can handle this set of sttributes - set up function attrs
    // directly on statepoint and return attrs later for gc_result intrinsic.
    invoke->setAttributes(new_attrs.getFnAttributes());
    return_attributes = new_attrs.getRetAttributes();

    token = invoke;

    // We'll insert the gc.result into the normal block
    BasicBlock *normalDest = normalizeBBForInvokeSafepoint(
        toReplace->getNormalDest(), invoke->getParent());
    Instruction *IP = &*(normalDest->getFirstInsertionPt());
    Builder.SetInsertPoint(IP);
  } else {
    llvm_unreachable("unexpect type of CallSite");
  }
  assert(token);

  // Handle the return value of the original call - update all uses to use a
  // gc_result hanging off the statepoint node we just inserted

  // Only add the gc_result iff there is actually a used result
  if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
    Instruction *gc_result = nullptr;
    std::vector<Type *> types;     // one per 'any' type
    types.push_back(CS.getType()); // result type
    auto get_gc_result_id = [&](Type &Ty) {
      if (Ty.isIntegerTy()) {
        return Intrinsic::experimental_gc_result_int;
      } else if (Ty.isFloatingPointTy()) {
        return Intrinsic::experimental_gc_result_float;
      } else if (Ty.isPointerTy()) {
        return Intrinsic::experimental_gc_result_ptr;
      } else {
        llvm_unreachable("non java type encountered");
      }
    };
    Intrinsic::ID Id = get_gc_result_id(*CS.getType());
    Value *gc_result_func = Intrinsic::getDeclaration(M, Id, types);

    std::vector<Value *> args;
    args.push_back(token);
    gc_result = Builder.CreateCall(
        gc_result_func, args,
        CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "");

    cast<CallInst>(gc_result)->setAttributes(return_attributes);
    return gc_result;
  } else {
    // No return value for the call.
    return nullptr;
  }
}
Beispiel #5
0
/// Replaces the given call site (Call or Invoke) with a gc.statepoint
/// intrinsic with an empty deoptimization arguments list.  This does
/// NOT do explicit relocation for GC support.
static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
                                    Pass *P) {
  assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
         "must be set");

  // TODO: technically, a pass is not allowed to get functions from within a
  // function pass since it might trigger a new function addition.  Refactor
  // this logic out to the initialization of the pass.  Doesn't appear to
  // matter in practice.

  // Then go ahead and use the builder do actually do the inserts.  We insert
  // immediately before the previous instruction under the assumption that all
  // arguments will be available here.  We can't insert afterwards since we may
  // be replacing a terminator.
  IRBuilder<> Builder(CS.getInstruction());

  // Note: The gc args are not filled in at this time, that's handled by
  // RewriteStatepointsForGC (which is currently under review).

  // Create the statepoint given all the arguments
  Instruction *Token = nullptr;
  AttributeSet OriginalAttrs;

  if (CS.isCall()) {
    CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
    CallInst *Call = Builder.CreateGCStatepointCall(
        CS.getCalledValue(), makeArrayRef(CS.arg_begin(), CS.arg_end()), None,
        None, "safepoint_token");
    Call->setTailCall(ToReplace->isTailCall());
    Call->setCallingConv(ToReplace->getCallingConv());

    // Before we have to worry about GC semantics, all attributes are legal
    // TODO: handle param attributes
    OriginalAttrs = ToReplace->getAttributes();

    // In case if we can handle this set of attributes - set up function
    // attributes directly on statepoint and return attributes later for
    // gc_result intrinsic.
    Call->setAttributes(OriginalAttrs.getFnAttributes());

    Token = Call;

    // Put the following gc_result and gc_relocate calls immediately after the
    // the old call (which we're about to delete).
    assert(ToReplace->getNextNode() && "not a terminator, must have next");
    Builder.SetInsertPoint(ToReplace->getNextNode());
    Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
  } else if (CS.isInvoke()) {
    InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());

    // Insert the new invoke into the old block.  We'll remove the old one in a
    // moment at which point this will become the new terminator for the
    // original block.
    Builder.SetInsertPoint(ToReplace->getParent());
    InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
        CS.getCalledValue(), ToReplace->getNormalDest(),
        ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
        Builder.getInt32(0), None, "safepoint_token");

    // Currently we will fail on parameter attributes and on certain
    // function attributes.
    OriginalAttrs = ToReplace->getAttributes();

    // In case if we can handle this set of attributes - set up function
    // attributes directly on statepoint and return attributes later for
    // gc_result intrinsic.
    Invoke->setAttributes(OriginalAttrs.getFnAttributes());

    Token = Invoke;

    // We'll insert the gc.result into the normal block
    BasicBlock *NormalDest = normalizeBBForInvokeSafepoint(
        ToReplace->getNormalDest(), Invoke->getParent());
    Builder.SetInsertPoint(NormalDest->getFirstInsertionPt());
  } else {
    llvm_unreachable("unexpect type of CallSite");
  }
  assert(Token);

  // Handle the return value of the original call - update all uses to use a
  // gc_result hanging off the statepoint node we just inserted

  // Only add the gc_result iff there is actually a used result
  if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
    std::string TakenName =
        CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
    CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName);
    GCResult->setAttributes(OriginalAttrs.getRetAttributes());
    return GCResult;
  } else {
    // No return value for the call.
    return nullptr;
  }
}
bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
                                         bool &TailCallsAreMarkedTail,
                                         SmallVector<PHINode*, 8> &ArgumentPHIs,
                                       bool CannotTailCallElimCallsMarkedTail) {
  BasicBlock *BB = Ret->getParent();
  Function *F = BB->getParent();

  if (&BB->front() == Ret) // Make sure there is something before the ret...
    return false;
  
  // If the return is in the entry block, then making this transformation would
  // turn infinite recursion into an infinite loop.  This transformation is ok
  // in theory, but breaks some code like:
  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
  // disable this xform in this case, because the code generator will lower the
  // call to fabs into inline code.
  if (BB == &F->getEntryBlock())
    return false;

  // Scan backwards from the return, checking to see if there is a tail call in
  // this block.  If so, set CI to it.
  CallInst *CI;
  BasicBlock::iterator BBI = Ret;
  while (1) {
    CI = dyn_cast<CallInst>(BBI);
    if (CI && CI->getCalledFunction() == F)
      break;

    if (BBI == BB->begin())
      return false;          // Didn't find a potential tail call.
    --BBI;
  }

  // If this call is marked as a tail call, and if there are dynamic allocas in
  // the function, we cannot perform this optimization.
  if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
    return false;

  // If we are introducing accumulator recursion to eliminate associative
  // operations after the call instruction, this variable contains the initial
  // value for the accumulator.  If this value is set, we actually perform
  // accumulator recursion elimination instead of simple tail recursion
  // elimination.
  Value *AccumulatorRecursionEliminationInitVal = 0;
  Instruction *AccumulatorRecursionInstr = 0;

  // Ok, we found a potential tail call.  We can currently only transform the
  // tail call if all of the instructions between the call and the return are
  // movable to above the call itself, leaving the call next to the return.
  // Check that this is the case now.
  for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
    if (!CanMoveAboveCall(BBI, CI)) {
      // If we can't move the instruction above the call, it might be because it
      // is an associative operation that could be tranformed using accumulator
      // recursion elimination.  Check to see if this is the case, and if so,
      // remember the initial accumulator value for later.
      if ((AccumulatorRecursionEliminationInitVal =
                             CanTransformAccumulatorRecursion(BBI, CI))) {
        // Yes, this is accumulator recursion.  Remember which instruction
        // accumulates.
        AccumulatorRecursionInstr = BBI;
      } else {
        return false;   // Otherwise, we cannot eliminate the tail recursion!
      }
    }

  // We can only transform call/return pairs that either ignore the return value
  // of the call and return void, ignore the value of the call and return a
  // constant, return the value returned by the tail call, or that are being
  // accumulator recursion variable eliminated.
  if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
      !isa<UndefValue>(Ret->getReturnValue()) &&
      AccumulatorRecursionEliminationInitVal == 0 &&
      !getCommonReturnValue(Ret, CI))
    return false;

  // OK! We can transform this tail call.  If this is the first one found,
  // create the new entry block, allowing us to branch back to the old entry.
  if (OldEntry == 0) {
    OldEntry = &F->getEntryBlock();
    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
    NewEntry->takeName(OldEntry);
    OldEntry->setName("tailrecurse");
    BranchInst::Create(OldEntry, NewEntry);

    // If this tail call is marked 'tail' and if there are any allocas in the
    // entry block, move them up to the new entry block.
    TailCallsAreMarkedTail = CI->isTailCall();
    if (TailCallsAreMarkedTail)
      // Move all fixed sized allocas from OldEntry to NewEntry.
      for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
             NEBI = NewEntry->begin(); OEBI != E; )
        if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
          if (isa<ConstantInt>(AI->getArraySize()))
            AI->moveBefore(NEBI);

    // Now that we have created a new block, which jumps to the entry
    // block, insert a PHI node for each argument of the function.
    // For now, we initialize each PHI to only have the real arguments
    // which are passed in.
    Instruction *InsertPos = OldEntry->begin();
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I) {
      PHINode *PN = PHINode::Create(I->getType(),
                                    I->getName() + ".tr", InsertPos);
      I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
      PN->addIncoming(I, NewEntry);
      ArgumentPHIs.push_back(PN);
    }
  }

  // If this function has self recursive calls in the tail position where some
  // are marked tail and some are not, only transform one flavor or another.  We
  // have to choose whether we move allocas in the entry block to the new entry
  // block or not, so we can't make a good choice for both.  NOTE: We could do
  // slightly better here in the case that the function has no entry block
  // allocas.
  if (TailCallsAreMarkedTail && !CI->isTailCall())
    return false;

  // Ok, now that we know we have a pseudo-entry block WITH all of the
  // required PHI nodes, add entries into the PHI node for the actual
  // parameters passed into the tail-recursive call.
  for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
    ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);

  // If we are introducing an accumulator variable to eliminate the recursion,
  // do so now.  Note that we _know_ that no subsequent tail recursion
  // eliminations will happen on this function because of the way the
  // accumulator recursion predicate is set up.
  //
  if (AccumulatorRecursionEliminationInitVal) {
    Instruction *AccRecInstr = AccumulatorRecursionInstr;
    // Start by inserting a new PHI node for the accumulator.
    PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr",
                                     OldEntry->begin());

    // Loop over all of the predecessors of the tail recursion block.  For the
    // real entry into the function we seed the PHI with the initial value,
    // computed earlier.  For any other existing branches to this block (due to
    // other tail recursions eliminated) the accumulator is not modified.
    // Because we haven't added the branch in the current block to OldEntry yet,
    // it will not show up as a predecessor.
    for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
         PI != PE; ++PI) {
      if (*PI == &F->getEntryBlock())
        AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI);
      else
        AccPN->addIncoming(AccPN, *PI);
    }

    // Add an incoming argument for the current block, which is computed by our
    // associative accumulator instruction.
    AccPN->addIncoming(AccRecInstr, BB);

    // Next, rewrite the accumulator recursion instruction so that it does not
    // use the result of the call anymore, instead, use the PHI node we just
    // inserted.
    AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);

    // Finally, rewrite any return instructions in the program to return the PHI
    // node instead of the "initval" that they do currently.  This loop will
    // actually rewrite the return value we are destroying, but that's ok.
    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
      if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
        RI->setOperand(0, AccPN);
    ++NumAccumAdded;
  }

  // Now that all of the PHI nodes are in place, remove the call and
  // ret instructions, replacing them with an unconditional branch.
  BranchInst::Create(OldEntry, Ret);
  BB->getInstList().erase(Ret);  // Remove return.
  BB->getInstList().erase(CI);   // Remove call.
  ++NumEliminated;
  return true;
}