CallInst* TailCallElim::FindTRECandidate(Instruction *TI, bool CannotTailCallElimCallsMarkedTail) { BasicBlock *BB = TI->getParent(); Function *F = BB->getParent(); if (&BB->front() == TI) // Make sure there is something before the terminator. return 0; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. CallInst *CI = 0; BasicBlock::iterator BBI = TI; while (true) { CI = dyn_cast<CallInst>(BBI); if (CI && CI->getCalledFunction() == F) break; if (BBI == BB->begin()) return 0; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) return 0; // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // and disable this xform in this case, because the code generator will // lower the call to fabs into inline code. if (BB == &F->getEntryBlock() && FirstNonDbg(BB->front()) == CI && FirstNonDbg(llvm::next(BB->begin())) == TI && CI->getCalledFunction() && !TTI->isLoweredToCall(CI->getCalledFunction())) { // A single-block function with just a call and a return. Check that // the arguments match. CallSite::arg_iterator I = CallSite(CI).arg_begin(), E = CallSite(CI).arg_end(); Function::arg_iterator FI = F->arg_begin(), FE = F->arg_end(); for (; I != E && FI != FE; ++I, ++FI) if (*I != &*FI) break; if (I == E && FI == FE) return 0; } return CI; }
/// Replaces the given call site (Call or Invoke) with a gc.statepoint /// intrinsic with an empty deoptimization arguments list. This does /// NOT do explicit relocation for GC support. static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) { assert(CS.getInstruction()->getModule() && "must be set"); // TODO: technically, a pass is not allowed to get functions from within a // function pass since it might trigger a new function addition. Refactor // this logic out to the initialization of the pass. Doesn't appear to // matter in practice. // Then go ahead and use the builder do actually do the inserts. We insert // immediately before the previous instruction under the assumption that all // arguments will be available here. We can't insert afterwards since we may // be replacing a terminator. IRBuilder<> Builder(CS.getInstruction()); // Note: The gc args are not filled in at this time, that's handled by // RewriteStatepointsForGC (which is currently under review). // Create the statepoint given all the arguments Instruction *Token = nullptr; uint64_t ID; uint32_t NumPatchBytes; AttributeSet OriginalAttrs = CS.getAttributes(); Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, "statepoint-id"); Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute( AttributeSet::FunctionIndex, "statepoint-num-patch-bytes"); AttrBuilder AttrsToRemove; bool HasID = AttrID.isStringAttribute() && !AttrID.getValueAsString().getAsInteger(10, ID); if (HasID) AttrsToRemove.addAttribute("statepoint-id"); else ID = 0xABCDEF00; bool HasNumPatchBytes = AttrNumPatchBytes.isStringAttribute() && !AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes); if (HasNumPatchBytes) AttrsToRemove.addAttribute("statepoint-num-patch-bytes"); else NumPatchBytes = 0; OriginalAttrs = OriginalAttrs.removeAttributes( CS.getInstruction()->getContext(), AttributeSet::FunctionIndex, AttrsToRemove); if (CS.isCall()) { CallInst *ToReplace = cast<CallInst>(CS.getInstruction()); CallInst *Call = Builder.CreateGCStatepointCall( ID, NumPatchBytes, CS.getCalledValue(), makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None, "safepoint_token"); Call->setTailCall(ToReplace->isTailCall()); Call->setCallingConv(ToReplace->getCallingConv()); // In case if we can handle this set of attributes - set up function // attributes directly on statepoint and return attributes later for // gc_result intrinsic. Call->setAttributes(OriginalAttrs.getFnAttributes()); Token = Call; // Put the following gc_result and gc_relocate calls immediately after // the old call (which we're about to delete). assert(ToReplace->getNextNode() && "not a terminator, must have next"); Builder.SetInsertPoint(ToReplace->getNextNode()); Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc()); } else if (CS.isInvoke()) { InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction()); // Insert the new invoke into the old block. We'll remove the old one in a // moment at which point this will become the new terminator for the // original block. Builder.SetInsertPoint(ToReplace->getParent()); InvokeInst *Invoke = Builder.CreateGCStatepointInvoke( ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(), ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None, "safepoint_token"); Invoke->setCallingConv(ToReplace->getCallingConv()); // In case if we can handle this set of attributes - set up function // attributes directly on statepoint and return attributes later for // gc_result intrinsic. Invoke->setAttributes(OriginalAttrs.getFnAttributes()); Token = Invoke; // We'll insert the gc.result into the normal block BasicBlock *NormalDest = ToReplace->getNormalDest(); // Can not insert gc.result in case of phi nodes preset. // Should have removed this cases prior to running this function assert(!isa<PHINode>(NormalDest->begin())); Instruction *IP = &*(NormalDest->getFirstInsertionPt()); Builder.SetInsertPoint(IP); } else { llvm_unreachable("unexpect type of CallSite"); } assert(Token); // Handle the return value of the original call - update all uses to use a // gc_result hanging off the statepoint node we just inserted // Only add the gc_result iff there is actually a used result if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) { std::string TakenName = CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : ""; CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName); GCResult->setAttributes(OriginalAttrs.getRetAttributes()); return GCResult; } else { // No return value for the call. return nullptr; } }
static bool markTails(Function &F, bool &AllCallsAreTailCalls) { if (F.callsFunctionThatReturnsTwice()) return false; AllCallsAreTailCalls = true; // The local stack holds all alloca instructions and all byval arguments. AllocaDerivedValueTracker Tracker; for (Argument &Arg : F.args()) { if (Arg.hasByValAttr()) Tracker.walk(&Arg); } for (auto &BB : F) { for (auto &I : BB) if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) Tracker.walk(AI); } bool Modified = false; // Track whether a block is reachable after an alloca has escaped. Blocks that // contain the escaping instruction will be marked as being visited without an // escaped alloca, since that is how the block began. enum VisitType { UNVISITED, UNESCAPED, ESCAPED }; DenseMap<BasicBlock *, VisitType> Visited; // We propagate the fact that an alloca has escaped from block to successor. // Visit the blocks that are propagating the escapedness first. To do this, we // maintain two worklists. SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped; // We may enter a block and visit it thinking that no alloca has escaped yet, // then see an escape point and go back around a loop edge and come back to // the same block twice. Because of this, we defer setting tail on calls when // we first encounter them in a block. Every entry in this list does not // statically use an alloca via use-def chain analysis, but may find an alloca // through other means if the block turns out to be reachable after an escape // point. SmallVector<CallInst *, 32> DeferredTails; BasicBlock *BB = &F.getEntryBlock(); VisitType Escaped = UNESCAPED; do { for (auto &I : *BB) { if (Tracker.EscapePoints.count(&I)) Escaped = ESCAPED; CallInst *CI = dyn_cast<CallInst>(&I); if (!CI || CI->isTailCall()) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); if (!IsNoTail && CI->doesNotAccessMemory()) { // A call to a readnone function whose arguments are all things computed // outside this function can be marked tail. Even if you stored the // alloca address into a global, a readnone function can't load the // global anyhow. // // Note that this runs whether we know an alloca has escaped or not. If // it has, then we can't trust Tracker.AllocaUsers to be accurate. bool SafeToTail = true; for (auto &Arg : CI->arg_operands()) { if (isa<Constant>(Arg.getUser())) continue; if (Argument *A = dyn_cast<Argument>(Arg.getUser())) if (!A->hasByValAttr()) continue; SafeToTail = false; break; } if (SafeToTail) { emitOptimizationRemark( F.getContext(), "tailcallelim", F, CI->getDebugLoc(), "marked this readnone call a tail call candidate"); CI->setTailCall(); Modified = true; continue; } } if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { DeferredTails.push_back(CI); } else { AllCallsAreTailCalls = false; } } for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { auto &State = Visited[SuccBB]; if (State < Escaped) { State = Escaped; if (State == ESCAPED) WorklistEscaped.push_back(SuccBB); else WorklistUnescaped.push_back(SuccBB); } } if (!WorklistEscaped.empty()) { BB = WorklistEscaped.pop_back_val(); Escaped = ESCAPED; } else { BB = nullptr; while (!WorklistUnescaped.empty()) { auto *NextBB = WorklistUnescaped.pop_back_val(); if (Visited[NextBB] == UNESCAPED) { BB = NextBB; Escaped = UNESCAPED; break; } } } } while (BB); for (CallInst *CI : DeferredTails) { if (Visited[CI->getParent()] != ESCAPED) { // If the escape point was part way through the block, calls after the // escape point wouldn't have been put into DeferredTails. emitOptimizationRemark(F.getContext(), "tailcallelim", F, CI->getDebugLoc(), "marked this call a tail call candidate"); CI->setTailCall(); Modified = true; } else { AllCallsAreTailCalls = false; } } return Modified; }
/// Replaces the given call site (Call or Invoke) with a gc.statepoint /// intrinsic with an empty deoptimization arguments list. This does /// NOT do explicit relocation for GC support. static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */ Pass *P) { BasicBlock *BB = CS.getInstruction()->getParent(); Function *F = BB->getParent(); Module *M = F->getParent(); assert(M && "must be set"); // TODO: technically, a pass is not allowed to get functions from within a // function pass since it might trigger a new function addition. Refactor // this logic out to the initialization of the pass. Doesn't appear to // matter in practice. // Fill in the one generic type'd argument (the function is also vararg) std::vector<Type *> argTypes; argTypes.push_back(CS.getCalledValue()->getType()); Function *gc_statepoint_decl = Intrinsic::getDeclaration( M, Intrinsic::experimental_gc_statepoint, argTypes); // Then go ahead and use the builder do actually do the inserts. We insert // immediately before the previous instruction under the assumption that all // arguments will be available here. We can't insert afterwards since we may // be replacing a terminator. Instruction *insertBefore = CS.getInstruction(); IRBuilder<> Builder(insertBefore); // First, create the statepoint (with all live ptrs as arguments). std::vector<llvm::Value *> args; // target, #args, unused, args Value *Target = CS.getCalledValue(); args.push_back(Target); int callArgSize = CS.arg_size(); args.push_back( ConstantInt::get(Type::getInt32Ty(M->getContext()), callArgSize)); // TODO: add a 'Needs GC-rewrite' later flag args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 0)); // Copy all the arguments of the original call args.insert(args.end(), CS.arg_begin(), CS.arg_end()); // Create the statepoint given all the arguments Instruction *token = nullptr; AttributeSet return_attributes; if (CS.isCall()) { CallInst *toReplace = cast<CallInst>(CS.getInstruction()); CallInst *call = Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token"); call->setTailCall(toReplace->isTailCall()); call->setCallingConv(toReplace->getCallingConv()); // Before we have to worry about GC semantics, all attributes are legal AttributeSet new_attrs = toReplace->getAttributes(); // In case if we can handle this set of sttributes - set up function attrs // directly on statepoint and return attrs later for gc_result intrinsic. call->setAttributes(new_attrs.getFnAttributes()); return_attributes = new_attrs.getRetAttributes(); // TODO: handle param attributes token = call; // Put the following gc_result and gc_relocate calls immediately after the // the old call (which we're about to delete) BasicBlock::iterator next(toReplace); assert(BB->end() != next && "not a terminator, must have next"); next++; Instruction *IP = &*(next); Builder.SetInsertPoint(IP); Builder.SetCurrentDebugLocation(IP->getDebugLoc()); } else if (CS.isInvoke()) { InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction()); // Insert the new invoke into the old block. We'll remove the old one in a // moment at which point this will become the new terminator for the // original block. InvokeInst *invoke = InvokeInst::Create( gc_statepoint_decl, toReplace->getNormalDest(), toReplace->getUnwindDest(), args, "", toReplace->getParent()); invoke->setCallingConv(toReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain // function attributes. AttributeSet new_attrs = toReplace->getAttributes(); // In case if we can handle this set of sttributes - set up function attrs // directly on statepoint and return attrs later for gc_result intrinsic. invoke->setAttributes(new_attrs.getFnAttributes()); return_attributes = new_attrs.getRetAttributes(); token = invoke; // We'll insert the gc.result into the normal block BasicBlock *normalDest = normalizeBBForInvokeSafepoint( toReplace->getNormalDest(), invoke->getParent()); Instruction *IP = &*(normalDest->getFirstInsertionPt()); Builder.SetInsertPoint(IP); } else { llvm_unreachable("unexpect type of CallSite"); } assert(token); // Handle the return value of the original call - update all uses to use a // gc_result hanging off the statepoint node we just inserted // Only add the gc_result iff there is actually a used result if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) { Instruction *gc_result = nullptr; std::vector<Type *> types; // one per 'any' type types.push_back(CS.getType()); // result type auto get_gc_result_id = [&](Type &Ty) { if (Ty.isIntegerTy()) { return Intrinsic::experimental_gc_result_int; } else if (Ty.isFloatingPointTy()) { return Intrinsic::experimental_gc_result_float; } else if (Ty.isPointerTy()) { return Intrinsic::experimental_gc_result_ptr; } else { llvm_unreachable("non java type encountered"); } }; Intrinsic::ID Id = get_gc_result_id(*CS.getType()); Value *gc_result_func = Intrinsic::getDeclaration(M, Id, types); std::vector<Value *> args; args.push_back(token); gc_result = Builder.CreateCall( gc_result_func, args, CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : ""); cast<CallInst>(gc_result)->setAttributes(return_attributes); return gc_result; } else { // No return value for the call. return nullptr; } }
/// Replaces the given call site (Call or Invoke) with a gc.statepoint /// intrinsic with an empty deoptimization arguments list. This does /// NOT do explicit relocation for GC support. static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */ Pass *P) { assert(CS.getInstruction()->getParent()->getParent()->getParent() && "must be set"); // TODO: technically, a pass is not allowed to get functions from within a // function pass since it might trigger a new function addition. Refactor // this logic out to the initialization of the pass. Doesn't appear to // matter in practice. // Then go ahead and use the builder do actually do the inserts. We insert // immediately before the previous instruction under the assumption that all // arguments will be available here. We can't insert afterwards since we may // be replacing a terminator. IRBuilder<> Builder(CS.getInstruction()); // Note: The gc args are not filled in at this time, that's handled by // RewriteStatepointsForGC (which is currently under review). // Create the statepoint given all the arguments Instruction *Token = nullptr; AttributeSet OriginalAttrs; if (CS.isCall()) { CallInst *ToReplace = cast<CallInst>(CS.getInstruction()); CallInst *Call = Builder.CreateGCStatepointCall( CS.getCalledValue(), makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None, "safepoint_token"); Call->setTailCall(ToReplace->isTailCall()); Call->setCallingConv(ToReplace->getCallingConv()); // Before we have to worry about GC semantics, all attributes are legal // TODO: handle param attributes OriginalAttrs = ToReplace->getAttributes(); // In case if we can handle this set of attributes - set up function // attributes directly on statepoint and return attributes later for // gc_result intrinsic. Call->setAttributes(OriginalAttrs.getFnAttributes()); Token = Call; // Put the following gc_result and gc_relocate calls immediately after the // the old call (which we're about to delete). assert(ToReplace->getNextNode() && "not a terminator, must have next"); Builder.SetInsertPoint(ToReplace->getNextNode()); Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc()); } else if (CS.isInvoke()) { InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction()); // Insert the new invoke into the old block. We'll remove the old one in a // moment at which point this will become the new terminator for the // original block. Builder.SetInsertPoint(ToReplace->getParent()); InvokeInst *Invoke = Builder.CreateGCStatepointInvoke( CS.getCalledValue(), ToReplace->getNormalDest(), ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()), Builder.getInt32(0), None, "safepoint_token"); // Currently we will fail on parameter attributes and on certain // function attributes. OriginalAttrs = ToReplace->getAttributes(); // In case if we can handle this set of attributes - set up function // attributes directly on statepoint and return attributes later for // gc_result intrinsic. Invoke->setAttributes(OriginalAttrs.getFnAttributes()); Token = Invoke; // We'll insert the gc.result into the normal block BasicBlock *NormalDest = normalizeBBForInvokeSafepoint( ToReplace->getNormalDest(), Invoke->getParent()); Builder.SetInsertPoint(NormalDest->getFirstInsertionPt()); } else { llvm_unreachable("unexpect type of CallSite"); } assert(Token); // Handle the return value of the original call - update all uses to use a // gc_result hanging off the statepoint node we just inserted // Only add the gc_result iff there is actually a used result if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) { std::string TakenName = CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : ""; CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName); GCResult->setAttributes(OriginalAttrs.getRetAttributes()); return GCResult; } else { // No return value for the call. return nullptr; } }
bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVector<PHINode*, 8> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail) { BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); if (&BB->front() == Ret) // Make sure there is something before the ret... return false; // If the return is in the entry block, then making this transformation would // turn infinite recursion into an infinite loop. This transformation is ok // in theory, but breaks some code like: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // disable this xform in this case, because the code generator will lower the // call to fabs into inline code. if (BB == &F->getEntryBlock()) return false; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. CallInst *CI; BasicBlock::iterator BBI = Ret; while (1) { CI = dyn_cast<CallInst>(BBI); if (CI && CI->getCalledFunction() == F) break; if (BBI == BB->begin()) return false; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) return false; // If we are introducing accumulator recursion to eliminate associative // operations after the call instruction, this variable contains the initial // value for the accumulator. If this value is set, we actually perform // accumulator recursion elimination instead of simple tail recursion // elimination. Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) if (!CanMoveAboveCall(BBI, CI)) { // If we can't move the instruction above the call, it might be because it // is an associative operation that could be tranformed using accumulator // recursion elimination. Check to see if this is the case, and if so, // remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction // accumulates. AccumulatorRecursionInstr = BBI; } else { return false; // Otherwise, we cannot eliminate the tail recursion! } } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a // constant, return the value returned by the tail call, or that are being // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && !getCommonReturnValue(Ret, CI)) return false; // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); BranchInst::Create(OldEntry, NewEntry); // If this tail call is marked 'tail' and if there are any allocas in the // entry block, move them up to the new entry block. TailCallsAreMarkedTail = CI->isTailCall(); if (TailCallsAreMarkedTail) // Move all fixed sized allocas from OldEntry to NewEntry. for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), NEBI = NewEntry->begin(); OEBI != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++)) if (isa<ConstantInt>(AI->getArraySize())) AI->moveBefore(NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. // For now, we initialize each PHI to only have the real arguments // which are passed in. Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { PHINode *PN = PHINode::Create(I->getType(), I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); ArgumentPHIs.push_back(PN); } } // If this function has self recursive calls in the tail position where some // are marked tail and some are not, only transform one flavor or another. We // have to choose whether we move allocas in the entry block to the new entry // block or not, so we can't make a good choice for both. NOTE: We could do // slightly better here in the case that the function has no entry block // allocas. if (TailCallsAreMarkedTail && !CI->isTailCall()) return false; // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i) ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion // eliminations will happen on this function because of the way the // accumulator recursion predicate is set up. // if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, // computed earlier. For any other existing branches to this block (due to // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); PI != PE; ++PI) { if (*PI == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI); else AccPN->addIncoming(AccPN, *PI); } // Add an incoming argument for the current block, which is computed by our // associative accumulator instruction. AccPN->addIncoming(AccRecInstr, BB); // Next, rewrite the accumulator recursion instruction so that it does not // use the result of the call anymore, instead, use the PHI node we just // inserted. AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will // actually rewrite the return value we are destroying, but that's ok. for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) RI->setOperand(0, AccPN); ++NumAccumAdded; } // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. BranchInst::Create(OldEntry, Ret); BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; return true; }