// Convert the given function to use normalized argument/return types. static bool ConvertFunction(Function *Func) { FunctionType *FTy = Func->getFunctionType(); FunctionType *NFTy = NormalizeFunctionType(FTy); if (NFTy == FTy) return false; // No change needed. Function *NewFunc = RecreateFunction(Func, NFTy); // Move the arguments across to the new function. for (Function::arg_iterator Arg = Func->arg_begin(), E = Func->arg_end(), NewArg = NewFunc->arg_begin(); Arg != E; ++Arg, ++NewArg) { NewArg->takeName(Arg); if (Arg->getType() == NewArg->getType()) { Arg->replaceAllUsesWith(NewArg); } else { Instruction *Trunc = new TruncInst( NewArg, Arg->getType(), NewArg->getName() + ".arg_trunc", NewFunc->getEntryBlock().getFirstInsertionPt()); Arg->replaceAllUsesWith(Trunc); } } if (FTy->getReturnType() != NFTy->getReturnType()) { // Fix up return instructions. Instruction::CastOps CastType = Func->getAttributes().hasAttribute(0, Attribute::SExt) ? Instruction::SExt : Instruction::ZExt; for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end(); BB != E; ++BB) { for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { Instruction *Inst = Iter++; if (ReturnInst *Ret = dyn_cast<ReturnInst>(Inst)) { Value *Ext = CopyDebug( CastInst::Create(CastType, Ret->getReturnValue(), NFTy->getReturnType(), Ret->getReturnValue()->getName() + ".ret_ext", Ret), Ret); CopyDebug(ReturnInst::Create(Ret->getContext(), Ext, Ret), Ret); Ret->eraseFromParent(); } } } } Func->eraseFromParent(); return true; }
/// cloneFunctionBody - Create a new function based on F and /// insert it into module. Remove first argument. Use STy as /// the return type for new function. Function *SRETPromotion::cloneFunctionBody(Function *F, const StructType *STy) { const FunctionType *FTy = F->getFunctionType(); std::vector<const Type*> Params; // Attributes - Keep track of the parameter attributes for the arguments. SmallVector<AttributeWithIndex, 8> AttributesVec; const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // Skip first argument. Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); ++I; // 0th parameter attribute is reserved for return type. // 1th parameter attribute is for first 1st sret argument. unsigned ParamIndex = 2; while (I != E) { Params.push_back(I->getType()); if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); ++I; ++ParamIndex; } // Add any fn attributes. if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg()); Function *NF = Function::Create(NFTy, F->getLinkage()); NF->takeName(F); NF->copyAttributesFrom(F); NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); F->getParent()->getFunctionList().insert(F, NF); NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Replace arguments I = F->arg_begin(); E = F->arg_end(); Function::arg_iterator NI = NF->arg_begin(); ++I; while (I != E) { I->replaceAllUsesWith(NI); NI->takeName(I); ++I; ++NI; } return NF; }
/// lowerIncomingArguments - To avoid having to handle incoming arguments /// specially, we lower each arg to a copy instruction in the entry block. This /// ensures that the argument value itself cannot be live out of the entry /// block. void SjLjEHPass::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt); Instruction *NI = InsertValueInst::Create(AI, EI, 0); NI->insertAfter(EI); AI->replaceAllUsesWith(NI); // Set the operand of the instructions back to the AllocaInst. EI->setOperand(0, AI); NI->setOperand(0, AI); } else { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", AfterAllocaInsPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. // Normally it's forbidden to replace a CastInst's operand because it // could cause the opcode to reflect an illegal conversion. However, we're // replacing it here with the same value it was constructed with. We do // this because the above replaceAllUsesWith() clobbered the operand, but // we want this one to remain. NC->setOperand(0, AI); } } }
/// lowerIncomingArguments - To avoid having to handle incoming arguments /// specially, we lower each arg to a copy instruction in the entry block. This /// ensures that the argument value itself cannot be live out of the entry /// block. void SjLjEHPrepare::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { Type *Ty = AI->getType(); if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) { // Aggregate types can't be cast, but are legal argument types, // so we have to handle them differently. We use // select i8 true, %arg, undef to achieve the same goal Value *TrueValue = ConstantInt::getTrue(F.getContext()); Value *UndefValue = UndefValue::get(Ty); Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue, AI->getName() + ".tmp", AfterAllocaInsPt); AI->replaceAllUsesWith(SI); SI->setOperand(1, AI); } else { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", AfterAllocaInsPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. // Normally it's forbidden to replace a CastInst's operand because it // could cause the opcode to reflect an illegal conversion. However, we're // replacing it here with the same value it was constructed with. We do // this because the above replaceAllUsesWith() clobbered the operand, but // we want this one to remain. NC->setOperand(0, AI); } } }
/// lowerIncomingArguments - To avoid having to handle incoming arguments /// specially, we lower each arg to a copy instruction in the entry block. This /// ensures that the argument value itself cannot be live out of the entry /// block. void SjLjEHPrepare::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { Type *Ty = AI->getType(); // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. Value *TrueValue = ConstantInt::getTrue(F.getContext()); Value *UndefValue = UndefValue::get(Ty); Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue, AI->getName() + ".tmp", AfterAllocaInsPt); AI->replaceAllUsesWith(SI); // Reset the operand, because it was clobbered by the RAUW above. SI->setOperand(1, AI); } }
void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, GlobalValue *Array) { const Type *ArgVTy = PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty); Module &M = *MainFn->getParent(); Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty, ArgVTy, UIntPtr, Type::Int32Ty, (Type *)0); // This could force argc and argv into programs that wouldn't otherwise have // them, but instead we just pass null values in. std::vector<Value*> Args(4); Args[0] = Constant::getNullValue(Type::Int32Ty); Args[1] = Constant::getNullValue(ArgVTy); // Skip over any allocas in the entry block. BasicBlock *Entry = MainFn->begin(); BasicBlock::iterator InsertPos = Entry->begin(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; std::vector<Constant*> GEPIndices(2, Constant::getNullValue(Type::Int32Ty)); unsigned NumElements = 0; if (Array) { Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], GEPIndices.size()); NumElements = cast<ArrayType>(Array->getType()->getElementType())->getNumElements(); } else { // If this profiling instrumentation doesn't have a constant array, just // pass null. Args[2] = ConstantPointerNull::get(UIntPtr); } Args[3] = ConstantInt::get(Type::Int32Ty, NumElements); Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; switch (MainFn->arg_size()) { default: case 2: AI = MainFn->arg_begin(); ++AI; if (AI->getType() != ArgVTy) { Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, false); InitCall->setOperand(2, CastInst::create(opcode, AI, ArgVTy, "argv.cast", InitCall)); } else { InitCall->setOperand(2, AI); } /* FALL THROUGH */ case 1: AI = MainFn->arg_begin(); // If the program looked at argc, have it look at the return value of the // init call instead. if (AI->getType() != Type::Int32Ty) { Instruction::CastOps opcode; if (!AI->use_empty()) { opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); AI->replaceAllUsesWith( CastInst::create(opcode, InitCall, AI->getType(), "", InsertPos)); } opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true); InitCall->setOperand(1, CastInst::create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall)); } else { AI->replaceAllUsesWith(InitCall); InitCall->setOperand(1, AI); } case 0: break; } }
// RemoveDeadStuffFromFunction - Remove any arguments and return values from F // that are not in LiveValues. Transform the function and all of the callees of // the function to not have these arguments and return values. // bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Don't modify fully live functions if (LiveFunctions.count(F)) return false; // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; // Set up to build a new list of parameter attributes. SmallVector<AttributeWithIndex, 8> AttributesVec; const AttributeSet &PAL = F->getAttributes(); // Find out the new return value. Type *RetTy = FTy->getReturnType(); Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); std::vector<Type*> RetTypes; if (RetTy->isVoidTy()) { NRetTy = RetTy; } else { StructType *STy = dyn_cast<StructType>(RetTy); if (STy) // Look at each of the original return values individually. for (unsigned i = 0; i != RetCount; ++i) { RetOrArg Ret = CreateRet(F, i); if (LiveValues.erase(Ret)) { RetTypes.push_back(STy->getElementType(i)); NewRetIdxs[i] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; DEBUG(dbgs() << "DAE - Removing return value " << i << " from " << F->getName() << "\n"); } } else // We used to return a single value. if (LiveValues.erase(CreateRet(F, 0))) { RetTypes.push_back(RetTy); NewRetIdxs[0] = 0; } else { DEBUG(dbgs() << "DAE - Removing return value from " << F->getName() << "\n"); ++NumRetValsEliminated; } if (RetTypes.size() > 1) // More than one return type? Return a struct with them. Also, if we used // to return a struct and didn't change the number of return values, // return a struct again. This prevents changing {something} into // something and {} into void. // Make the new struct packed if we used to return a packed struct // already. NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); else if (RetTypes.size() == 1) // One return type? Just a simple value then, but only if we didn't use to // return a struct with that simple value before. NRetTy = RetTypes.front(); else if (RetTypes.size() == 0) // No return types? Make it void, but only if we didn't use to return {}. NRetTy = Type::getVoidTy(F->getContext()); } assert(NRetTy && "No new return type found?"); // The existing function return attributes. AttributeSet RAttrs = PAL.getRetAttributes(); // Remove any incompatible attributes, but only if we removed all return // values. Otherwise, ensure that we don't have any conflicting attributes // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) RAttrs = AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex, AttrBuilder(RAttrs, AttributeSet::ReturnIndex). removeAttributes(AttributeFuncs::typeIncompatible(NRetTy))); else assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex). hasAttributes(AttributeFuncs::typeIncompatible(NRetTy)) && "Return attributes no longer compatible?"); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeWithIndex::get(NRetTy->getContext(), AttributeSet::ReturnIndex, RAttrs)); // Remember which arguments are still alive. SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false); // Construct the new parameter list from non-dead arguments. Also construct // a new set of parameter attributes to correspond. Skip the first parameter // attribute, since that belongs to the return value. unsigned i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++i) { RetOrArg Arg = CreateArg(F, i); if (LiveValues.erase(Arg)) { Params.push_back(I->getType()); ArgAlive[i] = true; // Get the original parameter attributes (skipping the first one, that is // for the return value. if (PAL.hasAttributes(i + 1)) { AttributesVec. push_back(AttributeWithIndex::get(F->getContext(), i + 1, PAL.getParamAttributes(i + 1))); AttributesVec.back().Index = Params.size(); } } else { ++NumArgumentsEliminated; DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() << ") from " << F->getName() << "\n"); } } if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(F->getContext(), AttributeSet::FunctionIndex, PAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); // No change? if (NFTy == FTy) return false; // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, F->getLinkage()); NF->copyAttributesFrom(F); NF->setAttributes(NewPAL); // Insert the new function before the old function, so we won't be processing // it again. F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. // std::vector<Value*> Args; while (!F->use_empty()) { CallSite CS(F->use_back()); Instruction *Call = CS.getInstruction(); AttributesVec.clear(); const AttributeSet &CallPAL = CS.getAttributes(); // The call return attributes. AttributeSet RAttrs = CallPAL.getRetAttributes(); // Adjust in case the function was changed to return void. RAttrs = AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex, AttrBuilder(RAttrs, AttributeSet::ReturnIndex). removeAttributes(AttributeFuncs::typeIncompatible(NF->getReturnType()))); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeWithIndex::get(NF->getContext(), AttributeSet::ReturnIndex, RAttrs)); // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. CallSite::arg_iterator I = CS.arg_begin(); unsigned i = 0; // Loop over those operands, corresponding to the normal arguments to the // original function, and add those that are still alive. for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i) if (ArgAlive[i]) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. if (CallPAL.hasAttributes(i + 1)) { AttributesVec. push_back(AttributeWithIndex::get(F->getContext(), i + 1, CallPAL.getParamAttributes(i + 1))); AttributesVec.back().Index = Args.size(); } } // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); if (CallPAL.hasAttributes(i + 1)) { AttributesVec. push_back(AttributeWithIndex::get(F->getContext(), i + 1, CallPAL.getParamAttributes(i + 1))); AttributesVec.back().Index = Args.size(); } } if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(Call->getContext(), AttributeSet::FunctionIndex, CallPAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewCallPAL); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } New->setDebugLoc(Call->getDebugLoc()); Args.clear(); if (!Call->use_empty()) { if (New->getType() == Call->getType()) { // Return type not changed? Just replace users then. Call->replaceAllUsesWith(New); New->takeName(Call); } else if (New->getType()->isVoidTy()) { // Our return value has uses, but they will get removed later on. // Replace by null for now. if (!Call->getType()->isX86_MMXTy()) Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); } else { assert(RetTy->isStructTy() && "Return type changed, but not into a void. The old return type" " must have been a struct!"); Instruction *InsertPt = Call; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { BasicBlock::iterator IP = II->getNormalDest()->begin(); while (isa<PHINode>(IP)) ++IP; InsertPt = IP; } // We used to return a struct. Instead of doing smart stuff with all the // uses of this struct, we will just rebuild it using // extract/insertvalue chaining and let instcombine clean that up. // // Start out building up our return value from undef Value *RetVal = UndefValue::get(RetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { Value *V; if (RetTypes.size() > 1) // We are still returning a struct, so extract the value from our // return value V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret", InsertPt); else // We are now returning a single element, so just insert that V = New; // Insert the value at the old position RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt); } // Now, replace all uses of the old call instruction with the return // struct we built Call->replaceAllUsesWith(RetVal); New->takeName(Call); } } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++i) if (ArgAlive[i]) { // If this is a live argument, move the name and users over to the new // version. I->replaceAllUsesWith(I2); I2->takeName(I); ++I2; } else { // If this argument is dead, replace any uses of it with null constants // (these are guaranteed to become unused later on). if (!I->getType()->isX86_MMXTy()) I->replaceAllUsesWith(Constant::getNullValue(I->getType())); } // If we change the return value of the function we must rewrite any return // instructions. Check this now. if (F->getReturnType() != NF->getReturnType()) for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Value *RetVal; if (NFTy->getReturnType()->isVoidTy()) { RetVal = 0; } else { assert (RetTy->isStructTy()); // The original return value was a struct, insert // extractvalue/insertvalue chains to extract only the values we need // to return and insert them into our new result. // This does generate messy code, but we'll let it to instcombine to // clean that up. Value *OldRet = RI->getOperand(0); // Start out building up our return value from undef RetVal = UndefValue::get(NRetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, "oldret", RI); if (RetTypes.size() > 1) { // We're still returning a struct, so reinsert the value into // our new return value at the new index RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i], "newret", RI); } else { // We are now only returning a simple value, so just return the // extracted value. RetVal = EV; } } } // Replace the return instruction with one returning the new return // value (possibly 0 if we became void). ReturnInst::Create(F->getContext(), RetVal, RI); BB->getInstList().erase(RI); } // Patch the pointer to LLVM function in debug info descriptor. FunctionDIMap::iterator DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) DI->second.replaceFunction(NF); // Now that the old function is dead, delete it. F->eraseFromParent(); return true; }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. CallGraphNode *ArgPromotion::DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); std::vector<const Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. // std::map<Argument*, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. std::map<IndicesVector, LoadInst*> OriginalLoads; // Attributes - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // First, determine the new argument list unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); IndicesVector Indices; Indices.reserve(User->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(Indices); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(User)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(User->use_back()); OriginalLoads[Indices] = OrigLoad; } // Add a parameter to the function for each element passed in. for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), SI->begin(), SI->end())); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } // Add any function attributes. if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); const Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Get the alias analysis information that we need to update to reflect our // changes. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraph>(); // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { CallSite CS = CallSite::get(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); // Add any return attributes. if (Attributes attrs = CallPAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); const Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. const Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing. ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } } if (ExtraArgHack) Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Add any function attributes. if (Attributes attrs = CallPAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } Args.clear(); AttributesVec.clear(); // Update the alias analysis implementation to know that we are replacing // the old call with a new one. AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); New->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transfering uses of the old arguments over to // the new arguments, also transfering over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(I2); I2->takeName(I); AA.replaceWithNewValue(I, I2); ++I2; continue; } if (ByValArgsToTransform.count(I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(I); AA.replaceWithNewValue(I, TheAlloca); continue; } if (I->use_empty()) { AA.deleteValue(I); continue; } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { assert(ArgIndices.begin()->empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); *It != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->use_back()); L->replaceAllUsesWith(TheArg); AA.replaceWithNewValue(L, TheArg); L->eraseFromParent(); } AA.deleteValue(GEP); GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) ++I2; } // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling // reference to the CallgraphNode, just leave the dead function around for // someone else to nuke. CallGraphNode *CGN = CG[F]; if (CGN->getNumReferences() == 0) delete CG.removeFunctionFromModule(CGN); else F->setLinkage(Function::ExternalLinkage); return NF_CGN; }
static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set // then we actually perform accumulator recursion elimination instead of // simple tail recursion elimination. If the operation is an LLVM instruction // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then // we are handling the case when the return instruction returns a constant C // which is different to the constant returned by other return instructions // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a // special case of accumulator recursion, the operation being "return C". Value *AccumulatorRecursionEliminationInitVal = nullptr; Instruction *AccumulatorRecursionInstr = nullptr; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. BasicBlock::iterator BBI(CI); for (++BBI; &*BBI != Ret; ++BBI) { if (canMoveAboveCall(&*BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = canTransformAccumulatorRecursion(&*BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction // accumulates. AccumulatorRecursionInstr = &*BBI; } else { return false; // Otherwise, we cannot eliminate the tail recursion! } } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a // constant, return the value returned by the tail call, or that are being // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == nullptr && !getCommonReturnValue(nullptr, CI)) { // One case remains that we are able to handle: the current return // instruction returns a constant, and all other return instructions // return a different constant. if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) return false; // Current return instruction does not return a constant. // Check that all other return instructions return a common constant. If // so, record it in AccumulatorRecursionEliminationInitVal. AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); if (!AccumulatorRecursionEliminationInitVal) return false; } BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(), "transforming tail recursion to loop"); // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. if (!OldEntry) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); BranchInst::Create(OldEntry, NewEntry); // If this tail call is marked 'tail' and if there are any allocas in the // entry block, move them up to the new entry block. TailCallsAreMarkedTail = CI->isTailCall(); if (TailCallsAreMarkedTail) // Move all fixed sized allocas from OldEntry to NewEntry. for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), NEBI = NewEntry->begin(); OEBI != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++)) if (isa<ConstantInt>(AI->getArraySize())) AI->moveBefore(&*NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. // For now, we initialize each PHI to only have the real arguments // which are passed in. Instruction *InsertPos = &OldEntry->front(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(&*I, NewEntry); ArgumentPHIs.push_back(PN); } } // If this function has self recursive calls in the tail position where some // are marked tail and some are not, only transform one flavor or another. We // have to choose whether we move allocas in the entry block to the new entry // block or not, so we can't make a good choice for both. NOTE: We could do // slightly better here in the case that the function has no entry block // allocas. if (TailCallsAreMarkedTail && !CI->isTailCall()) return false; // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion // eliminations will happen on this function because of the way the // accumulator recursion predicate is set up. // if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); PHINode *AccPN = PHINode::Create( AccumulatorRecursionEliminationInitVal->getType(), std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, // computed earlier. For any other existing branches to this block (due to // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if (P == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else AccPN->addIncoming(AccPN, P); } if (AccRecInstr) { // Add an incoming argument for the current block, which is computed by // our associative and commutative accumulator instruction. AccPN->addIncoming(AccRecInstr, BB); // Next, rewrite the accumulator recursion instruction so that it does not // use the result of the call anymore, instead, use the PHI node we just // inserted. AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); } else { // Add an incoming argument for the current block, which is just the // constant returned by the current return instruction. AccPN->addIncoming(Ret->getReturnValue(), BB); } // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will // actually rewrite the return value we are destroying, but that's ok. for (BasicBlock &BBI : *F) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI.getTerminator())) RI->setOperand(0, AccPN); ++NumAccumAdded; } // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); NewBI->setDebugLoc(CI->getDebugLoc()); BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; return true; }
// First thing we need to do is scan the whole function for values that are // live across unwind edges. Each value that is live across an unwind edge // we spill into a stack location, guaranteeing that there is nothing live // across the unwind edge. This process also splits all critical edges // coming out of invoke's. void LowerInvoke:: splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; SplitCriticalEdge(II, 0, this); SplitCriticalEdge(II, 1, this); assert(!isa<PHINode>(II->getNormalDest()) && !isa<PHINode>(II->getUnwindDest()) && "critical edge splitting left single entry phi nodes?"); } Function *F = Invokes.back()->getParent()->getParent(); // To avoid having to handle incoming arguments specially, we lower each arg // to a copy instruction in the entry block. This ensures that the argument // value itself cannot be live across the entry block. BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsertPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { // This is always a no-op cast because we're casting AI to AI->getType() so // src and destination types are identical. BitCast is the only possibility. CastInst *NC = new BitCastInst( AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); AI->replaceAllUsesWith(NC); // Normally its is forbidden to replace a CastInst's operand because it // could cause the opcode to reflect an illegal conversion. However, we're // replacing it here with the same value it was constructed with to simply // make NC its user. NC->setOperand(0, AI); } // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && !isa<PHINode>(Inst->use_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. std::vector<Instruction*> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Scan all of the uses and see if the live range is live across an unwind // edge. If we find a use live across an invoke edge, create an alloca // and spill the value. std::set<InvokeInst*> InvokesWithStoreInserted; // Find all of the blocks that this value is live in. std::set<BasicBlock*> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { NeedsSpill = true; } } // If we decided we need a spill, do it. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); } } }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform, Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector<Type *> Params; using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. std::map<Argument *, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. // We need to keep the original loads for each argument and the elements // of the argument that are accessed. std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeSet, 8> ArgAttrVec; AttributeList PAL = F->getAttributes(); // First, determine the new argument list unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), AttributeSet()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; // There may be remaining metadata uses of the argument for things like // llvm.dbg.value. Replace them with undef. I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); Type *SrcTy; if (LoadInst *L = dyn_cast<LoadInst>(UI)) SrcTy = L->getType(); else SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(UI->user_back()); OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; } // Add a parameter to the function for each element passed in. for (const auto &ArgIndex : ArgIndices) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType( cast<PointerType>(I->getType()->getScalarType())->getElementType(), ArgIndex.second)); ArgAttrVec.push_back(AttributeSet()); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } Type *RetTy = FTy->getReturnType(); // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); // Patch the pointer to LLVM function in debug info descriptor. NF->setSubprogram(F->getSubprogram()); F->setSubprogram(nullptr); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), PAL.getRetAttributes(), ArgAttrVec)); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value *, 16> Args; while (!F->use_empty()) { CallSite CS(F->user_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttributeList &CallPAL = CS.getAttributes(); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); ArgAttrVec.push_back(AttributeSet()); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value *> Ops; for (const auto &ArgIndex : ArgIndices) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; if (!ArgIndex.second.empty()) { Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); for (auto II : ArgIndex.second) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, II)); // Keep track of the type we're currently indexing. if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) ElTy = ElPTy->getElementType(); else ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, V->getName() + ".idx", Call); Ops.clear(); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); } } // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; CS.getOperandBundlesAsDefs(OpBundles); CallSite NewCS; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, OpBundles, "", Call); } else { auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); NewCS = NewCall; } NewCS.setCallingConv(CS.getCallingConv()); NewCS.setAttributes( AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), CallPAL.getRetAttributes(), ArgAttrVec)); NewCS->setDebugLoc(Call->getDebugLoc()); uint64_t W; if (Call->extractProfTotalWeight(W)) NewCS->setProfWeight(W); Args.clear(); ArgAttrVec.clear(); // Update the callgraph to know that the callsite has been transformed. if (ReplaceCallSite) (*ReplaceCallSite)(CS, NewCS); if (!Call->use_empty()) { Call->replaceAllUsesWith(NewCS.getInstruction()); NewCS->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } const DataLayout &DL = F->getParent()->getDataLayout(); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); ++I2; continue; } if (ByValArgsToTransform.count(&*I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, I->getParamAlignment(), "", InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); I2->setName(I->getName() + "." + Twine(i)); new StoreInst(&*I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(&*I); // If the alloca is used in a call, we must clear the tail flag since // the callee now uses an alloca from the caller. for (User *U : TheAlloca->users()) { CallInst *Call = dyn_cast<CallInst>(U); if (!Call) continue; Call->setTailCall(false); } continue; } if (I->use_empty()) continue; // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName() + ".val"); LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->user_back()); L->replaceAllUsesWith(&*TheArg); L->eraseFromParent(); } GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. std::advance(I2, ArgIndices.size()); } return NF; }
/// DeleteDeadVarargs - If this is an function that takes a ... list, and if /// llvm.vastart is never called, the varargs list is dead for the function. bool DAE::DeleteDeadVarargs(Function &Fn) { assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; // Ensure that the function is only directly called. if (Fn.hasAddressTaken()) return false; // Okay, we know we can transform this function if safe. Scan its body // looking for calls to llvm.vastart. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { if (II->getIntrinsicID() == Intrinsic::vastart) return false; } } } // If we get here, there are no calls to llvm.vastart in the function body, // remove the "..." and adjust all the calls. // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. const FunctionType *FTy = Fn.getFunctionType(); std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, Fn.getLinkage()); NF->copyAttributesFrom(&Fn); Fn.getParent()->getFunctionList().insert(&Fn, NF); NF->takeName(&Fn); // Loop over all of the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. // std::vector<Value*> Args; while (!Fn.use_empty()) { CallSite CS = CallSite::get(Fn.use_back()); Instruction *Call = CS.getInstruction(); // Pass all the same arguments. Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs); // Drop any attributes that were on the vararg arguments. AttrListPtr PAL = CS.getAttributes(); if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) { SmallVector<AttributeWithIndex, 8> AttributesVec; for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); if (Attributes FnAttrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); } Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(PAL); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(PAL); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } if (MDNode *N = Call->getDbgMetadata()) New->setDbgMetadata(N); Args.clear(); if (!Call->use_empty()) Call->replaceAllUsesWith(New); New->takeName(Call); // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); // Loop over the argument list, transfering uses of the old arguments over to // the new arguments, also transfering over the names as well. While we're at // it, remove the dead arguments from the DeadArguments list. // for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++I2) { // Move the name and users over to the new version. I->replaceAllUsesWith(I2); I2->takeName(I); } // Finally, nuke the old function. Fn.eraseFromParent(); return true; }
/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge /// we spill into a stack location, guaranteeing that there is nothing live /// across the unwind edge. This process also splits all critical edges /// coming out of invoke's. /// FIXME: Move this function to a common utility file (Local.cpp?) so /// both SjLj and LowerInvoke can use it. void SjLjEHPass:: splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; SplitCriticalEdge(II, 0, this); // FIXME: New EH - This if-condition will be always true in the new scheme. if (II->getUnwindDest()->isLandingPad()) { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(), ".1", ".2", this, NewBBs); LPadSuccMap[II] = *succ_begin(NewBBs[0]); } else { SplitCriticalEdge(II, 1, this); } assert(!isa<PHINode>(II->getNormalDest()) && !isa<PHINode>(II->getUnwindDest()) && "Critical edge splitting left single entry phi nodes?"); } Function *F = Invokes.back()->getParent()->getParent(); // To avoid having to handle incoming arguments specially, we lower each arg // to a copy instruction in the entry block. This ensures that the argument // value itself cannot be live across the entry block. BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsertPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); Instruction *NI = InsertValueInst::Create(AI, EI, 0); NI->insertAfter(EI); AI->replaceAllUsesWith(NI); // Set the operand of the instructions back to the AllocaInst. EI->setOperand(0, AI); NI->setOperand(0, AI); } else { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. CastInst *NC = new BitCastInst( AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. // Normally it's forbidden to replace a CastInst's operand because it // could cause the opcode to reflect an illegal conversion. However, // we're replacing it here with the same value it was constructed with. // We do this because the above replaceAllUsesWith() clobbered the // operand, but we want this one to remain. NC->setOperand(0, AI); } } // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && !isa<PHINode>(Inst->use_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction*,16> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. std::set<BasicBlock*> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { NeedsSpill = true; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); } } }
bool RemovePhantomArg::removePhantomArgument(Function* F) { assert(!F->getFunctionType()->isVarArg() && "Function is varargs!"); // if this is a callee function that get inlined, we don't // need to change it because it will be dropped before // codegen anyway if (F->hasFnAttribute("is-inlined-callee")) return false; // if a function should get safepoint then it needs vmstate // and should have the phantom arg if (!shouldGetSafepoints(*F)) return false; // Start by computing a new prototype for the function, which is the same as // the old function, but has one less argument. FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; // Set up to build a new list of parameter attributes. SmallVector<AttributeSet, 8> AttributesVec; const AttributeSet &PAL = F->getAttributes(); // Construct the new parameter list from non-phantom arguments. Also // construct a new set of parameter attributes to correspond. Skip the // first parameter attribute, since that belongs to the return value. unsigned i = 1; Function::arg_iterator I = F->arg_begin(); llvm::Argument* phantomArg = I++; assert(phantomArg->getType()->isIntegerTy(32) && "First arg must be int32"); for (Function::arg_iterator E = F->arg_end(); I != E; ++I, ++i) { Params.push_back(I->getType()); // Get the original parameter attributes (skipping the first one, that is // for the return value. if (PAL.hasAttributes(i + 1)) { AttrBuilder B(PAL, i + 1); AttributesVec. push_back(AttributeSet::get(F->getContext(), Params.size(), B)); } } // Find out the new return value. Type *NRetTy = FTy->getReturnType(); // The existing function return attributes. AttributeSet RAttrs = PAL.getRetAttributes(); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs)); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeSet::get(F->getContext(), PAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, false); assert(NFTy != FTy && "They should not be the same"); // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, F->getLinkage()); NF->copyAttributesFrom(F); NF->setAttributes(NewPAL); // Insert the new function before the old function, so we won't be processing // it again. F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. for (Function::arg_iterator I = ++F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++I2) { I->replaceAllUsesWith(I2); I2->takeName(I); } // Patch the pointer to LLVM function in debug info descriptor. auto DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) DI->second.replaceFunction(NF); assert(F->use_empty() && "Function should have no directly use"); // Now that the old function is dead, delete it. F->eraseFromParent(); return true; }
/// DeleteDeadVarargs - If this is an function that takes a ... list, and if /// llvm.vastart is never called, the varargs list is dead for the function. bool DAE::DeleteDeadVarargs(Function &Fn) { assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; // Ensure that the function is only directly called. if (Fn.hasAddressTaken()) return false; // Don't touch naked functions. The assembly might be using an argument, or // otherwise rely on the frame layout in a way that this analysis will not // see. if (Fn.hasFnAttribute(Attribute::Naked)) { return false; } // Okay, we know we can transform this function if safe. Scan its body // looking for calls marked musttail or calls to llvm.vastart. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallInst *CI = dyn_cast<CallInst>(I); if (!CI) continue; if (CI->isMustTailCall()) return false; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { if (II->getIntrinsicID() == Intrinsic::vastart) return false; } } } // If we get here, there are no calls to llvm.vastart in the function body, // remove the "..." and adjust all the calls. // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. FunctionType *FTy = Fn.getFunctionType(); std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, Fn.getLinkage()); NF->copyAttributesFrom(&Fn); Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF); NF->takeName(&Fn); // Loop over all of the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. // std::vector<Value*> Args; for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) { CallSite CS(*I++); if (!CS) continue; Instruction *Call = CS.getInstruction(); // Pass all the same arguments. Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs); // Drop any attributes that were on the vararg arguments. AttributeSet PAL = CS.getAttributes(); if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { SmallVector<AttributeSet, 8> AttributesVec; for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlotAttributes(i)); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeSet::get(Fn.getContext(), PAL.getFnAttributes())); PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(PAL); } else { New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(PAL); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } New->setDebugLoc(Call->getDebugLoc()); Args.clear(); if (!Call->use_empty()) Call->replaceAllUsesWith(New); New->takeName(Call); // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. While we're at // it, remove the dead arguments from the DeadArguments list. // for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++I2) { // Move the name and users over to the new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); } // Patch the pointer to LLVM function in debug info descriptor. NF->setSubprogram(Fn.getSubprogram()); // Fix up any BlockAddresses that refer to the function. Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType())); // Delete the bitcast that we just created, so that NF does not // appear to be address-taken. NF->removeDeadConstantUsers(); // Finally, nuke the old function. Fn.eraseFromParent(); return true; }
/// PropagateConstantsIntoArguments - Look at all uses of the specified /// function. If all uses are direct call sites, and all pass a particular /// constant in for an argument, propagate that constant in as the argument. /// bool IPCP::PropagateConstantsIntoArguments(Function &F) { if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit. // For each argument, keep track of its constant value and whether it is a // constant or not. The bool is driven to true when found to be non-constant. SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants; ArgumentConstants.resize(F.arg_size()); unsigned NumNonconstant = 0; for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { User *U = *UI; // Ignore blockaddress uses. if (isa<BlockAddress>(U)) continue; // Used by a non-instruction, or not the callee of a function, do not // transform. if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) return false; CallSite CS(cast<Instruction>(U)); if (!CS.isCallee(UI)) return false; // Check out all of the potentially constant arguments. Note that we don't // inspect varargs here. CallSite::arg_iterator AI = CS.arg_begin(); Function::arg_iterator Arg = F.arg_begin(); for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI, ++Arg) { // If this argument is known non-constant, ignore it. if (ArgumentConstants[i].second) continue; Constant *C = dyn_cast<Constant>(*AI); if (C && ArgumentConstants[i].first == 0) { ArgumentConstants[i].first = C; // First constant seen. } else if (C && ArgumentConstants[i].first == C) { // Still the constant value we think it is. } else if (*AI == &*Arg) { // Ignore recursive calls passing argument down. } else { // Argument became non-constant. If all arguments are non-constant now, // give up on this function. if (++NumNonconstant == ArgumentConstants.size()) return false; ArgumentConstants[i].second = true; } } } // If we got to this point, there is a constant argument! assert(NumNonconstant != ArgumentConstants.size()); bool MadeChange = false; Function::arg_iterator AI = F.arg_begin(); for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { // Do we have a constant argument? if (ArgumentConstants[i].second || AI->use_empty() || (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; Value *V = ArgumentConstants[i].first; if (V == 0) V = UndefValue::get(AI->getType()); AI->replaceAllUsesWith(V); ++NumArgumentsProped; MadeChange = true; } return MadeChange; }
// RemoveDeadStuffFromFunction - Remove any arguments and return values from F // that are not in LiveValues. Transform the function and all of the callees of // the function to not have these arguments and return values. // bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Don't modify fully live functions if (LiveFunctions.count(F)) return false; // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; // Keep track of if we have a live 'returned' argument bool HasLiveReturnedArg = false; // Set up to build a new list of parameter attributes. SmallVector<AttributeSet, 8> AttributesVec; const AttributeSet &PAL = F->getAttributes(); // Remember which arguments are still alive. SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false); // Construct the new parameter list from non-dead arguments. Also construct // a new set of parameter attributes to correspond. Skip the first parameter // attribute, since that belongs to the return value. unsigned i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++i) { RetOrArg Arg = CreateArg(F, i); if (LiveValues.erase(Arg)) { Params.push_back(I->getType()); ArgAlive[i] = true; // Get the original parameter attributes (skipping the first one, that is // for the return value. if (PAL.hasAttributes(i + 1)) { AttrBuilder B(PAL, i + 1); if (B.contains(Attribute::Returned)) HasLiveReturnedArg = true; AttributesVec. push_back(AttributeSet::get(F->getContext(), Params.size(), B)); } } else { ++NumArgumentsEliminated; DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() << ") from " << F->getName() << "\n"); } } // Find out the new return value. Type *RetTy = FTy->getReturnType(); Type *NRetTy = nullptr; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); std::vector<Type*> RetTypes; // If there is a function with a live 'returned' argument but a dead return // value, then there are two possible actions: // 1) Eliminate the return value and take off the 'returned' attribute on the // argument. // 2) Retain the 'returned' attribute and treat the return value (but not the // entire function) as live so that it is not eliminated. // // It's not clear in the general case which option is more profitable because, // even in the absence of explicit uses of the return value, code generation // is free to use the 'returned' attribute to do things like eliding // save/restores of registers across calls. Whether or not this happens is // target and ABI-specific as well as depending on the amount of register // pressure, so there's no good way for an IR-level pass to figure this out. // // Fortunately, the only places where 'returned' is currently generated by // the FE are places where 'returned' is basically free and almost always a // performance win, so the second option can just be used always for now. // // This should be revisited if 'returned' is ever applied more liberally. if (RetTy->isVoidTy() || HasLiveReturnedArg) { NRetTy = RetTy; } else { // Look at each of the original return values individually. for (unsigned i = 0; i != RetCount; ++i) { RetOrArg Ret = CreateRet(F, i); if (LiveValues.erase(Ret)) { RetTypes.push_back(getRetComponentType(F, i)); NewRetIdxs[i] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; DEBUG(dbgs() << "DAE - Removing return value " << i << " from " << F->getName() << "\n"); } } if (RetTypes.size() > 1) { // More than one return type? Reduce it down to size. if (StructType *STy = dyn_cast<StructType>(RetTy)) { // Make the new struct packed if we used to return a packed struct // already. NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); } else { assert(isa<ArrayType>(RetTy) && "unexpected multi-value return"); NRetTy = ArrayType::get(RetTypes[0], RetTypes.size()); } } else if (RetTypes.size() == 1) // One return type? Just a simple value then, but only if we didn't use to // return a struct with that simple value before. NRetTy = RetTypes.front(); else if (RetTypes.size() == 0) // No return types? Make it void, but only if we didn't use to return {}. NRetTy = Type::getVoidTy(F->getContext()); } assert(NRetTy && "No new return type found?"); // The existing function return attributes. AttributeSet RAttrs = PAL.getRetAttributes(); // Remove any incompatible attributes, but only if we removed all return // values. Otherwise, ensure that we don't have any conflicting attributes // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) RAttrs = RAttrs.removeAttributes(NRetTy->getContext(), AttributeSet::ReturnIndex, AttributeFuncs::typeIncompatible(NRetTy)); else assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex). overlaps(AttributeFuncs::typeIncompatible(NRetTy)) && "Return attributes no longer compatible?"); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs)); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeSet::get(F->getContext(), PAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); // No change? if (NFTy == FTy) return false; // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, F->getLinkage()); NF->copyAttributesFrom(F); NF->setAttributes(NewPAL); // Insert the new function before the old function, so we won't be processing // it again. F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. // std::vector<Value*> Args; while (!F->use_empty()) { CallSite CS(F->user_back()); Instruction *Call = CS.getInstruction(); AttributesVec.clear(); const AttributeSet &CallPAL = CS.getAttributes(); // The call return attributes. AttributeSet RAttrs = CallPAL.getRetAttributes(); // Adjust in case the function was changed to return void. RAttrs = RAttrs.removeAttributes(NRetTy->getContext(), AttributeSet::ReturnIndex, AttributeFuncs::typeIncompatible(NF->getReturnType())); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs)); // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. CallSite::arg_iterator I = CS.arg_begin(); unsigned i = 0; // Loop over those operands, corresponding to the normal arguments to the // original function, and add those that are still alive. for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i) if (ArgAlive[i]) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. if (CallPAL.hasAttributes(i + 1)) { AttrBuilder B(CallPAL, i + 1); // If the return type has changed, then get rid of 'returned' on the // call site. The alternative is to make all 'returned' attributes on // call sites keep the return value alive just like 'returned' // attributes on function declaration but it's less clearly a win // and this is not an expected case anyway if (NRetTy != RetTy && B.contains(Attribute::Returned)) B.removeAttribute(Attribute::Returned); AttributesVec. push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } } // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); if (CallPAL.hasAttributes(i + 1)) { AttrBuilder B(CallPAL, i + 1); AttributesVec. push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } } if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeSet::get(Call->getContext(), CallPAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, "", Call->getParent()); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewCallPAL); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } New->setDebugLoc(Call->getDebugLoc()); Args.clear(); if (!Call->use_empty()) { if (New->getType() == Call->getType()) { // Return type not changed? Just replace users then. Call->replaceAllUsesWith(New); New->takeName(Call); } else if (New->getType()->isVoidTy()) { // Our return value has uses, but they will get removed later on. // Replace by null for now. if (!Call->getType()->isX86_MMXTy()) Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); } else { assert((RetTy->isStructTy() || RetTy->isArrayTy()) && "Return type changed, but not into a void. The old return type" " must have been a struct or an array!"); Instruction *InsertPt = Call; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest()); InsertPt = &*NewEdge->getFirstInsertionPt(); } // We used to return a struct or array. Instead of doing smart stuff // with all the uses, we will just rebuild it using extract/insertvalue // chaining and let instcombine clean that up. // // Start out building up our return value from undef Value *RetVal = UndefValue::get(RetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { Value *V; if (RetTypes.size() > 1) // We are still returning a struct, so extract the value from our // return value V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret", InsertPt); else // We are now returning a single element, so just insert that V = New; // Insert the value at the old position RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt); } // Now, replace all uses of the old call instruction with the return // struct we built Call->replaceAllUsesWith(RetVal); New->takeName(Call); } } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++i) if (ArgAlive[i]) { // If this is a live argument, move the name and users over to the new // version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); ++I2; } else { // If this argument is dead, replace any uses of it with null constants // (these are guaranteed to become unused later on). if (!I->getType()->isX86_MMXTy()) I->replaceAllUsesWith(Constant::getNullValue(I->getType())); } // If we change the return value of the function we must rewrite any return // instructions. Check this now. if (F->getReturnType() != NF->getReturnType()) for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Value *RetVal; if (NFTy->getReturnType()->isVoidTy()) { RetVal = nullptr; } else { assert(RetTy->isStructTy() || RetTy->isArrayTy()); // The original return value was a struct or array, insert // extractvalue/insertvalue chains to extract only the values we need // to return and insert them into our new result. // This does generate messy code, but we'll let it to instcombine to // clean that up. Value *OldRet = RI->getOperand(0); // Start out building up our return value from undef RetVal = UndefValue::get(NRetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, "oldret", RI); if (RetTypes.size() > 1) { // We're still returning a struct, so reinsert the value into // our new return value at the new index RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i], "newret", RI); } else { // We are now only returning a simple value, so just return the // extracted value. RetVal = EV; } } } // Replace the return instruction with one returning the new return // value (possibly 0 if we became void). ReturnInst::Create(F->getContext(), RetVal, RI); BB->getInstList().erase(RI); } // Patch the pointer to LLVM function in debug info descriptor. NF->setSubprogram(F->getSubprogram()); // Now that the old function is dead, delete it. F->eraseFromParent(); return true; }
bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVector<PHINode*, 8> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail) { BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); if (&BB->front() == Ret) // Make sure there is something before the ret... return false; // If the return is in the entry block, then making this transformation would // turn infinite recursion into an infinite loop. This transformation is ok // in theory, but breaks some code like: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // disable this xform in this case, because the code generator will lower the // call to fabs into inline code. if (BB == &F->getEntryBlock()) return false; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. CallInst *CI; BasicBlock::iterator BBI = Ret; while (1) { CI = dyn_cast<CallInst>(BBI); if (CI && CI->getCalledFunction() == F) break; if (BBI == BB->begin()) return false; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) return false; // If we are introducing accumulator recursion to eliminate associative // operations after the call instruction, this variable contains the initial // value for the accumulator. If this value is set, we actually perform // accumulator recursion elimination instead of simple tail recursion // elimination. Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) if (!CanMoveAboveCall(BBI, CI)) { // If we can't move the instruction above the call, it might be because it // is an associative operation that could be tranformed using accumulator // recursion elimination. Check to see if this is the case, and if so, // remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction // accumulates. AccumulatorRecursionInstr = BBI; } else { return false; // Otherwise, we cannot eliminate the tail recursion! } } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a // constant, return the value returned by the tail call, or that are being // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && !getCommonReturnValue(Ret, CI)) return false; // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); BranchInst::Create(OldEntry, NewEntry); // If this tail call is marked 'tail' and if there are any allocas in the // entry block, move them up to the new entry block. TailCallsAreMarkedTail = CI->isTailCall(); if (TailCallsAreMarkedTail) // Move all fixed sized allocas from OldEntry to NewEntry. for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), NEBI = NewEntry->begin(); OEBI != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++)) if (isa<ConstantInt>(AI->getArraySize())) AI->moveBefore(NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. // For now, we initialize each PHI to only have the real arguments // which are passed in. Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { PHINode *PN = PHINode::Create(I->getType(), I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); ArgumentPHIs.push_back(PN); } } // If this function has self recursive calls in the tail position where some // are marked tail and some are not, only transform one flavor or another. We // have to choose whether we move allocas in the entry block to the new entry // block or not, so we can't make a good choice for both. NOTE: We could do // slightly better here in the case that the function has no entry block // allocas. if (TailCallsAreMarkedTail && !CI->isTailCall()) return false; // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i) ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion // eliminations will happen on this function because of the way the // accumulator recursion predicate is set up. // if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, // computed earlier. For any other existing branches to this block (due to // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); PI != PE; ++PI) { if (*PI == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI); else AccPN->addIncoming(AccPN, *PI); } // Add an incoming argument for the current block, which is computed by our // associative accumulator instruction. AccPN->addIncoming(AccRecInstr, BB); // Next, rewrite the accumulator recursion instruction so that it does not // use the result of the call anymore, instead, use the PHI node we just // inserted. AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will // actually rewrite the return value we are destroying, but that's ok. for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) RI->setOperand(0, AccPN); ++NumAccumAdded; } // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. BranchInst::Create(OldEntry, Ret); BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; return true; }