bool StructuredModuleEditor::replaceFunc(Function *OldFunc, Function *NewFunc) { if (OldFunc == NULL || NewFunc == NULL) return false; if (!signaturesMatch(OldFunc, NewFunc)) { OS << "Cannot replace '" << OldFunc->getName() << "' with '" << NewFunc->getName() << "' because they don't have identical signatures\n"; return false; } // Gathers all the calls to the function we want to bypass InstList Calls = getCallsToFunction(OldFunc); // Iterates over each call to the function we want to bypass and sets the callee // to the function we want to hook for (InstList::iterator I = Calls.begin(), E = Calls.end(); I != E; ++I) { CallSite CS(cast<Value>(*I)); CS.setCalledFunction(NewFunc); // Creates an edge from the calling node to its new destination node CallGraphNode *CallingNode = (*CG)[CS.getCaller()]; CallGraphNode *NewCalleeNode = (*CG)[NewFunc]; CallingNode->replaceCallEdge(CS, CS, NewCalleeNode); } // Replace all remaining uses of OldFunc with NewFunc (e.g. pointers) OldFunc->replaceAllUsesWith(NewFunc); return true; }
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { if (skipSCC(SCC)) return false; // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); LegacyAARGetter AARGetter(*this); bool Changed = false, LocalChange; // Iterate until we stop promoting from this SCC. do { LocalChange = false; // Attempt to promote arguments from all functions in this SCC. for (CallGraphNode *OldNode : SCC) { Function *OldF = OldNode->getFunction(); if (!OldF) continue; auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) { Function *Caller = OldCS.getInstruction()->getParent()->getParent(); CallGraphNode *NewCalleeNode = CG.getOrInsertFunction(NewCS.getCalledFunction()); CallGraphNode *CallerNode = CG[Caller]; CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode); }; if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, {ReplaceCallSite})) { LocalChange = true; // Update the call graph for the newly promoted function. CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); NewNode->stealCalledFunctionsFrom(OldNode); if (OldNode->getNumReferences() == 0) delete CG.removeFunctionFromModule(OldNode); else OldF->setLinkage(Function::ExternalLinkage); // And updat ethe SCC we're iterating as well. SCC.ReplaceNode(OldNode, NewNode); } } // Remember that we changed something. Changed |= LocalChange; } while (LocalChange); return Changed; }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. CallGraphNode *ArgPromotion::DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); std::vector<const Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. // std::map<Argument*, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. std::map<IndicesVector, LoadInst*> OriginalLoads; // Attributes - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // First, determine the new argument list unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); IndicesVector Indices; Indices.reserve(User->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(Indices); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(User)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(User->use_back()); OriginalLoads[Indices] = OrigLoad; } // Add a parameter to the function for each element passed in. for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), SI->begin(), SI->end())); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } // Add any function attributes. if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); const Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Get the alias analysis information that we need to update to reflect our // changes. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraph>(); // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { CallSite CS = CallSite::get(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); // Add any return attributes. if (Attributes attrs = CallPAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); const Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. const Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing. ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } } if (ExtraArgHack) Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Add any function attributes. if (Attributes attrs = CallPAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } Args.clear(); AttributesVec.clear(); // Update the alias analysis implementation to know that we are replacing // the old call with a new one. AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); New->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transfering uses of the old arguments over to // the new arguments, also transfering over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(I2); I2->takeName(I); AA.replaceWithNewValue(I, I2); ++I2; continue; } if (ByValArgsToTransform.count(I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(I); AA.replaceWithNewValue(I, TheAlloca); continue; } if (I->use_empty()) { AA.deleteValue(I); continue; } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { assert(ArgIndices.begin()->empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); *It != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->use_back()); L->replaceAllUsesWith(TheArg); AA.replaceWithNewValue(L, TheArg); L->eraseFromParent(); } AA.deleteValue(GEP); GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) ++I2; } // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling // reference to the CallgraphNode, just leave the dead function around for // someone else to nuke. CallGraphNode *CGN = CG[F]; if (CGN->getNumReferences() == 0) delete CG.removeFunctionFromModule(CGN); else F->setLinkage(Function::ExternalLinkage); return NF_CGN; }
void StructuredModuleEditor::instrumentCallsToFunction(Function *Callee) { if (Callee == NULL) { OS << "Function not found!\n"; return; } InstList Calls = getCallsToFunction(Callee); FuncList Callers; for (InstList::iterator II = Calls.begin(), IE = Calls.end(); II != IE; ++II) { Function *Caller = (*II)->getParent()->getParent(); if (std::find(Callers.begin(), Callers.end(), Caller) == Callers.end()) Callers.push_back(Caller); } OS << Callers.size() << " functions call '" << Callee->getName() << "'...\n"; OS << "=================================\n"; for (FuncList::iterator FI = Callers.begin(), FE = Callers.end(); FI != FE; ++FI) { OS << (*FI)->getName() << "\n"; } OS << "=================================\n"; std::vector<Value*> PreArgs; std::vector<Type*> PreArgTypes; for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); I != E; ++I) { PreArgTypes.push_back(I->getType()); PreArgs.push_back(I); } std::vector<Type*> PostArgTypes; if (!Callee->getReturnType()->isVoidTy()) { PostArgTypes.push_back(Callee->getReturnType()); } FuncList Clones; Clones.push_back(Callee); for (uint64_t i = 0; i < Callers.size() - 1; i++) { Function *Clone = cloneFunc(Callee); Clones.push_back(Clone); } for (uint64_t i = 0; i < Clones.size(); i++) { Constant *PreConst = M->getOrInsertFunction("", FunctionType::get(Type::getVoidTy(getGlobalContext()), PreArgTypes, false)); Function *Pre = cast<Function>(PreConst); Pre->setName("pre"); CG->getOrInsertFunction(Pre); Constant *PostConst = M->getOrInsertFunction("", FunctionType::get(Type::getVoidTy(getGlobalContext()), PostArgTypes, false)); Function *Post = cast<Function>(PostConst); Post->setName("post"); CG->getOrInsertFunction(Post); /* OS << "\n"; OS << "Wrapping '" << Clones.at(i)->getName() << "'...\n\n"; OS << "Pre-invocation function = " << Pre->getName() << "\n"; OS << *Pre; OS << "Post-invocation function = " << Post->getName() << "\n"; OS << *Post; OS << "**************************************\n";*/ Function *Wrapper = wrapFunc(Clones.at(i), Pre, Post); if (i == 0) Callee = Wrapper; Function *Caller = Callers.at(i); for (Function::iterator BBI = Caller->begin(), BBE = Caller->end(); BBI != BBE; ++BBI) { for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE; ++II) { CallSite CS(cast<Value>(II)); // If this isn't a call, or it is a call to an intrinsic... if (!CS || isa<IntrinsicInst>(II)) continue; if (Callee == CS.getCalledFunction()) { CS.setCalledFunction(Wrapper); // Creates an edge from the calling node to its new destination node CallGraphNode *CallingNode = (*CG)[CS.getCaller()]; CallGraphNode *NewCalleeNode = (*CG)[Wrapper]; CallingNode->replaceCallEdge(CS, CS, NewCalleeNode); } } } } OS << "Functions successfully wrapped!\n"; }
CallGraphNode* ArgumentRecovery::recoverArguments(llvm::CallGraphNode *node) { Function* fn = node->getFunction(); if (fn == nullptr) { // "theoretical nodes", whatever that is return nullptr; } // quick exit if there isn't exactly one argument if (fn->arg_size() != 1) { return nullptr; } Argument* fnArg = fn->arg_begin(); if (!isStructType(fnArg)) { return nullptr; } // This is a nasty NASTY hack that relies on the AA pass being RegisterUse. // The data should be moved to a separate helper pass that can be queried from both the AA pass and this one. RegisterUse& regUse = getAnalysis<RegisterUse>(); CallGraph& cg = getAnalysis<CallGraphWrapperPass>().getCallGraph(); const auto* modRefInfo = regUse.getModRefInfo(fn); assert(modRefInfo != nullptr); // At this point we pretty much know that we're going to modify the function, so start doing that. // Get register offsets from the old function before we start mutilating it. auto& registerMap = exposeAllRegisters(fn); // Create a new function prototype, asking RegisterUse for which registers should be passed in, and how. LLVMContext& ctx = fn->getContext(); SmallVector<pair<const char*, Type*>, 16> parameters; Type* int64 = Type::getInt64Ty(ctx); Type* int64ptr = Type::getInt64PtrTy(ctx); for (const auto& pair : *modRefInfo) { if (pair.second != RegisterUse::NoModRef) { Type* paramType = (pair.second & RegisterUse::Mod) == RegisterUse::Mod ? int64ptr : int64; parameters.push_back({pair.first, paramType}); } } // Order parameters. // FIXME: This could use an ABI-specific sort routine. For now, use a lexicographical sort. sort(parameters.begin(), parameters.end(), [](const pair<const char*, Type*>& a, const pair<const char*, Type*>& b) { return strcmp(a.first, b.first) < 0; }); // Extract parameter types. SmallVector<Type*, 16> parameterTypes; for (const auto& pair : parameters) { parameterTypes.push_back(pair.second); } // Ideally, we would also do caller analysis here to figure out which output registers are never read, such that // we can either eliminate them from the parameter list or pass them by value instead of by address. // We would also pick a return value. FunctionType* newFunctionType = FunctionType::get(Type::getVoidTy(ctx), parameterTypes, false); Function* newFunc = Function::Create(newFunctionType, fn->getLinkage()); newFunc->copyAttributesFrom(fn); fn->getParent()->getFunctionList().insert(fn, newFunc); newFunc->takeName(fn); fn->setName("__hollow_husk__" + newFunc->getName()); // Set argument names size_t i = 0; for (Argument& arg : newFunc->args()) { arg.setName(parameters[i].first); i++; } // update call graph CallGraphNode* newFuncNode = cg.getOrInsertFunction(newFunc); CallGraphNode* oldFuncNode = cg[fn]; // loop over callers and transform call sites. while (!fn->use_empty()) { CallSite cs(fn->user_back()); Instruction* call = cast<CallInst>(cs.getInstruction()); Function* caller = call->getParent()->getParent(); auto& registerPositions = exposeAllRegisters(caller); SmallVector<Value*, 16> callParameters; for (const auto& pair : parameters) { // HACKHACK: find a pointer to a 64-bit int in the set. Value* registerPointer = nullptr; auto range = registerPositions.equal_range(pair.first); for (auto iter = range.first; iter != range.second; iter++) { if (auto gep = dyn_cast<GetElementPtrInst>(iter->second)) if (gep->getResultElementType() == int64) { registerPointer = gep; break; } } assert(registerPointer != nullptr); if (isa<PointerType>(pair.second)) { callParameters.push_back(registerPointer); } else { // Create a load instruction. GVN will get rid of it if it's unnecessary. LoadInst* load = new LoadInst(registerPointer, pair.first, call); callParameters.push_back(load); } } CallInst* newCall = CallInst::Create(newFunc, callParameters, "", call); // Update AA regUse.replaceWithNewValue(call, newCall); // Update call graph CallGraphNode* calleeNode = cg[caller]; calleeNode->replaceCallEdge(cs, CallSite(newCall), newFuncNode); // Finish replacing if (!call->use_empty()) { call->replaceAllUsesWith(newCall); newCall->takeName(call); } call->eraseFromParent(); } // Do not fix functions without a body. if (!fn->isDeclaration()) { // Fix up function code. Start by moving everything into the new function. newFunc->getBasicBlockList().splice(newFunc->begin(), fn->getBasicBlockList()); newFuncNode->stealCalledFunctionsFrom(oldFuncNode); // Change register uses size_t argIndex = 0; auto& argList = newFunc->getArgumentList(); // Create a temporary insertion point. We don't want an existing instruction since chances are that we'll remove it. Instruction* insertionPoint = BinaryOperator::CreateAdd(ConstantInt::get(int64, 0), ConstantInt::get(int64, 0), "noop", newFunc->begin()->begin()); for (auto iter = argList.begin(); iter != argList.end(); iter++, argIndex++) { Value* replaceWith = iter; const auto& paramTuple = parameters[argIndex]; if (!isa<PointerType>(paramTuple.second)) { // Create an alloca, copy value from parameter, replace GEP with alloca. // This is ugly code gen, but it will optimize easily, and still work if // we need a pointer reference to the register. auto alloca = new AllocaInst(paramTuple.second, paramTuple.first, insertionPoint); new StoreInst(iter, alloca, insertionPoint); replaceWith = alloca; } // Replace all uses with new instance. auto iterPair = registerMap.equal_range(paramTuple.first); for (auto registerMapIter = iterPair.first; registerMapIter != iterPair.second; registerMapIter++) { auto& registerValue = registerMapIter->second; registerValue->replaceAllUsesWith(replaceWith); cast<Instruction>(registerValue)->eraseFromParent(); registerValue = replaceWith; } } // At this point, the uses of the argument struct left should be: // * preserved registers // * indirect jumps const auto& target = getAnalysis<TargetInfo>(); while (!fnArg->use_empty()) { auto lastUser = fnArg->user_back(); if (auto user = dyn_cast<GetElementPtrInst>(lastUser)) { // Promote register to alloca. const char* maybeName = target.registerName(*user); const char* regName = target.largestOverlappingRegister(maybeName); assert(regName != nullptr); auto alloca = new AllocaInst(user->getResultElementType(), regName, insertionPoint); user->replaceAllUsesWith(alloca); user->eraseFromParent(); } else { auto call = cast<CallInst>(lastUser); Function* intrin = nullptr; StringRef intrinName = call->getCalledFunction()->getName(); if (intrinName == "x86_jump_intrin") { intrin = indirectJump; } else if (intrinName == "x86_call_intrin") { intrin = indirectCall; } else { assert(false); // Can't decompile this function. Delete its body. newFunc->deleteBody(); insertionPoint = nullptr; break; } // Replace intrinsic with another intrinsic. Value* jumpTarget = call->getOperand(2); SmallVector<Value*, 16> callArgs; callArgs.push_back(jumpTarget); for (Argument& arg : argList) { callArgs.push_back(&arg); } CallInst* varargCall = CallInst::Create(intrin, callArgs, "", call); newFuncNode->replaceCallEdge(CallSite(call), CallSite(varargCall), cg[intrin]); regUse.replaceWithNewValue(call, varargCall); varargCall->takeName(call); call->eraseFromParent(); } } if (insertionPoint != nullptr) { // no longer needed insertionPoint->eraseFromParent(); } } // At this point nothing should be using the old register argument anymore. (Pray!) // Leave the hollow husk of the old function in place to be erased by global DCE. registerAddresses[newFunc] = move(registerMap); registerAddresses.erase(fn); // Should be all. return newFuncNode; }