void md::setRecoveredReturnFieldNames(Module& module, StructType& returnType, const CallInformation& callInfo) { LLVMContext& ctx = module.getContext(); string key; bool result = getMdNameForType(returnType, key); assert(result); (void) result; auto mdNode = module.getOrInsertNamedMetadata(key); for (const ValueInformation& vi : callInfo.returns()) { MDString* operand = nullptr; if (vi.type == ValueInformation::IntegerRegister) { operand = MDString::get(ctx, vi.registerInfo->name); } else if (vi.type == ValueInformation::Stack) { string fieldName; raw_string_ostream(fieldName) << "sp" << vi.frameBaseOffset; operand = MDString::get(ctx, fieldName); } else { llvm_unreachable("not implemented"); } mdNode->addOperand(MDNode::get(ctx, operand)); } }
bool hackhack_fillFromParamInfo(LLVMContext& ctx, ParameterRegistry& registry, CallInformation& info, bool returns, size_t integerLikeParameters, bool isVariadic) { TargetInfo& targetInfo = registry.getTargetInfo(); Type* intType = Type::getIntNTy(ctx, targetInfo.getPointerSize() * CHAR_BIT); Type* returnType = returns ? intType : Type::getVoidTy(ctx); vector<Type*> params(integerLikeParameters, intType); FunctionType* fType = FunctionType::get(returnType, params, false); for (CallingConvention* cc : registry) { if (cc->analyzeFunctionType(registry, info, *fType)) { info.setCallingConvention(cc); return true; } info.clear(); } assert(false); return false; }
bool CallingConvention_x86_64_systemv::analyzeCallSite(ParameterRegistry ®istry, CallInformation &fillOut, CallSite cs) { fillOut.clear(); TargetInfo& targetInfo = registry.getTargetInfo(); Instruction& inst = *cs.getInstruction(); Function& caller = *inst.getParent()->getParent(); MemorySSA& mssa = *registry.getMemorySSA(caller); MemoryAccess* thisDef = mssa.getMemoryAccess(&inst); identifyParameterCandidates(targetInfo, mssa, thisDef->getDefiningAccess(), fillOut); identifyReturnCandidates(targetInfo, mssa, thisDef, fillOut); return true; }
bool CallingConvention_x86_64_systemv::analyzeFunction(ParameterRegistry ®istry, CallInformation &callInfo, Function &function) { // TODO: Look at called functions to find hidden parameters/return values if (md::isPrototype(function)) { return false; } TargetInfo& targetInfo = registry.getTargetInfo(); // We always need rip and rsp. callInfo.addParameter(ValueInformation::IntegerRegister, targetInfo.registerNamed("rip")); callInfo.addParameter(ValueInformation::IntegerRegister, targetInfo.registerNamed("rsp")); // Identify register GEPs. // (assume x86 regs as first parameter) assert(function.arg_size() == 1); Argument* regs = function.arg_begin(); auto pointerType = dyn_cast<PointerType>(regs->getType()); assert(pointerType != nullptr && pointerType->getTypeAtIndex(int(0))->getStructName() == "struct.x86_regs"); unordered_multimap<const TargetRegisterInfo*, GetElementPtrInst*> geps; for (auto& use : regs->uses()) { if (GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(use.getUser())) if (const TargetRegisterInfo* regName = targetInfo.registerInfo(*gep)) { geps.insert({regName, gep}); } } // Look at temporary registers that are read before they are written MemorySSA& mssa = *registry.getMemorySSA(function); for (const char* name : parameterRegisters) { const TargetRegisterInfo* smallReg = targetInfo.registerNamed(name); const TargetRegisterInfo* regInfo = targetInfo.largestOverlappingRegister(*smallReg); auto range = geps.equal_range(regInfo); vector<Instruction*> addresses; for (auto iter = range.first; iter != range.second; ++iter) { addresses.push_back(iter->second); } for (size_t i = 0; i < addresses.size(); ++i) { Instruction* addressInst = addresses[i]; for (auto& use : addressInst->uses()) { if (auto load = dyn_cast<LoadInst>(use.getUser())) { MemoryAccess* parent = mssa.getMemoryAccess(load)->getDefiningAccess(); if (mssa.isLiveOnEntryDef(parent)) { // register argument! callInfo.addParameter(ValueInformation::IntegerRegister, regInfo); } } else if (auto cast = dyn_cast<CastInst>(use.getUser())) { if (cast->getType()->isPointerTy()) { addresses.push_back(cast); } } } } } // Does the function refer to values at an offset above the initial rsp value? // Assume that rsp is known to be preserved. auto spRange = geps.equal_range(targetInfo.getStackPointer()); for (auto iter = spRange.first; iter != spRange.second; ++iter) { auto* gep = iter->second; // Find all uses of reference to sp register for (auto& use : gep->uses()) { if (auto load = dyn_cast<LoadInst>(use.getUser())) { // Find uses above +8 (since +0 is the return address) for (auto& use : load->uses()) { ConstantInt* offset = nullptr; if (match(use.get(), m_Add(m_Value(), m_ConstantInt(offset)))) { make_signed<decltype(offset->getLimitedValue())>::type intOffset = offset->getLimitedValue(); if (intOffset > 8) { // memory argument! callInfo.addParameter(ValueInformation::Stack, intOffset); } } } } } } // Are we using return registers? vector<const TargetRegisterInfo*> usedReturns; usedReturns.reserve(2); for (const char* name : returnRegisters) { const TargetRegisterInfo* regInfo = targetInfo.registerNamed(name); auto range = geps.equal_range(regInfo); for (auto iter = range.first; iter != range.second; ++iter) { bool hasStore = any_of(iter->second->use_begin(), iter->second->use_end(), [](Use& use) { return isa<StoreInst>(use.getUser()); }); if (hasStore) { usedReturns.push_back(regInfo); break; } } } for (const TargetRegisterInfo* reg : ipaFindUsedReturns(registry, function, usedReturns)) { // return value! callInfo.addReturn(ValueInformation::IntegerRegister, reg); } return true; }
void ArgumentRecovery::updateFunctionBody(Function& oldFunction, Function& newFunction, const CallInformation &ci) { // Do not fix functions without a body. assert(!md::isPrototype(oldFunction)); LLVMContext& ctx = oldFunction.getContext(); auto targetInfo = TargetInfo::getTargetInfo(*oldFunction.getParent()); unsigned pointerSize = targetInfo->getPointerSize() * CHAR_BIT; Type* integer = Type::getIntNTy(ctx, pointerSize); Type* integerPtr = Type::getIntNPtrTy(ctx, pointerSize, 1); // move code, delete leftover metadata on oldFunction newFunction.getBasicBlockList().splice(newFunction.begin(), oldFunction.getBasicBlockList()); oldFunction.deleteBody(); // Create a register structure at the beginning of the function and copy arguments to it. Argument* oldArg0 = static_cast<Argument*>(oldFunction.arg_begin()); Type* registerStruct = oldArg0->getType()->getPointerElementType(); Instruction* insertionPoint = static_cast<Instruction*>(newFunction.begin()->begin()); AllocaInst* newRegisters = new AllocaInst(registerStruct, "registers", insertionPoint); md::setRegisterStruct(*newRegisters); oldArg0->replaceAllUsesWith(newRegisters); registerPtr[&newFunction] = newRegisters; // get stack register from new set auto spPtr = targetInfo->getRegister(newRegisters, *targetInfo->getStackPointer()); spPtr->insertBefore(insertionPoint); auto spValue = new LoadInst(spPtr, "sp", insertionPoint); // Copy each argument to the register structure or to the stack. auto valueIter = ci.begin(); for (Argument& arg : newFunction.args()) { if (valueIter->type == ValueInformation::IntegerRegister) { auto gep = targetInfo->getRegister(newRegisters, *valueIter->registerInfo); gep->insertBefore(insertionPoint); new StoreInst(&arg, gep, insertionPoint); } else if (valueIter->type == ValueInformation::Stack) { auto offsetConstant = ConstantInt::get(integer, valueIter->frameBaseOffset); auto offset = BinaryOperator::Create(BinaryOperator::Add, spValue, offsetConstant, "", insertionPoint); auto casted = new IntToPtrInst(offset, integerPtr, "", insertionPoint); new StoreInst(&arg, casted, insertionPoint); } else { llvm_unreachable("not implemented"); } valueIter++; } // If the function returns, adjust return values. if (!newFunction.doesNotReturn() && !newFunction.getReturnType()->isVoidTy()) { for (BasicBlock& bb : newFunction) { if (auto ret = dyn_cast<ReturnInst>(bb.getTerminator())) { Value* returnValue = createReturnValue(newFunction, ci, ret); ReturnInst::Create(ctx, returnValue, ret); ret->eraseFromParent(); } } } }
bool CallingConvention_AnyArch_AnyCC::analyzeFunction(ParameterRegistry ®istry, CallInformation &fillOut, llvm::Function &func) { if (!isFullDisassembly() || md::isPrototype(func)) { return false; } auto regs = &*func.arg_begin(); unordered_map<const TargetRegisterInfo*, ModRefInfo> resultMap; // Find all GEPs const auto& target = registry.getTargetInfo(); unordered_multimap<const TargetRegisterInfo*, User*> registerUsers; for (User* user : regs->users()) { if (const TargetRegisterInfo* maybeRegister = target.registerInfo(*user)) { const TargetRegisterInfo& registerInfo = target.largestOverlappingRegister(*maybeRegister); registerUsers.insert({®isterInfo, user}); } } // Find all users of these GEPs DominatorsPerRegister gepUsers; for (auto iter = registerUsers.begin(); iter != registerUsers.end(); iter++) { addAllUsers(*iter->second, iter->first, gepUsers); } DominatorTree& preDom = registry.getAnalysis<DominatorTreeWrapperPass>(func).getDomTree(); PostDominatorTree& postDom = registry.getAnalysis<PostDominatorTreeWrapperPass>(func).getPostDomTree(); // Add calls SmallVector<CallInst*, 8> calls; CallGraph& cg = registry.getAnalysis<CallGraphWrapperPass>().getCallGraph(); CallGraphNode* thisFunc = cg[&func]; for (const auto& pair : *thisFunc) { Function* callee = pair.second->getFunction(); if (const CallInformation* callInfo = registry.getCallInfo(*callee)) if (callInfo->getStage() == CallInformation::Completed) { // pair.first is a weak value handle and has a cast operator to get the pointee CallInst* caller = cast<CallInst>((Value*)pair.first); calls.push_back(caller); for (const auto& vi : *callInfo) { if (vi.type == ValueInformation::IntegerRegister) { gepUsers[vi.registerInfo].insert(caller); } } } } // Start out resultMap based on call dominance. Weed out calls until dominant call set has been established. // This map will be refined by results from mod/ref instruction analysis. The purpose is mainly to define // mod/ref behavior for registers that are used in callees of this function, but not in this function // directly. while (calls.size() > 0) { unordered_map<const TargetRegisterInfo*, unsigned> callResult; auto dominant = findDominantValues(preDom, calls); for (CallInst* call : dominant) { Function* callee = call->getCalledFunction(); for (const auto& pair : translateToModRef(*registry.getCallInfo(*callee))) { callResult[pair.first] |= pair.second; } calls.erase(find(calls.begin(), calls.end(), call)); } for (const auto& pair : callResult) { resultMap[pair.first] = static_cast<ModRefInfo>(pair.second); } } // Find the dominant use(s) auto preDominatingUses = gepUsers; for (auto& pair : preDominatingUses) { pair.second = findDominantValues(preDom, pair.second); } // Fill out ModRef use dictionary // (Ref info is incomplete) for (auto& pair : preDominatingUses) { ModRefInfo& r = resultMap[pair.first]; r = IncompleteRef; for (auto inst : pair.second) { if (isa<StoreInst>(inst)) { // If we see a dominant store, then the register is modified. r = MRI_Mod; break; } if (CallInst* call = dyn_cast<CallInst>(inst)) { // If the first user is a call, propagate its ModRef value. r = registry.getCallInfo(*call->getCalledFunction())->getRegisterModRef(*pair.first); break; } } } // Find post-dominating stores auto postDominatingUses = gepUsers; for (auto& pair : postDominatingUses) { const TargetRegisterInfo* key = pair.first; auto& set = pair.second; // remove non-Mod instructions for (auto iter = set.begin(); iter != set.end(); ) { if (isa<StoreInst>(*iter)) { iter++; continue; } else if (CallInst* call = dyn_cast<CallInst>(*iter)) { auto callee = call->getCalledFunction(); const auto& info = *registry.getCallInfo(*callee); if ((info.getRegisterModRef(*key) & MRI_Mod) == MRI_Mod) { iter++; continue; } } iter = set.erase(iter); } set = findDominantValues(postDom, set); } MemorySSA& mssa = *registry.getMemorySSA(func); // Walk up post-dominating uses until we get to liveOnEntry. for (auto& pair : postDominatingUses) { walkUpPostDominatingUse(target, mssa, preDominatingUses, postDominatingUses, resultMap, pair.first); } // Use resultMap to build call information. First, sort registers by their pointer order; this ensures stable // parameter order. // We have authoritative information on used parameters, but not on return values. Only register parameters in this // step. SmallVector<pair<const TargetRegisterInfo*, ModRefInfo>, 16> registers; copy(resultMap.begin(), resultMap.end(), registers.begin()); sort(registers.begin(), registers.end()); vector<const TargetRegisterInfo*> returns; for (const auto& pair : resultMap) { if (pair.second & MRI_Ref) { fillOut.addParameter(ValueInformation::IntegerRegister, pair.first); } if (pair.second & MRI_Mod) { returns.push_back(pair.first); } } // Check for used returns. for (const TargetRegisterInfo* reg : ipaFindUsedReturns(registry, func, returns)) { fillOut.addReturn(ValueInformation::IntegerRegister, reg); } return true; }