// check if addr falls into a data section, and is at least minAddr. // the minAddr check exists for times when we are not sure if an address // is a data reference or an immediate value; in some cases data is mapped // at 0x0 and determining this could be tricky static bool addrIsInData(VA addr, NativeModulePtr m, VA &base, VA minAddr = 0x0 ) { list<DataSection> §ions = m->getData(); list<DataSection>::iterator it = sections.begin(); // sanity check: // assume no data references before minAddr. if (addr < minAddr) { return false; } if(sections.size() == 0) { llvm::dbgs() << __FUNCTION__ << ": WARNING: no data sections!\n"; return false; } while( it != sections.end() ) { DataSection &curSec = *it; VA low = curSec.getBase(); VA high = low+curSec.getSize(); if( addr >= low && addr < high ) { base = low; return true; } ++it; } return false; }
static VA FindSymbolInModule(NativeModulePtr mod, const std::string &sym_name) { for (auto &sym : mod->getEntryPoints()) { if (sym.getName() == sym_name) { return sym.getAddr(); } } return static_cast<VA>( -1); }
static bool InsertDataSections(NativeModulePtr natMod, llvm::Module *M) { auto &globaldata = natMod->getData(); //insert all global data before we insert the CFG std::vector<DataSectionVar> gvars; // pre-create references to all data sections // as later we may have data references that are // from one section into another for (auto &dt : globaldata) { std::stringstream ss; ss << "data_" << std::hex << dt.getBase(); std::string bufferName = ss.str(); //report << "inserting global data section named "; //report << bufferName << "\n"; std::cout << "inserting global data section named "; std::cout << bufferName << std::endl; auto st_opaque = llvm::StructType::create(M->getContext()); // Used to be PrivateLinkage, but that emitted // .objs that would not link with MSVC auto g = new llvm::GlobalVariable( *M, st_opaque, dt.isReadOnly(), llvm::GlobalVariable::InternalLinkage, nullptr, bufferName); gvars.push_back({&dt, st_opaque, g}); } // actually populate the data sections for (auto &var : gvars) { //data we use to create LLVM values for this section // secContents is the actual values we will be inserting std::vector<llvm::Constant *> secContents; // data_section_types is their types, which are needed to initialize // the global variable std::vector<llvm::Type *> data_section_types; dataSectionToTypesContents(globaldata, *var.section, M, secContents, data_section_types, true); // fill in the opaque structure with actual members var.opaque_type->setBody(data_section_types, true); // create an initializer list using the now filled in opaque // structure type auto cst = llvm::ConstantStruct::get(var.opaque_type, secContents); // align on pointer size boundary, max needed by SSE instructions var.var->setAlignment(ArchPointerSize(M)); var.var->setInitializer(cst); } return true; }
static bool addTableDataSection(NativeModulePtr natMod, Module *M, VA &newVA, const T& table) { list<DataSection> &globaldata = natMod->getData(); list<DataSection>::const_iterator git = globaldata.begin(); // ensure we make this the last data section newVA = 0; while( git != globaldata.end() ) { const DataSection &dt = *git; uint64_t extent = dt.getBase() + dt.getSize(); if(newVA < extent) { newVA = extent; } git++; } // skip a few newVA += 4; // create a new data section from the table DataSection *ds = tableToDataSection(newVA, table); // add to global data section list globaldata.push_back(*ds); // create the GlobalVariable string bufferName = "data_0x" + to_string<VA>(newVA, hex); StructType *st_opaque = StructType::create(M->getContext()); GlobalVariable *gv = new GlobalVariable(*M, st_opaque, true, GlobalVariable::InternalLinkage, NULL, bufferName); vector<Type*> data_section_types; vector<Constant*> secContents; dataSectionToTypesContents(globaldata, *ds, M, secContents, data_section_types, false); st_opaque->setBody(data_section_types, true); Constant *cst = ConstantStruct::get(st_opaque, secContents); gv->setAlignment(4); gv->setInitializer(cst); return true; }
static bool LiftFunctionsIntoModule(NativeModulePtr natMod, llvm::Module *M) { // populate functions for (auto &func_info : natMod->get_funcs()) { NativeFunctionPtr f = func_info.second; if (!InsertFunctionIntoModule(natMod, f, M)) { std::string fname = f->get_name(); std::cerr << "Could not insert function: " << fname << " into the LLVM module" << std::endl; return false; } } return true; }
void PrintCFGFunctionList(const NativeModulePtr native_module, const std::string &architecture) noexcept { std::ios::fmtflags original_stream_flags(std::cout.flags()); int address_digit_count = (architecture == "amd64" ? 16 : 8); std::cout << "\nCFG Function List:\n"; const auto &function_map = native_module->get_funcs(); for (const auto &function_descriptor : function_map) { VA virtual_address = function_descriptor.first; const NativeFunctionPtr function = function_descriptor.second; std::cout << " " << std::hex << std::setw(address_digit_count) << std::setfill('0') << virtual_address << " "; std::cout << function->get_name() << std::endl; } std::cout.flags(original_stream_flags); }
void doPrintModule(NativeModulePtr m) { string pathBase = "./"; list<NativeFunctionPtr> mod_funcs = m->get_funcs(); list<NativeFunctionPtr>::iterator it = mod_funcs.begin(); for(; it != mod_funcs.end(); ++it) { NativeFunctionPtr f = *it; string n = pathBase+to_string<uint64_t>(f->get_start(), hex) + ".dot"; ofstream out(n.c_str()); block_label_writer bgl(f); CFG g = f->get_cfg(); write_graphviz(out, g, bgl); } return; }
void RenameLiftedFunctions(NativeModulePtr natMod, llvm::Module *M, const std::set<VA> &entry_point_pcs) { // Rename the functions to have their 'nice' names, where available. for (auto &f : natMod->get_funcs()) { NativeFunctionPtr native_func = f.second; if (entry_point_pcs.count(native_func->get_start())) { continue; } auto sub_name = native_func->get_name(); auto F = M->getFunction(sub_name); std::stringstream ss; ss << "callback_" << sub_name; if (!M->getFunction(ss.str())) { auto &sym_name = native_func->get_symbol_name(); if (!sym_name.empty()) { F->setName(sym_name); } } } }
static void InitExternalData(NativeModulePtr natMod, llvm::Module *M) { for (auto dr : natMod->getExtDataRefs()) { auto dsize = dr->getDataSize(); auto symname = dr->getSymbolName(); auto extType = llvm::ArrayType::get(llvm::Type::getInt8Ty(M->getContext()), dsize); auto gv = llvm::dyn_cast<llvm::GlobalValue>( M->getOrInsertGlobal(symname, extType)); TASSERT(gv != nullptr, "Could not make global value!"); if (dr->isWeak()) { gv->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } else { gv->setLinkage(llvm::GlobalValue::ExternalLinkage); } llvm::Triple triple(M->getTargetTriple()); if (llvm::Triple::Win32 == triple.getOS()) { gv->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } } }
static void InitLiftedFunctions(NativeModulePtr natMod, llvm::Module *M) { for (auto &f : natMod->get_funcs()) { NativeFunctionPtr native_func = f.second; auto fname = native_func->get_name(); auto F = M->getFunction(fname); if (!F) { F = llvm::dyn_cast<llvm::Function>( M->getOrInsertFunction(fname, LiftedFunctionType())); TASSERT(F != nullptr, "Could not insert function into module"); ArchSetCallingConv(M, F); // make local functions 'static' F->setLinkage(llvm::GlobalValue::InternalLinkage); std::cout << "Inserted function: " << fname << std::endl; } else { std::cout << "Already inserted function: " << fname << ", skipping." << std::endl; } } }
// Iterate over the list of external functions and insert them as // global functions. static void InitExternalCode(NativeModulePtr natMod, llvm::Module *M) { for (auto e : natMod->getExtCalls()) { auto conv = e->getCallingConvention(); auto argCount = e->getNumArgs(); auto symName = e->getSymbolName(); auto funcSign = e->getFunctionSignature(); // Create the function if it is not already there. auto &C = M->getContext(); auto F = M->getFunction(symName); if (F) { continue; } if (ExternalCodeRef::McsemaCall == conv) { // normal mcsema function prototypes F = llvm::dyn_cast<llvm::Function>(M->getOrInsertFunction( ArchNameMcSemaCall(symName), LiftedFunctionType())); ArchSetCallingConv(M, F); F->setLinkage(llvm::GlobalValue::ExternalLinkage); continue; } std::vector<llvm::Type *> arguments; llvm::Type *returnType = nullptr; // Create arguments. const auto Arch = SystemArch(M); const auto OS = SystemOS(M); for (auto i = 0; i < argCount; i++) { if (_X86_64_ == Arch) { if (llvm::Triple::Win32 == OS) { if (funcSign.c_str()[i] == 'F') { arguments.push_back(llvm::Type::getDoubleTy(C)); } else { arguments.push_back(llvm::Type::getInt64Ty(C)); } } else if (llvm::Triple::Linux == OS) { arguments.push_back(llvm::Type::getInt64Ty(C)); } else { TASSERT(false, "Unknown OS Type!"); } } else { arguments.push_back(llvm::Type::getInt32Ty(C)); } } //create function type switch (e->getReturnType()) { case ExternalCodeRef::NoReturn: case ExternalCodeRef::VoidTy: returnType = llvm::Type::getVoidTy(C); break; case ExternalCodeRef::Unknown: case ExternalCodeRef::IntTy: if (natMod->is64Bit()) { returnType = llvm::Type::getInt64Ty(C); } else { returnType = llvm::Type::getInt32Ty(C); } break; default: throw TErr( __LINE__, __FILE__, "Encountered an unknown return type while translating function"); } auto FTy = llvm::FunctionType::get(returnType, arguments, false); if (e->isWeak()) { F = llvm::Function::Create(FTy, llvm::GlobalValue::ExternalWeakLinkage, symName, M); } else { F = llvm::Function::Create(FTy, llvm::GlobalValue::ExternalLinkage, symName, M); } if (e->getReturnType() == ExternalCodeRef::NoReturn) { F->setDoesNotReturn(); } //set calling convention if (natMod->is64Bit()) { ArchSetCallingConv(M, F); } else { F->setCallingConv(getLLVMCC(conv)); } } }