Exemple #1
0
// check if addr falls into a data section, and is at least minAddr.
// the minAddr check exists for times when we are not sure if an address
// is a data reference or an immediate value; in some cases data is mapped
// at 0x0 and determining this could be tricky
static
bool addrIsInData(VA addr, NativeModulePtr m, VA &base, VA minAddr = 0x0 ) {
    list<DataSection>  &sections = m->getData();
    list<DataSection>::iterator it = sections.begin();

    // sanity check:
    // assume no data references before minAddr.
    if (addr < minAddr) {
        return false;
    }

    if(sections.size() == 0) {
        llvm::dbgs() << __FUNCTION__ << ": WARNING: no data sections!\n";
        return false;
    }
    while( it != sections.end() ) {
        DataSection         &curSec = *it;
        VA          low = curSec.getBase();
        VA          high = low+curSec.getSize();

        if( addr >= low && addr < high ) {
            base = low;
            return true;
        }

        ++it;
    }

    return false;
}
Exemple #2
0
static VA FindSymbolInModule(NativeModulePtr mod, const std::string &sym_name) {
  for (auto &sym : mod->getEntryPoints()) {
    if (sym.getName() == sym_name) {
      return sym.getAddr();
    }
  }
  return static_cast<VA>( -1);
}
Exemple #3
0
static bool InsertDataSections(NativeModulePtr natMod, llvm::Module *M) {

  auto &globaldata = natMod->getData();
  //insert all global data before we insert the CFG

  std::vector<DataSectionVar> gvars;

  // pre-create references to all data sections
  // as later we may have data references that are
  // from one section into another

  for (auto &dt : globaldata) {
    std::stringstream ss;
    ss << "data_" << std::hex << dt.getBase();

    std::string bufferName = ss.str();

    //report << "inserting global data section named ";
    //report << bufferName << "\n";
    std::cout << "inserting global data section named ";
    std::cout << bufferName << std::endl;

    auto st_opaque = llvm::StructType::create(M->getContext());
    // Used to be PrivateLinkage, but that emitted
    // .objs that would not link with MSVC
    auto g = new llvm::GlobalVariable(
        *M, st_opaque, dt.isReadOnly(),
        llvm::GlobalVariable::InternalLinkage,
        nullptr, bufferName);
    gvars.push_back({&dt, st_opaque, g});
  }

  // actually populate the data sections
  for (auto &var : gvars) {

    //data we use to create LLVM values for this section
    // secContents is the actual values we will be inserting
    std::vector<llvm::Constant *> secContents;
    // data_section_types is their types, which are needed to initialize
    // the global variable
    std::vector<llvm::Type *> data_section_types;

    dataSectionToTypesContents(globaldata, *var.section, M, secContents,
                               data_section_types, true);

    // fill in the opaque structure with actual members
    var.opaque_type->setBody(data_section_types, true);

    // create an initializer list using the now filled in opaque
    // structure type
    auto cst = llvm::ConstantStruct::get(var.opaque_type, secContents);
    // align on pointer size boundary, max needed by SSE instructions
    var.var->setAlignment(ArchPointerSize(M));
    var.var->setInitializer(cst);

  }
  return true;
}
Exemple #4
0
static bool addTableDataSection(NativeModulePtr natMod, 
        Module *M, VA &newVA, const T& table)
{

    list<DataSection>  &globaldata = natMod->getData();
    list<DataSection>::const_iterator git = globaldata.begin();

    // ensure we make this the last data section
    newVA = 0;
    while( git != globaldata.end() ) {
        const DataSection         &dt = *git;
        uint64_t extent = dt.getBase() + dt.getSize();
        if(newVA < extent) {
            newVA = extent;
        }
        git++;
    }

    // skip a few
    newVA += 4;

    // create a new data section from the table
    DataSection *ds = tableToDataSection(newVA, table);
    
    // add to global data section list
    globaldata.push_back(*ds);

    // create the GlobalVariable
    string bufferName = "data_0x" + to_string<VA>(newVA, hex);
    StructType *st_opaque = StructType::create(M->getContext());
    GlobalVariable *gv = new GlobalVariable(*M,
                            st_opaque, 
                            true,
                            GlobalVariable::InternalLinkage,
                            NULL,
                            bufferName);

    vector<Type*> data_section_types;
    vector<Constant*>    secContents;

    dataSectionToTypesContents(globaldata, 
            *ds, 
            M, 
            secContents, 
            data_section_types, 
            false);

    st_opaque->setBody(data_section_types, true);
    Constant *cst = ConstantStruct::get(st_opaque, secContents);
    gv->setAlignment(4);
    gv->setInitializer(cst);

    return true;

} 
Exemple #5
0
static bool LiftFunctionsIntoModule(NativeModulePtr natMod, llvm::Module *M) {
  // populate functions
  for (auto &func_info : natMod->get_funcs()) {
    NativeFunctionPtr f = func_info.second;
    if (!InsertFunctionIntoModule(natMod, f, M)) {
      std::string fname = f->get_name();
      std::cerr << "Could not insert function: " << fname
                << " into the LLVM module" << std::endl;
      return false;
    }
  }
  return true;
}
Exemple #6
0
void PrintCFGFunctionList(const NativeModulePtr native_module, const std::string &architecture) noexcept {
  std::ios::fmtflags original_stream_flags(std::cout.flags());
  int address_digit_count = (architecture == "amd64" ? 16 : 8);

  std::cout << "\nCFG Function List:\n";

  const auto &function_map = native_module->get_funcs();
  for (const auto &function_descriptor : function_map) {
    VA virtual_address = function_descriptor.first;
    const NativeFunctionPtr function = function_descriptor.second;

    std::cout << "  " << std::hex << std::setw(address_digit_count) << std::setfill('0') << virtual_address << " ";
    std::cout << function->get_name() << std::endl;
  }

  std::cout.flags(original_stream_flags);
}
Exemple #7
0
void doPrintModule(NativeModulePtr m) {
    string  pathBase = "./";

    list<NativeFunctionPtr>           mod_funcs = m->get_funcs();
    list<NativeFunctionPtr>::iterator it = mod_funcs.begin();

    for(; it != mod_funcs.end(); ++it) {
        NativeFunctionPtr f = *it;
        string n =
            pathBase+to_string<uint64_t>(f->get_start(), hex) + ".dot";

        ofstream    out(n.c_str());

        block_label_writer  bgl(f);
        CFG                 g = f->get_cfg();
        write_graphviz(out, g, bgl);
    }

    return;
}
Exemple #8
0
void RenameLiftedFunctions(NativeModulePtr natMod, llvm::Module *M,
                           const std::set<VA> &entry_point_pcs) {
  // Rename the functions to have their 'nice' names, where available.
  for (auto &f : natMod->get_funcs()) {
    NativeFunctionPtr native_func = f.second;
    if (entry_point_pcs.count(native_func->get_start())) {
      continue;
    }

    auto sub_name = native_func->get_name();
    auto F = M->getFunction(sub_name);
    std::stringstream ss;
    ss << "callback_" << sub_name;
    if (!M->getFunction(ss.str())) {
      auto &sym_name = native_func->get_symbol_name();
      if (!sym_name.empty()) {
        F->setName(sym_name);
      }
    }
  }
}
Exemple #9
0
static void InitExternalData(NativeModulePtr natMod, llvm::Module *M) {
  for (auto dr : natMod->getExtDataRefs()) {
    auto dsize = dr->getDataSize();
    auto symname = dr->getSymbolName();
    auto extType = llvm::ArrayType::get(llvm::Type::getInt8Ty(M->getContext()),
                                        dsize);
    auto gv = llvm::dyn_cast<llvm::GlobalValue>(
        M->getOrInsertGlobal(symname, extType));
    TASSERT(gv != nullptr, "Could not make global value!");

    if (dr->isWeak()) {
      gv->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
    } else {
      gv->setLinkage(llvm::GlobalValue::ExternalLinkage);
    }

    llvm::Triple triple(M->getTargetTriple());
    if (llvm::Triple::Win32 == triple.getOS()) {
      gv->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
    }
  }
}
Exemple #10
0
static void InitLiftedFunctions(NativeModulePtr natMod, llvm::Module *M) {
  for (auto &f : natMod->get_funcs()) {
    NativeFunctionPtr native_func = f.second;
    auto fname = native_func->get_name();
    auto F = M->getFunction(fname);

    if (!F) {
      F = llvm::dyn_cast<llvm::Function>(
          M->getOrInsertFunction(fname, LiftedFunctionType()));

      TASSERT(F != nullptr, "Could not insert function into module");

      ArchSetCallingConv(M, F);
      // make local functions 'static'
      F->setLinkage(llvm::GlobalValue::InternalLinkage);
      std::cout << "Inserted function: " << fname << std::endl;
    } else {
      std::cout << "Already inserted function: " << fname << ", skipping."
                << std::endl;
    }
  }
}
Exemple #11
0
// Iterate over the list of external functions and insert them as
// global functions.
static void InitExternalCode(NativeModulePtr natMod, llvm::Module *M) {
  for (auto e : natMod->getExtCalls()) {
    auto conv = e->getCallingConvention();
    auto argCount = e->getNumArgs();
    auto symName = e->getSymbolName();
    auto funcSign = e->getFunctionSignature();

    // Create the function if it is not already there.
    auto &C = M->getContext();
    auto F = M->getFunction(symName);
    if (F) {
      continue;
    }

    if (ExternalCodeRef::McsemaCall == conv) {
       // normal mcsema function prototypes
      F = llvm::dyn_cast<llvm::Function>(M->getOrInsertFunction(
          ArchNameMcSemaCall(symName), LiftedFunctionType()));
      ArchSetCallingConv(M, F);
      F->setLinkage(llvm::GlobalValue::ExternalLinkage);
      continue;
    }

    std::vector<llvm::Type *> arguments;
    llvm::Type *returnType = nullptr;

    // Create arguments.
    const auto Arch = SystemArch(M);
    const auto OS = SystemOS(M);
    for (auto i = 0; i < argCount; i++) {
      if (_X86_64_ == Arch) {
        if (llvm::Triple::Win32 == OS) {
          if (funcSign.c_str()[i] == 'F') {
            arguments.push_back(llvm::Type::getDoubleTy(C));
          } else {
            arguments.push_back(llvm::Type::getInt64Ty(C));
          }
        } else if (llvm::Triple::Linux == OS) {
          arguments.push_back(llvm::Type::getInt64Ty(C));

        } else {
          TASSERT(false, "Unknown OS Type!");
        }
      } else {
        arguments.push_back(llvm::Type::getInt32Ty(C));
      }
    }

    //create function type
    switch (e->getReturnType()) {
      case ExternalCodeRef::NoReturn:
      case ExternalCodeRef::VoidTy:
        returnType = llvm::Type::getVoidTy(C);
        break;

      case ExternalCodeRef::Unknown:
      case ExternalCodeRef::IntTy:
        if (natMod->is64Bit()) {
          returnType = llvm::Type::getInt64Ty(C);
        } else {
          returnType = llvm::Type::getInt32Ty(C);
        }
        break;

      default:
        throw TErr(
            __LINE__, __FILE__,
            "Encountered an unknown return type while translating function");
    }

    auto FTy = llvm::FunctionType::get(returnType, arguments, false);
    if (e->isWeak()) {
      F = llvm::Function::Create(FTy, llvm::GlobalValue::ExternalWeakLinkage,
                                 symName, M);
    } else {
      F = llvm::Function::Create(FTy, llvm::GlobalValue::ExternalLinkage,
                                 symName, M);
    }

    if (e->getReturnType() == ExternalCodeRef::NoReturn) {
      F->setDoesNotReturn();
    }

    //set calling convention
    if (natMod->is64Bit()) {
      ArchSetCallingConv(M, F);
    } else {
      F->setCallingConv(getLLVMCC(conv));
    }
  }
}