static bool parseJumpIndexTable(ExecutableContainer *c, InstPtr index_insn, const vector<VA> &jmptable_entries, raw_ostream &out) { VA reloc_offset = index_insn->get_reloc_offset(); if (reloc_offset == 0) { out << "Unsupported jump index write; no relocation\n"; // this jump index probably doesn't use a table return false; } VA addrInInst = index_insn->get_loc() + reloc_offset; VA indexTableEntry; VA symbolSize; if(!c->relocate_addr(addrInInst, indexTableEntry, symbolSize)) { out << "Not a jump index table: can't relocate relocation in index insn\n"; // can't relocate, something bad happened return false; } // assume we always index the start of the index table // ... might not be correct // this means we set initial entry to zero, the first element int initial_entry = 0; uint8_t b; int bindex = 0; // loop while all the bytes we read can be table indexes vector<uint8_t> index_entries; while( (indexTableEntry+bindex) < c->getExtent() ) { c->readByte(indexTableEntry+bindex, &b); if (b > jmptable_entries.size()) { break; } out << "Read index table byte: " << to_string<uint32_t>((uint32_t)b, hex) << "\n"; index_entries.push_back(b); bindex++; } JumpIndexTable *jit = new JumpIndexTable(index_entries, initial_entry); index_insn->set_jump_index_table(JumpIndexTablePtr(jit)); return true; }
static InstTransResult translate_CALLpcrel32(NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; if( ip->has_ext_call_target() ) { std::string s = ip->get_ext_call_target()->getSymbolName(); ret = doCallPCExtern(block, s); } else if (ip->has_call_tgt() ) { int64_t off = (int64_t) ip->get_call_tgt(0); ret = doCallPC(ip, block, off); } else { int64_t off = (int64_t) OP(0).getImm(); ret = doCallPC(ip, block, ip->get_loc()+ip->get_len()+off); } return ret; }
static InstTransResult translate_JMP32m(NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; // translate JMP mem32 API calls // as a call <api>, ret; if( ip->has_ext_call_target() ) { std::string s = ip->get_ext_call_target()->getSymbolName(); ret = doCallPCExtern(block, s, true); if (ret != EndBlock) { return doRet(block); } else { // noreturn api calls don't need to fix stack return ret; } } else if (ip->has_jump_table() && ip->is_data_offset()) { // this is a jump table that got converted // into a table in the data section doJumpTableViaData(natM, block, ip, inst); // return a "ret", since the jmp is simulated // as a call/ret pair return doRet(block); } else if(ip->has_jump_table()) { // this is a conformant jump table // emit an llvm switch doJumpTableViaSwitch(natM, block, ip, inst); return EndBlock; } else { std::string msg("NIY: JMP32m only supported for external API calls and jump tables: "); msg += to_string<VA>(ip->get_loc(), std::hex); throw TErr(__LINE__, __FILE__, msg.c_str()); return EndBlock; } }
NativeBlockPtr decodeBlock( ExecutableContainer *c, ExternalFunctionMap &f, LLVMByteDecoder &d, stack<VA> &blockChildren, VA e, stack<VA> &funcs, raw_ostream &out) { NativeBlockPtr B = NativeBlockPtr(new NativeBlock(e, d.getPrinter())); VA curAddr = e; bool has_follow = true; out << "Processing block: " << B->get_name() << "\n"; do { InstPtr I = d.getInstFromBuff(curAddr, c); //I, if a terminator, will have true and false targets //filled in. I could be an indirect branch of some kind, //we will deal with that here. we will also deal with the //instruction if it is a data instruction with relocation out << to_string<VA>(I->get_loc(), hex) << ":"; out << I->printInst() << "\n"; if(I->get_tr() != 0) { B->add_follow(I->get_tr()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_tr(), hex) << "\n"; blockChildren.push(I->get_tr()); } if(I->get_fa() != 0) { B->add_follow(I->get_fa()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_fa(), hex) << "\n"; blockChildren.push(I->get_fa()); } if(I->terminator()) { has_follow = false; } //do we need to add a data reference to this instruction? //again, because there is no offset information in the //instruction decoder, for now we just ask if every addr //in the inst is relocated for(uint32_t i = 0; i < I->get_len(); i++) { VA addrInInst = curAddr+i; if(c->is_addr_relocated(addrInInst)) { VA addr = 0; std::string has_imp; // this instruction has a relocation // save the relocation offset for later I->set_reloc_offset(i); //get the offset for this address //add it as a data offset to the instruction if (c->find_import_name(addrInInst, has_imp) ) { if(f.is_data(has_imp)) { ExternalDataRefPtr data_p = makeExtDataRefFromString(has_imp, f); out << "Adding external data ref: " << has_imp << "\n"; I->set_ext_data_ref(data_p); } else { ExternalCodeRefPtr code_p = makeExtCodeRefFromString(has_imp, f); LASSERT(code_p, "Failed to get ext call from map for symbol: "+has_imp); //maybe, this call doesn't return, in which case, //we should kill the decoding of this flow if(code_p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } out << "Adding external code ref: " << has_imp << "\n"; I->set_ext_call_target(code_p); } } else if(c->relocate_addr(addrInInst, addr)) { bool can_ref_code = canInstructionReferenceCode(I); bool is_reloc_code = isAddrOfType(c, addr, ExecutableContainer::CodeSection); bool is_reloc_data = isAddrOfType(c, addr, ExecutableContainer::DataSection); unsigned opc = I->get_inst().getOpcode(); if(isBranchViaMemory(I)) { out << "Detect branch via memory, relocation handled later\n"; } // this instruction can reference code and does // reference code // so we assume the code points to a function else if( can_ref_code && is_reloc_code ) { list<VA> new_funcs; if(dataInCodeHeuristic(c, I, addr, new_funcs)) { // add new functions to our functions list for(list<VA>::const_iterator nfi = new_funcs.begin(); nfi != new_funcs.end(); nfi++) { funcs.push(*nfi); } I->set_data_offset(addr); } else { I->set_call_tgt(addr); out << "Adding: 0x" << to_string<VA>(addr, hex) << " as target\n"; funcs.push(addr); } } // this instruction can't reference code and points to .text // or references data. Treat as data element // TODO: extract this from .text and shove into .data? else if(( !can_ref_code && is_reloc_code) || is_reloc_data ) { I->set_data_offset(addr); } else { out << "WARNING: relocation points to neither code nor data:" << to_string<VA>(addr, hex) << "\n"; } } else { out << "*NOT* Relocating relocatable addr:" << to_string<uint32_t>(addrInInst, hex) << "\n"; } break; } } //is this instruction an external call? //in a COFF binary, the pcrel call can refer to an //external symbol that has been relocated //so, get the string that corresponds, and //provide the translation using the function map MCOperand op; string imp; switch(I->get_inst().getOpcode()) { case X86::JMP32m: { string thunkSym; bool r = c->find_import_name(curAddr+2, thunkSym); if(r) { // this goes to an external API call out << "Adding external code ref via JMP: " << thunkSym << "\n"; ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); has_follow = false; } else { // this is an internal jmp. probably a jump table. bool did_jmptable = handlePossibleJumpTable(c, B, I, curAddr, funcs, blockChildren, out); LASSERT(did_jmptable, "JMP32m processing aborted: couldn't parse jumptable"); } } break; case X86::CALLpcrel32: //this could be an external call in COFF, or not op = I->get_inst().getOperand(0); LASSERT(op.isImm(), "Nonsense for CALLpcrel32"); if(op.getImm() !=0) { VA callTgt = curAddr+op.getImm()+I->get_len(); bool foldFunc = false; //speculate about callTgt InstPtr spec = d.getInstFromBuff(callTgt, c); if(spec->terminator() && spec->get_inst().getOpcode() == X86::JMP32m) { string thunkSym; bool r = c->find_import_name(callTgt+2, thunkSym); LASSERT(r, "Need to find thunk import addr"); ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); foldFunc = true; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } } if(foldFunc == false) { //add this to our list of funcs to search funcs.push(callTgt); } } else { //check to see if this is an external call... if(I->has_ext_call_target() == false) { // may be a local call VA addr=curAddr+1, relo_addr=0; out << "Symbol not found, maybe a local call\n"; if(c->relocate_addr(addr, relo_addr)){ out << "Found local call to: " << to_string<VA>(relo_addr, hex) << "\n"; I->set_call_tgt(relo_addr); out << "Adding: 0x" << to_string<VA>(relo_addr, hex) << " as target\n"; funcs.push(relo_addr); } else { out << "Could not relocate addr for local call at: "; out << to_string<VA>(curAddr, hex) << "\n"; } } else { out << "External call to: " << I->get_ext_call_target()->getSymbolName() << "\n"; } } break; case X86::CALL32m: //this should be a call to an external, or we have no idea //so we need to try and look up the symbol that we're calling at this address... if(c->find_import_name(curAddr+2, imp)) { ExternalCodeRefPtr p = makeExtCodeRefFromString(imp, f); LASSERT(p, "Failed to get ext call from map for symbol"+imp); out << "Calling symbol: " << p->getSymbolName() << "\n"; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } I->set_ext_call_target(p); } else { out << "Cannot find symbol at address "; out << to_string<VA>(curAddr, hex) << "\n"; } break; } B->add_inst(I); curAddr += I->get_len(); } while(has_follow); //we have built a basic block, it might contain //multiple calls, but it only has one terminator //which is either a ret or a branch return B; }
static InstTransResult translate_JMP32r(NativeModulePtr natM, BasicBlock *&block, InstPtr ip, MCInst &inst) { const MCOperand &tgtOp = inst.getOperand(0); TASSERT(inst.getNumOperands() == 1, ""); TASSERT(tgtOp.isReg(), ""); //read the register Value *fromReg = R_READ<32>(block, tgtOp.getReg()); if (ip->has_jump_table()) { // this is a jump table that got converted // into a table in the data section llvm::dbgs() << __FUNCTION__ << ": jump table via register: " << to_string<VA>(ip->get_loc(), std::hex) << "\n"; BasicBlock *defaultb = nullptr; doJumpTableViaSwitchReg(block, ip, fromReg, defaultb); TASSERT(defaultb != nullptr, "Default block has to exit"); // fallback to doing do_call_value doCallV(defaultb, ip, fromReg); return doRet(defaultb); } else { // translate the JMP32r as a call/ret llvm::dbgs() << __FUNCTION__ << ": regular jump via register: " << to_string<VA>(ip->get_loc(), std::hex) << "\n"; doCallV(block, ip, fromReg); return doRet(block); } }