static bool handleJump(ExecutableContainer *c, NativeBlockPtr B, InstPtr jmpinst, VA curAddr, stack<VA> &funcs, stack<VA> &blockChildren, raw_ostream &out) { // this is an internal jmp. probably a jump table. out << "Found a possible jump table!\n"; bool did_jmptable = handlePossibleJumpTable(c, B, jmpinst, curAddr, funcs, blockChildren, out); if(!did_jmptable) { out << "Heristic jumptable processing couldn't parse jumptable\n"; out << "pointing to: 0x" << to_string<VA>(curAddr, hex) << "\n"; out << jmpinst->printInst() << "\n"; out << c->hash << "\n"; } return did_jmptable; }
NativeBlockPtr decodeBlock( ExecutableContainer *c, ExternalFunctionMap &f, LLVMByteDecoder &d, stack<VA> &blockChildren, VA e, stack<VA> &funcs, raw_ostream &out) { NativeBlockPtr B = NativeBlockPtr(new NativeBlock(e, d.getPrinter())); VA curAddr = e; bool has_follow = true; out << "Processing block: " << B->get_name() << "\n"; do { InstPtr I = d.getInstFromBuff(curAddr, c); //I, if a terminator, will have true and false targets //filled in. I could be an indirect branch of some kind, //we will deal with that here. we will also deal with the //instruction if it is a data instruction with relocation out << to_string<VA>(I->get_loc(), hex) << ":"; out << I->printInst() << "\n"; if(I->get_tr() != 0) { B->add_follow(I->get_tr()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_tr(), hex) << "\n"; blockChildren.push(I->get_tr()); } if(I->get_fa() != 0) { B->add_follow(I->get_fa()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_fa(), hex) << "\n"; blockChildren.push(I->get_fa()); } if(I->terminator()) { has_follow = false; } //do we need to add a data reference to this instruction? //again, because there is no offset information in the //instruction decoder, for now we just ask if every addr //in the inst is relocated for(uint32_t i = 0; i < I->get_len(); i++) { VA addrInInst = curAddr+i; if(c->is_addr_relocated(addrInInst)) { VA addr = 0; std::string has_imp; // this instruction has a relocation // save the relocation offset for later I->set_reloc_offset(i); //get the offset for this address //add it as a data offset to the instruction if (c->find_import_name(addrInInst, has_imp) ) { if(f.is_data(has_imp)) { ExternalDataRefPtr data_p = makeExtDataRefFromString(has_imp, f); out << "Adding external data ref: " << has_imp << "\n"; I->set_ext_data_ref(data_p); } else { ExternalCodeRefPtr code_p = makeExtCodeRefFromString(has_imp, f); LASSERT(code_p, "Failed to get ext call from map for symbol: "+has_imp); //maybe, this call doesn't return, in which case, //we should kill the decoding of this flow if(code_p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } out << "Adding external code ref: " << has_imp << "\n"; I->set_ext_call_target(code_p); } } else if(c->relocate_addr(addrInInst, addr)) { bool can_ref_code = canInstructionReferenceCode(I); bool is_reloc_code = isAddrOfType(c, addr, ExecutableContainer::CodeSection); bool is_reloc_data = isAddrOfType(c, addr, ExecutableContainer::DataSection); unsigned opc = I->get_inst().getOpcode(); if(isBranchViaMemory(I)) { out << "Detect branch via memory, relocation handled later\n"; } // this instruction can reference code and does // reference code // so we assume the code points to a function else if( can_ref_code && is_reloc_code ) { list<VA> new_funcs; if(dataInCodeHeuristic(c, I, addr, new_funcs)) { // add new functions to our functions list for(list<VA>::const_iterator nfi = new_funcs.begin(); nfi != new_funcs.end(); nfi++) { funcs.push(*nfi); } I->set_data_offset(addr); } else { I->set_call_tgt(addr); out << "Adding: 0x" << to_string<VA>(addr, hex) << " as target\n"; funcs.push(addr); } } // this instruction can't reference code and points to .text // or references data. Treat as data element // TODO: extract this from .text and shove into .data? else if(( !can_ref_code && is_reloc_code) || is_reloc_data ) { I->set_data_offset(addr); } else { out << "WARNING: relocation points to neither code nor data:" << to_string<VA>(addr, hex) << "\n"; } } else { out << "*NOT* Relocating relocatable addr:" << to_string<uint32_t>(addrInInst, hex) << "\n"; } break; } } //is this instruction an external call? //in a COFF binary, the pcrel call can refer to an //external symbol that has been relocated //so, get the string that corresponds, and //provide the translation using the function map MCOperand op; string imp; switch(I->get_inst().getOpcode()) { case X86::JMP32m: { string thunkSym; bool r = c->find_import_name(curAddr+2, thunkSym); if(r) { // this goes to an external API call out << "Adding external code ref via JMP: " << thunkSym << "\n"; ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); has_follow = false; } else { // this is an internal jmp. probably a jump table. bool did_jmptable = handlePossibleJumpTable(c, B, I, curAddr, funcs, blockChildren, out); LASSERT(did_jmptable, "JMP32m processing aborted: couldn't parse jumptable"); } } break; case X86::CALLpcrel32: //this could be an external call in COFF, or not op = I->get_inst().getOperand(0); LASSERT(op.isImm(), "Nonsense for CALLpcrel32"); if(op.getImm() !=0) { VA callTgt = curAddr+op.getImm()+I->get_len(); bool foldFunc = false; //speculate about callTgt InstPtr spec = d.getInstFromBuff(callTgt, c); if(spec->terminator() && spec->get_inst().getOpcode() == X86::JMP32m) { string thunkSym; bool r = c->find_import_name(callTgt+2, thunkSym); LASSERT(r, "Need to find thunk import addr"); ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); foldFunc = true; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } } if(foldFunc == false) { //add this to our list of funcs to search funcs.push(callTgt); } } else { //check to see if this is an external call... if(I->has_ext_call_target() == false) { // may be a local call VA addr=curAddr+1, relo_addr=0; out << "Symbol not found, maybe a local call\n"; if(c->relocate_addr(addr, relo_addr)){ out << "Found local call to: " << to_string<VA>(relo_addr, hex) << "\n"; I->set_call_tgt(relo_addr); out << "Adding: 0x" << to_string<VA>(relo_addr, hex) << " as target\n"; funcs.push(relo_addr); } else { out << "Could not relocate addr for local call at: "; out << to_string<VA>(curAddr, hex) << "\n"; } } else { out << "External call to: " << I->get_ext_call_target()->getSymbolName() << "\n"; } } break; case X86::CALL32m: //this should be a call to an external, or we have no idea //so we need to try and look up the symbol that we're calling at this address... if(c->find_import_name(curAddr+2, imp)) { ExternalCodeRefPtr p = makeExtCodeRefFromString(imp, f); LASSERT(p, "Failed to get ext call from map for symbol"+imp); out << "Calling symbol: " << p->getSymbolName() << "\n"; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } I->set_ext_call_target(p); } else { out << "Cannot find symbol at address "; out << to_string<VA>(curAddr, hex) << "\n"; } break; } B->add_inst(I); curAddr += I->get_len(); } while(has_follow); //we have built a basic block, it might contain //multiple calls, but it only has one terminator //which is either a ret or a branch return B; }