void doJumpIndexTableViaSwitch( BasicBlock *&block, InstPtr ip) { Function *F = block->getParent(); Module *M = F->getParent(); // we know this conforms to // movzx reg32, [base+disp] // sanity check const MCInst &inst = ip->get_inst(); const MCOperand& dest = OP(0); const MCOperand& base = OP(1); TASSERT(base.isReg(), "Conformant jump index tables need base to be a register"); TASSERT(dest.isReg(), "Conformant jump index tables need to write to a register"); JumpIndexTablePtr idxptr = ip->get_jump_index_table(); // to ensure no negative entries Value *adjustment = CONST_V<32>(block, idxptr->getInitialEntry()); Value *reg_val = R_READ<32>(block, base.getReg()); Value *real_index = BinaryOperator::Create(Instruction::Add, adjustment, reg_val, "", block); BasicBlock *continueBlock = BasicBlock::Create(block->getContext(), "", F, 0); // create a default block that just traps BasicBlock *defaultBlock = BasicBlock::Create(block->getContext(), "", F, 0); Function *trapFn = Intrinsic::getDeclaration(M, Intrinsic::trap); CallInst::Create(trapFn, "", defaultBlock); BranchInst::Create(continueBlock, defaultBlock); // end default block const std::vector<uint8_t> &idxblocks = idxptr->getJumpIndexTable(); // create a switch inst SwitchInst *theSwitch = SwitchInst::Create( real_index, defaultBlock, idxblocks.size(), block); // populate switch int myindex = 0; for(std::vector<uint8_t>::const_iterator itr = idxblocks.begin(); itr != idxblocks.end(); itr++) { BasicBlock *writeBl = emitJumpIndexWrite(F, *itr, dest.getReg(), continueBlock ); theSwitch->addCase(CONST_V<32>(block, myindex), writeBl); ++myindex; } // new block to write to is continue block block = continueBlock; }
// assume the immediate references code if: // * we are dealing with a fully linked ELF // * The immediate is in the range of a valid code or data section static bool setHeuristicRef(ExecutableContainer *c, InstPtr I, int opnum, stack<VA> &funcs, raw_ostream &out, const std::string whichInst) { MCOperand op; std::string imp_name; ElfTarget *elft = dynamic_cast<ElfTarget*>(c); op = I->get_inst().getOperand(opnum); LASSERT(op.isImm(), "No immediate operand for " + whichInst); VA imm = op.getImm(); if(elft && elft->isLinked()) { if (elft->is_in_code(imm)) { // this instruction references code I->set_call_tgt(imm); // make sure we disassemble at this new address funcs.push(imm); out << "Found new function entry from " << whichInst << ": " << to_string<VA>(imm, hex) << "\n"; return true; } else if (elft->is_in_data(imm)) { out << "Adding local data ref to: " << to_string<VA>(imm, hex) << "\n"; I->set_data_offset(imm); } else if (c->find_import_name(imm, imp_name)) { out << "Import name is: " << imp_name << "\n"; } } return false; }
bool dataInCodeHeuristic( ExecutableContainer *c, InstPtr I, uint32_t addr, list<VA> &funcs, uint32_t relocSize) { // detect SEH handler if(I->get_inst().getOpcode() == X86::PUSHi32) { uint32_t dw1; uint8_t *ptr = (uint8_t*)&dw1; c->readByte(addr+0, ptr+0); c->readByte(addr+1, ptr+1); c->readByte(addr+2, ptr+2); c->readByte(addr+3, ptr+3); if(dw1 == 0xFFFFFFFE) { llvm::outs() << "WARNING: Heuristically detected SEH handler at: " << to_string<VA>(addr, hex) << "\n"; return treatCodeAsData(c, addr, 0x28, funcs); } } else { return treatCodeAsData(c, addr, relocSize, funcs); } return false; }
// return true if this instruction // branches via a memory lookup static bool isBranchViaMemory(InstPtr inst) { switch(inst->get_inst().getOpcode()) { case X86::JMP32m: case X86::CALL32m: return true; default: return false; } }
static bool processJumpIndexTable(ExecutableContainer *c, NativeBlockPtr B, InstPtr jmpinst, const vector<VA> &jmptable_entries, raw_ostream &out) { // first, find which operand was the index // register in jmpinst // const MCInst &inst = jmpinst->get_inst(); int index_reg = regFromInst(inst); if(index_reg == -1) { out << "JMPINST does not use a register to index\n"; return false; } // loop backwards through block looking for // instructions that write to this register const std::list<InstPtr> &block_insts = B->get_insts(); InstPtr write_reg_insn; for( std::list<InstPtr>::const_reverse_iterator itr = block_insts.rbegin(); itr != block_insts.rend(); itr++) { // check if we 'write to a register' if(writesToReg((*itr)->get_inst(), index_reg)) { write_reg_insn = *itr; break; } } if(write_reg_insn == NULL) { out << "No instruction writes index register in the same basic block\n"; return false; } out << "Found register index write instruction:\n"; if(!parseJumpIndexTable(c, write_reg_insn, jmptable_entries, out)) { out << "Could not parse jump index table, aborting\n"; return false; } return true; }
static bool canInstructionReferenceCode( InstPtr inst) { switch(inst->get_inst().getOpcode()) { case X86::MOV32mi: // writes to memory, but uses an immediate, which could be code case X86::MOV32o32a: // writes imm32 to eax; probably code case X86::MOV32ri: // writes imm32 to register, could be code case X86::PUSHi32: // push an imm32, which could be code // need to check if mem references are valid here case X86::MOV32rm: // writes mem to register, mem could be code? case X86::PUSH32rmm: // push mem, which could be/have code //case X86::LEA32r: // write address of mem to reg return true; default: return false; } }
NativeBlockPtr decodeBlock( ExecutableContainer *c, ExternalFunctionMap &f, LLVMByteDecoder &d, stack<VA> &blockChildren, VA e, stack<VA> &funcs, raw_ostream &out) { NativeBlockPtr B = NativeBlockPtr(new NativeBlock(e, d.getPrinter())); VA curAddr = e; bool has_follow = true; out << "Processing block: " << B->get_name() << "\n"; do { InstPtr I = d.getInstFromBuff(curAddr, c); //I, if a terminator, will have true and false targets //filled in. I could be an indirect branch of some kind, //we will deal with that here. we will also deal with the //instruction if it is a data instruction with relocation out << to_string<VA>(I->get_loc(), hex) << ":"; out << I->printInst() << "\n"; if(I->get_tr() != 0) { B->add_follow(I->get_tr()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_tr(), hex) << "\n"; blockChildren.push(I->get_tr()); } if(I->get_fa() != 0) { B->add_follow(I->get_fa()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_fa(), hex) << "\n"; blockChildren.push(I->get_fa()); } if(I->terminator()) { has_follow = false; } //do we need to add a data reference to this instruction? //again, because there is no offset information in the //instruction decoder, for now we just ask if every addr //in the inst is relocated for(uint32_t i = 0; i < I->get_len(); i++) { VA addrInInst = curAddr+i; if(c->is_addr_relocated(addrInInst)) { VA addr = 0; std::string has_imp; // this instruction has a relocation // save the relocation offset for later I->set_reloc_offset(i); //get the offset for this address //add it as a data offset to the instruction if (c->find_import_name(addrInInst, has_imp) ) { if(f.is_data(has_imp)) { ExternalDataRefPtr data_p = makeExtDataRefFromString(has_imp, f); out << "Adding external data ref: " << has_imp << "\n"; I->set_ext_data_ref(data_p); } else { ExternalCodeRefPtr code_p = makeExtCodeRefFromString(has_imp, f); LASSERT(code_p, "Failed to get ext call from map for symbol: "+has_imp); //maybe, this call doesn't return, in which case, //we should kill the decoding of this flow if(code_p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } out << "Adding external code ref: " << has_imp << "\n"; I->set_ext_call_target(code_p); } } else if(c->relocate_addr(addrInInst, addr)) { bool can_ref_code = canInstructionReferenceCode(I); bool is_reloc_code = isAddrOfType(c, addr, ExecutableContainer::CodeSection); bool is_reloc_data = isAddrOfType(c, addr, ExecutableContainer::DataSection); unsigned opc = I->get_inst().getOpcode(); if(isBranchViaMemory(I)) { out << "Detect branch via memory, relocation handled later\n"; } // this instruction can reference code and does // reference code // so we assume the code points to a function else if( can_ref_code && is_reloc_code ) { list<VA> new_funcs; if(dataInCodeHeuristic(c, I, addr, new_funcs)) { // add new functions to our functions list for(list<VA>::const_iterator nfi = new_funcs.begin(); nfi != new_funcs.end(); nfi++) { funcs.push(*nfi); } I->set_data_offset(addr); } else { I->set_call_tgt(addr); out << "Adding: 0x" << to_string<VA>(addr, hex) << " as target\n"; funcs.push(addr); } } // this instruction can't reference code and points to .text // or references data. Treat as data element // TODO: extract this from .text and shove into .data? else if(( !can_ref_code && is_reloc_code) || is_reloc_data ) { I->set_data_offset(addr); } else { out << "WARNING: relocation points to neither code nor data:" << to_string<VA>(addr, hex) << "\n"; } } else { out << "*NOT* Relocating relocatable addr:" << to_string<uint32_t>(addrInInst, hex) << "\n"; } break; } } //is this instruction an external call? //in a COFF binary, the pcrel call can refer to an //external symbol that has been relocated //so, get the string that corresponds, and //provide the translation using the function map MCOperand op; string imp; switch(I->get_inst().getOpcode()) { case X86::JMP32m: { string thunkSym; bool r = c->find_import_name(curAddr+2, thunkSym); if(r) { // this goes to an external API call out << "Adding external code ref via JMP: " << thunkSym << "\n"; ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); has_follow = false; } else { // this is an internal jmp. probably a jump table. bool did_jmptable = handlePossibleJumpTable(c, B, I, curAddr, funcs, blockChildren, out); LASSERT(did_jmptable, "JMP32m processing aborted: couldn't parse jumptable"); } } break; case X86::CALLpcrel32: //this could be an external call in COFF, or not op = I->get_inst().getOperand(0); LASSERT(op.isImm(), "Nonsense for CALLpcrel32"); if(op.getImm() !=0) { VA callTgt = curAddr+op.getImm()+I->get_len(); bool foldFunc = false; //speculate about callTgt InstPtr spec = d.getInstFromBuff(callTgt, c); if(spec->terminator() && spec->get_inst().getOpcode() == X86::JMP32m) { string thunkSym; bool r = c->find_import_name(callTgt+2, thunkSym); LASSERT(r, "Need to find thunk import addr"); ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); foldFunc = true; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } } if(foldFunc == false) { //add this to our list of funcs to search funcs.push(callTgt); } } else { //check to see if this is an external call... if(I->has_ext_call_target() == false) { // may be a local call VA addr=curAddr+1, relo_addr=0; out << "Symbol not found, maybe a local call\n"; if(c->relocate_addr(addr, relo_addr)){ out << "Found local call to: " << to_string<VA>(relo_addr, hex) << "\n"; I->set_call_tgt(relo_addr); out << "Adding: 0x" << to_string<VA>(relo_addr, hex) << " as target\n"; funcs.push(relo_addr); } else { out << "Could not relocate addr for local call at: "; out << to_string<VA>(curAddr, hex) << "\n"; } } else { out << "External call to: " << I->get_ext_call_target()->getSymbolName() << "\n"; } } break; case X86::CALL32m: //this should be a call to an external, or we have no idea //so we need to try and look up the symbol that we're calling at this address... if(c->find_import_name(curAddr+2, imp)) { ExternalCodeRefPtr p = makeExtCodeRefFromString(imp, f); LASSERT(p, "Failed to get ext call from map for symbol"+imp); out << "Calling symbol: " << p->getSymbolName() << "\n"; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } I->set_ext_call_target(p); } else { out << "Cannot find symbol at address "; out << to_string<VA>(curAddr, hex) << "\n"; } break; } B->add_inst(I); curAddr += I->get_len(); } while(has_follow); //we have built a basic block, it might contain //multiple calls, but it only has one terminator //which is either a ret or a branch return B; }
static bool handlePossibleJumpTable(ExecutableContainer *c, NativeBlockPtr B, InstPtr jmpinst, VA curAddr, stack<VA> &funcs, stack<VA> &blockChildren, raw_ostream &out) { LASSERT(jmpinst->get_inst().getOpcode() == X86::JMP32m, "handlePossibleJumpTable needs a JMP32m opcode" ); // is this a jump table, step 0 // does this instruction have a relocation? VA reloc_offset = jmpinst->get_reloc_offset(); if (reloc_offset == 0) { out << "Not a jump table: no relocation in JMP32m\n"; // bail, this is not a jump table return false; } // this relocation has to point to a relocation VA addrInInst = curAddr + reloc_offset; VA jmpTableEntry, someFunction; if(!c->relocate_addr(addrInInst, jmpTableEntry)) { out << "Not a jump table: can't relocate relocation in JMP32m\n"; // can't relocate, something bad happened return false; } if(!c->relocate_addr(jmpTableEntry, someFunction)) { // could not relocate the default jump table entry. // not good out << "Not a jump table: can't relocate first jump table entry\n"; return false; } bool is_reloc_code = isAddrOfType(c, someFunction, ExecutableContainer::CodeSection); if(!is_reloc_code) { // jump table entry not point to code out << "Not a jump table: first entry doesn't point to code\n"; return false; } // read jump table entries and add them as new function // entry points vector<VA> jmptable_entries; int new_funs; int original_zero; // this reads negative jump table indexes, but vectors are not negative // indexed. the negative most, which should be the new index 0, is now // index N. Reverse the vector so it will be index 0, and save the current // size as the original zeroth element new_funs = addJmpTableEntries(c, jmptable_entries, jmpTableEntry, -4, out); std::reverse(jmptable_entries.begin(), jmptable_entries.end()); out << "Added: " << to_string<int>(new_funs, dec) << " functions to jmptable\n"; original_zero = new_funs; // add original entry at the zero position jmptable_entries.push_back(someFunction); out << "Added JMPTABLE entry [" << to_string<uint32_t>(jmpTableEntry, hex) << "] => " << to_string<uint32_t>(someFunction, hex) << "\n"; // add the positive table entries new_funs = addJmpTableEntries(c, jmptable_entries, jmpTableEntry, 4, out); out << "Added: " << to_string<int>(new_funs, dec) << " functions to jmptable\n"; // associate instruction with jump table JumpTable *jt = new JumpTable(jmptable_entries, original_zero); jmpinst->set_jump_table(JumpTablePtr(jt)); stack<VA> *toPush = NULL; // if this jump table is in the format // jmp [reg*4+imm32], then it is conformant // and we can turn it into an llvm switch(); bool is_conformant = isConformantJumpInst(jmpinst); if(is_conformant) { toPush = &blockChildren; out << "GOT A CONFORMANT JUMP INST\n"; } else { toPush = &funcs; } // add these jump table entries as new entry points for(std::vector<VA>::const_iterator itr = jmptable_entries.begin(); itr != jmptable_entries.end(); itr++) { out << "Adding block via jmptable: " << to_string<VA>(*itr, hex) << "\n"; toPush->push(*itr); if(is_conformant) { B->add_follow(*itr); } } processJumpIndexTable(c, B, jmpinst, jmptable_entries, out); return true; }