void reset_to_canonical_interval(xed_decoded_inst_t *xptr, unwind_interval **next, bool irdebug, interval_arg_t *iarg, mem_alloc m_alloc) { unwind_interval *current = iarg->current; unwind_interval *first = iarg->first; unwind_interval *hw_uwi = iarg->highwatermark.uwi; // if the return is not the last instruction in the interval, // set up an interval for code after the return if (iarg->ins + xed_decoded_inst_get_length(xptr) < iarg->end){ if (iarg->bp_frames_found) { // look for first bp frame first = find_first_bp_frame(first); set_ui_canonical(first, iarg->canonical_interval); iarg->canonical_interval = first; } else if (iarg->canonical_interval) { if (hw_uwi && UWI_RECIPE(hw_uwi)->bp_status != BP_UNCHANGED) if ((UWI_RECIPE(iarg->canonical_interval)->bp_status == BP_UNCHANGED) || ((UWI_RECIPE(iarg->canonical_interval)->bp_status == BP_SAVED) && (UWI_RECIPE(hw_uwi)->bp_status == BP_HOSED))) { set_ui_canonical(hw_uwi, iarg->canonical_interval); iarg->canonical_interval = hw_uwi; } first = iarg->canonical_interval; } else { // look for first nondecreasing with no jmp first = find_first_non_decr(first, hw_uwi); set_ui_canonical(first, iarg->canonical_interval); iarg->canonical_interval = first; } { ra_loc ra_status = UWI_RECIPE(first)->ra_status; bp_loc bp_status = (UWI_RECIPE(current)->bp_status == BP_HOSED) ? BP_HOSED : UWI_RECIPE(first)->bp_status; #ifndef FIX_INTERVALS_AT_RETURN if ((UWI_RECIPE(current)->ra_status != ra_status) || (UWI_RECIPE(current)->bp_status != bp_status) || (UWI_RECIPE(current)->sp_ra_pos != UWI_RECIPE(first)->sp_ra_pos) || (UWI_RECIPE(current)->bp_ra_pos != UWI_RECIPE(first)->bp_ra_pos) || (UWI_RECIPE(current)->bp_bp_pos != UWI_RECIPE(first)->bp_bp_pos) || (UWI_RECIPE(current)->sp_bp_pos != UWI_RECIPE(first)->sp_bp_pos)) #endif { *next = new_ui(iarg->ins + xed_decoded_inst_get_length(xptr), ra_status, UWI_RECIPE(first)->sp_ra_pos, UWI_RECIPE(first)->bp_ra_pos, bp_status, UWI_RECIPE(first)->sp_bp_pos, UWI_RECIPE(first)->bp_bp_pos, current, m_alloc); set_ui_restored_canonical(*next, UWI_RECIPE(iarg->canonical_interval)->prev_canonical); if (UWI_RECIPE(first)->bp_status != BP_HOSED && bp_status == BP_HOSED) { set_ui_canonical(*next, iarg->canonical_interval); iarg->canonical_interval = *next; } return; } } } *next = current; }
unsigned int disas_get_target(unsigned int start_pc, struct PEMU_INST *inst) { PEMU_read_mem(start_pc, 15, inst->PEMU_inst_buf); xed_decoded_inst_zero_set_mode(&inst->PEMU_xedd_g, &inst->PEMU_dstate); xed_error_enum_t xed_error = xed_decode(&inst->PEMU_xedd_g, XED_STATIC_CAST(const xed_uint8_t *, inst->PEMU_inst_buf), 15); if (xed_error != XED_ERROR_NONE) { fprintf(stderr, "error in disas_get_target\n"); exit(0); } const xed_inst_t *xi = xed_decoded_inst_inst(&inst->PEMU_xedd_g); if(xed_decoded_inst_get_iclass(&inst->PEMU_xedd_g) != XED_ICLASS_CALL_NEAR) { return 0; } const xed_operand_t *op = xed_inst_operand(xi, 0); xed_reg_enum_t reg_id; xed_operand_enum_t op_name = xed_operand_name(op); unsigned int dest, tmp; if(operand_is_relbr(op_name, &dest)){ dest += (start_pc + xed_decoded_inst_get_length(&inst->PEMU_xedd_g)); }else if(operand_is_reg(op_name, ®_id)){ dest = PEMU_get_reg(reg_id); }else if(operand_is_mem4(op_name, &dest,0)){ PEMU_read_mem(dest, 4, &tmp); dest = tmp; } return dest; }
static target_ulong Instrument_CALL_NEAR(target_ulong pc) { uint32_t mem_addr; xed_reg_enum_t reg_id; target_ulong target; PEMU_read_mem(pc, 15, pemu_inst.PEMU_inst_buf); xed_decoded_inst_zero_set_mode(&pemu_inst.PEMU_xedd_g, &pemu_inst.PEMU_dstate); xed_error_enum_t xed_error = xed_decode(&pemu_inst.PEMU_xedd_g, XED_STATIC_CAST(const xed_uint8_t *, pemu_inst.PEMU_inst_buf), 15); const xed_inst_t * ins = xed_decoded_inst_inst(&pemu_inst.PEMU_xedd_g); const xed_operand_t *op = xed_inst_operand(ins, 0); xed_operand_enum_t op_name = xed_operand_name(op); if (operand_is_mem(op_name, &mem_addr, 0)) { PEMU_read_mem(mem_addr,sizeof(target) , &target); } else if (operand_is_reg(op_name, ®_id)){ target = PEMU_get_reg(reg_id); } else{ int len = xed_decoded_inst_get_length(&pemu_inst.PEMU_xedd_g); target = xed_decoded_inst_get_branch_displacement(&pemu_inst.PEMU_xedd_g) + pc + len; } return target; }
static void format_jmp(const xed_inst_t *xi) { uint32_t dest; const xed_operand_t *op = xed_inst_operand(xi, 0); xed_operand_enum_t op_name = xed_operand_name(op); xed_reg_enum_t reg_id; #ifdef STATISTICS g_symbol_nums++; fprintf(stderr, "jmp at pc\t%x\n", g_pc); g_jmp_num++; #endif INST* inst = get_inst(g_pc); if(inst->type == TAIL){//jmp to plt #ifdef DEBUG fprintf(stdout, "TAIL:\t%p\n", inst); #endif sprintf(inst_buffer, "%s %s", "jmp", inst->api_call.fname); return; } if(operand_is_relbr(op_name, &dest)){ dest += g_pc + xed_decoded_inst_get_length(&xedd_g); sprintf(inst_buffer, "%s L_0x%x", "jmp", dest); }else{ fprintf(stderr, "error in format_jmp\n"); } }
static void format_direct_call(const xed_inst_t *xi) { uint32_t dest; const xed_operand_t *op = xed_inst_operand(xi, 0); xed_operand_enum_t op_name = xed_operand_name(op); xed_reg_enum_t reg_id; #ifdef STATISTICS fprintf(stderr, "call at pc\t%x\n", g_pc); g_symbol_nums++; g_call_num++; #endif INST* inst = get_inst(g_pc); if(operand_is_relbr(op_name, &dest)){ if(inst->api_call.fname){ if(inst->api_call.type == API_IMP) sprintf(inst_buffer, "call %s", inst->api_call.fname); else sprintf(inst_buffer, "call dword ptr %s", inst->api_call.fname); return; } dest += g_pc + xed_decoded_inst_get_length(&xedd_g); sprintf(inst_buffer, "call func_0x%x", dest); }else{ fprintf(stderr, "error in format_jmp\n"); } }
unwind_interval * process_lea(xed_decoded_inst_t *xptr, const xed_inst_t *xi, interval_arg_t *iarg) { highwatermark_t *hw_tmp = &(iarg->highwatermark); unwind_interval *next = iarg->current; const xed_operand_t *op0 = xed_inst_operand(xi, 0); xed_operand_enum_t op0_name = xed_operand_name(op0); if ((op0_name == XED_OPERAND_REG0)) { xed_reg_enum_t regname = xed_decoded_inst_get_reg(xptr, op0_name); if (x86_isReg_BP(regname)) { //======================================================================= // action: clobbering the base pointer; begin a new SP_RELATIVE interval // note: we don't check that BP is BP_SAVED; we might have to //======================================================================= next = new_ui(iarg->ins + xed_decoded_inst_get_length(xptr), RA_SP_RELATIVE, iarg->current->sp_ra_pos, iarg->current->bp_ra_pos, BP_HOSED, iarg->current->sp_bp_pos, iarg->current->bp_bp_pos, iarg->current); if (HW_TEST_STATE(hw_tmp->state, HW_BP_SAVED, HW_BP_OVERWRITTEN) && (hw_tmp->uwi->sp_ra_pos == next->sp_ra_pos)) { hw_tmp->uwi = next; hw_tmp->state = HW_NEW_STATE(hw_tmp->state, HW_BP_OVERWRITTEN); } } } return next; }
static int xed_next_ip(uint64_t *pip, const xed_decoded_inst_t *inst, uint64_t ip) { xed_uint_t length, disp_width; if (!pip || !inst) return -pte_internal; length = xed_decoded_inst_get_length(inst); if (!length) { printf("[xed error: failed to determine instruction length]\n"); return -pte_bad_insn; } ip += length; /* If it got a branch displacement it must be a branch. * * This includes conditional branches for which we don't know whether * they were taken. The next IP won't be used in this case as a * conditional branch ends a block. The next block will start with the * correct IP. */ disp_width = xed_decoded_inst_get_branch_displacement_width(inst); if (disp_width) ip += xed_decoded_inst_get_branch_displacement(inst); *pip = ip; return 0; }
ADDRINT GetInstructionLength (ADDRINT ip) { xed_state_t dstate; xed_error_enum_t xed_error; xed_decoded_inst_t xedd; ostringstream os; if (sizeof(ADDRINT) == 4) xed_state_init(&dstate, XED_MACHINE_MODE_LEGACY_32, XED_ADDRESS_WIDTH_32b, XED_ADDRESS_WIDTH_32b); else xed_state_init(&dstate, XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b, XED_ADDRESS_WIDTH_64b); xed_decoded_inst_zero_set_mode(&xedd, &dstate); UINT32 len = 15; xed_error = xed_decode(&xedd, reinterpret_cast<const UINT8*>(ip), len); if (xed_error != XED_ERROR_NONE) { printf ("Failure to decode at %p\n", (char *)ip); exit (1); } return xed_decoded_inst_get_length(&xedd); }
unwind_interval * process_leave(xed_decoded_inst_t *xptr, const xed_inst_t *xi, interval_arg_t *iarg) { unwind_interval *next; next = new_ui(iarg->ins + xed_decoded_inst_get_length(xptr), RA_SP_RELATIVE, 0, 0, BP_UNCHANGED, 0, 0, iarg->current); return next; }
bool VariableRecovery(BYTE *startAddr, BYTE *endAddr, std::map<int, AbstractVariable*> &container) { const unsigned int maxInstructionLength = 15; // the max length of a x86 instruction is 15byte xed_decoded_inst_t xedd; int instLength, index; BYTE *currentAddr = startAddr; char buffer[1024]; VariableHunter *varHunter = new VariableHunter(); // initialize the variable hunter static const xed_state_t dstate = { XED_MACHINE_MODE_LEGACY_32, XED_ADDRESS_WIDTH_32b }; //currently, we only implement the 32bit machine index = 0; while(currentAddr < endAddr) // only small than, can't be equal. { memset(buffer, 0, sizeof(buffer)); xed_decoded_inst_zero_set_mode(&xedd, &dstate); xed_error_enum_t xedCode = xed_decode(&xedd, (uint8_t*)currentAddr, maxInstructionLength); if(xedCode == XED_ERROR_NONE) { instLength = xed_decoded_inst_get_length(&xedd); //get the length of the instruction in byte xed_uint64_t runtime_address = (xed_uint64_t)currentAddr; xed_decoded_inst_dump_intel_format(&xedd, buffer, 1024, runtime_address); printf("0x%x\t\t%s\n", index, buffer); varHunter -> findVariable(xedd); currentAddr += instLength; index += instLength; } else return false; } varHunter -> getResult(container); delete varHunter; return true; }
static int format_jcc(const xed_inst_t *xi) { uint32_t dest, next; char opcode[20], jmp_dst[20]; const xed_operand_t *op = xed_inst_operand(xi, 0); xed_operand_enum_t op_name = xed_operand_name(op); xed_reg_enum_t reg_id; strcpy(opcode, xed_iclass_enum_t2str(xed_decoded_inst_get_iclass(&xedd_g))); INST* inst = get_inst(g_pc); if(operand_is_relbr(op_name, &dest)){ next = g_pc + xed_decoded_inst_get_length(&xedd_g); dest += next;//TODO: handle two branch #ifdef DEBUG fprintf(stdout, "format:%x\t%s\n", g_pc, g_inst_str); #endif #ifdef STATISTICS g_symbol_nums++; g_jcc_num++; #endif if(get_inst(dest)) sprintf(inst_buffer, "%s L_0x%x", opcode, dest); else{ sprintf(inst_buffer, "%s L_ERROR_0x%x", opcode, g_current_func->begin()->first); #ifdef STATISTICS g_check_nums += 1; fprintf(stderr, "check at pc\t%x\n", g_pc); #endif } if(get_inst(next)) memset(safety_guard, 0, sizeof(safety_guard)); else{ sprintf(safety_guard, "jmp L_ERROR_0x%x", g_current_func->begin()->first); #ifdef STATISTICS fprintf(stderr, "check at pc\t%x\n", g_pc); g_check_nums += 1; #endif } }else{ fprintf(stderr, "error in format_jcc\n"); } }
static target_ulong Instrument_CALL_NEAR( const xed_inst_t * ins, target_ulong pc) { uint32_t mem_addr; xed_reg_enum_t reg_id; target_ulong target; const xed_operand_t *op = xed_inst_operand(ins, 0); xed_operand_enum_t op_name = xed_operand_name(op); if (operand_is_mem(op_name, &mem_addr, 0)) { PEMU_read_mem(mem_addr,sizeof(target) , &target); } else if (operand_is_reg(op_name, ®_id)){ target = PEMU_get_reg(reg_id); } else{ int len = xed_decoded_inst_get_length(&xedd_g); target = xed_decoded_inst_get_branch_displacement(&xedd_g) + pc + len; } return target; }
static void xed_print_insn(const xed_decoded_inst_t *inst, uint64_t ip, const struct ptxed_options *options) { xed_print_info_t pi; char buffer[256]; xed_bool_t ok; if (!inst || !options) { printf(" [internal error]"); return; } if (options->print_raw_insn) { xed_uint_t length, i; length = xed_decoded_inst_get_length(inst); for (i = 0; i < length; ++i) printf(" %02x", xed_decoded_inst_get_byte(inst, i)); for (; i < pt_max_insn_size; ++i) printf(" "); } xed_init_print_info(&pi); pi.p = inst; pi.buf = buffer; pi.blen = sizeof(buffer); pi.runtime_address = ip; if (options->att_format) pi.syntax = XED_SYNTAX_ATT; ok = xed_format_generic(&pi); if (!ok) { printf(" [xed print error]"); return; } printf(" %s", buffer); }
unwind_interval * process_and(xed_decoded_inst_t *xptr, const xed_inst_t *xi, interval_arg_t *iarg, mem_alloc m_alloc) { unwind_interval *next = iarg->current; const xed_operand_t* op0 = xed_inst_operand(xi,0); xed_operand_enum_t op0_name = xed_operand_name(op0); if (op0_name == XED_OPERAND_REG0) { xed_reg_enum_t reg0 = xed_decoded_inst_get_reg(xptr, op0_name); if (x86_isReg_SP(reg0) && UWI_RECIPE(iarg->current)->bp_status != BP_UNCHANGED) { //----------------------------------------------------------------------- // we are adjusting the stack pointer via 'and' instruction //----------------------------------------------------------------------- next = new_ui(iarg->ins + xed_decoded_inst_get_length(xptr), RA_BP_FRAME, UWI_RECIPE(iarg->current)->sp_ra_pos, UWI_RECIPE(iarg->current)->bp_ra_pos, UWI_RECIPE(iarg->current)->bp_status, UWI_RECIPE(iarg->current)->sp_bp_pos, UWI_RECIPE(iarg->current)->bp_bp_pos, iarg->current, m_alloc); } } return next; }
void x86_dump_ins(void *ins) { xed_decoded_inst_t xedd; xed_decoded_inst_t *xptr = &xedd; xed_error_enum_t xed_error; char inst_buf[1024]; char errbuf[2048]; xed_decoded_inst_zero_set_mode(xptr, &x86_decoder_settings.xed_settings); xed_error = xed_decode(xptr, (uint8_t*) ins, 15); if (xed_error == XED_ERROR_NONE) { xed_format_xed(xptr, inst_buf, sizeof(inst_buf), (xed_uint64_t)(uintptr_t)ins); sprintf(errbuf, "(%p, %d bytes, %s) %s \n" , ins, xed_decoded_inst_get_length(xptr), xed_iclass_enum_t2str(iclass(xptr)), inst_buf); } else { #if defined(ENABLE_XOP) && defined (HOST_CPU_x86_64) amd_decode_t decode_res; adv_amd_decode(&decode_res, ins); if (decode_res.success) { if (decode_res.weak) sprintf(errbuf, "(%p, %d bytes) weak AMD XOP \n", ins, (int) decode_res.len); else sprintf(errbuf, "(%p, %d bytes) robust AMD XOP \n", ins, (int) decode_res.len); } else #endif // ENABLE_XOP and HOST_CPU_x86_64 sprintf(errbuf, "x86_dump_ins: xed decode error addr=%p, code = %d\n", ins, (int) xed_error); } EMSG(errbuf); fprintf(stderr, errbuf); fflush(stderr); }
int main(int argc, char** argv) { xed_bool_t long_mode = 1; xed_decoded_inst_t xedd; xed_state_t dstate; unsigned char itext[15] = { 0xf2, 0x2e, 0x4f, 0x0F, 0x85, 0x99, 0x00, 0x00, 0x00 }; xed_tables_init(); // one time per process if (long_mode) dstate.mmode=XED_MACHINE_MODE_LONG_64; else dstate.mmode=XED_MACHINE_MODE_LEGACY_32; xed_decoded_inst_zero_set_mode(&xedd, &dstate); xed_ild_decode(&xedd, itext, XED_MAX_INSTRUCTION_BYTES); printf("length = %d\n",xed_decoded_inst_get_length(&xedd)); return 0; (void) argc; (void) argv; //pacify compiler }
int PEMU_disas_handle_branch(target_ulong pc) { xed_error_enum_t xed_error = disas_one_inst(pc); if(xed_error != XED_ERROR_NONE){ return 0; } xed_iclass_enum_t opcode = xed_decoded_inst_get_iclass(&xedd_g); switch(opcode){ case XED_ICLASS_JMP: case XED_ICLASS_CALL_NEAR: { const xed_inst_t * ins = xed_decoded_inst_inst(&xedd_g); const xed_operand_t *op = xed_inst_operand(ins, 0); xed_operand_enum_t op_name = xed_operand_name(op); int len = xed_decoded_inst_get_length(&xedd_g); unsigned int rel; if(operand_is_relbr(op_name, &rel)) { unsigned long target = pc + len + rel; PEMU_add_trace(target); return 0; } return 1; } break; case XED_ICLASS_RET_NEAR: return 1; default: break; } return 0; }
unwind_interval * process_addsub(xed_decoded_inst_t *xptr, const xed_inst_t *xi, interval_arg_t *iarg) { highwatermark_t *hw_tmp = &(iarg->highwatermark); unwind_interval *next = iarg->current; const xed_operand_t* op0 = xed_inst_operand(xi,0); const xed_operand_t* op1 = xed_inst_operand(xi,1); xed_operand_enum_t op0_name = xed_operand_name(op0); if (op0_name == XED_OPERAND_REG0) { xed_reg_enum_t reg0 = xed_decoded_inst_get_reg(xptr, op0_name); if (x86_isReg_SP(reg0)) { //----------------------------------------------------------------------- // we are adjusting the stack pointer //----------------------------------------------------------------------- if (xed_operand_name(op1) == XED_OPERAND_IMM0) { int sign = (iclass_eq(xptr, XED_ICLASS_ADD)) ? -1 : 1; long immedv = sign * xed_decoded_inst_get_signed_immediate(xptr); ra_loc istatus = iarg->current->ra_status; if ((istatus == RA_STD_FRAME) && (immedv > 0) && (hw_tmp->state & HW_SP_DECREMENTED)) { //------------------------------------------------------------------- // if we are in a standard frame and we see a second subtract, // it is time to convert interval to a BP frame to minimize // the chance we get the wrong offset for the return address // in a routine that manipulates SP frequently (as in // leapfrog_mod_leapfrog_ in the SPEC CPU2006 benchmark // 459.GemsFDTD, when compiled with PGI 7.0.3 with high levels // of optimization). // // 9 December 2007 -- John Mellor-Crummey //------------------------------------------------------------------- } next = new_ui(iarg->ins + xed_decoded_inst_get_length(xptr), istatus, iarg->current->sp_ra_pos + immedv, iarg->current->bp_ra_pos, iarg->current->bp_status, iarg->current->sp_bp_pos + immedv, iarg->current->bp_bp_pos, iarg->current); if (immedv > 0) { if (HW_TEST_STATE(hw_tmp->state, 0, HW_SP_DECREMENTED)) { //----------------------------------------------------------------- // set the highwatermark and canonical interval upon seeing // the FIRST subtract from SP; take no action on subsequent // subtracts. // // test case: main in SPEC CPU2006 benchmark 470.lbm // contains multiple subtracts from SP when compiled with // PGI 7.0.3 with high levels of optimization. the first // subtract from SP is to set up the frame; subsequent ones // are to reserve space for arguments passed to functions. // // 9 December 2007 -- John Mellor-Crummey //----------------------------------------------------------------- hw_tmp->uwi = next; hw_tmp->succ_inst_ptr = iarg->ins + xed_decoded_inst_get_length(xptr); hw_tmp->state = HW_NEW_STATE(hw_tmp->state, HW_SP_DECREMENTED); iarg->canonical_interval = next; } } } else { if (iarg->current->ra_status != RA_BP_FRAME){ //------------------------------------------------------------------- // no immediate in add/subtract from stack pointer; switch to // BP_FRAME // // 9 December 2007 -- John Mellor-Crummey //------------------------------------------------------------------- next = new_ui(iarg->ins + xed_decoded_inst_get_length(xptr), RA_BP_FRAME, iarg->current->sp_ra_pos, iarg->current->bp_ra_pos, iarg->current->bp_status, iarg->current->sp_bp_pos, iarg->current->bp_bp_pos, iarg->current); iarg->bp_frames_found = true; } } } } return next; }
void Disasm::disasm(std::ostream& out, uint8_t* codeStartAddr, uint8_t* codeEndAddr) { #ifdef HAVE_LIBXED auto const endClr = m_opts.m_color.empty() ? "" : ANSI_COLOR_END; char codeStr[MAX_INSTR_ASM_LEN]; xed_uint8_t *frontier; xed_decoded_inst_t xedd; uint64_t codeBase = uint64_t(codeStartAddr); uint64_t ip; // Decode and print each instruction for (frontier = codeStartAddr, ip = (uint64_t)codeStartAddr; frontier < codeEndAddr; ) { xed_decoded_inst_zero_set_mode(&xedd, &m_xedState); xed_decoded_inst_set_input_chip(&xedd, XED_CHIP_INVALID); xed_error_enum_t xed_error = xed_decode(&xedd, frontier, 15); if (xed_error != XED_ERROR_NONE) error("disasm error: xed_decode failed"); // Get disassembled instruction in codeStr auto const syntax = m_opts.m_forceAttSyntax ? XED_SYNTAX_ATT : s_xed_syntax; if (!xed_format_context(syntax, &xedd, codeStr, MAX_INSTR_ASM_LEN, ip, nullptr)) { error("disasm error: xed_format_context failed"); } uint32_t instrLen = xed_decoded_inst_get_length(&xedd); // If it's a jump, we're printing relative offsets, and the dest // is within the range we're printing, add the dest as a relative // offset. std::string jmpComment; auto const cat = xed_decoded_inst_get_category(&xedd); if (cat == XED_CATEGORY_COND_BR || cat == XED_CATEGORY_UNCOND_BR) { if (m_opts.m_relativeOffset) { auto disp = uint64_t(frontier + instrLen + xed_decoded_inst_get_branch_displacement(&xedd) - codeBase); if (disp < uint64_t(codeEndAddr - codeStartAddr)) { jmpComment = folly::format(" # {:#x}", disp).str(); } } } for (int i = 0; i < m_opts.m_indentLevel; ++i) { out << ' '; } out << m_opts.m_color; if (m_opts.m_addresses) { const char* fmt = m_opts.m_relativeOffset ? "{:3x}: " : "{:#10x}: "; out << folly::format(fmt, ip - (m_opts.m_relativeOffset ? codeBase : 0)); } if (m_opts.m_printEncoding) { // print encoding, like in objdump unsigned posi = 0; for (; posi < instrLen; ++posi) { out << folly::format("{:02x} ", (uint8_t)frontier[posi]); } for (; posi < 16; ++posi) { out << " "; } } out << codeStr << jmpComment << endClr << '\n'; frontier += instrLen; ip += instrLen; } #else out << "This binary was compiled without disassembly support\n"; #endif // HAVE_LIBXED }
static void Instrument_CALL(const xed_inst_t* xi) { #ifdef DEBUG fprintf(stdout, "txt:instrument_call\n"); #endif xed_reg_enum_t reg_id; unsigned int dest = 0, taint = 0; uint32_t buf; char *fname; const xed_operand_t *op = xed_inst_operand(xi, 0); xed_operand_enum_t op_name = xed_operand_name(op); API_TYPE type; INST *inst; if(operand_is_mem4(op_name, &dest, 0)){ int mem_idx = op_name == XED_OPERAND_MEM1 ? 1 : 0; xed_reg_enum_t base_regid = xed_decoded_inst_get_base_reg(&xedd_g, mem_idx); PEMU_read_mem(dest, 4, &buf); dest = buf; if(taint = t_get_reg_taint(base_regid)){ #ifdef DEBUG fprintf(stdout, "txt: indirect call1\n"); #endif if(type = is_api_call(dest, &fname)){ goto API_CALL; }else{ insert_pc_addr(taint, 2); } }else if(taint = t_get_mem_taint(dest)){ #ifdef DEBUG fprintf(stdout, "txt: indirect call2\n"); #endif if(type = is_api_call(dest, &fname)){ goto API_CALL; }else{ insert_pc_addr(taint, 2); } } return; }else if(operand_is_reg(op_name, ®_id)){ #ifdef DEBUG fprintf(stdout, "txt: indirect call3\n"); #endif if(taint = t_get_reg_taint(reg_id)){ insert_pc_addr(taint, 2); dest = PEMU_get_reg(reg_id); if(type = is_api_call(dest, &fname)){ goto API_CALL; }else{ insert_pc_addr(taint, 2); } } return; }else if(operand_is_relbr(op_name, &dest)){ dest += (g_pc + xed_decoded_inst_get_length(&xedd_g)); if(type = is_api_call(dest, &fname)){ #ifdef DEBUG fprintf(stdout, "is_api_call\t%x\t%x\n", dest, type); #endif goto REST; } return; } API_CALL: inst = get_inst(taint); // api_copy(&inst->api_call, get_api_call(dest)); REST: #ifdef DEBUG fprintf(stdout, "taint:\t%x\t%x\n", taint, dest); #endif t_set_reg_taint(XED_REG_EAX, 0); handle_api_issues(get_api_call(dest), 0); }
/*for userspace now*/ int disas_basic_block_ex(target_ulong pc_start, struct PEMU_BBL *bbl) { target_ulong pc; //if(pc_start > 0xc0000000) { // return -1; //} bbl->PEMU_bbl_pc = bbl->bbl->start_pc = pc_start; bbl->bbl->size = 0; bbl->bbl->inst_count = 0; pc = pc_start; while(1){ xed_error_enum_t xed_error = disas_one_inst(pc); if(xed_error != XED_ERROR_NONE){ return -1; } //xed_decoded_inst_dump_intel_format(&xedd_g, inst_str, // sizeof(inst_str), 0); xed_iclass_enum_t opcode = xed_decoded_inst_get_iclass(&xedd_g); int len = xed_decoded_inst_get_length(&xedd_g); switch(opcode){ //case XED_ICLASS_CALL_FAR: case XED_ICLASS_CALL_NEAR: //case XED_ICLASS_SYSCALL: //case XED_ICLASS_INT: //case XED_ICLASS_INT1: //case XED_ICLASS_INT3: case XED_ICLASS_RET_FAR: case XED_ICLASS_RET_NEAR: //case XED_ICLASS_IRET: //case XED_ICLASS_IRETD: //case XED_ICLASS_IRETQ: case XED_ICLASS_JB: case XED_ICLASS_JBE: case XED_ICLASS_JL: case XED_ICLASS_JLE: case XED_ICLASS_JMP: case XED_ICLASS_JMP_FAR: case XED_ICLASS_JNB: case XED_ICLASS_JNBE: case XED_ICLASS_JNL: case XED_ICLASS_JNLE: case XED_ICLASS_JNO: case XED_ICLASS_JNP: case XED_ICLASS_JNS: case XED_ICLASS_JNZ: case XED_ICLASS_JO: case XED_ICLASS_JP: case XED_ICLASS_JRCXZ: case XED_ICLASS_JS: case XED_ICLASS_JZ: return pc + len; default: //TODO: return; //set_code_cache(pc, TAINTED); //printf("pc=%lu\tlen=%x\n", pc, len); } pc += len; bbl->bbl->size += len; bbl->bbl->inst_count++; } return 0; }
int disas_trace_ex(target_ulong pc_start, TRACE trace) { //#define PEMU_DEBUG #ifdef PEMU_DEBUG printf("New trace %x\n", pc_start); #endif target_ulong pc; char inst_str[256]; int newtrace = 0; pc = pc_start; trace->trace_start = pc_start; BBL bbl= get_BBL(pc); trace->head = bbl; INS ins = NULL; #ifdef PEMU_DEBUG printf("New BBL %x\n", pc_start); #endif while(!newtrace){ xed_error_enum_t xed_error = disas_one_inst(pc); if(xed_error != XED_ERROR_NONE){ return -1; } xed_decoded_inst_dump_intel_format(&xedd_g, inst_str, sizeof(inst_str), 0); xed_iclass_enum_t opcode = xed_decoded_inst_get_iclass(&xedd_g); int len = xed_decoded_inst_get_length(&xedd_g); #ifdef PEMU_DEBUG printf("New INS %x %s\n", pc, inst_str); #endif if(!ins){ ins = get_INS(pc); bbl->head = ins; }else{ ins->next = get_INS(pc); ins = ins->next; } if(pemu_hook_funcs.inst_hook != 0) { pemu_inst.PEMU_inst_pc = pc; pemu_hook_funcs.inst_hook(ins, 0); } bbl->inst_count ++; bbl-> size += len; switch(opcode){ //case XED_ICLASS_CALL_FAR: case XED_ICLASS_CALL_NEAR: case XED_ICLASS_SYSENTER: case XED_ICLASS_INT: case XED_ICLASS_INT1: case XED_ICLASS_INT3: case XED_ICLASS_HLT: case XED_ICLASS_RET_FAR: case XED_ICLASS_RET_NEAR: case XED_ICLASS_JMP: case XED_ICLASS_JMP_FAR: newtrace = 1; break; //case XED_ICLASS_IRET: //case XED_ICLASS_IRETD: //case XED_ICLASS_IRETQ: case XED_ICLASS_JB: case XED_ICLASS_JBE: case XED_ICLASS_JL: case XED_ICLASS_JLE: case XED_ICLASS_JNB: case XED_ICLASS_JNBE: case XED_ICLASS_JNL: case XED_ICLASS_JNLE: case XED_ICLASS_JNO: case XED_ICLASS_JNP: case XED_ICLASS_JNS: case XED_ICLASS_JNZ: case XED_ICLASS_JO: case XED_ICLASS_JP: case XED_ICLASS_JRCXZ: case XED_ICLASS_JS: case XED_ICLASS_JZ: { //add new trace target const xed_inst_t * xi = xed_decoded_inst_inst(&xedd_g); const xed_operand_t *op = xed_inst_operand(xi,0); xed_operand_enum_t opname = xed_operand_name(op); target_ulong rel; target_ulong target; rel = xed_decoded_inst_get_branch_displacement(&xedd_g); target = pc + rel + len; PEMU_add_trace(target); #ifdef PEMU_DEBUG printf("New target %x %d\n", target, bbl->inst_count); #endif bbl->next = get_BBL(pc+len); bbl = bbl->next; ins = NULL; #ifdef PEMU_DEBUG printf("New BBL %x\n", pc + len); #endif break; } default: //TODO: break; //set_code_cache(pc, TAINTED); //printf("pc=%lu\tlen=%x\n", pc, len); } pc = pc + len; } if(pemu_hook_funcs.trace_hook != 0) pemu_hook_funcs.trace_hook(trace, 0); BBL bblnext = trace->head; do{ bbl = bblnext; bblnext = bbl->next; INS ins; INS insnext = bbl->head; do{ ins = insnext; insnext = ins->next; free(ins); }while(insnext!=NULL); free(bbl); }while(bblnext!=NULL); free(trace); }
static string disassemble(UINT64 start, UINT64 stop) { UINT64 pc = start; xed_state_t dstate; xed_syntax_enum_t syntax = XED_SYNTAX_INTEL; xed_error_enum_t xed_error; xed_decoded_inst_t xedd; ostringstream os; if (sizeof(ADDRINT) == 4) xed_state_init(&dstate, XED_MACHINE_MODE_LEGACY_32, XED_ADDRESS_WIDTH_32b, XED_ADDRESS_WIDTH_32b); else xed_state_init(&dstate, XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b, XED_ADDRESS_WIDTH_64b); /*while( pc < stop )*/ { xed_decoded_inst_zero_set_mode(&xedd, &dstate); UINT32 len = 15; if (stop - pc < 15) len = stop-pc; xed_error = xed_decode(&xedd, reinterpret_cast<const UINT8*>(pc), len); bool okay = (xed_error == XED_ERROR_NONE); iostream::fmtflags fmt = os.flags(); os << std::setfill('0') << "XDIS " << std::hex << std::setw(sizeof(ADDRINT)*2) << pc << std::dec << ": " << std::setfill(' ') << std::setw(4); if (okay) { char buffer[200]; unsigned int dec_len, sp; os << xed_extension_enum_t2str(xed_decoded_inst_get_extension(&xedd)); dec_len = xed_decoded_inst_get_length(&xedd); print_hex_line(buffer, reinterpret_cast<UINT8*>(pc), dec_len); os << " " << buffer; for ( sp=dec_len; sp < 12; sp++) // pad out the instruction bytes os << " "; os << " "; memset(buffer,0,200); int dis_okay = xed_format(syntax, &xedd, buffer, 200, pc); if (dis_okay) os << buffer << endl; else os << "Error disasassembling pc 0x" << std::hex << pc << std::dec << endl; pc += dec_len; } else { // print the byte and keep going. UINT8 memval = *reinterpret_cast<UINT8*>(pc); os << "???? " // no extension << std::hex << std::setw(2) << std::setfill('0') << static_cast<UINT32>(memval) << std::endl; pc += 1; } os.flags(fmt); } return os.str(); }
/* This is the central function Given a memory address, reads a bunch of memory bytes and calls the disassembler to obtain the information Then it stores the information into the eh EntryHeader */ void decode_address(uint32_t address, EntryHeader *eh, int ignore_taint) { unsigned char insn_buf[MAX_INSN_BYTES]; unsigned int is_stackpush = 0, is_stackpop = 0; unsigned int stackpushpop_acc = 0; if (xed2chris_regmapping[XED_REG_EAX][0] == 0) { init_xed2chris(); assert(xed2chris_regmapping[XED_REG_EAX][0] != 0); } /* Read memory from TEMU */ TEMU_read_mem(address, MAX_INSN_BYTES, insn_buf); /* Disassemble instruction buffer */ xed_decoded_inst_zero_set_mode(&xedd, &dstate); xed_error_enum_t xed_error = xed_decode(&xedd, STATIC_CAST(const xed_uint8_t*,insn_buf), MAX_INSN_BYTES); xed_bool_t okay = (xed_error == XED_ERROR_NONE); if (!okay) return; // Increase counters tstats.insn_counter_decoded++; int i; /* Clear out Entry header */ memset(eh, 0, sizeof(EntryHeader)); /* Copy the address and instruction size */ eh->address = address; eh->inst_size = xed_decoded_inst_get_length(&xedd); if (eh->inst_size > MAX_INSN_BYTES) eh->inst_size = MAX_INSN_BYTES; /* Copy instruction rawbytes */ memcpy(eh->rawbytes, insn_buf, eh->inst_size); /* Get the number of XED operands */ const xed_inst_t* xi = xed_decoded_inst_inst(&xedd); int xed_ops = xed_inst_noperands(xi); int op_idx = -1; /* Get the category of the instruction */ xed_category_enum_t category = xed_decoded_inst_get_category(&xedd); /* Iterate over the XED operands */ for(i = 0; i < xed_ops; i++) { if(op_idx >= MAX_NUM_OPERANDS) break; //assert(op_idx < MAX_NUM_OPERANDS); /* Get operand */ const xed_operand_t* op = xed_inst_operand(xi,i); xed_operand_enum_t op_name = xed_operand_name(op); switch(op_name) { /* Register */ case XED_OPERAND_REG0: case XED_OPERAND_REG1: case XED_OPERAND_REG2: case XED_OPERAND_REG3: case XED_OPERAND_REG4: case XED_OPERAND_REG5: case XED_OPERAND_REG6: case XED_OPERAND_REG7: case XED_OPERAND_REG8: case XED_OPERAND_REG9: case XED_OPERAND_REG10: case XED_OPERAND_REG11: case XED_OPERAND_REG12: case XED_OPERAND_REG13: case XED_OPERAND_REG14: case XED_OPERAND_REG15: { xed_reg_enum_t reg_id = xed_decoded_inst_get_reg(&xedd, op_name); int regnum = xed2chris_regmapping[reg_id][1]; // Special handling for Push if (reg_id == XED_REG_STACKPUSH) is_stackpush = 1; else if (reg_id == XED_REG_STACKPOP) is_stackpop = 1; if (-1 == regnum) break; else { op_idx++; eh->num_operands++; eh->operand[op_idx].type = TRegister; eh->operand[op_idx].addr = xed2chris_regmapping[reg_id][0]; eh->operand[op_idx].length = (uint8_t) xed_decoded_inst_operand_length (&xedd, i); eh->operand[op_idx].access = (uint8_t) xed_operand_rw (op); eh->operand[op_idx].value = TEMU_cpu_regs[regnum]; switch (eh->operand[op_idx].addr) { case ax_reg: case bx_reg: case cx_reg: case dx_reg: case bp_reg: case sp_reg: case si_reg: case di_reg: eh->operand[op_idx].value &= 0xFFFF; break; case al_reg: case bl_reg: case cl_reg: case dl_reg: eh->operand[op_idx].value &= 0xFF; break; case ah_reg: case bh_reg: case ch_reg: case dh_reg: eh->operand[op_idx].value = (eh->operand[i].value & 0xFF00) >> 8; break; default: break; } } if (ignore_taint == 0) set_operand_data(&(eh->operand[op_idx])); break; } /* Immediate */ case XED_OPERAND_IMM0: { op_idx++; eh->num_operands++; eh->operand[op_idx].type = TImmediate; eh->operand[op_idx].length = (uint8_t) xed_decoded_inst_operand_length (&xedd, i); eh->operand[op_idx].access = (uint8_t) xed_operand_rw (op); //xed_uint_t width = xed_decoded_inst_get_immediate_width(&xedd); if (xed_decoded_inst_get_immediate_is_signed(&xedd)) { xed_int32_t signed_imm_val = xed_decoded_inst_get_signed_immediate(&xedd); eh->operand[op_idx].value = (uint32_t) signed_imm_val; } else { xed_uint64_t unsigned_imm_val = xed_decoded_inst_get_unsigned_immediate(&xedd); eh->operand[op_idx].value = (uint32_t) unsigned_imm_val; } break; break; } /* Special immediate only used in ENTER instruction */ case XED_OPERAND_IMM1: { op_idx++; eh->num_operands++; eh->operand[op_idx].type = TImmediate; eh->operand[op_idx].length = (uint8_t) xed_decoded_inst_operand_length (&xedd, i); eh->operand[op_idx].access = (uint8_t) xed_operand_rw (op); xed_uint8_t unsigned_imm_val = xed_decoded_inst_get_second_immediate(&xedd); eh->operand[op_idx].value = (uint32_t) unsigned_imm_val; break; } /* Memory */ case XED_OPERAND_AGEN: case XED_OPERAND_MEM0: case XED_OPERAND_MEM1: { unsigned long base = 0; unsigned long index = 0; unsigned long scale = 1; unsigned long segbase = 0; unsigned short segsel = 0; unsigned long displacement = 0; unsigned int j; size_t remaining = 0; /* Set memory index */ int mem_idx = 0; if (op_name == XED_OPERAND_MEM1) mem_idx = 1; unsigned int memlen = xed_decoded_inst_operand_length (&xedd, i); for (j = 0; j < memlen; j+=4) { /* Initialization */ base = 0; index = 0; scale = 1; segbase = 0; segsel = 0; displacement = 0; remaining = memlen - j; op_idx++; if(op_idx >= MAX_NUM_OPERANDS) break; //assert(op_idx < MAX_NUM_OPERANDS); eh->num_operands++; eh->operand[op_idx].type = TMemLoc; eh->operand[op_idx].access = (uint8_t) xed_operand_rw (op); eh->operand[op_idx].length = remaining > 4 ? 4 : (uint8_t) remaining; // Get Segment register xed_reg_enum_t seg_regid = xed_decoded_inst_get_seg_reg(&xedd,mem_idx); if (seg_regid != XED_REG_INVALID) { const xed_operand_values_t *xopv = xed_decoded_inst_operands_const(&xedd); xed_bool_t default_segment = xed_operand_values_using_default_segment (xopv,mem_idx); if (!default_segment) { eh->num_operands++; int segmentreg = xed2chris_regmapping[seg_regid][0] - 100; segbase = TEMU_cpu_segs[segmentreg].base; segsel = TEMU_cpu_segs[segmentreg].selector; eh->memregs[op_idx][0].type = TRegister; eh->memregs[op_idx][0].length = 2; eh->memregs[op_idx][0].addr = xed2chris_regmapping[seg_regid][0]; eh->memregs[op_idx][0].access = (uint8_t) XED_OPERAND_ACTION_R; eh->memregs[op_idx][0].value = segsel; eh->memregs[op_idx][0].usage = memsegment; if (ignore_taint == 0) set_operand_data(&(eh->memregs[op_idx][0])); int dt; if (segsel & 0x4) // ldt dt = TEMU_cpu_ldt->base; else //gdt dt = TEMU_cpu_gdt->base; segsel = segsel >> 3; unsigned long segent = dt + 8 * segsel; unsigned char segdes[8]; TEMU_read_mem(segent, 8, segdes); #if 0 // debugging code to double check segbase value unsigned long segbasenew = segdes[2] + segdes[3] * 256 + segdes[4] * 256 * 256 + segdes[7] * 256 * 256 * 256; if (segbase != segbasenew) { term_printf("segbase unexpected: 0x%08lX v.s 0x%08lX\n", segbase, segbasenew); } #endif /* Segment descriptor is stored as a memory operand */ eh->num_operands+=2; eh->memregs[op_idx][3].type = TMemLoc; eh->memregs[op_idx][3].length = 4; eh->memregs[op_idx][3].addr = segent; eh->memregs[op_idx][3].access = (uint8_t) XED_OPERAND_ACTION_INVALID; eh->memregs[op_idx][3].value = *(uint32_t *) segdes; eh->memregs[op_idx][3].tainted = 0; eh->memregs[op_idx][3].usage = memsegent0; eh->memregs[op_idx][4].type = TMemLoc; eh->memregs[op_idx][4].length = 4; eh->memregs[op_idx][4].addr = segent + 4; eh->memregs[op_idx][4].access = (uint8_t) XED_OPERAND_ACTION_INVALID; eh->memregs[op_idx][4].value = *(uint32_t *) (segdes + 4); eh->memregs[op_idx][4].tainted = 0; eh->memregs[op_idx][4].usage = memsegent1; } } // Get Base register xed_reg_enum_t base_regid = xed_decoded_inst_get_base_reg(&xedd,mem_idx); if (base_regid != XED_REG_INVALID) { eh->num_operands++; int basereg = xed2chris_regmapping[base_regid][1]; base = TEMU_cpu_regs[basereg]; eh->memregs[op_idx][1].type = TRegister; eh->memregs[op_idx][1].addr = xed2chris_regmapping[base_regid][0]; eh->memregs[op_idx][1].length = 4; eh->memregs[op_idx][1].access = (uint8_t) XED_OPERAND_ACTION_R; eh->memregs[op_idx][1].value = base; eh->memregs[op_idx][1].usage = membase; if (ignore_taint == 0) set_operand_data(&(eh->memregs[op_idx][1])); } // Get Index register and Scale xed_reg_enum_t index_regid = xed_decoded_inst_get_index_reg(&xedd,mem_idx); if (mem_idx == 0 && index_regid != XED_REG_INVALID) { eh->num_operands++; int indexreg = xed2chris_regmapping[index_regid][1]; index = TEMU_cpu_regs[indexreg]; eh->memregs[op_idx][2].type = TRegister; eh->memregs[op_idx][2].addr = xed2chris_regmapping[index_regid][0]; eh->memregs[op_idx][2].length = 4; eh->memregs[op_idx][2].access = (uint8_t) XED_OPERAND_ACTION_R; eh->memregs[op_idx][2].value = index; eh->memregs[op_idx][2].usage = memindex; if (ignore_taint == 0) set_operand_data(&(eh->memregs[op_idx][2])); // Get Scale (AKA width) (only have a scale if the index exists) if (xed_decoded_inst_get_scale(&xedd,i) != 0) { scale = (unsigned long) xed_decoded_inst_get_scale(&xedd,mem_idx); } } // Get displacement (AKA offset) displacement = (unsigned long) xed_decoded_inst_get_memory_displacement (&xedd,mem_idx); // Fix displacement for: // 1) Any instruction that pushes into the stack, since ESP is // decremented before memory operand is written using ESP. // Affects: ENTER,PUSH,PUSHA,PUSHF,CALL if (is_stackpush) { stackpushpop_acc += eh->operand[op_idx].length; displacement = displacement - stackpushpop_acc -j; } // 2) Pop instructions where the // destination operand is a memory location that uses ESP // as base or index register. // The pop operations increments ESP and the written memory // location address needs to be adjusted. // Affects: pop (%esp) else if ((category == XED_CATEGORY_POP) && (!is_stackpop)) { if ((eh->memregs[op_idx][1].addr == esp_reg) || (eh->memregs[op_idx][2].addr == esp_reg)) { displacement = displacement + eh->operand[op_idx].length; } } // Calculate memory address accessed eh->operand[op_idx].addr = j + segbase + base + index * scale + displacement; // Special handling for LEA instructions if (op_name == XED_OPERAND_AGEN) { eh->operand[op_idx].type = TMemAddress; eh->operand[op_idx].length = 4; has_page_fault = 0; // LEA won't trigger page fault } else { has_page_fault = TEMU_read_mem(eh->operand[op_idx].addr, (int)(eh->operand[op_idx].length), (uint8_t *)&(eh->operand[op_idx].value)); } // Check if instruction accesses user memory // kernel_mem_start defined in shared/read_linux.c if ((eh->operand[op_idx].addr < kernel_mem_start) && (op_name != XED_OPERAND_AGEN)) { access_user_mem = 1; } if (ignore_taint == 0) set_operand_data(&(eh->operand[op_idx])); } break; } /* Jumps */ case XED_OPERAND_PTR: // pointer (always in conjunction with a IMM0) case XED_OPERAND_RELBR: { // branch displacements xed_uint_t disp = xed_decoded_inst_get_branch_displacement(&xedd); /* Displacement is from instruction end */ /* Adjust displacement with instruction size */ disp = disp + eh->inst_size; op_idx++; eh->num_operands++; eh->operand[op_idx].type = TJump; eh->operand[op_idx].length = 4; eh->operand[op_idx].access = (uint8_t) xed_operand_rw (op); eh->operand[op_idx].value = disp; break; } /* Floating point registers */ case XED_REG_X87CONTROL: case XED_REG_X87STATUS: case XED_REG_X87TOP: case XED_REG_X87TAG: case XED_REG_X87PUSH: case XED_REG_X87POP: case XED_REG_X87POP2: op_idx++; eh->num_operands++; eh->operand[op_idx].type = TFloatRegister; eh->operand[op_idx].length = 4; eh->operand[op_idx].access = (uint8_t) xed_operand_rw (op); default: break; } }
xed_uint_t disas_decode_encode_binary(const xed_state_t* dstate, const xed_uint8_t* decode_text_binary, const unsigned int bytes, xed_decoded_inst_t* xedd, xed_uint64_t runtime_address) { // decode then encode unsigned int retval_olen = 0; // decode it... xed_bool_t decode_okay = disas_decode_binary(dstate, decode_text_binary, bytes, xedd, runtime_address); if(decode_okay) { xed_error_enum_t encode_okay; xed_uint64_t t1, t2; unsigned int enc_olen, ilen = XED_MAX_INSTRUCTION_BYTES; xed_uint8_t array[XED_MAX_INSTRUCTION_BYTES]; // they are basically the same now xed_encoder_request_t* enc_req = xedd; // convert decode structure to proper encode structure xed_encoder_request_init_from_decode(xedd); // encode it again... t1 = xed_get_time(); encode_okay = xed_encode(enc_req, array, ilen, &enc_olen); t2 = xed_get_time(); if(encode_okay != XED_ERROR_NONE) { if(CLIENT_VERBOSE) { char buf[XED_TMP_BUF_LEN]; char buf2[XED_TMP_BUF_LEN]; int blen = XED_TMP_BUF_LEN; xed_encode_request_print(enc_req, buf, XED_TMP_BUF_LEN); blen = xed_strncpy(buf2, "Could not re-encode: ", blen); blen = xed_strncat(buf2, buf, blen); blen = xed_strncat(buf2, "\nError code was: ", blen); blen = xed_strncat(buf2, xed_error_enum_t2str(encode_okay), blen); blen = xed_strncat(buf2, "\n", blen); xedex_dwarn(buf2); } } else { retval_olen = enc_olen; // See if it matched the original... if(CLIENT_VERBOSE) { char buf[XED_HEX_BUFLEN]; xed_uint_t dec_length; xed_print_hex_line(buf, array, enc_olen, XED_HEX_BUFLEN); printf("Encodable! %s\n", buf); xed_decode_stats_reset(&xed_enc_stats, t1, t2); dec_length = xed_decoded_inst_get_length(xedd); if((enc_olen != dec_length || memcmp(decode_text_binary, array, enc_olen))) { char buf2[XED_TMP_BUF_LEN]; char buf3[XED_TMP_BUF_LEN]; printf("Discrepenacy after re-encoding. dec_len= " XED_FMT_U " ", dec_length); xed_print_hex_line(buf, decode_text_binary, dec_length, XED_HEX_BUFLEN); printf("[%s] ", buf); printf("enc_olen= " XED_FMT_U "", enc_olen); xed_print_hex_line(buf, array, enc_olen, XED_HEX_BUFLEN); printf(" [%s] ", buf); printf("for instruction: "); xed_decoded_inst_dump(xedd, buf3, XED_TMP_BUF_LEN); printf("%s\n", buf3); printf("vs Encode request: "); xed_encode_request_print(enc_req, buf2, XED_TMP_BUF_LEN); printf("%s\n", buf2); } else printf("Identical re-encoding\n"); } } } return retval_olen; }
int main(int argc, char** argv) { xed_ild_t ild; xed_uint_t uargc = (xed_uint_t)argc; xed_uint_t length = 0; xed_uint_t dlen = 0; xed_uint_t i,j,input_nibbles=0; xed_uint8_t itext[XED_MAX_INSTRUCTION_BYTES]; char src[MAX_INPUT_NIBBLES+1]; xed_state_t dstate; xed_decoded_inst_t xedd; xed_uint_t first_argv; xed_uint_t bytes; xed_error_enum_t xed_error; xed_chip_enum_t chip = XED_CHIP_INVALID; int already_set_mode = 0; // initialize the XED tables -- one time. xed_tables_init(); xed_state_zero(&dstate); first_argv = 1; dstate.mmode=XED_MACHINE_MODE_LEGACY_32; dstate.stack_addr_width=XED_ADDRESS_WIDTH_32b; for(i=1;i< uargc;i++) { if (strcmp(argv[i], "-64") == 0) { assert(already_set_mode == 0); already_set_mode = 1; dstate.mmode=XED_MACHINE_MODE_LONG_64; first_argv++; } else if (strcmp(argv[i], "-16") == 0) { assert(already_set_mode == 0); already_set_mode = 1; dstate.mmode=XED_MACHINE_MODE_LEGACY_16; dstate.stack_addr_width=XED_ADDRESS_WIDTH_16b; first_argv++; } else if (strcmp(argv[i], "-s16") == 0) { already_set_mode = 1; dstate.stack_addr_width=XED_ADDRESS_WIDTH_16b; first_argv++; } else if (strcmp(argv[i], "-chip") == 0) { assert(i+1 < uargc); chip = str2xed_chip_enum_t(argv[i+1]); printf("Setting chip to %s\n", xed_chip_enum_t2str(chip)); assert(chip != XED_CHIP_INVALID); first_argv+=2; } } assert(first_argv < uargc); xed_decoded_inst_zero_set_mode(&xedd, &dstate); if (first_argv >= uargc) { printf("Need some hex instruction nibbles"); exit(1); } for(i=first_argv;i<uargc;i++) { for(j=0;argv[i][j];j++) { assert(input_nibbles < MAX_INPUT_NIBBLES); src[input_nibbles] = argv[i][j]; input_nibbles++; } } src[input_nibbles] = 0; if (input_nibbles & 1) { printf("Need an even number of nibbles"); exit(1); } bytes = xed_convert_ascii_to_hex(src, itext, XED_MAX_INSTRUCTION_BYTES); printf("Attempting to decode: "); for(i=0;i<bytes;i++) { printf("%02x", itext[i]); } printf("\n"); xed_ild_init(&ild, dstate.mmode, chip, itext, XED_MAX_INSTRUCTION_BYTES); length = xed_instruction_length_decode(&ild); print_ild(&ild); printf("ILD length = %d\n",length); xed_decoded_inst_set_input_chip(&xedd, chip); xed_error = xed_decode(&xedd, XED_REINTERPRET_CAST(const xed_uint8_t*,itext), bytes); switch(xed_error) { case XED_ERROR_NONE: break; case XED_ERROR_BUFFER_TOO_SHORT: printf("Not enough bytes provided\n"); exit(1); case XED_ERROR_INVALID_FOR_CHIP: printf("The instruction was not valid for the specified chip.\n"); exit(1); case XED_ERROR_GENERAL_ERROR: printf("Could not decode given input.\n"); exit(1); default: printf("Unhandled error code %s\n",xed_error_enum_t2str(xed_error)); exit(1); } dlen = xed_decoded_inst_get_length(&xedd); printf ("Traditional length = %d\n", dlen); if (dlen != length) { printf ("Length error\n"); exit(1); } printf ("Length matched\n"); return 0; }
static void Instrument_CALL(const xed_inst_t* xi) { #ifdef DEBUG fprintf(stdout, "data:instrument_call\n"); #endif xed_reg_enum_t reg_id; uint32_t buf; unsigned int value = 0, dest = 0; const xed_operand_t *op = xed_inst_operand(xi, 0); xed_operand_enum_t op_name = xed_operand_name(op); char *fname = 0; unsigned int taint = 0; API_TYPE type; //TODO: value may be plt call if(operand_is_mem4(op_name, &dest, 0)){ int mem_idx = op_name == XED_OPERAND_MEM1 ? 1 : 0; xed_reg_enum_t base_regid = xed_decoded_inst_get_base_reg(&xedd_g, mem_idx); PEMU_read_mem(dest, 4, &buf); dest = buf; if(taint = d_get_reg_taint(base_regid)){ if(type = is_api_call(dest, &fname)){ // update_mem_val_type(taint, 2, type, fname); goto API_CALL; }else{ update_mem_val_type(taint, 2, API_NONE, 0); } }else if(taint = d_get_mem_taint(dest)){ if(type = is_api_call(dest, &fname)){ // update_mem_val_type(taint, 2, type, fname); goto API_CALL; }else{ update_mem_val_type(taint, 2, API_NONE, 0); } } /* else if(taint = t_get_reg_taint(base_regid)) { uint32_t mem_addr=dest; PEMU_read_mem(dest, 4, buf); dest = *(unsigned int*)buf; insert_mem_val(mem_addr, dest); if(type = is_api_call(dest, &fname)){ update_mem_val_type(mem_addr, 2, type, fname); }else{ update_mem_val_type(mem_addr, 2, API_NONE, 0); } }else if(base_regid==XED_REG_INVALID) { uint32_t mem_addr=dest; PEMU_read_mem(dest, 4, buf); dest = *(unsigned int*)buf; insert_mem_val(mem_addr, dest); if(type = is_api_call(dest, &fname)){ update_mem_val_type(mem_addr, 2, type, fname); }else{ update_mem_val_type(mem_addr, 2, API_NONE, 0); } }*/ return; }else if(operand_is_reg(op_name, ®_id)){ if(taint = d_get_reg_taint(reg_id)){ dest = PEMU_get_reg(reg_id); if(type = is_api_call(dest, &fname)){ // update_mem_val_type(taint, 2, type, fname); goto API_CALL; }else{ update_mem_val_type(taint, 2, API_NONE, 0); } } return; }else if(operand_is_relbr(op_name, &dest)){ dest += (g_pc + xed_decoded_inst_get_length(&xedd_g)); if(type = is_api_call(dest, &fname)){ #ifdef DEBUG fprintf(stdout, "is_api_call\t%x\t%x\n", dest, type); #endif goto REST; } return; } API_CALL: update_mem_val_type(taint, 2, type, fname); //api_copy(&inst->api_call, get_api_call(dest)); REST: handle_api_issues(get_api_call(dest), 1); }
static PyObject *get_length(instruction_t *self) { return PyInt_FromLong(xed_decoded_inst_get_length(self->decoded_inst)); }
void xed_disas_test(xed_disas_info_t* di) { static int first = 1; #if !defined(XED_ILD_ONLY) && !defined(XED2_PERF_MEASURE) xed_uint64_t errors = 0; #endif unsigned int m; unsigned char* z; unsigned char* zlimit; unsigned int length; int skipping; int last_all_zeros; unsigned int i; int okay; xed_decoded_inst_t xedd; xed_uint64_t runtime_instruction_address; xed_dot_graph_supp_t* gs = 0; xed_bool_t graph_empty = 1; //#define XED_USE_DECODE_CACHE #if defined(XED_USE_DECODE_CACHE) xed_decode_cache_t cache; xed_uint32_t n_cache_entries = 16 * 1024; xed_decode_cache_entry_t* cache_entries = (xed_decode_cache_entry_t*) malloc(n_cache_entries * sizeof(xed_decode_cache_entry_t)); xed_decode_cache_initialize(&cache, cache_entries, n_cache_entries); #endif if(di->dot_graph_output) { xed_syntax_enum_t syntax = XED_SYNTAX_INTEL; gs = xed_dot_graph_supp_create(syntax); } if(first) { xed_decode_stats_zero(&xed_stats, di); first = 0; } m = di->ninst; // number of things to decode z = di->a; if(di->runtime_vaddr_disas_start) if(di->runtime_vaddr_disas_start > di->runtime_vaddr) z = (di->runtime_vaddr_disas_start - di->runtime_vaddr) + di->a; zlimit = 0; if(di->runtime_vaddr_disas_end) { if(di->runtime_vaddr_disas_end > di->runtime_vaddr) zlimit = (di->runtime_vaddr_disas_end - di->runtime_vaddr) + di->a; else /* end address is before start of this region -- skip it */ goto finish; } if(z >= di->q) /* start pointer is after end of section */ goto finish; // for skipping long strings of zeros skipping = 0; last_all_zeros = 0; for(i = 0; i < m; i++) { int ilim, elim; if(zlimit && z >= zlimit) { if(di->xml_format == 0) printf("# end of range.\n"); break; } if(z >= di->q) { if(di->xml_format == 0) #if !defined(XED_ILD_ONLY) printf("# end of text section.\n"); #endif break; } /* if we get near the end of the section, clip the itext length */ ilim = 15; elim = di->q - z; if(elim < ilim) ilim = elim; if(CLIENT_VERBOSE3) { printf("\n==============================================\n"); printf("Decoding instruction " XED_FMT_U "\n", i); printf("==============================================\n"); } // if we get two full things of 0's in a row, start skipping. if(all_zeros((xed_uint8_t*) z, ilim)) { if(skipping) { z = z + ilim; continue; } else if(last_all_zeros) { #if !defined(XED_ILD_ONLY) && !defined(XED2_PERF_MEASURE) printf("...\n"); #endif z = z + ilim; skipping = 1; continue; } else last_all_zeros = 1; } else { skipping = 0; last_all_zeros = 0; } runtime_instruction_address = ((xed_uint64_t)(z - di->a)) + di->runtime_vaddr; if(CLIENT_VERBOSE3) { char tbuf[XED_HEX_BUFLEN]; printf("Runtime Address " XED_FMT_LX , runtime_instruction_address); xed_print_hex_line(tbuf, (xed_uint8_t*) z, ilim, XED_HEX_BUFLEN); printf(" [%s]\n", tbuf); } okay = 0; length = 0; xed_decoded_inst_zero_set_mode(&xedd, di->dstate); if(di->late_init) (*di->late_init)(&xedd); if(di->decode_only) { xed_uint64_t t1; xed_uint64_t t2; xed_error_enum_t xed_error = XED_ERROR_NONE; t1 = xed_get_time(); #if defined(XED_USE_DECODE_CACHE) xed_error = xed_decode_cache(&xedd, XED_REINTERPRET_CAST(const xed_uint8_t*, z), ilim, &cache); #else xed_error = decode_internal( &xedd, XED_REINTERPRET_CAST(const xed_uint8_t*, z), ilim); #endif t2 = xed_get_time(); okay = (xed_error == XED_ERROR_NONE); #if defined(PTI_XED_TEST) if(okay) pti_xed_test(&xedd, XED_REINTERPRET_CAST(const xed_uint8_t*, z), ilim, runtime_instruction_address); #endif xed_decode_stats_reset(&xed_stats, t1, t2); length = xed_decoded_inst_get_length(&xedd); if(okay && length == 0) { printf("Zero length on decoded instruction!\n"); xed_decode_error(runtime_instruction_address, z - di->a, z, xed_error); xedex_derror("Dieing"); } if(di->resync && di->symfn) { xed_bool_t resync = 0; unsigned int x; for(x = 1; x < length; x++) { char* name = (*di->symfn)(runtime_instruction_address + x, di->caller_symbol_data); if(name) { char buf[XED_HEX_BUFLEN]; /* bad news. We found a symbol in the middle of an * instruction. That probably means decoding is * messed up. This usually happens because of * data-in the code/text section. We should reject * the current instruction and pick up at the * symbol address. */ printf("ERROR: found symbol in the middle of" " an instruction. Resynchronizing...\n"); printf("ERROR: Rejecting: ["); xed_print_hex_line(buf, z, x, XED_HEX_BUFLEN); printf("%s]\n", buf); z += x; resync = 1; break; } } if(resync) continue; } xed_stats.total_ilen += length; //we don't want to print out disassembly with ILD perf #if !defined(XED_ILD_ONLY) && !defined(XED2_PERF_MEASURE) if(okay) { if(CLIENT_VERBOSE1) { char tbuf[XED_TMP_BUF_LEN]; xed_decoded_inst_dump(&xedd, tbuf, XED_TMP_BUF_LEN); printf("%s\n", tbuf); } if(CLIENT_VERBOSE) { char buffer[XED_TMP_BUF_LEN]; unsigned int dec_len; unsigned int sp; if(di->symfn) { char* name = (*di->symfn)(runtime_instruction_address, di->caller_symbol_data); if(name) { if(di->xml_format) printf("\n<SYM>%s</SYM>\n", name); else printf("\nSYM %s:\n", name); } } if(di->xml_format) { printf("<ASMLINE>\n"); printf(" <ADDR>" XED_FMT_LX "</ADDR>\n", runtime_instruction_address); printf(" <CATEGORY>%s</CATEGORY>\n", xed_category_enum_t2str( xed_decoded_inst_get_category(&xedd))); printf(" <EXTENSION>%s</EXTENSION>\n", xed_extension_enum_t2str( xed_decoded_inst_get_extension(&xedd))); printf(" <ITEXT>"); dec_len = xed_decoded_inst_get_length(&xedd); xed_print_hex_line(buffer, (xed_uint8_t*) z, dec_len, XED_TMP_BUF_LEN); printf("%s</ITEXT>\n", buffer); buffer[0] = 0; disassemble(buffer, XED_TMP_BUF_LEN, &xedd, runtime_instruction_address, di->caller_symbol_data); printf(" %s\n", buffer); printf("</ASMLINE>\n"); } else { printf("XDIS " XED_FMT_LX ": ", runtime_instruction_address); #if 0 /* test code for the new API */ if(xed_decoded_inst_masked_vector_operation(&xedd)) printf("MSK "); else printf(" "); #endif if(di->ast) { printf("%-6s ", xed_ast_input_enum_t2str( classify_avx_sse(&xedd))); } else { printf("%-9s ", xed_category_enum_t2str( xed_decoded_inst_get_category(&xedd))); printf("%-6s ", xed_extension_enum_t2str( xed_decoded_inst_get_extension(&xedd))); } dec_len = xed_decoded_inst_get_length(&xedd); xed_print_hex_line(buffer, (xed_uint8_t*) z, dec_len, XED_HEX_BUFLEN); printf("%s", buffer); // pad out the instruction bytes for(sp = dec_len; sp < 12; sp++) printf(" "); printf(" "); buffer[0] = 0; disassemble(buffer, XED_TMP_BUF_LEN, &xedd, runtime_instruction_address, di->caller_symbol_data); printf("%s", buffer); if(gs) { graph_empty = 0; xed_dot_graph_add_instruction( gs, &xedd, runtime_instruction_address, di->caller_symbol_data); } if(di->line_number_info_fn) (*di->line_number_info_fn)(runtime_instruction_address); printf("\n"); } } } else { errors++; xed_decode_error(runtime_instruction_address, z - di->a, z, xed_error); // just give a length of 1B to see if we can restart decode... length = 1; } } #if defined(XED_ENCODER) else {
void OfflineX86Code::disasm(FILE* file, TCA fileStartAddr, TCA codeStartAddr, uint64_t codeLen, const PerfEventsMap<TCA>& perfEvents, BCMappingInfo bcMappingInfo, bool printAddr /* =true */, bool printBinary /* =false */) { char codeStr[MAX_INSTR_ASM_LEN]; xed_uint8_t* code = (xed_uint8_t*) alloca(codeLen); xed_uint8_t* frontier; TCA ip; TCA r10val = 0; size_t currBC = 0; if (codeLen == 0) return; auto const offset = codeStartAddr - fileStartAddr; if (fseek(file, offset, SEEK_SET)) { error("disasm error: seeking file"); } size_t readLen = fread(code, codeLen, 1, file); if (readLen != 1) { error("Failed to read {} bytes at offset {} from code file due to {}", codeLen, offset, feof(file) ? "EOF" : "read error"); } xed_decoded_inst_t xedd; // Decode and print each instruction for (frontier = code, ip = codeStartAddr; frontier < code + codeLen; ) { xed_decoded_inst_zero_set_mode(&xedd, &xed_state); xed_decoded_inst_set_input_chip(&xedd, XED_CHIP_INVALID); xed_error_enum_t xed_error = xed_decode(&xedd, frontier, 15); if (xed_error != XED_ERROR_NONE) break; // Get disassembled instruction in codeStr if (!xed_format_context(xed_syntax, &xedd, codeStr, MAX_INSTR_ASM_LEN, (uint64_t)ip, nullptr #if XED_ENCODE_ORDER_MAX_ENTRIES != 28 // Newer version of XED library , 0 #endif )) { error("disasm error: xed_format_context failed"); } // Annotate the x86 with its bytecode. currBC = printBCMapping(bcMappingInfo, currBC, (TCA)ip); if (printAddr) printf("%14p: ", ip); uint32_t instrLen = xed_decoded_inst_get_length(&xedd); if (printBinary) { uint32_t i; for (i=0; i < instrLen; i++) { printf("%02X", frontier[i]); } for (; i < 16; i++) { printf(" "); } } // For calls, we try to figure out the destination symbol name. // We look both at relative branches and the pattern: // move r10, IMMEDIATE // call r10 xed_iclass_enum_t iclass = xed_decoded_inst_get_iclass(&xedd); string callDest = ""; if (iclass == XED_ICLASS_CALL_NEAR || iclass == XED_ICLASS_CALL_FAR) { const xed_inst_t *xi = xed_decoded_inst_inst(&xedd); always_assert(xed_inst_noperands(xi) >= 1); const xed_operand_t *opnd = xed_inst_operand(xi, 0); xed_operand_enum_t opndName = xed_operand_name(opnd); if (opndName == XED_OPERAND_RELBR) { if (xed_decoded_inst_get_branch_displacement_width(&xedd)) { xed_int32_t disp = xed_decoded_inst_get_branch_displacement(&xedd); TCA addr = ip + instrLen + disp; callDest = getSymbolName(addr); } } else if (opndName == XED_OPERAND_REG0) { if (xed_decoded_inst_get_reg(&xedd, opndName) == XED_REG_R10) { callDest = getSymbolName(r10val); } } } else if (iclass == XED_ICLASS_MOV) { // Look for moves into r10 and keep r10val updated const xed_inst_t* xi = xed_decoded_inst_inst(&xedd); always_assert(xed_inst_noperands(xi) >= 2); const xed_operand_t *destOpnd = xed_inst_operand(xi, 0); xed_operand_enum_t destOpndName = xed_operand_name(destOpnd); if (destOpndName == XED_OPERAND_REG0 && xed_decoded_inst_get_reg(&xedd, destOpndName) == XED_REG_R10) { const xed_operand_t *srcOpnd = xed_inst_operand(xi, 1); xed_operand_enum_t srcOpndName = xed_operand_name(srcOpnd); if (srcOpndName == XED_OPERAND_IMM0) { TCA addr = (TCA)xed_decoded_inst_get_unsigned_immediate(&xedd); r10val = addr; } } } if (!perfEvents.empty()) { printEventStats((TCA)ip, instrLen, perfEvents); } else { printf("%48s", ""); } printf("%s%s\n", codeStr, callDest.c_str()); frontier += instrLen; ip += instrLen; } }