static dr_emit_flags_t event_bb(void *dc, void *tag, instrlist_t *bb, bool for_trace, bool translating) { instr_t *where = instrlist_first(bb); instr_t *ret_label = INSTR_CREATE_label(dc); dr_save_reg(dc, bb, where, DR_REG_XAX, SPILL_SLOT_1); PRE(bb, where, INSTR_CREATE_mov_imm(dc, opnd_create_reg(DR_REG_XAX), opnd_create_instr(ret_label))); PRE(bb, where, INSTR_CREATE_jmp(dc, opnd_create_pc(slowpath))); PRE(bb, where, ret_label); dr_restore_reg(dc, bb, where, DR_REG_XAX, SPILL_SLOT_1); return DR_EMIT_DEFAULT; }
static dr_emit_flags_t bb_event(void* drcontext, app_pc tag, instrlist_t* bb, bool for_trace, bool translating) { if (tag >= start && tag < end) { instr_t* instr = instrlist_first(bb); dr_prepare_for_call(drcontext, bb, instr); MINSERT(bb, instr, INSTR_CREATE_push_imm (drcontext, OPND_CREATE_INT32((ptr_uint_t)tag))); MINSERT(bb, instr, INSTR_CREATE_push_imm (drcontext, OPND_CREATE_INT32((ptr_uint_t)drcontext))); MINSERT(bb, instr, INSTR_CREATE_call (drcontext, opnd_create_pc((void*)delete_fragment))); dr_cleanup_after_call(drcontext, bb, instr, 8); } return DR_EMIT_DEFAULT; }
void cfi_insert_meta_native_call_vargs(void *dcontext, instrlist_t *bb, instr_t *cursor, bool clean_call, void *callee) { instr_t *in = (cursor == NULL) ? instrlist_last(bb) : instr_get_prev(cursor); // PRE(ilist, instr, INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_RDI), opnd_create_reg(REG_RSP))); PRE(bb, cursor, INSTR_CREATE_call(dcontext, opnd_create_pc(callee))); /* mark it all meta */ if (in == NULL) in = instrlist_first(bb); else in = instr_get_next(in); while (in != cursor) { instr_set_ok_to_mangle(in, false); in = instr_get_next(in); } }
static dr_emit_flags_t bb_event(void *drcontext, void *tag, instrlist_t *bb, bool for_trace, bool translating) { instr_t *instr, *next_instr; for (instr = instrlist_first(bb); instr != NULL; instr = next_instr) { next_instr = instr_get_next(instr); if (instr_is_cbr(instr)) { /* Conditional branch. We can determine the target and * fallthrough addresses here, but we want to note the * edge if and when it actually executes at runtime. * Instead of using dr_insert_cbr_instrumentation(), we'll * insert separate instrumentation for the taken and not * taken cases and remove the instrumentation for an edge * after it executes. */ cbr_state_t state; bool insert_taken, insert_not_taken; app_pc src = instr_get_app_pc(instr); /* First look up the state of this branch so we * know what instrumentation to insert, if any. */ elem_t *elem = lookup(table, src); if (elem == NULL) { state = CBR_NEITHER; insert(table, src, CBR_NEITHER); } else { state = elem->state; } insert_taken = (state & CBR_TAKEN) == 0; insert_not_taken = (state & CBR_NOT_TAKEN) == 0; if (insert_taken || insert_not_taken) { app_pc fall = (app_pc)decode_next_pc(drcontext, (byte *)src); app_pc targ = instr_get_branch_target_pc(instr); /* Redirect the existing cbr to jump to a callout for * the 'taken' case. We'll insert a 'not-taken' * callout at the fallthrough address. */ instr_t *label = INSTR_CREATE_label(drcontext); /* should be meta, and meta-instrs shouldn't have translations */ instr_set_meta_no_translation(instr); /* it may not reach (in particular for x64) w/ our added clean call */ if (instr_is_cti_short(instr)) { /* if jecxz/loop we want to set the target of the long-taken * so set instr to the return value */ instr = instr_convert_short_meta_jmp_to_long(drcontext, bb, instr); } instr_set_target(instr, opnd_create_instr(label)); if (insert_not_taken) { /* Callout for the not-taken case. Insert after * the cbr (i.e., 3rd argument is NULL). */ dr_insert_clean_call(drcontext, bb, NULL, (void*)at_not_taken, false /* don't save fp state */, 2 /* 2 args for at_not_taken */, OPND_CREATE_INTPTR(src), OPND_CREATE_INTPTR(fall)); } /* After the callout, jump to the original fallthrough * address. Note that this is an exit cti, and should * not be a meta-instruction. Therefore, we use * preinsert instead of meta_preinsert, and we must * set the translation field. On Windows, this jump * and the final jump below never execute since the * at_taken and at_not_taken callouts redirect * execution and never return. However, since the API * expects clients to produced well-formed code, we * insert explicit exits from the block for Windows as * well as Linux. */ instrlist_preinsert(bb, NULL, INSTR_XL8(INSTR_CREATE_jmp (drcontext, opnd_create_pc(fall)), fall)); /* label goes before the 'taken' callout */ MINSERT(bb, NULL, label); if (insert_taken) { /* Callout for the taken case */ dr_insert_clean_call(drcontext, bb, NULL, (void*)at_taken, false /* don't save fp state */, 2 /* 2 args for at_taken */, OPND_CREATE_INTPTR(src), OPND_CREATE_INTPTR(targ)); } /* After the callout, jump to the original target * block (this should not be a meta-instruction). */ instrlist_preinsert(bb, NULL, INSTR_XL8(INSTR_CREATE_jmp (drcontext, opnd_create_pc(targ)), targ)); } } } /* since our added instrumentation is not constant, we ask to store * translations now */ return DR_EMIT_STORE_TRANSLATIONS; }
static void dynamic_info_instrumentation(void *drcontext, instrlist_t *ilist, instr_t *where, instr_t * static_info) { /* issues that may arise 1. pc and eflags is uint but in 64 bit mode 8 byte transfers are done -> so far no problem (need to see this) need to see whether there is a better way 2. double check all the printing */ /* this function does the acutal instrumentation arguments - we get a filled pointer here about the operand types for a given instruction (srcs and dests) 1) increment the pointer to the instr_trace buffers 2) add this pointer to instr_trace_t wrapper 3) check whether any of the srcs and dests have memory operations; if so add a lea instruction and get the dynamic address Add this address to instr_trace_t structure 4) if the buffer is full call a function to dump it to the file and restore the head ptr of the buffer (lean function is used utilizing a code cache to limit code bloat needed for a clean call before every instruction.) */ instr_t *instr, *call, *restore, *first, *second; opnd_t ref, opnd1, opnd2; reg_id_t reg1 = DR_REG_XBX; /* We can optimize it by picking dead reg */ reg_id_t reg2 = DR_REG_XCX; /* reg2 must be ECX or RCX for jecxz */ reg_id_t reg3 = DR_REG_XAX; per_thread_t *data; uint pc; uint i; module_data_t * module_data; if (client_arg->instrace_mode == DISASSEMBLY_TRACE){ dr_insert_clean_call(drcontext, ilist, where, clean_call_disassembly_trace, false, 0); return; } data = drmgr_get_tls_field(drcontext, tls_index); /* Steal the register for memory reference address * * We can optimize away the unnecessary register save and restore * by analyzing the code and finding the register is dead. */ dr_save_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_save_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); dr_save_reg(drcontext, ilist, where, reg3, SPILL_SLOT_4); drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg2); /* Load data->buf_ptr into reg2 */ opnd1 = opnd_create_reg(reg2); opnd2 = OPND_CREATE_MEMPTR(reg2, offsetof(per_thread_t, buf_ptr)); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* buf_ptr->static_info_instr = static_info; */ /* Move static_info to static_info_instr field of buf (which is a instr_trace_t *) */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(instr_trace_t, static_info_instr)); instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)static_info, opnd1, ilist, where, &first, &second); /* buf_ptr->num_mem = 0; */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(instr_trace_t, num_mem)); instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)0, opnd1, ilist, where, &first, &second); for (i = 0; i<instr_num_dsts(where); i++){ if (opnd_is_memory_reference(instr_get_dst(where, i))){ ref = instr_get_dst(where, i); DR_ASSERT(opnd_is_null(ref) == false); dr_restore_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_restore_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); #ifdef DEBUG_MEM_REGS dr_insert_clean_call(drcontext, ilist, where, clean_call_disassembly_trace, false, 0); dr_insert_clean_call(drcontext, ilist, where, clean_call_print_regvalues, false, 0); #endif drutil_insert_get_mem_addr(drcontext, ilist, where, ref, reg1, reg2); #ifdef DEBUG_MEM_REGS dr_insert_clean_call(drcontext, ilist, where, clean_call_print_regvalues, false, 0); #endif #ifdef DEBUG_MEM_STATS dr_insert_clean_call(drcontext, ilist, where, clean_call_disassembly_trace, false, 0); dr_insert_clean_call(drcontext, ilist, where, clean_call_mem_stats, false, 1, opnd_create_reg(reg1)); #endif dr_insert_clean_call(drcontext, ilist, where, clean_call_populate_mem, false, 3, opnd_create_reg(reg1), OPND_CREATE_INT32(i), OPND_CREATE_INT32(DST_TYPE)); } } for (i = 0; i<instr_num_srcs(where); i++){ if (opnd_is_memory_reference(instr_get_src(where, i))){ ref = instr_get_src(where, i); DR_ASSERT(opnd_is_null(ref) == false); dr_restore_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_restore_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); #ifdef DEBUG_MEM_REGS dr_insert_clean_call(drcontext, ilist, where, clean_call_disassembly_trace, false, 0); dr_insert_clean_call(drcontext, ilist, where, clean_call_print_regvalues, false, 0); #endif drutil_insert_get_mem_addr(drcontext, ilist, where, ref, reg1, reg2); #ifdef DEBUG_MEM_REGS dr_insert_clean_call(drcontext, ilist, where, clean_call_print_regvalues, false, 0); #endif #ifdef DEBUG_MEM_STATS dr_insert_clean_call(drcontext, ilist, where, clean_call_disassembly_trace, false, 0); dr_insert_clean_call(drcontext, ilist, where, clean_call_mem_stats, false, 1, opnd_create_reg(reg1)); #endif dr_insert_clean_call(drcontext, ilist, where, clean_call_populate_mem, false, 3, opnd_create_reg(reg1), OPND_CREATE_INT32(i), OPND_CREATE_INT32(SRC_TYPE)); } } drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg2); /* Load data->buf_ptr into reg2 */ opnd1 = opnd_create_reg(reg2); opnd2 = OPND_CREATE_MEMPTR(reg2, offsetof(per_thread_t, buf_ptr)); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* arithmetic flags are saved here for buf_ptr->eflags filling */ dr_save_arith_flags_to_xax(drcontext, ilist, where); /* load the eflags */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(instr_trace_t, eflags)); opnd2 = opnd_create_reg(reg3); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* load the app_pc */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(instr_trace_t, pc)); module_data = dr_lookup_module(instr_get_app_pc(where)); //dynamically generated code - module information not available - then just store 0 at the pc slot of the instr_trace data if (module_data != NULL){ pc = instr_get_app_pc(where) - module_data->start; dr_free_module_data(module_data); } else{ pc = 0; } instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)pc, opnd1, ilist, where, &first, &second); /* buf_ptr++; */ /* Increment reg value by pointer size using lea instr */ opnd1 = opnd_create_reg(reg2); opnd2 = opnd_create_base_disp(reg2, DR_REG_NULL, 0, sizeof(instr_trace_t), OPSZ_lea); instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Update the data->buf_ptr */ drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg1); opnd1 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_ptr)); opnd2 = opnd_create_reg(reg2); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* we use lea + jecxz trick for better performance * lea and jecxz won't disturb the eflags, so we won't insert * code to save and restore application's eflags. */ /* lea [reg2 - buf_end] => reg2 */ opnd1 = opnd_create_reg(reg1); opnd2 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_end)); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); opnd1 = opnd_create_reg(reg2); opnd2 = opnd_create_base_disp(reg1, reg2, 1, 0, OPSZ_lea); instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* jecxz call */ call = INSTR_CREATE_label(drcontext); opnd1 = opnd_create_instr(call); instr = INSTR_CREATE_jecxz(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* jump restore to skip clean call */ restore = INSTR_CREATE_label(drcontext); opnd1 = opnd_create_instr(restore); instr = INSTR_CREATE_jmp(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* clean call */ /* We jump to lean procedure which performs full context switch and * clean call invocation. This is to reduce the code cache size. */ instrlist_meta_preinsert(ilist, where, call); /* mov restore DR_REG_XCX */ opnd1 = opnd_create_reg(reg2); /* this is the return address for jumping back from lean procedure */ opnd2 = opnd_create_instr(restore); /* We could use instrlist_insert_mov_instr_addr(), but with a register * destination we know we can use a 64-bit immediate. */ instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* jmp code_cache */ opnd1 = opnd_create_pc(code_cache); instr = INSTR_CREATE_jmp(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* restore %reg */ instrlist_meta_preinsert(ilist, where, restore); //dr_restore_arith_flags_from_xax(drcontext, ilist, where); dr_restore_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_restore_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); dr_restore_reg(drcontext, ilist, where, reg3, SPILL_SLOT_4); //instrlist_disassemble(drcontext, instr_get_app_pc(instrlist_first(ilist)), ilist, logfile); }
dr_emit_flags_t bb_event(void *drcontext, void *tag, instrlist_t *bb, bool for_trace, bool translating) { instr_t *instr, *next_instr; app_pc bb_addr = dr_fragment_app_pc(tag); if (bb_addr == start_pc) { instrument = true; } else if (bb_addr == stop_pc) { instrument = false; } if (!instrument) { return DR_EMIT_DEFAULT; } for (instr = instrlist_first(bb); instr != NULL; instr = next_instr) { next_instr = instr_get_next(instr); /* * Conditional branch. We can determine the target and * fallthrough addresses here, but we need to instrument if we * want to record the edge only if it actually executes at * runtime. Instead of using dr_insert_cbr_instrumentation, * we'll insert separate instrumentation for the taken and not * taken cases and remove it separately after we see each * case. */ if (instr_is_cbr(instr)) { app_pc src = instr_get_app_pc(instr); cbr_state_t state; bool insert_taken, insert_not_taken; /* First look up the state of this branch so we * know what instrumentation to insert, if any. */ elem_t *elem = lookup(table, src); if (elem == NULL) { state = CBR_NONE; insert(table, src, CBR_NONE); } else { state = elem->state; } insert_taken = (state & CBR_TAKEN) == 0; insert_not_taken = (state & CBR_NOT_TAKEN) == 0; if (insert_taken || insert_not_taken) { app_pc fall = (app_pc)decode_next_pc(drcontext, (byte *)src); app_pc targ = instr_get_branch_target_pc(instr); /* * Redirect the cbr to jump to the 'taken' callout. * We'll insert a 'not-taken' callout at fallthrough * address. */ instr_t *label = INSTR_CREATE_label(drcontext); instr_set_meta(instr); instr_set_translation(instr, NULL); /* If this is a short cti, make sure it can reach its new target */ if (instr_is_cti_short(instr)) { /* if jecxz/loop we want to set the target of the long-taken * so set instr to the return value */ instr = instr_convert_short_meta_jmp_to_long(drcontext, bb, instr); } instr_set_target(instr, opnd_create_instr(label)); if (insert_not_taken) { /* * Callout for the not-taken case */ dr_insert_clean_call(drcontext, bb, NULL, (void *)at_not_taken, false /* don't save fp state */, 2 /* 2 args for at_not_taken */, OPND_CREATE_INTPTR((ptr_uint_t)src), OPND_CREATE_INTPTR((ptr_uint_t)fall)); } /* * Jump to the original fall-through address. * (This should not be a meta-instruction). */ instrlist_preinsert( bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(fall)), fall)); /* label goes before the 'taken' callout */ MINSERT(bb, NULL, label); if (insert_taken) { /* * Callout for the taken case */ dr_insert_clean_call(drcontext, bb, NULL, (void *)at_taken, false /* don't save fp state */, 2 /* 2 args for at_taken */, OPND_CREATE_INTPTR((ptr_uint_t)src), OPND_CREATE_INTPTR((ptr_uint_t)targ)); } /* * Jump to the original target block (this should * not be a meta-instruction). */ instrlist_preinsert( bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(targ)), targ)); } } } /* since our added instrumentation is not constant, we ask to store * translations now */ return DR_EMIT_STORE_TRANSLATIONS; }
/* PR 215143: auto-magically add size prefixes */ static void test_size_changes(void *dc) { /* * 0x004299d4 67 51 addr16 push %ecx %sp -> %sp (%sp) * 0x004299d4 66 51 data16 push %cx %esp -> %esp (%esp) * 0x004299d4 66 67 51 data16 addr16 push %cx %sp -> %sp (%sp) * 0x004298a4 e3 fe jecxz $0x004298a4 %ecx * 0x004298a4 67 e3 fd addr16 jecxz $0x004298a4 %cx * 0x080a5260 67 e2 fd addr16 loop $0x080a5260 %cx -> %cx * 0x080a5260 67 e1 fd addr16 loope $0x080a5260 %cx -> %cx * 0x080a5260 67 e0 fd addr16 loopne $0x080a5260 %cx -> %cx */ instr_t *instr; /* push addr16 */ instr = instr_create_2dst_2src(dc, OP_push, opnd_create_reg(IF_X64_ELSE(REG_ESP, REG_SP)), opnd_create_base_disp(IF_X64_ELSE(REG_ESP, REG_SP), REG_NULL, 0, -(int)sizeof(void*), OPSZ_ret), opnd_create_reg(REG_XCX), opnd_create_reg(IF_X64_ELSE(REG_ESP, REG_SP))); test_instr_encode(dc, instr, 2); #ifndef X64 /* can only shorten on AMD */ /* push data16 */ instr = instr_create_2dst_2src(dc, OP_push, opnd_create_reg(REG_XSP), opnd_create_base_disp(REG_XSP, REG_NULL, 0, -2, OPSZ_2), opnd_create_reg(REG_CX), opnd_create_reg(REG_XSP)); test_instr_encode(dc, instr, 2); /* push addr16 and data16 */ instr = instr_create_2dst_2src(dc, OP_push, opnd_create_reg(REG_SP), opnd_create_base_disp(REG_SP, REG_NULL, 0, -2, OPSZ_2), opnd_create_reg(REG_CX), opnd_create_reg(REG_SP)); test_instr_encode(dc, instr, 3); #endif /* jecxz and jcxz */ test_instr_encode(dc, INSTR_CREATE_jecxz(dc, opnd_create_pc(buf)), 2); /* test non-default count register size (requires addr prefix) */ instr = instr_create_0dst_2src (dc, OP_jecxz, opnd_create_pc(buf), opnd_create_reg(IF_X64_ELSE(REG_ECX, REG_CX))); test_instr_encode(dc, instr, 3); instr = instr_create_1dst_2src (dc, OP_loop, opnd_create_reg(IF_X64_ELSE(REG_ECX, REG_CX)), opnd_create_pc(buf), opnd_create_reg(IF_X64_ELSE(REG_ECX, REG_CX))); test_instr_encode(dc, instr, 3); instr = instr_create_1dst_2src (dc, OP_loope, opnd_create_reg(IF_X64_ELSE(REG_ECX, REG_CX)), opnd_create_pc(buf), opnd_create_reg(IF_X64_ELSE(REG_ECX, REG_CX))); test_instr_encode(dc, instr, 3); instr = instr_create_1dst_2src (dc, OP_loopne, opnd_create_reg(IF_X64_ELSE(REG_ECX, REG_CX)), opnd_create_pc(buf), opnd_create_reg(IF_X64_ELSE(REG_ECX, REG_CX))); test_instr_encode(dc, instr, 3); /* * 0x004ee0b8 a6 cmps %ds:(%esi) %es:(%edi) %esi %edi -> %esi %edi * 0x004ee0b8 67 a6 addr16 cmps %ds:(%si) %es:(%di) %si %di -> %si %di * 0x004ee0b8 66 a7 data16 cmps %ds:(%esi) %es:(%edi) %esi %edi -> %esi %edi * 0x004ee0b8 d7 xlat %ds:(%ebx,%al,1) -> %al * 0x004ee0b8 67 d7 addr16 xlat %ds:(%bx,%al,1) -> %al * 0x004ee0b8 0f f7 c1 maskmovq %mm0 %mm1 -> %ds:(%edi) * 0x004ee0b8 67 0f f7 c1 addr16 maskmovq %mm0 %mm1 -> %ds:(%di) * 0x004ee0b8 66 0f f7 c1 maskmovdqu %xmm0 %xmm1 -> %ds:(%edi) * 0x004ee0b8 67 66 0f f7 c1 addr16 maskmovdqu %xmm0 %xmm1 -> %ds:(%di) */ test_instr_encode(dc, INSTR_CREATE_cmps_1(dc), 1); instr = instr_create_2dst_4src (dc, OP_cmps, opnd_create_reg(IF_X64_ELSE(REG_ESI, REG_SI)), opnd_create_reg(IF_X64_ELSE(REG_EDI, REG_DI)), opnd_create_far_base_disp(SEG_DS, IF_X64_ELSE(REG_ESI, REG_SI), REG_NULL, 0, 0, OPSZ_1), opnd_create_far_base_disp(SEG_ES, IF_X64_ELSE(REG_EDI, REG_DI), REG_NULL, 0, 0, OPSZ_1), opnd_create_reg(IF_X64_ELSE(REG_ESI, REG_SI)), opnd_create_reg(IF_X64_ELSE(REG_EDI, REG_DI))); test_instr_encode(dc, instr, 2); instr = instr_create_2dst_4src (dc, OP_cmps, opnd_create_reg(REG_XSI), opnd_create_reg(REG_XDI), opnd_create_far_base_disp(SEG_DS, REG_XSI, REG_NULL, 0, 0, OPSZ_2), opnd_create_far_base_disp(SEG_ES, REG_XDI, REG_NULL, 0, 0, OPSZ_2), opnd_create_reg(REG_XSI), opnd_create_reg(REG_XDI)); test_instr_encode_and_decode(dc, instr, 2, true/*src*/, 0, OPSZ_2, 2); test_instr_encode(dc, INSTR_CREATE_xlat(dc), 1); instr = instr_create_1dst_1src (dc, OP_xlat, opnd_create_reg(REG_AL), opnd_create_far_base_disp(SEG_DS, IF_X64_ELSE(REG_EBX, REG_BX), REG_AL, 1, 0, OPSZ_1)); test_instr_encode(dc, instr, 2); instr = INSTR_CREATE_maskmovq(dc, opnd_create_reg(REG_MM0), opnd_create_reg(REG_MM1)); test_instr_encode(dc, instr, 3); instr = instr_create_1dst_2src (dc, OP_maskmovq, opnd_create_far_base_disp(SEG_DS, IF_X64_ELSE(REG_EDI, REG_DI), REG_NULL, 0, 0, OPSZ_8), opnd_create_reg(REG_MM0), opnd_create_reg(REG_MM1)); test_instr_encode(dc, instr, 4); instr = INSTR_CREATE_maskmovdqu(dc, opnd_create_reg(REG_XMM0), opnd_create_reg(REG_XMM1)); test_instr_encode(dc, instr, 4); instr = instr_create_1dst_2src (dc, OP_maskmovdqu, opnd_create_far_base_disp(SEG_DS, IF_X64_ELSE(REG_EDI, REG_DI), REG_NULL, 0, 0, OPSZ_16), opnd_create_reg(REG_XMM0), opnd_create_reg(REG_XMM1)); test_instr_encode(dc, instr, 5); /* Test iretw, iretd, iretq (unlike most stack operation iretd (and lretd on AMD) * exist and are the default in 64-bit mode. As such, it has a different size/type * then most other stack operations). Our instr_create routine should match stack * (iretq on 64-bit, iretd on 32-bit). See PR 191977. */ instr = INSTR_CREATE_iret(dc); #ifdef X64 test_instr_encode_and_decode(dc, instr, 2, true /*src*/, 1, OPSZ_40, 40); ASSERT(buf[0] == 0x48); /* check for rex.w prefix */ #else test_instr_encode_and_decode(dc, instr, 1, true /*src*/, 1, OPSZ_12, 12); #endif instr = instr_create_1dst_2src (dc, OP_iret, opnd_create_reg(REG_XSP), opnd_create_reg(REG_XSP), opnd_create_base_disp(REG_XSP, REG_NULL, 0, 0, OPSZ_12)); test_instr_encode_and_decode(dc, instr, 1, true /*src*/, 1, OPSZ_12, 12); instr = instr_create_1dst_2src (dc, OP_iret, opnd_create_reg(REG_XSP), opnd_create_reg(REG_XSP), opnd_create_base_disp(REG_XSP, REG_NULL, 0, 0, OPSZ_6)); test_instr_encode_and_decode(dc, instr, 2, true /*src*/, 1, OPSZ_6, 6); ASSERT(buf[0] == 0x66); /* check for data prefix */ }
void Shade::detour(void *address, void *target, void *&trampoline) { const size_t instr_max = 17; auto list = instrlist_create(dr); byte instr_data[instr_max]; byte *current = (byte *)address; byte *min_pos = (byte *)address + 5; size_t size = 0; while(current < min_pos) { read(current, instr_data, instr_max); auto instr = instr_create(dr); byte *decoded = decode_from_copy(dr, instr_data, current, instr); if(!decoded) error("Unknown instruction"); instrlist_append(list, instr); instr_make_persistent(dr, instr); current += (size_t)(decoded - instr_data); size += instr_length(dr, instr); } auto instr = INSTR_CREATE_jmp(dr, opnd_create_pc(current)); size += instr_length(dr, instr); instrlist_append(list, instr); auto local_trampoline = alloca(size); if(!local_trampoline) error("Out of memory"); void *remote = code_section.allocate(size, 4); if(!instrlist_encode_to_copy(dr, list, (byte *)local_trampoline, (byte *)remote, 0, true)) error("Unable to encode instructions"); instrlist_clear_and_destroy(dr, list); write(remote, local_trampoline, size); trampoline = remote; char code[5]; DWORD offset = (size_t)target - (size_t)address - 5; code[0] = 0xE9; *(DWORD *)(code + 1) = offset; access(address, 5, [&] { write(address, code, 5); }); }
/* instrument_instr is called whenever a memory reference is identified. * It inserts code before the memory reference to to fill the memory buffer * and jump to our own code cache to call the clean_call when the buffer is full. */ static void instrument_instr(void *drcontext, instrlist_t *ilist, instr_t *where) { instr_t *instr, *call, *restore; opnd_t opnd1, opnd2; reg_id_t reg1, reg2; drvector_t allowed; per_thread_t *data; app_pc pc; data = drmgr_get_tls_field(drcontext, tls_index); /* Steal two scratch registers. * reg2 must be ECX or RCX for jecxz. */ drreg_init_and_fill_vector(&allowed, false); drreg_set_vector_entry(&allowed, DR_REG_XCX, true); if (drreg_reserve_register(drcontext, ilist, where, &allowed, ®2) != DRREG_SUCCESS || drreg_reserve_register(drcontext, ilist, where, NULL, ®1) != DRREG_SUCCESS) { DR_ASSERT(false); /* cannot recover */ drvector_delete(&allowed); return; } drvector_delete(&allowed); /* The following assembly performs the following instructions * buf_ptr->pc = pc; * buf_ptr->opcode = opcode; * buf_ptr++; * if (buf_ptr >= buf_end_ptr) * clean_call(); */ drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg2); /* Load data->buf_ptr into reg2 */ opnd1 = opnd_create_reg(reg2); opnd2 = OPND_CREATE_MEMPTR(reg2, offsetof(per_thread_t, buf_ptr)); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Store pc */ pc = instr_get_app_pc(where); /* For 64-bit, we can't use a 64-bit immediate so we split pc into two halves. * We could alternatively load it into reg1 and then store reg1. * We use a convenience routine that does the two-step store for us. */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(ins_ref_t, pc)); instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t) pc, opnd1, ilist, where, NULL, NULL); /* Store opcode */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(ins_ref_t, opcode)); opnd2 = OPND_CREATE_INT32(instr_get_opcode(where)); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Increment reg value by pointer size using lea instr */ opnd1 = opnd_create_reg(reg2); opnd2 = opnd_create_base_disp(reg2, DR_REG_NULL, 0, sizeof(ins_ref_t), OPSZ_lea); instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Update the data->buf_ptr */ drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg1); opnd1 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_ptr)); opnd2 = opnd_create_reg(reg2); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* We use the lea + jecxz trick for better performance. * lea and jecxz won't disturb the eflags, so we won't need * code to save and restore the application's eflags. */ /* lea [reg2 - buf_end] => reg2 */ opnd1 = opnd_create_reg(reg1); opnd2 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_end)); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); opnd1 = opnd_create_reg(reg2); opnd2 = opnd_create_base_disp(reg1, reg2, 1, 0, OPSZ_lea); instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* jecxz call */ call = INSTR_CREATE_label(drcontext); opnd1 = opnd_create_instr(call); instr = INSTR_CREATE_jecxz(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* jump restore to skip clean call */ restore = INSTR_CREATE_label(drcontext); opnd1 = opnd_create_instr(restore); instr = INSTR_CREATE_jmp(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* clean call */ /* We jump to our generated lean procedure which performs a full context * switch and clean call invocation. This is to reduce the code cache size. */ instrlist_meta_preinsert(ilist, where, call); /* mov restore DR_REG_XCX */ opnd1 = opnd_create_reg(reg2); /* This is the return address for jumping back from the lean procedure. */ opnd2 = opnd_create_instr(restore); /* We could use instrlist_insert_mov_instr_addr(), but with a register * destination we know we can use a 64-bit immediate. */ instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* jmp code_cache */ opnd1 = opnd_create_pc(code_cache); instr = INSTR_CREATE_jmp(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* Restore scratch registers */ instrlist_meta_preinsert(ilist, where, restore); if (drreg_unreserve_register(drcontext, ilist, where, reg1) != DRREG_SUCCESS || drreg_unreserve_register(drcontext, ilist, where, reg2) != DRREG_SUCCESS) DR_ASSERT(false); }
/* * instrument_mem is called whenever a memory reference is identified. * It inserts code before the memory reference to to fill the memory buffer * and jump to our own code cache to call the clean_call when the buffer is full. */ static void instrument_mem(void *drcontext, instrlist_t *ilist, instr_t *where, int pos, bool write) { instr_t *instr, *call, *restore, *first, *second; opnd_t ref, opnd1, opnd2; reg_id_t reg1 = DR_REG_XBX; /* We can optimize it by picking dead reg */ reg_id_t reg2 = DR_REG_XCX; /* reg2 must be ECX or RCX for jecxz */ per_thread_t *data; app_pc pc; data = drmgr_get_tls_field(drcontext, tls_index); /* Steal the register for memory reference address * * We can optimize away the unnecessary register save and restore * by analyzing the code and finding the register is dead. */ dr_save_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_save_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); if (write) ref = instr_get_dst(where, pos); else ref = instr_get_src(where, pos); /* use drutil to get mem address */ drutil_insert_get_mem_addr(drcontext, ilist, where, ref, reg1, reg2); /* The following assembly performs the following instructions * buf_ptr->write = write; * buf_ptr->addr = addr; * buf_ptr->size = size; * buf_ptr->pc = pc; * buf_ptr++; * if (buf_ptr >= buf_end_ptr) * clean_call(); */ drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg2); /* Load data->buf_ptr into reg2 */ opnd1 = opnd_create_reg(reg2); opnd2 = OPND_CREATE_MEMPTR(reg2, offsetof(per_thread_t, buf_ptr)); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Move write/read to write field */ opnd1 = OPND_CREATE_MEM32(reg2, offsetof(mem_ref_t, write)); opnd2 = OPND_CREATE_INT32(write); instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Store address in memory ref */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(mem_ref_t, addr)); opnd2 = opnd_create_reg(reg1); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Store size in memory ref */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(mem_ref_t, size)); /* drutil_opnd_mem_size_in_bytes handles OP_enter */ opnd2 = OPND_CREATE_INT32(drutil_opnd_mem_size_in_bytes(ref, where)); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Store pc in memory ref */ pc = instr_get_app_pc(where); /* For 64-bit, we can't use a 64-bit immediate so we split pc into two halves. * We could alternatively load it into reg1 and then store reg1. * We use a convenience routine that does the two-step store for us. */ opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(mem_ref_t, pc)); instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t) pc, opnd1, ilist, where, &first, &second); instr_set_ok_to_mangle(first, false/*meta*/); if (second != NULL) instr_set_ok_to_mangle(second, false/*meta*/); /* Increment reg value by pointer size using lea instr */ opnd1 = opnd_create_reg(reg2); opnd2 = opnd_create_base_disp(reg2, DR_REG_NULL, 0, sizeof(mem_ref_t), OPSZ_lea); instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* Update the data->buf_ptr */ drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg1); opnd1 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_ptr)); opnd2 = opnd_create_reg(reg2); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* we use lea + jecxz trick for better performance * lea and jecxz won't disturb the eflags, so we won't insert * code to save and restore application's eflags. */ /* lea [reg2 - buf_end] => reg2 */ opnd1 = opnd_create_reg(reg1); opnd2 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_end)); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); opnd1 = opnd_create_reg(reg2); opnd2 = opnd_create_base_disp(reg1, reg2, 1, 0, OPSZ_lea); instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* jecxz call */ call = INSTR_CREATE_label(drcontext); opnd1 = opnd_create_instr(call); instr = INSTR_CREATE_jecxz(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* jump restore to skip clean call */ restore = INSTR_CREATE_label(drcontext); opnd1 = opnd_create_instr(restore); instr = INSTR_CREATE_jmp(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* clean call */ /* We jump to lean procedure which performs full context switch and * clean call invocation. This is to reduce the code cache size. */ instrlist_meta_preinsert(ilist, where, call); /* mov restore DR_REG_XCX */ opnd1 = opnd_create_reg(reg2); /* this is the return address for jumping back from lean procedure */ opnd2 = opnd_create_instr(restore); /* We could use instrlist_insert_mov_instr_addr(), but with a register * destination we know we can use a 64-bit immediate. */ instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); /* jmp code_cache */ opnd1 = opnd_create_pc(code_cache); instr = INSTR_CREATE_jmp(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); /* restore %reg */ instrlist_meta_preinsert(ilist, where, restore); dr_restore_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_restore_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); }
static dr_emit_flags_t bb_event(void* drcontext, void *tag, instrlist_t* bb, bool for_trace, bool translating) { instr_t* instr = instrlist_first(bb); instr_t *ins1, *ins2; global_var = (ptr_uint_t)INT_MAX + 1; dr_prepare_for_call(drcontext, bb, instr); /* test push_imm */ instrlist_insert_push_immed_ptrsz(drcontext, (ptr_int_t)1, bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on push 1\n"); #ifdef X64 MINSERT(bb, instr, INSTR_CREATE_mov_ld (drcontext, opnd_create_reg(IF_LINUX_ELSE(DR_REG_RDX, DR_REG_R8)), OPND_CREATE_MEMPTR(DR_REG_RSP, 0))); #endif instrlist_insert_push_immed_ptrsz(drcontext, (ptr_int_t)-1, bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on push -1\n"); #ifdef X64 MINSERT(bb, instr, INSTR_CREATE_mov_ld (drcontext, opnd_create_reg(IF_LINUX_ELSE(DR_REG_RSI, DR_REG_RDX)), OPND_CREATE_MEMPTR(DR_REG_RSP, 0))); #endif instrlist_insert_push_immed_ptrsz(drcontext, global_var, bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); #ifdef X64 if (ins2 == NULL) /* ins2 should not be NULL */ dr_fprintf(STDERR, "Error on push tag\n"); else instr_set_ok_to_mangle(ins2, false); #endif #ifdef X64 MINSERT(bb, instr, INSTR_CREATE_mov_ld (drcontext, opnd_create_reg(IF_LINUX_ELSE(DR_REG_RDI, DR_REG_RCX)), OPND_CREATE_MEMPTR(DR_REG_RSP, 0))); #endif /* test mov_imm */ instrlist_insert_mov_immed_ptrsz(drcontext, global_var, OPND_CREATE_ABSMEM(&var0, OPSZ_PTR), bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); #ifdef X64 if (ins2 == NULL) /* ins2 should not be NULL */ dr_fprintf(STDERR, "Error on mov %p\n", global_var); else instr_set_ok_to_mangle(ins2, false); #endif instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)-1, OPND_CREATE_ABSMEM(&var1, OPSZ_PTR), bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on mov -1\n"); instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)1, OPND_CREATE_ABSMEM(&var2, OPSZ_PTR), bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on mov 1\n"); /* call */ MINSERT(bb, instr, INSTR_CREATE_call (drcontext, opnd_create_pc((void*)my_abort))); dr_cleanup_after_call(drcontext, bb, instr, 0); return DR_EMIT_DEFAULT; }