static void insert_counter_update(void *drcontext, instrlist_t *bb, instr_t *where, int offset) { /* Since the inc instruction clobbers 5 of the arithmetic eflags, * we have to save them around the inc. We could be more efficient * by not bothering to save the overflow flag and constructing our * own sequence of instructions to save the other 5 flags (using * lahf). */ if (drreg_reserve_aflags(drcontext, bb, where) != DRREG_SUCCESS) { DR_ASSERT(false); /* cannot recover */ return; } /* Increment the global counter using the lock prefix to make it atomic * across threads. It would be cheaper to aggregate the thread counters * in the exit events, but this sample is intended to illustrate inserted * instrumentation. */ instrlist_meta_preinsert( bb, where, LOCK(INSTR_CREATE_inc( drcontext, OPND_CREATE_ABSMEM(((byte *)&global_count) + offset, OPSZ_4)))); /* Increment the thread private counter. */ if (dr_using_all_private_caches()) { per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx); /* private caches - we can use an absolute address */ instrlist_meta_preinsert( bb, where, INSTR_CREATE_inc(drcontext, OPND_CREATE_ABSMEM(((byte *)&data) + offset, OPSZ_4))); } else { /* shared caches - we must indirect via thread local storage */ reg_id_t scratch; if (drreg_reserve_register(drcontext, bb, where, NULL, &scratch) != DRREG_SUCCESS) DR_ASSERT(false); drmgr_insert_read_tls_field(drcontext, tls_idx, bb, where, scratch); instrlist_meta_preinsert( bb, where, INSTR_CREATE_inc(drcontext, OPND_CREATE_MEM32(scratch, offset))); if (drreg_unreserve_register(drcontext, bb, where, scratch) != DRREG_SUCCESS) DR_ASSERT(false); } if (drreg_unreserve_aflags(drcontext, bb, where) != DRREG_SUCCESS) DR_ASSERT(false); /* cannot recover */ }
static dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, bool for_trace, bool translating) { instr_t *instr, *first = instrlist_first(bb), *point = NULL; uint num_instrs; uint flags; uint need_restore; /* count instruction */ for (instr = first, num_instrs = 0; instr != NULL; instr = instr_get_next(instr)) { num_instrs++; } need_restore = 0; flags = instr_get_arith_flags(instr); /* eflags are not dead save eflags to register */ if (!(TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags))) { need_restore = 1; dr_save_reg(drcontext, bb, first, DR_REG_XAX, SPILL_SLOT_1); dr_save_arith_flags_to_xax(drcontext, bb, first); } /* add the instruction count */ instrlist_meta_preinsert (bb, first, INSTR_CREATE_add(drcontext, OPND_CREATE_ABSMEM ((byte *)&global_count, OPSZ_4), OPND_CREATE_INT32(num_instrs))); /* Need to carry since it is a 8 byte variable. */ instrlist_meta_preinsert (bb, first, INSTR_CREATE_adc(drcontext, OPND_CREATE_ABSMEM ((byte *)&global_count + 4, OPSZ_4), OPND_CREATE_INT32(0))); /* resotre eflags */ if (need_restore) { dr_restore_arith_flags_from_xax(drcontext, bb, first); dr_restore_reg(drcontext, bb, first, DR_REG_XAX, SPILL_SLOT_1); } return DR_EMIT_DEFAULT; }
static void insert_counter_update(void *drcontext, instrlist_t *bb, instr_t *where, int offset) { /* Since the inc instruction clobbers 5 of the arithmetic eflags, * we have to save them around the inc. We could be more efficient * by not bothering to save the overflow flag and constructing our * own sequence of instructions to save the other 5 flags (using * lahf) or by doing a liveness analysis on the flags and saving * only if live. */ dr_save_reg(drcontext, bb, where, DR_REG_XAX, SPILL_SLOT_1); dr_save_arith_flags_to_xax(drcontext, bb, where); /* Increment the global counter using the lock prefix to make it atomic * across threads. It would be cheaper to aggregate the thread counters * in the exit events, but this sample is intended to illustrate inserted * instrumentation. */ instrlist_meta_preinsert(bb, where, LOCK(INSTR_CREATE_inc (drcontext, OPND_CREATE_ABSMEM(((byte *)&global_count) + offset, OPSZ_4)))); /* Increment the thread private counter. */ if (dr_using_all_private_caches()) { per_thread_t *data = (per_thread_t *) dr_get_tls_field(drcontext); /* private caches - we can use an absolute address */ instrlist_meta_preinsert(bb, where, INSTR_CREATE_inc(drcontext, OPND_CREATE_ABSMEM(((byte *)&data) + offset, OPSZ_4))); } else { /* shared caches - we must indirect via thread local storage */ /* We spill xbx to use a scratch register (we could do a liveness * analysis to try and find a dead register to use). Note that xax * is currently holding the saved eflags. */ dr_save_reg(drcontext, bb, where, DR_REG_XBX, SPILL_SLOT_2); dr_insert_read_tls_field(drcontext, bb, where, DR_REG_XBX); instrlist_meta_preinsert(bb, where, INSTR_CREATE_inc(drcontext, OPND_CREATE_MEM32(DR_REG_XBX, offset))); dr_restore_reg(drcontext, bb, where, DR_REG_XBX, SPILL_SLOT_2); } /* Restore flags and xax. */ dr_restore_arith_flags_from_xax(drcontext, bb, where); dr_restore_reg(drcontext, bb, where, DR_REG_XAX, SPILL_SLOT_1); }
/* save aflags from eax */ void umbra_save_eax_aflags(void *drcontext, umbra_info_t *info, instrlist_t *ilist, instr_t *where) { instr_t *instr; instr = INSTR_CREATE_mov_st(drcontext, OPND_CREATE_ABSMEM(&info->aflags, OPSZ_4), opnd_create_reg(DR_REG_EAX)); instrlist_meta_preinsert(ilist, where, instr); }
static bool instr_is_reg_restore(instr_t *instr, reg_id_t reg, umbra_info_t *info) { opnd_t opnd; int slot; if (instr_get_opcode(instr) != OP_mov_ld) return false; opnd = instr_get_dst(instr, 0); if (!opnd_is_reg(opnd) || opnd_get_reg(opnd) != reg) return false; slot = reg - REG_SPILL_START; opnd = OPND_CREATE_ABSMEM(&info->spill_regs[slot], OPSZ_PTR); if (opnd_same(opnd, instr_get_src(instr, 0))) return true; return false; }
static dr_emit_flags_t event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst, bool for_trace, bool translating, void *user_data) { #ifdef SHOW_RESULTS bool aflags_dead; #endif if (!drmgr_is_first_instr(drcontext, inst)) return DR_EMIT_DEFAULT; #ifdef VERBOSE dr_printf("in dynamorio_basic_block(tag="PFX")\n", tag); # ifdef VERBOSE_VERBOSE instrlist_disassemble(drcontext, tag, bb, STDOUT); # endif #endif #ifdef SHOW_RESULTS if (drreg_are_aflags_dead(drcontext, inst, &aflags_dead) == DRREG_SUCCESS && !aflags_dead) bbs_eflags_saved++; else bbs_no_eflags_saved++; #endif /* We demonstrate how to use drreg for aflags save/restore here. * We could use drx_insert_counter_update instead of drreg. * Xref sample opcodes.c as an example of using drx_insert_counter_update. */ if (drreg_reserve_aflags(drcontext, bb, inst) != DRREG_SUCCESS) DR_ASSERT(false && "fail to reserve aflags!"); /* racy update on the counter for better performance */ instrlist_meta_preinsert (bb, inst, INSTR_CREATE_inc(drcontext, OPND_CREATE_ABSMEM ((byte *)&global_count, OPSZ_4))); if (drreg_unreserve_aflags(drcontext, bb, inst) != DRREG_SUCCESS) DR_ASSERT(false && "fail to unreserve aflags!"); #if defined(VERBOSE) && defined(VERBOSE_VERBOSE) dr_printf("Finished instrumenting dynamorio_basic_block(tag="PFX")\n", tag); instrlist_disassemble(drcontext, tag, bb, STDOUT); #endif return DR_EMIT_DEFAULT; }
/* save reg before where in ilist */ void umbra_save_reg(void *drcontext, umbra_info_t *info, instrlist_t *ilist, instr_t *where, reg_id_t reg) { int slot; instr_t *instr; DR_ASSERT(reg >= REG_SPILL_START && reg <= REG_SPILL_STOP); slot = reg - REG_SPILL_START; instr = INSTR_CREATE_mov_st(drcontext, OPND_CREATE_ABSMEM(&info->spill_regs[slot], OPSZ_PTR), opnd_create_reg(reg)); instrlist_meta_preinsert(ilist, where, instr); }
static instr_t * analyze_client_code(void *drcontext, instrlist_t *ilist, instr_t *where, ref_info_t *ref_info) { instr_t *next, *lea, *and, *cmp, *jcc, *sub; opnd_t ref, opnd; ref_cache_t *cache; reg_id_t reg; int pos, i; next = instr_get_next(where); if (next == NULL) return NULL; if (instr_get_opcode(where) != OP_lea) return next; /* lea [ref] => r1 */ ref = instr_get_src(where, 0); if (!opnd_is_base_disp(ref) || opnd_get_index(ref) != DR_REG_NULL) return next; lea = where; and = next; cmp = instr_get_next(and); jcc = instr_get_next(cmp); if (instr_get_app_pc(and) == NULL && instr_get_opcode(and) == OP_and && instr_get_app_pc(cmp) == NULL && instr_get_opcode(cmp) == OP_cmp && instr_get_app_pc(jcc) == NULL && instr_get_opcode(jcc) == OP_jz) { /* find pattern of * lea [ref] => reg * and 0xffffffff00000000 reg * cmp cache->tag reg * jz */ opnd = instr_get_src(cmp, 1); cache = opnd_get_addr(opnd) - offsetof(ref_cache_t, tag); for (i = 0; i < 10; ) { lea = instr_get_next(lea); if (!instr_is_label(lea)) i++; } DR_ASSERT(instr_get_opcode(lea) == OP_lea); } else if (instr_get_app_pc(next) == NULL && instr_get_opcode(next) == OP_sub) { opnd = instr_get_src(next, 0); cache = opnd_get_addr(opnd) - offsetof(ref_cache_t, offset); } else { return next; } reg = opnd_get_base(ref); UMBRA_REG_TO_POS(reg, pos); if (ref_info[pos].cache == NULL) { ref_info[pos].cache = cache; } else { sub = instr_get_next(lea); DR_ASSERT(instr_get_opcode(sub) == OP_sub); while (lea != where) { next = instr_get_next(where); instrlist_remove(ilist, where); instr_destroy(drcontext, where); where = next; } opnd = OPND_CREATE_ABSMEM((void *)(reg_t)ref_info[pos].cache + offsetof(ref_cache_t, offset), OPSZ_PTR); instr_set_src(sub, 0, opnd); if (proc_info.client.app_unit_bits > 0 && proc_info.client.shd_unit_bits != 0) next = instr_get_next(sub); /* reg & mask => reg */ if (proc_info.client.orig_addr) { next = instr_get_next(next); /* mov reg => r2 */ next = instr_get_next(next); /* r2 & bit_mask => r2 */ } } next = instr_get_next(lea); return instr_get_next(next); }
static void instrument_mem(void *drcontext, instrlist_t *ilist, instr_t *where, int pos, bool write) { instr_t *instr; opnd_t ref, opnd1, opnd2; reg_id_t reg1 = DR_REG_XAX; /* We can optimize it by picking dead reg */ reg_id_t reg2 = DR_REG_XCX; /* reg2 must be ECX or RCX for jecxz */ if (write) ref = instr_get_dst(where, pos); else ref = instr_get_src(where, pos); dr_save_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_save_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); // reg2 = RBufIdx opnd1 = opnd_create_reg(reg2); opnd2 = OPND_CREATE_ABSMEM((byte *)&RBufIdx, OPSZ_4); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); // save flags since we are using inc, and dr_save_arith_flags_to_xax(drcontext, ilist, where); // reg2 = reg2 & RBUF_SIZE opnd1 = opnd_create_reg(reg2); opnd2 = OPND_CREATE_INT32(RBUF_SIZE); instr = INSTR_CREATE_and(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); dr_restore_arith_flags_from_xax(drcontext, ilist, where); // reg1 = &RBuf opnd1 = opnd_create_reg(reg1); opnd2 = OPND_CREATE_INTPTR(RBuf); instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); // reg1 = reg1 + reg2 * sizeof(uint) // = RBuf + RBufIdx * sizeof(uint) // = RBuf[RBufIdx] opnd1 = opnd_create_reg(reg1); opnd2 = opnd_create_base_disp(reg1, reg2, sizeof(uint), 0, OPSZ_lea); instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); // RBuf[RBufIdx].addr = addr; opnd1 = OPND_CREATE_MEMPTR(reg1, 0); drutil_insert_get_mem_addr(drcontext, ilist, where, ref, reg2, reg1); opnd2 = opnd_create_reg(reg2); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); dr_save_arith_flags_to_xax(drcontext, ilist, where); // reg2 = RBufIdx opnd1 = opnd_create_reg(reg2); opnd2 = OPND_CREATE_ABSMEM((byte *)&RBufIdx, OPSZ_4); instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); // reg2 = reg2 + 1 opnd1 = opnd_create_reg(reg2); instr = INSTR_CREATE_inc(drcontext, opnd1); instrlist_meta_preinsert(ilist, where, instr); // RBufIdx = reg2 opnd1 = OPND_CREATE_ABSMEM((byte *)&RBufIdx, OPSZ_4); opnd2 = opnd_create_reg(reg2); instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(ilist, where, instr); dr_restore_arith_flags_from_xax(drcontext, ilist, where); dr_restore_reg(drcontext, ilist, where, reg1, SPILL_SLOT_2); dr_restore_reg(drcontext, ilist, where, reg2, SPILL_SLOT_3); }
static dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, bool for_trace, bool translating) { int i; const int MAX_INSTR_LEN = 64; char instr_name[MAX_INSTR_LEN]; instr_t *instr, *first = instrlist_first(bb); uint flags; uint cur_flop_count = 0; uint tracked_instr_count[tracked_instrs_len]; for( i = 0; i < tracked_instrs_len; i++ ) tracked_instr_count[i] = 0; #ifdef VERBOSE dr_printf("in dynamorio_basic_block(tag="PFX")\n", tag); # ifdef VERBOSE_VERBOSE instrlist_disassemble(drcontext, tag, bb, STDOUT); # endif #endif /* we use fp ops so we have to save fp state */ byte fp_raw[512 + 16]; byte *fp_align = (byte *) ( (((ptr_uint_t)fp_raw) + 16) & ((ptr_uint_t)-16) ); if (translating) { return DR_EMIT_DEFAULT; } proc_save_fpstate(fp_align); int my_readfrom[DR_REG_LAST_VALID_ENUM+MY_NUM_EFLAGS+1]; int my_writtento[DR_REG_LAST_VALID_ENUM+MY_NUM_EFLAGS+1]; for (i = 0; i < DR_REG_LAST_VALID_ENUM+MY_NUM_EFLAGS+1; i++) { my_readfrom[i] = 0; my_writtento[i] = 0; } t_glob_reg_state glob_reg_state = {0,0,0,0,0,0,my_readfrom,my_writtento}; int my_cur_size = 0; for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { my_cur_size++; /* ILP Calculations */ glob_reg_state.raw_setnr = 1; glob_reg_state.war_setnr = 1; glob_reg_state.waw_setnr = 1; glob_reg_state.else_setnr = 1; glob_reg_state.final_setnr = 1; calc_set_num(instr, &glob_reg_state); /* Count flop instr */ if( instr_is_floating( instr ) ) { cur_flop_count += 1; } /* Count mul instructions */ instr_disassemble_to_buffer( drcontext, instr, instr_name, MAX_INSTR_LEN ); for( i = 0; i < tracked_instrs_len; i++ ) { if( strncmp( instr_name, tracked_instrs[i], strlen(tracked_instrs[i])) == 0) { tracked_instr_count[i] += 1; } } } //now we can calculate the ILP. float ilp = ((float)my_cur_size) / ((float)(glob_reg_state.num_sets != 0 ? glob_reg_state.num_sets : 1)); dr_mutex_lock(stats_mutex); // Due to lack of memory, we only store the ILPs for the latest MY_MAX_BB // basic blocks. This enables us to run e.g. firefox. int my_cur_num = my_bbcount % MY_MAX_BB; my_bbcount++; if(my_cur_num == 0 && my_bbcount > 1) { dr_printf("Overflow at %d\n", my_bbcount); } my_bbexecs[my_cur_num] = 0; //initialize my_bbsizes[my_cur_num] = my_cur_size; bb_flop_count[my_cur_num] = cur_flop_count; for( i = 0; i < tracked_instrs_len; i++ ) { bb_instr_count[my_cur_num*tracked_instrs_len+i] = tracked_instr_count[i]; } my_bbilp[my_cur_num] = ilp; dr_mutex_unlock(stats_mutex); #ifdef USE_CLEAN_CALL dr_insert_clean_call(drcontext, bb, instrlist_first(bb), clean_call, false, 1, OPND_CREATE_INT32(my_cur_num)); #else #ifdef INSERT_AT_END instr = NULL; #else // Find place to insert inc instruction for (instr = first; instr != NULL; instr = instr_get_next(instr)) { flags = instr_get_arith_flags(instr); if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) break; } #endif if (instr == NULL) { // no suitable place found, save regs dr_save_reg(drcontext, bb, first, DR_REG_XAX, SPILL_SLOT_1); dr_save_arith_flags_to_xax(drcontext, bb, first); } // Increment my_bbexecs[my_current_bb] using the lock prefix instrlist_meta_preinsert (bb, (instr == NULL) ? first : instr, LOCK(INSTR_CREATE_inc(drcontext, OPND_CREATE_ABSMEM ((byte *)&(my_bbexecs[my_cur_num]), OPSZ_4)))); if (instr == NULL) { // no suitable place found earlier, restore regs dr_restore_arith_flags_from_xax(drcontext, bb, first); dr_restore_reg(drcontext, bb, first, DR_REG_XAX, SPILL_SLOT_1); } #endif proc_restore_fpstate(fp_align); #if defined(VERBOSE) && defined(VERBOSE_VERBOSE) dr_printf("Finished instrumenting dynamorio_basic_block(tag="PFX")\n", tag); instrlist_disassemble(drcontext, tag, bb, STDOUT); #endif return DR_EMIT_DEFAULT; }
static dr_emit_flags_t bb_event(void* drcontext, void *tag, instrlist_t* bb, bool for_trace, bool translating) { instr_t* instr = instrlist_first(bb); instr_t *ins1, *ins2; global_var = (ptr_uint_t)INT_MAX + 1; dr_prepare_for_call(drcontext, bb, instr); /* test push_imm */ instrlist_insert_push_immed_ptrsz(drcontext, (ptr_int_t)1, bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on push 1\n"); #ifdef X64 MINSERT(bb, instr, INSTR_CREATE_mov_ld (drcontext, opnd_create_reg(IF_LINUX_ELSE(DR_REG_RDX, DR_REG_R8)), OPND_CREATE_MEMPTR(DR_REG_RSP, 0))); #endif instrlist_insert_push_immed_ptrsz(drcontext, (ptr_int_t)-1, bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on push -1\n"); #ifdef X64 MINSERT(bb, instr, INSTR_CREATE_mov_ld (drcontext, opnd_create_reg(IF_LINUX_ELSE(DR_REG_RSI, DR_REG_RDX)), OPND_CREATE_MEMPTR(DR_REG_RSP, 0))); #endif instrlist_insert_push_immed_ptrsz(drcontext, global_var, bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); #ifdef X64 if (ins2 == NULL) /* ins2 should not be NULL */ dr_fprintf(STDERR, "Error on push tag\n"); else instr_set_ok_to_mangle(ins2, false); #endif #ifdef X64 MINSERT(bb, instr, INSTR_CREATE_mov_ld (drcontext, opnd_create_reg(IF_LINUX_ELSE(DR_REG_RDI, DR_REG_RCX)), OPND_CREATE_MEMPTR(DR_REG_RSP, 0))); #endif /* test mov_imm */ instrlist_insert_mov_immed_ptrsz(drcontext, global_var, OPND_CREATE_ABSMEM(&var0, OPSZ_PTR), bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); #ifdef X64 if (ins2 == NULL) /* ins2 should not be NULL */ dr_fprintf(STDERR, "Error on mov %p\n", global_var); else instr_set_ok_to_mangle(ins2, false); #endif instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)-1, OPND_CREATE_ABSMEM(&var1, OPSZ_PTR), bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on mov -1\n"); instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)1, OPND_CREATE_ABSMEM(&var2, OPSZ_PTR), bb, instr, &ins1, &ins2); instr_set_ok_to_mangle(ins1, false); if (ins2 != NULL) /* ins2 should be NULL */ dr_fprintf(STDERR, "Error on mov 1\n"); /* call */ MINSERT(bb, instr, INSTR_CREATE_call (drcontext, opnd_create_pc((void*)my_abort))); dr_cleanup_after_call(drcontext, bb, instr, 0); return DR_EMIT_DEFAULT; }