/* If has_instr_jmp_targets is true, this routine trashes the note field * of each instr_t to store the offset in order to properly encode * the relative pc for an instr_t jump target */ byte * instrlist_encode_to_copy(dcontext_t *dcontext, instrlist_t *ilist, byte *copy_pc, byte *final_pc, byte *max_pc, bool has_instr_jmp_targets) { instr_t *inst; int len = 0; if (has_instr_jmp_targets || max_pc != NULL) { /* must set note fields first with offset, or compute length */ for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) { if (has_instr_jmp_targets) instr_set_note(inst, (void *)(ptr_int_t)len); len += instr_length(dcontext, inst); } } if (max_pc != NULL && (copy_pc + len > max_pc || POINTER_OVERFLOW_ON_ADD(copy_pc, len))) return NULL; for (inst = instrlist_first(ilist); inst != NULL; inst = instr_get_next(inst)) { byte *pc = instr_encode_to_copy(dcontext, inst, copy_pc, final_pc); if (pc == NULL) return NULL; final_pc += pc - copy_pc; copy_pc = pc; } return copy_pc; }
/* insert inline code to add an instruction entry into the buffer */ static void instrument_instr(void *drcontext, instrlist_t *ilist, instr_t *where) { /* We need two scratch registers */ reg_id_t reg_ptr, reg_tmp; /* we don't want to predicate this, because an instruction fetch always occurs */ instrlist_set_auto_predicate(ilist, DR_PRED_NONE); if (drreg_reserve_register(drcontext, ilist, where, NULL, ®_ptr) != DRREG_SUCCESS || drreg_reserve_register(drcontext, ilist, where, NULL, ®_tmp) != DRREG_SUCCESS) { DR_ASSERT(false); /* cannot recover */ return; } insert_load_buf_ptr(drcontext, ilist, where, reg_ptr); insert_save_type(drcontext, ilist, where, reg_ptr, reg_tmp, (ushort)instr_get_opcode(where)); insert_save_size(drcontext, ilist, where, reg_ptr, reg_tmp, (ushort)instr_length(drcontext, where)); insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, instr_get_app_pc(where)); insert_update_buf_ptr(drcontext, ilist, where, reg_ptr, sizeof(mem_ref_t)); /* Restore scratch registers */ if (drreg_unreserve_register(drcontext, ilist, where, reg_ptr) != DRREG_SUCCESS || drreg_unreserve_register(drcontext, ilist, where, reg_tmp) != DRREG_SUCCESS) DR_ASSERT(false); instrlist_set_auto_predicate(ilist, instr_get_predicate(where)); }
/** Returns offset, in bytes, between starts of the first and the second * instruction. Returns -1 if the second instruction does not follow the first * instruction. */ int get_offset(void* ctx, instr_t* first, instr_t* second) { int offset = 0; while(first != second) { if(first == NULL) { return -1; } offset += instr_length(ctx, first); first = instr_get_next(first); } return offset; }
/* We collect the basic block information including offset from module base, * size, and num of instructions, and add it into a basic block table without * instrumentation. */ static dr_emit_flags_t event_basic_block_analysis(void *drcontext, void *tag, instrlist_t *bb, bool for_trace, bool translating, OUT void **user_data) { per_thread_t *data; instr_t *instr; app_pc tag_pc, start_pc, end_pc; /* do nothing for translation */ if (translating) return DR_EMIT_DEFAULT; data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx); /* Collect the number of instructions and the basic block size, * assuming the basic block does not have any elision on control * transfer instructions, which is true for default options passed * to DR but not for -opt_speed. */ /* We separate the tag from the instr pc ranges to handle displaced code * such as for the vsyscall hook. */ tag_pc = dr_fragment_app_pc(tag); start_pc = instr_get_app_pc(instrlist_first_app(bb)); end_pc = start_pc; /* for finding the size */ for (instr = instrlist_first_app(bb); instr != NULL; instr = instr_get_next_app(instr)) { app_pc pc = instr_get_app_pc(instr); int len = instr_length(drcontext, instr); /* -opt_speed (elision) is not supported */ /* For rep str expansion pc may be one back from start pc but equal to the tag. */ ASSERT(pc != NULL && (pc >= start_pc || pc == tag_pc), "-opt_speed is not supported"); if (pc + len > end_pc) end_pc = pc + len; } /* We allow duplicated basic blocks for the following reasons: * 1. Avoids handling issues like code cache consistency, e.g., * module load/unload, self-modifying code, etc. * 2. Avoids the overhead on duplication check. * 3. Stores more information on code cache events, e.g., trace building, * repeated bb building, etc. * 4. The duplication can be easily handled in a post-processing step, * which is required anyway. */ bb_table_entry_add(drcontext, data, tag_pc, (uint)(end_pc - start_pc)); if (go_native) return DR_EMIT_GO_NATIVE; else return DR_EMIT_DEFAULT; }
/** Returns code chunk of length up to _max_, corresponding to longest prefix of * instruction sequence. Changes _instr_ to instruction immediately following * prefix. */ struct chunk_info_t get_chunk_info(void* ctx, instr_t** instr, size_t max) { struct chunk_info_t chunk_info = { 0, 0 }; for(; *instr; *instr = instr_get_next(*instr)) { app_pc pc; size_t size; pc = dr_app_pc_for_decoding(instr_get_app_pc(*instr)); if(pc != chunk_info.pc + chunk_info.size) { if(chunk_info.pc == 0 && chunk_info.size == 0) { chunk_info.pc = pc; } else { break; } } size = chunk_info.size + instr_length(ctx, *instr); if(size > max) { break; } chunk_info.size = size; } return chunk_info; }
void Shade::detour(void *address, void *target, void *&trampoline) { const size_t instr_max = 17; auto list = instrlist_create(dr); byte instr_data[instr_max]; byte *current = (byte *)address; byte *min_pos = (byte *)address + 5; size_t size = 0; while(current < min_pos) { read(current, instr_data, instr_max); auto instr = instr_create(dr); byte *decoded = decode_from_copy(dr, instr_data, current, instr); if(!decoded) error("Unknown instruction"); instrlist_append(list, instr); instr_make_persistent(dr, instr); current += (size_t)(decoded - instr_data); size += instr_length(dr, instr); } auto instr = INSTR_CREATE_jmp(dr, opnd_create_pc(current)); size += instr_length(dr, instr); instrlist_append(list, instr); auto local_trampoline = alloca(size); if(!local_trampoline) error("Out of memory"); void *remote = code_section.allocate(size, 4); if(!instrlist_encode_to_copy(dr, list, (byte *)local_trampoline, (byte *)remote, 0, true)) error("Unable to encode instructions"); instrlist_clear_and_destroy(dr, list); write(remote, local_trampoline, size); trampoline = remote; char code[5]; DWORD offset = (size_t)target - (size_t)address - 5; code[0] = 0xE9; *(DWORD *)(code + 1) = offset; access(address, 5, [&] { write(address, code, 5); }); }
// We want it to look like the original rep string instead of the // drutil-expanded loop. if (!prev_instr_was_rep_string) prev_instr_was_rep_string = true; else skip_instr = true; } else prev_instr_was_rep_string = false; // FIXME i#1729: make bundles via lazy accum until hit memref/end. if (!skip_instr) { DO_VERBOSE(3, { instr_set_translation(&instr, orig_pc); dr_print_instr(dcontext, STDOUT, &instr, ""); }); buf->type = instru_t::instr_to_instr_type(&instr); buf->size = (ushort) (skip_icache ? 0 : instr_length(dcontext, &instr)); buf->addr = (addr_t) orig_pc; ++buf; } else VPRINT(3, "Skipping instr fetch for " PFX "\n", (ptr_uint_t)decode_pc); decode_pc = pc; // We need to interleave instrs with memrefs. // There is no following memref for (instrs_are_separate && !skip_icache). if ((!instrs_are_separate || skip_icache) && // Rule out OP_lea. (instr_reads_memory(&instr) || instr_writes_memory(&instr))) { for (int i = 0; i < instr_num_srcs(&instr); i++) { if (opnd_is_memory_reference(instr_get_src(&instr, i))) { std::string error = append_memref(&buf, tidx, &instr, instr_get_src(&instr, i), false); if (!error.empty())