static void replay_init_scratch_memory(struct context* ctx, struct mmapped_file *file) { /* initialize the scratchpad as the recorder did, but * make it PROT_NONE. The idea is just to reserve the * address space so the replayed process address map * looks like the recorded process, if it were to be * probed by madvise or some other means. But we make * it PROT_NONE so that rogue reads/writes to the * scratch memory are caught. */ /* set up the mmap system call */ struct user_regs_struct orig_regs; read_child_registers(ctx->child_tid, &orig_regs); struct user_regs_struct mmap_call = orig_regs; mmap_call.eax = SYS_mmap2; mmap_call.ebx = (uintptr_t)file->start; mmap_call.ecx = file->end - file->start; mmap_call.edx = PROT_NONE; mmap_call.esi = MAP_PRIVATE | MAP_ANONYMOUS; mmap_call.edi = -1; mmap_call.ebp = 0; inject_and_execute_syscall(ctx,&mmap_call); write_child_registers(ctx->child_tid,&orig_regs); }
void write_child_eip(int tid, long int val) { struct user_regs_struct regs; read_child_registers(tid, ®s); regs.eip = val; write_child_registers(tid, ®s); }
static void continue_or_step(struct context* ctx, int stepi) { pid_t tid = ctx->child_tid; if (stepi) { sys_ptrace_singlestep(tid); } else { /* We continue with PTRACE_SYSCALL for error checking: * since the next event is supposed to be a signal, * entering a syscall here means divergence. There * shouldn't be any straight-line execution overhead * for SYSCALL vs. CONT, so the difference in cost * should be neglible. */ sys_ptrace_syscall(tid); } sys_waitpid(tid, &ctx->status); ctx->child_sig = signal_pending(ctx->status); if (0 == ctx->child_sig) { struct user_regs_struct regs; read_child_registers(ctx->child_tid, ®s); log_err("Replaying `%s' (line %d): expecting tracee signal or trap, but instead at `%s' (rcb: %llu)", strevent(ctx->trace.stop_reason), get_trace_file_lines_counter(), strevent(regs.orig_eax), read_rbc(ctx->hpc)); emergency_debug(ctx); } }
/** * Step over the system call instruction to "exit" the emulated * syscall. */ static void step_exit_syscall_emu(struct context *ctx) { pid_t tid = ctx->child_tid; struct user_regs_struct regs; read_child_registers(tid, ®s); sys_ptrace_sysemu_singlestep(tid); sys_waitpid(tid, &ctx->status); write_child_registers(tid, ®s); ctx->status = 0; }
static void guard_unexpected_signal(struct context* ctx) { int event; /* "0" normally means "syscall", but continue_or_step() guards * against unexpected syscalls. So the caller must have set * "0" intentionally. */ if (0 == ctx->child_sig || SIGTRAP == ctx->child_sig) { return; } if (ctx->child_sig) { event = -ctx->child_sig; } else { struct user_regs_struct regs; read_child_registers(ctx->child_tid, ®s); event = MAX(0, regs.orig_eax); } log_err("Replay got unrecorded event %s while awaiting signal\n" " replaying trace line %d", strevent(event), get_trace_file_lines_counter()); emergency_debug(ctx); /* not reached */ }
static void replay_one_trace_frame(struct dbg_context* dbg, struct context* ctx) { struct dbg_request req; struct rep_trace_step step; int event = ctx->trace.stop_reason; int stop_sig = 0; debug("%d: replaying event %s, state %s", ctx->rec_tid, strevent(event), statename(ctx->trace.state)); if (ctx->syscallbuf_hdr) { debug(" (syscllbufsz:%u, abrtcmt:%u)", ctx->syscallbuf_hdr->num_rec_bytes, ctx->syscallbuf_hdr->abort_commit); } /* Advance the trace until we've exec()'d the tracee before * processing debugger requests. Otherwise the debugger host * will be confused about the initial executable image, * rr's. */ if (validate) { req = process_debugger_requests(dbg, ctx); assert(dbg_is_resume_request(&req)); } /* print some kind of progress */ if (ctx->trace.global_time % 10000 == 0) { fprintf(stderr, "time: %u\n",ctx->trace.global_time); } if (ctx->child_sig != 0) { assert(event == -ctx->child_sig || event == -(ctx->child_sig | DET_SIGNAL_BIT)); ctx->child_sig = 0; } /* Ask the trace-interpretation code what to do next in order * to retire the current frame. */ memset(&step, 0, sizeof(step)); switch (event) { case USR_INIT_SCRATCH_MEM: { /* for checksumming: make a note that this area is * scratch and need not be validated. */ struct mmapped_file file; read_next_mmapped_file_stats(&file); replay_init_scratch_memory(ctx, &file); add_scratch((void*)ctx->trace.recorded_regs.eax, file.end - file.start); step.action = TSTEP_RETIRE; break; } case USR_EXIT: rep_sched_deregister_thread(&ctx); /* Early-return because |ctx| is gone now. */ return; case USR_ARM_DESCHED: case USR_DISARM_DESCHED: rep_skip_desched_ioctl(ctx); /* TODO */ step.action = TSTEP_RETIRE; break; case USR_SYSCALLBUF_ABORT_COMMIT: ctx->syscallbuf_hdr->abort_commit = 1; step.action = TSTEP_RETIRE; break; case USR_SYSCALLBUF_FLUSH: rep_process_flush(ctx, rr_flags->redirect); /* TODO */ step.action = TSTEP_RETIRE; break; case USR_SYSCALLBUF_RESET: ctx->syscallbuf_hdr->num_rec_bytes = 0; step.action = TSTEP_RETIRE; break; case USR_SCHED: step.action = TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT; step.target.rcb = ctx->trace.rbc; step.target.regs = &ctx->trace.recorded_regs; step.target.signo = 0; break; case SIG_SEGV_RDTSC: step.action = TSTEP_DETERMINISTIC_SIGNAL; step.signo = SIGSEGV; break; default: /* Pseudosignals are handled above. */ assert(event > LAST_RR_PSEUDOSIGNAL); if (FIRST_DET_SIGNAL <= event && event <= LAST_DET_SIGNAL) { step.action = TSTEP_DETERMINISTIC_SIGNAL; step.signo = (-event & ~DET_SIGNAL_BIT); stop_sig = step.signo; } else if (event < 0) { assert(FIRST_ASYNC_SIGNAL <= event && event <= LAST_ASYNC_SIGNAL); step.action = TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT; step.target.rcb = ctx->trace.rbc; step.target.regs = &ctx->trace.recorded_regs; step.target.signo = -event; stop_sig = step.target.signo; } else { assert(event > 0); /* XXX not so pretty ... */ validate |= (ctx->trace.state == STATE_SYSCALL_EXIT && event == SYS_execve); rep_process_syscall(ctx, rr_flags->redirect, &step); } } /* See the comment below about *not* resetting the hpc for * buffer flushes. Here, we're processing the *other* event, * just after the buffer flush, where the rcb matters. To * simplify the advance-to-target code that follows (namely, * making debugger interrupts simpler), pretend like the * execution in the BUFFER_FLUSH didn't happen by resetting * the rbc and compensating down the target rcb. */ if (TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT == step.action) { uint64_t rcb_now = read_rbc(ctx->hpc); assert(step.target.rcb >= rcb_now); step.target.rcb -= rcb_now; reset_hpc(ctx, 0); } /* Advance until |step| has been fulfilled. */ while (try_one_trace_step(ctx, &step, &req)) { struct user_regs_struct regs; /* Currently we only understand software breakpoints * and successful stepi's. */ assert(SIGTRAP == ctx->child_sig && "Unknown trap"); read_child_registers(ctx->child_tid, ®s); if (ip_is_breakpoint((void*)regs.eip)) { /* SW breakpoint: $ip is just past the * breakpoint instruction. Move $ip back * right before it. */ regs.eip -= sizeof(int_3_insn); write_child_registers(ctx->child_tid, ®s); } else { /* Successful stepi. Nothing else to do. */ assert(DREQ_STEP == req.type && req.target == get_threadid(ctx)); } /* Don't restart with SIGTRAP anywhere. */ ctx->child_sig = 0; /* Notify the debugger and process any new requests * that might have triggered before resuming. */ dbg_notify_stop(dbg, get_threadid(ctx), 0x05/*gdb mandate*/); req = process_debugger_requests(dbg, ctx); assert(dbg_is_resume_request(&req)); } if (dbg && stop_sig) { dbg_notify_stop(dbg, get_threadid(ctx), stop_sig); } /* We flush the syscallbuf in response to detecting *other* * events, like signal delivery. Flushing the syscallbuf is a * sort of side-effect of reaching the other event. But once * we've flushed the syscallbuf during replay, we still must * reach the execution point of the *other* event. For async * signals, that requires us to have an "intact" rbc, with the * same value as it was when the last buffered syscall was * retired during replay. We'll be continuing from that rcb * to reach the rcb we recorded at signal delivery. So don't * reset the counter for buffer flushes. (It doesn't matter * for non-async-signal types, which are deterministic.) */ switch (ctx->trace.stop_reason) { case USR_SYSCALLBUF_ABORT_COMMIT: case USR_SYSCALLBUF_FLUSH: case USR_SYSCALLBUF_RESET: break; default: reset_hpc(ctx, 0); } debug_memory(ctx); }
/** * Run execution forwards for |ctx| until |ctx->trace.rbc| is reached, * and the $ip reaches the recorded $ip. Return 0 if successful or 1 * if an unhandled interrupt occurred. |sig| is the pending signal to * be delivered; it's only used to distinguish debugger-related traps * from traps related to replaying execution. */ static int advance_to(struct context* ctx, uint64_t rcb, const struct user_regs_struct* regs, int sig, int stepi) { pid_t tid = ctx->child_tid; uint64_t rcb_now; assert(ctx->hpc->rbc.fd > 0); assert(ctx->child_sig == 0); /* Step 1: advance to the target rcb (minus a slack region) as * quickly as possible by programming the hpc. */ rcb_now = read_rbc(ctx->hpc); debug("Advancing to rcb:%llu/eip:%p from rcb:%llu", rcb, (void*)regs->eip, rcb_now); /* XXX should we only do this if (rcb > 10000)? */ while (rcb > SKID_SIZE && rcb_now < rcb - SKID_SIZE) { if (SIGTRAP == ctx->child_sig) { /* We proved we're not at the execution target * and we're not single-stepping execution, so * this must have been meant for the debugger. * (The debugging code will verify that.) */ return 1; } ctx->child_sig = 0; reset_hpc(ctx, rcb - rcb_now - SKID_SIZE); continue_or_step(ctx, stepi); if (SIGIO == ctx->child_sig || SIGCHLD == ctx->child_sig) { /* Tracees can receive SIGCHLD at pretty much * any time during replay. If we recorded * delivery, we'll manually replay it * eventually (or already have). Just ignore * here. */ ctx->child_sig = 0; } guard_unexpected_signal(ctx); if (fcntl(ctx->hpc->rbc.fd, F_GETOWN) != tid) { fatal("Scheduled task %d doesn't own hpc; replay divergence", tid); } rcb_now = read_rbc(ctx->hpc); } guard_overshoot(ctx, rcb, rcb_now); /* Step 2: Slowly single-step our way to the target rcb. * * This is apparently needed because hpc interrupts can * overshoot. */ while (rcb > 0 && rcb_now < rcb) { if (SIGTRAP == ctx->child_sig && is_debugger_trap(ctx, sig, ASYNC, NOT_AT_TARGET, stepi)) { /* We proved that we're not at the execution * target, but we're single-stepping now so * have to check whether this was a debugger * trap. */ return 1; } continue_or_step(ctx, STEPI); if (SIGCHLD == ctx->child_sig) { /* See above. */ ctx->child_sig = 0; } guard_unexpected_signal(ctx); rcb_now = read_rbc(ctx->hpc); } guard_overshoot(ctx, rcb, rcb_now); /* Step 3: Slowly single-step our way to the target $ip. * * What we really want to do is set a retired-instruction * interrupt and do away with all this cruft. */ while (rcb == rcb_now) { struct user_regs_struct cur_regs; read_child_registers(ctx->child_tid, &cur_regs); if (0 == compare_register_files("rep interrupt", &cur_regs, "rec", regs, 0, 0)) { if (SIGTRAP == ctx->child_sig && is_debugger_trap(ctx, sig, ASYNC, AT_TARGET, stepi)) { return 1; } ctx->child_sig = 0; break; } debug("Stepping from ip %p to %p", (void*)cur_regs.eip, (void*)regs->eip); if (SIGTRAP == ctx->child_sig && is_debugger_trap(ctx, ASYNC, sig, NOT_AT_TARGET, stepi)) { /* See above. */ return 1; } continue_or_step(ctx, STEPI); if (SIGCHLD == ctx->child_sig) { /* See above. */ ctx->child_sig = 0; } guard_unexpected_signal(ctx); rcb_now = read_rbc(ctx->hpc); } guard_overshoot(ctx, rcb, rcb_now); return 0; }
static int is_debugger_trap(struct context* ctx, int target_sig, sigdelivery_t delivery, execstate_t exec_state, int stepi) { struct user_regs_struct regs; void* ip; assert(SIGTRAP == ctx->child_sig); /* We're not replaying a trap, and it was clearly raised on * behalf of the debugger. (The debugger will verify * that.) */ if (SIGTRAP != target_sig && (DETERMINISTIC == delivery /* We single-step for async delivery, so the trap was * only clearly for the debugger if the debugger was * requesting single-stepping. */ || (stepi && NOT_AT_TARGET == exec_state))) { return 1; } /* We're trying to replay a deterministic SIGTRAP, or we're * replaying an async signal. */ read_child_registers(ctx->child_tid, ®s); ip = (void*)regs.eip; if (ip_is_breakpoint(ip)) { /* No ambiguity, definitely meant for the debugger. */ assert(is_breakpoint_trap(ctx)); return 1; } if (is_breakpoint_trap(ctx)) { /* We should only ever see a breakpoint trap for int3 * instructions (that aren't debugger-set breakpoints, * which we already checked). These traps must be * determistic. These aren't meant for the debugger, * but we'll notify the debugger anyway. */ assert(DETERMINISTIC == delivery); return 0; } if (DETERMINISTIC == delivery) { /* If the delivery of SIGTRAP is supposed to be * deterministic and we didn't just retire an |int 3| * and this wasn't a breakpoint, we must have been * single stepping. So definitely for the * debugger. */ assert(stepi); return 1; } /* We're replaying an async signal. */ if (AT_TARGET == exec_state) { /* If we're at the target of the async signal * delivery, prefer delivering the signal to retiring * a possible debugger single-step; we'll notify the * debugger anyway. */ return 0; } /* Otherwise, we're not at the target and this wasn't a * breakpoint, so it's for the debugger if the debugger wants * to single-step. */ return stepi; }
/** * Reply to debugger requests until the debugger asks us to resume * execution. */ static struct dbg_request process_debugger_requests(struct dbg_context* dbg, struct context* ctx) { if (!dbg) { return continue_all_tasks; } while (1) { struct dbg_request req = dbg_get_request(dbg); struct context* target = NULL; if (dbg_is_resume_request(&req)) { return req; } target = (req.target > 0) ? rep_sched_lookup_thread(req.target) : ctx; switch (req.type) { case DREQ_GET_CURRENT_THREAD: { dbg_reply_get_current_thread(dbg, get_threadid(ctx)); continue; } case DREQ_GET_IS_THREAD_ALIVE: dbg_reply_get_is_thread_alive(dbg, !!target); continue; case DREQ_GET_MEM: { size_t len; byte* mem = read_mem(target, req.mem.addr, req.mem.len, &len); dbg_reply_get_mem(dbg, mem, len); sys_free((void**)&mem); continue; } case DREQ_GET_OFFSETS: /* TODO */ dbg_reply_get_offsets(dbg); continue; case DREQ_GET_REG: { struct user_regs_struct regs; dbg_regvalue_t val; read_child_registers(target->child_tid, ®s); val.value = get_reg(®s, req.reg, &val.defined); dbg_reply_get_reg(dbg, val); continue; } case DREQ_GET_REGS: { struct user_regs_struct regs; struct dbg_regfile file; int i; dbg_regvalue_t* val; read_child_registers(target->child_tid, ®s); memset(&file, 0, sizeof(file)); for (i = DREG_EAX; i < DREG_NUM_USER_REGS; ++i) { val = &file.regs[i]; val->value = get_reg(®s, i, &val->defined); } val = &file.regs[DREG_ORIG_EAX]; val->value = get_reg(®s, DREG_ORIG_EAX, &val->defined); dbg_reply_get_regs(dbg, &file); continue; } case DREQ_GET_STOP_REASON: { dbg_reply_get_stop_reason(dbg, target->rec_tid, target->child_sig); continue; } case DREQ_GET_THREAD_LIST: { pid_t* tids; size_t len; rep_sched_enumerate_tasks(&tids, &len); dbg_reply_get_thread_list(dbg, tids, len); sys_free((void**)&tids); continue; } case DREQ_INTERRUPT: /* Tell the debugger we stopped and await * further instructions. */ dbg_notify_stop(dbg, get_threadid(ctx), 0); continue; case DREQ_SET_SW_BREAK: set_sw_breakpoint(target, &req); dbg_reply_watchpoint_request(dbg, 0); continue; case DREQ_REMOVE_SW_BREAK: remove_sw_breakpoint(target, &req); dbg_reply_watchpoint_request(dbg, 0); break; case DREQ_REMOVE_HW_BREAK: case DREQ_REMOVE_RD_WATCH: case DREQ_REMOVE_WR_WATCH: case DREQ_REMOVE_RDWR_WATCH: case DREQ_SET_HW_BREAK: case DREQ_SET_RD_WATCH: case DREQ_SET_WR_WATCH: case DREQ_SET_RDWR_WATCH: dbg_reply_watchpoint_request(dbg, -1); continue; default: fatal("Unknown debugger request %d", req.type); } } }
/** * function goes to the n-th conditional branch */ static void compensate_branch_count(struct context *ctx, int sig) { uint64_t rbc_now, rbc_rec; rbc_rec = ctx->trace.rbc_up; rbc_now = read_rbc_up(ctx->hpc); /* if the skid size was too small, go back to the last checkpoint and * re-execute the program. */ if (rbc_now > rbc_rec) { /* checkpointing is not implemented yet - so we fail */ fprintf(stderr, "hpc overcounted in asynchronous event, recorded: %llu now: %llu\n", rbc_rec, rbc_now); fprintf(stderr,"event: %d, flobal_time %u\n",ctx->trace.stop_reason, ctx->trace.global_time); assert(0); } int found_spot = 0; rbc_now = read_rbc_up(ctx->hpc); while (rbc_now < rbc_rec) { singlestep(ctx, 0, 0x57f); rbc_now = read_rbc_up(ctx->hpc); } while (rbc_now == rbc_rec) { struct user_regs_struct regs; read_child_registers(ctx->child_tid, ®s); if (sig == SIGSEGV) { /* we should now stop at the instruction that caused the SIGSEGV */ sys_ptrace_syscall(ctx->child_tid); sys_waitpid(ctx->child_tid, &ctx->status); } /* the eflags register has two bits that are set when an interrupt is pending: * bit 8: TF (trap flag) * bit 17: VM (virtual 8086 mode) * * we enable these two bits in the eflags register to make sure that the register * files match * */ int check = compare_register_files("now", ®s, "rec", &ctx->trace.recorded_regs, 0, 0); if (check == 0 || check == 0x80) { found_spot++; /* A SIGSEGV can be triggered by a regular instruction; it is not necessarily sent by * another process. We check this condition here. */ if (sig == SIGSEGV) { //print_inst(ctx->child_tid); /* here we ensure that the we get a SIGSEGV at the right spot */ singlestep(ctx, 0, 0xb7f); /* deliver the signal */ break; } else { break; } /* set the signal such that it is delivered when the process continues */ } /* check that we do not get unexpected signal in the single-stepping process */ singlestep(ctx, 0, 0x57f); rbc_now = read_rbc_up(ctx->hpc); } if (found_spot != 1) { printf("cannot find signal %d time: %u\n",sig,ctx->trace.global_time); assert(found_spot == 1); } }
void rep_process_signal(struct context *ctx) { struct trace* trace = &(ctx->trace); int tid = ctx->child_tid; int sig = -trace->stop_reason; /* if the there is still a signal pending here, two signals in a row must be delivered?\n */ assert(ctx->child_sig == 0); switch (sig) { /* set the eax and edx register to the recorded values */ case -SIG_SEGV_RDTSC: { struct user_regs_struct regs; int size; /* goto the event */ goto_next_event(ctx); /* make sure we are there */ assert(WSTOPSIG(ctx->status) == SIGSEGV); char* inst = get_inst(tid, 0, &size); assert(strncmp(inst,"rdtsc",5) == 0); read_child_registers(tid, ®s); regs.eax = trace->recorded_regs.eax; regs.edx = trace->recorded_regs.edx; regs.eip += size; write_child_registers(tid, ®s); sys_free((void**) &inst); compare_register_files("rdtsv_now", ®s, "rdsc_rec", &ctx->trace.recorded_regs, 1, 1); /* this signal should not be recognized by the application */ ctx->child_sig = 0; break; } case -USR_SCHED: { assert(trace->rbc_up > 0); /* if the current architecture over-counts the event in question, * substract the overcount here */ reset_hpc(ctx, trace->rbc_up - SKID_SIZE); goto_next_event(ctx); /* make sure that the signal came from hpc */ if (fcntl(ctx->hpc->rbc_down.fd, F_GETOWN) == ctx->child_tid) { /* this signal should not be recognized by the application */ ctx->child_sig = 0; stop_hpc_down(ctx); compensate_branch_count(ctx, sig); stop_hpc(ctx); } else { fprintf(stderr, "internal error: next event should be: %d but it is: %d -- bailing out\n", -USR_SCHED, ctx->event); sys_exit(); } break; } case SIGIO: case SIGCHLD: { /* synchronous signal (signal received in a system call) */ if (trace->rbc_up == 0) { ctx->replay_sig = sig; return; } // setup and start replay counters reset_hpc(ctx, trace->rbc_up - SKID_SIZE); /* single-step if the number of instructions to the next event is "small" */ if (trace->rbc_up <= 10000) { stop_hpc_down(ctx); compensate_branch_count(ctx, sig); stop_hpc(ctx); } else { printf("large count\n"); sys_ptrace_syscall(tid); sys_waitpid(tid, &ctx->status); // make sure we ere interrupted by ptrace assert(WSTOPSIG(ctx->status) == SIGIO); /* reset the penig sig, since it did not occur in the original execution */ ctx->child_sig = 0; ctx->status = 0; //DO NOT FORGET TO STOP HPC!!! compensate_branch_count(ctx, sig); stop_hpc(ctx); stop_hpc_down(ctx); } break; } case SIGSEGV: { /* synchronous signal (signal received in a system call) */ if (trace->rbc_up == 0 && trace->page_faults == 0) { ctx->replay_sig = sig; return; } sys_ptrace_syscall(ctx->child_tid); sys_waitpid(ctx->child_tid, &ctx->status); assert(WSTOPSIG(ctx->status) == SIGSEGV); struct user_regs_struct regs; read_child_registers(ctx->child_tid, ®s); assert(compare_register_files("now", ®s, "rec", &ctx->trace.recorded_regs, 1, 1) == 0); /* deliver the signal */ singlestep(ctx, SIGSEGV, 0x57f); break; } default: printf("unknown signal %d -- bailing out\n", sig); sys_exit(); break; } }