/* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ void linux_syscall(struct trapframe *frame) { register const struct sysent *callp; struct lwp *l; int error; register_t code, args[6], rval[2]; l = curlwp; LWP_CACHE_CREDS(l, l->l_proc); code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1); callp = linux_sysent; callp += code; /* * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in * increasing order. */ args[0] = frame->tf_ebx; args[1] = frame->tf_ecx; args[2] = frame->tf_edx; args[3] = frame->tf_esi; args[4] = frame->tf_edi; args[5] = frame->tf_ebp; rval[0] = 0; rval[1] = 0; if (__predict_false(l->l_proc->p_trace_enabled)) { error = trace_enter(code, args, callp->sy_narg); if (__predict_true(error == 0)) { error = sy_call(callp, l, args, rval); code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1); trace_exit(code, rval, error); } } else error = sy_call(callp, l, args, rval); if (__predict_true(error == 0)) { frame->tf_eax = rval[0]; /* * XXX The linux libc code I (dsl) looked at doesn't use the * carry bit. * Values above 0xfffff000 are assumed to be errno values and * not result codes! */ frame->tf_eflags &= ~PSL_C; /* carry bit */ } else { switch (error) { case ERESTART: /* * The offset to adjust the PC by depends on whether * we entered the kernel through the trap or call gate. * We save the instruction size in tf_err on entry. */ frame->tf_eip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux_errno[error]; frame->tf_eax = error; frame->tf_eflags |= PSL_C; /* carry bit */ break; } } userret(l); }
/* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ static void linux_syscall(struct trapframe *frame) { const struct sysent *callp; struct proc *p; struct lwp *l; int error; register_t code, rval[2]; #define args (&frame->tf_rdi) l = curlwp; p = l->l_proc; code = frame->tf_rax; LWP_CACHE_CREDS(l, p); callp = p->p_emul->e_sysent; code &= (LINUX_SYS_NSYSENT - 1); callp += code; /* * Linux system calls have a maximum of 6 arguments, they are * already adjacent in the syscall trapframe. */ if (__predict_false(p->p_trace_enabled) && (error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: frame->tf_rax = rval[0]; break; case ERESTART: /* * The offset to adjust the PC by depends on whether we entered * the kernel through the trap or call gate. We pushed the * size of the instruction into tf_err on entry. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux_errno[error]; frame->tf_rax = error; break; } if (__predict_false(p->p_trace_enabled)) trace_exit(code, rval, error); userret(l); }
static void syscall_fancy(register_t code, struct lwp *l, struct frame *frame) { char *params; const struct sysent *callp; int error, nsys; size_t argsize; register_t args[16], rval[2]; struct proc *p = l->l_proc; nsys = p->p_emul->e_nsysent; callp = p->p_emul->e_sysent; params = (char *)frame->f_regs[SP] + sizeof(int); switch (code) { case SYS_syscall: /* * Code is first argument, followed by actual args. */ code = fuword(params); params += sizeof(int); #if defined(COMPAT_13) || defined(COMPAT_16) /* * XXX sigreturn requires special stack manipulation * that is only done if entered via the sigreturn * trap. Cannot allow it here so make sure we fail. */ switch (code) { #ifdef COMPAT_13 case SYS_compat_13_sigreturn13: #endif #ifdef COMPAT_16 case SYS_compat_16___sigreturn14: #endif code = nsys; break; } #endif break; case SYS___syscall: /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = fuword(params + _QUAD_LOWWORD * sizeof(int)); params += sizeof(quad_t); break; default: break; } if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; /* illegal */ else callp += code; argsize = callp->sy_argsize; if (argsize) { error = copyin(params, (void *)args, argsize); if (error) goto bad; } if ((error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = frame->f_regs[D1]; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: /* * Reinitialize lwp/proc pointers as they may be different * if this is a child returning from fork syscall. */ l = curlwp; p = l->l_proc; frame->f_regs[D0] = rval[0]; frame->f_regs[D1] = rval[1]; frame->f_sr &= ~PSL_C; /* carry bit */ #ifdef COMPAT_50 /* see syscall_plain for a comment explaining this */ /* * Some pre-m68k ELF libc assembler stubs assume * %a0 is preserved across system calls... */ if (p->p_emul == &emul_netbsd) frame->f_regs[A0] = rval[0]; #endif break; case ERESTART: /* * We always enter through a `trap' instruction, which is 2 * bytes, so adjust the pc by that amount. */ frame->f_pc = frame->f_pc - 2; break; case EJUSTRETURN: /* nothing to do */ break; default: bad: /* * XXX: SVR4 uses this code-path, so we may have * to translate errno. */ if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[D0] = error; frame->f_sr |= PSL_C; /* carry bit */ break; } trace_exit(code, rval, error); }
void syscall(struct trapframe *frame, lwp_t *l, uint32_t insn) { struct proc * const p = l->l_proc; const struct sysent *callp; int error; u_int nargs; register_t *args; register_t copyargs[2+SYS_MAXSYSARGS]; register_t rval[2]; ksiginfo_t ksi; const uint32_t os_mask = insn & SWI_OS_MASK; uint32_t code = insn & 0x000fffff; /* test new official and old unofficial NetBSD ranges */ if (__predict_false(os_mask != SWI_OS_NETBSD) && __predict_false(os_mask != 0)) { if (os_mask == SWI_OS_ARM && (code == SWI_IMB || code == SWI_IMBrange)) { userret(l); return; } /* Undefined so illegal instruction */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = 0; /* XXX get an ILL_ILLSYSCALL assigned */ #ifdef THUMB_CODE if (frame->tf_spsr & PSR_T_bit) ksi.ksi_addr = (void *)(frame->tf_pc - THUMB_INSN_SIZE); else #endif ksi.ksi_addr = (void *)(frame->tf_pc - INSN_SIZE); ksi.ksi_trap = insn; trapsignal(l, &ksi); userret(l); return; } code &= (SYS_NSYSENT - 1); callp = p->p_emul->e_sysent + code; nargs = callp->sy_narg; if (nargs > 4) { args = copyargs; memcpy(args, &frame->tf_r0, 4 * sizeof(register_t)); error = copyin((void *)frame->tf_usr_sp, args + 4, (nargs - 4) * sizeof(register_t)); if (error) goto bad; } else { args = &frame->tf_r0; } if (!__predict_false(p->p_trace_enabled) || __predict_false(callp->sy_flags & SYCALL_INDIRECT) || (error = trace_enter(code, args, nargs)) == 0) { rval[0] = 0; rval[1] = 0; KASSERT(l->l_holdcnt == 0); error = (*callp->sy_call)(l, args, rval); } if (__predict_false(p->p_trace_enabled) || !__predict_false(callp->sy_flags & SYCALL_INDIRECT)) trace_exit(code, rval, error); switch (error) { case 0: frame->tf_r0 = rval[0]; frame->tf_r1 = rval[1]; #ifdef __PROG32 frame->tf_spsr &= ~PSR_C_bit; /* carry bit */ #else frame->tf_r15 &= ~R15_FLAG_C; /* carry bit */ #endif break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ #ifdef THUMB_CODE if (frame->tf_spsr & PSR_T_bit) frame->tf_pc -= THUMB_INSN_SIZE; else #endif frame->tf_pc -= INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: bad: frame->tf_r0 = error; #ifdef __PROG32 frame->tf_spsr |= PSR_C_bit; /* carry bit */ #else frame->tf_r15 |= R15_FLAG_C; /* carry bit */ #endif break; } userret(l); }
/* I/O write */ static void trace_dev_write(void *opaque, target_phys_addr_t offset, uint32_t value) { trace_dev_state *s = (trace_dev_state *)opaque; switch (offset >> 2) { case TRACE_DEV_REG_SWITCH: // context switch, switch to pid if (trace_filename != NULL) { trace_switch(value); #ifdef DEBUG printf("QEMU.trace: kernel, context switch %u\n", value); #endif } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_switch(value); } #endif // CONFIG_MEMCHECK break; case TRACE_DEV_REG_TGID: // save the tgid for the following fork/clone tgid = value; #ifdef DEBUG if (trace_filename != NULL) { printf("QEMU.trace: kernel, tgid %u\n", value); } #endif break; case TRACE_DEV_REG_FORK: // fork, fork new pid if (trace_filename != NULL) { trace_fork(tgid, value); #ifdef DEBUG printf("QEMU.trace: kernel, fork %u\n", value); #endif } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_fork(tgid, value); } #endif // CONFIG_MEMCHECK break; case TRACE_DEV_REG_CLONE: // fork, clone new pid (i.e. thread) if (trace_filename != NULL) { trace_clone(tgid, value); #ifdef DEBUG printf("QEMU.trace: kernel, clone %u\n", value); #endif } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_clone(tgid, value); } #endif // CONFIG_MEMCHECK break; case TRACE_DEV_REG_EXECVE_VMSTART: // execve, vstart vstart = value; break; case TRACE_DEV_REG_EXECVE_VMEND: // execve, vend vend = value; break; case TRACE_DEV_REG_EXECVE_OFFSET: // execve, offset in EXE eoff = value; break; case TRACE_DEV_REG_EXECVE_EXEPATH: // init exec, path of EXE vstrcpy(value, path, CLIENT_PAGE_SIZE); if (trace_filename != NULL) { trace_init_exec(vstart, vend, eoff, path); #ifdef DEBUG printf("QEMU.trace: kernel, init exec [%lx,%lx]@%lx [%s]\n", vstart, vend, eoff, path); #endif } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { if (path[0] == '\0') { // vstrcpy may fail to copy path. In this case lets do it // differently. memcheck_get_guest_kernel_string(path, value, CLIENT_PAGE_SIZE); } memcheck_mmap_exepath(vstart, vend, eoff, path); } #endif // CONFIG_MEMCHECK path[0] = 0; break; case TRACE_DEV_REG_CMDLINE_LEN: // execve, process cmdline length cmdlen = value; break; case TRACE_DEV_REG_CMDLINE: // execve, process cmdline cpu_memory_rw_debug(cpu_single_env, value, arg, cmdlen, 0); if (trace_filename != NULL) { trace_execve(arg, cmdlen); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_set_cmd_line(arg, cmdlen); } #endif // CONFIG_MEMCHECK #ifdef DEBUG if (trace_filename != NULL) { int i; for (i = 0; i < cmdlen; i ++) if (i != cmdlen - 1 && arg[i] == 0) arg[i] = ' '; printf("QEMU.trace: kernel, execve %s[%d]\n", arg, cmdlen); arg[0] = 0; } #endif break; case TRACE_DEV_REG_EXIT: // exit, exit current process with exit code if (trace_filename != NULL) { trace_exit(value); #ifdef DEBUG printf("QEMU.trace: kernel, exit %x\n", value); #endif } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_exit(value); } #endif // CONFIG_MEMCHECK break; case TRACE_DEV_REG_NAME: // record thread name vstrcpy(value, path, CLIENT_PAGE_SIZE); // Remove the trailing newline if it exists int len = strlen(path); if (path[len - 1] == '\n') { path[len - 1] = 0; } if (trace_filename != NULL) { trace_name(path); #ifdef DEBUG printf("QEMU.trace: kernel, name %s\n", path); #endif } break; case TRACE_DEV_REG_MMAP_EXEPATH: // mmap, path of EXE, the others are same as execve vstrcpy(value, path, CLIENT_PAGE_SIZE); if (trace_filename != NULL) { trace_mmap(vstart, vend, eoff, path); #ifdef DEBUG printf("QEMU.trace: kernel, mmap [%lx,%lx]@%lx [%s]\n", vstart, vend, eoff, path); #endif } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { if (path[0] == '\0') { // vstrcpy may fail to copy path. In this case lets do it // differently. memcheck_get_guest_kernel_string(path, value, CLIENT_PAGE_SIZE); } memcheck_mmap_exepath(vstart, vend, eoff, path); } #endif // CONFIG_MEMCHECK path[0] = 0; break; case TRACE_DEV_REG_INIT_PID: // init, name the pid that starts before device registered pid = value; #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_init_pid(value); } #endif // CONFIG_MEMCHECK break; case TRACE_DEV_REG_INIT_NAME: // init, the comm of the init pid vstrcpy(value, path, CLIENT_PAGE_SIZE); if (trace_filename != NULL) { trace_init_name(tgid, pid, path); #ifdef DEBUG printf("QEMU.trace: kernel, init name %u [%s]\n", pid, path); #endif } path[0] = 0; break; case TRACE_DEV_REG_DYN_SYM_ADDR: // dynamic symbol address dsaddr = value; break; case TRACE_DEV_REG_DYN_SYM: // add dynamic symbol vstrcpy(value, arg, CLIENT_PAGE_SIZE); if (trace_filename != NULL) { trace_dynamic_symbol_add(dsaddr, arg); #ifdef DEBUG printf("QEMU.trace: dynamic symbol %lx:%s\n", dsaddr, arg); #endif } arg[0] = 0; break; case TRACE_DEV_REG_REMOVE_ADDR: // remove dynamic symbol addr if (trace_filename != NULL) { trace_dynamic_symbol_remove(value); #ifdef DEBUG printf("QEMU.trace: dynamic symbol remove %lx\n", dsaddr); #endif } break; case TRACE_DEV_REG_PRINT_STR: // print string vstrcpy(value, arg, CLIENT_PAGE_SIZE); printf("%s", arg); arg[0] = 0; break; case TRACE_DEV_REG_PRINT_NUM_DEC: // print number in decimal printf("%d", value); break; case TRACE_DEV_REG_PRINT_NUM_HEX: // print number in hexical printf("%x", value); break; case TRACE_DEV_REG_STOP_EMU: // stop the VM execution if (trace_filename != NULL) { // To ensure that the number of instructions executed in this // block is correct, we pretend that there was an exception. trace_exception(0); } cpu_single_env->exception_index = EXCP_HLT; cpu_single_env->halted = 1; qemu_system_shutdown_request(); cpu_loop_exit(); break; case TRACE_DEV_REG_ENABLE: // tracing enable: 0 = stop, 1 = start if (value == 1) { if (trace_filename != NULL) { start_tracing(); } } else if (value == 0) { if (trace_filename != NULL) { stop_tracing(); // To ensure that the number of instructions executed in this // block is correct, we pretend that there was an exception. trace_exception(0); } } break; case TRACE_DEV_REG_UNMAP_START: unmap_start = value; break; case TRACE_DEV_REG_UNMAP_END: if (trace_filename != NULL) { trace_munmap(unmap_start, value); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_unmap(unmap_start, value); } #endif // CONFIG_MEMCHECK break; case TRACE_DEV_REG_METHOD_ENTRY: case TRACE_DEV_REG_METHOD_EXIT: case TRACE_DEV_REG_METHOD_EXCEPTION: case TRACE_DEV_REG_NATIVE_ENTRY: case TRACE_DEV_REG_NATIVE_EXIT: case TRACE_DEV_REG_NATIVE_EXCEPTION: if (trace_filename != NULL) { if (tracing) { int call_type = (offset - 4096) >> 2; trace_interpreted_method(value, call_type); } } break; #ifdef CONFIG_MEMCHECK case TRACE_DEV_REG_MALLOC: if (memcheck_enabled) { memcheck_guest_alloc(value); } break; case TRACE_DEV_REG_FREE_PTR: if (memcheck_enabled) { memcheck_guest_free(value); } break; case TRACE_DEV_REG_QUERY_MALLOC: if (memcheck_enabled) { memcheck_guest_query_malloc(value); } break; case TRACE_DEV_REG_LIBC_INIT: if (memcheck_enabled) { memcheck_guest_libc_initialized(value); } break; case TRACE_DEV_REG_PRINT_USER_STR: if (memcheck_enabled) { memcheck_guest_print_str(value); } break; #endif // CONFIG_MEMCHECK default: if (offset < 4096) { cpu_abort(cpu_single_env, "trace_dev_write: Bad offset %x\n", offset); } break; } }
static void trace_dev_write(void *opaque, target_phys_addr_t offset, uint32_t value) { trace_dev_state *s = (trace_dev_state *)opaque; (void)s; switch (offset >> 2) { case TRACE_DEV_REG_SWITCH: DPID("QEMU.trace: context switch tid=%u\n", value); if (trace_filename != NULL) { trace_switch(value); D("QEMU.trace: kernel, context switch %u\n", value); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_switch(value); } #endif tid = (unsigned) value; break; case TRACE_DEV_REG_TGID: DPID("QEMU.trace: tgid=%u\n", value); tgid = value; if (trace_filename != NULL) { D("QEMU.trace: kernel, tgid %u\n", value); } break; case TRACE_DEV_REG_FORK: DPID("QEMU.trace: fork (pid=%d tgid=%d value=%d)\n", pid, tgid, value); if (trace_filename != NULL) { trace_fork(tgid, value); D("QEMU.trace: kernel, fork %u\n", value); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_fork(tgid, value); } #endif break; case TRACE_DEV_REG_CLONE: DPID("QEMU.trace: clone (pid=%d tgid=%d value=%d)\n", pid, tgid, value); if (trace_filename != NULL) { trace_clone(tgid, value); D("QEMU.trace: kernel, clone %u\n", value); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_clone(tgid, value); } #endif break; case TRACE_DEV_REG_EXECVE_VMSTART: vstart = value; break; case TRACE_DEV_REG_EXECVE_VMEND: vend = value; break; case TRACE_DEV_REG_EXECVE_OFFSET: eoff = value; break; case TRACE_DEV_REG_EXECVE_EXEPATH: vstrcpy(value, exec_path, CLIENT_PAGE_SIZE); if (trace_filename != NULL) { trace_init_exec(vstart, vend, eoff, exec_path); D("QEMU.trace: kernel, init exec [%lx,%lx]@%lx [%s]\n", vstart, vend, eoff, exec_path); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { if (exec_path[0] == '\0') { memcheck_get_guest_kernel_string(exec_path, value, CLIENT_PAGE_SIZE); } memcheck_mmap_exepath(vstart, vend, eoff, exec_path); } #endif exec_path[0] = 0; break; case TRACE_DEV_REG_CMDLINE_LEN: cmdlen = value; break; case TRACE_DEV_REG_CMDLINE: safe_memory_rw_debug(cpu_single_env, value, (uint8_t*)exec_arg, cmdlen, 0); if (trace_filename != NULL) { trace_execve(exec_arg, cmdlen); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_set_cmd_line(exec_arg, cmdlen); } #endif #if DEBUG || DEBUG_PID if (trace_filename != NULL) { int i; for (i = 0; i < cmdlen; i ++) if (i != cmdlen - 1 && exec_arg[i] == 0) exec_arg[i] = ' '; printf("QEMU.trace: kernel, execve %s[%d]\n", exec_arg, cmdlen); exec_arg[0] = 0; } #endif break; case TRACE_DEV_REG_EXIT: DPID("QEMU.trace: exit tid=%u\n", value); if (trace_filename != NULL) { trace_exit(value); D("QEMU.trace: kernel, exit %x\n", value); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_exit(value); } #endif break; case TRACE_DEV_REG_NAME: vstrcpy(value, exec_path, CLIENT_PAGE_SIZE); DPID("QEMU.trace: thread name=%s\n", exec_path); int len = strlen(exec_path); if (exec_path[len - 1] == '\n') { exec_path[len - 1] = 0; } if (trace_filename != NULL) { trace_name(exec_path); D("QEMU.trace: kernel, name %s\n", exec_path); } break; case TRACE_DEV_REG_MMAP_EXEPATH: vstrcpy(value, exec_path, CLIENT_PAGE_SIZE); DPID("QEMU.trace: mmap exe=%s\n", exec_path); if (trace_filename != NULL) { trace_mmap(vstart, vend, eoff, exec_path); D("QEMU.trace: kernel, mmap [%lx,%lx]@%lx [%s]\n", vstart, vend, eoff, exec_path); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { if (exec_path[0] == '\0') { memcheck_get_guest_kernel_string(exec_path, value, CLIENT_PAGE_SIZE); } memcheck_mmap_exepath(vstart, vend, eoff, exec_path); } #endif exec_path[0] = 0; break; case TRACE_DEV_REG_INIT_PID: pid = value; DPID("QEMU.trace: pid=%d\n", value); #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_init_pid(value); } #endif break; case TRACE_DEV_REG_INIT_NAME: vstrcpy(value, exec_path, CLIENT_PAGE_SIZE); DPID("QEMU.trace: tgid=%d pid=%d name=%s\n", tgid, pid, exec_path); if (trace_filename != NULL) { trace_init_name(tgid, pid, exec_path); D("QEMU.trace: kernel, init name %u [%s]\n", pid, exec_path); } exec_path[0] = 0; break; case TRACE_DEV_REG_DYN_SYM_ADDR: dsaddr = value; break; case TRACE_DEV_REG_DYN_SYM: vstrcpy(value, exec_arg, CLIENT_PAGE_SIZE); if (trace_filename != NULL) { trace_dynamic_symbol_add(dsaddr, exec_arg); D("QEMU.trace: dynamic symbol %lx:%s\n", dsaddr, exec_arg); } exec_arg[0] = 0; break; case TRACE_DEV_REG_REMOVE_ADDR: if (trace_filename != NULL) { trace_dynamic_symbol_remove(value); D("QEMU.trace: dynamic symbol remove %lx\n", dsaddr); } break; case TRACE_DEV_REG_PRINT_STR: vstrcpy(value, exec_arg, CLIENT_PAGE_SIZE); printf("%s", exec_arg); exec_arg[0] = 0; break; case TRACE_DEV_REG_PRINT_NUM_DEC: printf("%d", value); break; case TRACE_DEV_REG_PRINT_NUM_HEX: printf("%x", value); break; case TRACE_DEV_REG_STOP_EMU: if (trace_filename != NULL) { trace_exception(0); } cpu_single_env->exception_index = EXCP_HLT; cpu_single_env->halted = 1; qemu_system_shutdown_request(); cpu_loop_exit(); break; case TRACE_DEV_REG_ENABLE: if (value == 1) { if (trace_filename != NULL) { start_tracing(); } } else if (value == 0) { if (trace_filename != NULL) { stop_tracing(); trace_exception(0); } } break; case TRACE_DEV_REG_UNMAP_START: unmap_start = value; break; case TRACE_DEV_REG_UNMAP_END: if (trace_filename != NULL) { trace_munmap(unmap_start, value); } #ifdef CONFIG_MEMCHECK if (memcheck_enabled) { memcheck_unmap(unmap_start, value); } #endif break; case TRACE_DEV_REG_METHOD_ENTRY: case TRACE_DEV_REG_METHOD_EXIT: case TRACE_DEV_REG_METHOD_EXCEPTION: case TRACE_DEV_REG_NATIVE_ENTRY: case TRACE_DEV_REG_NATIVE_EXIT: case TRACE_DEV_REG_NATIVE_EXCEPTION: if (trace_filename != NULL) { if (tracing) { int call_type = (offset - 4096) >> 2; trace_interpreted_method(value, call_type); } } break; #ifdef CONFIG_MEMCHECK case TRACE_DEV_REG_MALLOC: if (memcheck_enabled) { memcheck_guest_alloc(value); } break; case TRACE_DEV_REG_FREE_PTR: if (memcheck_enabled) { memcheck_guest_free(value); } break; case TRACE_DEV_REG_QUERY_MALLOC: if (memcheck_enabled) { memcheck_guest_query_malloc(value); } break; case TRACE_DEV_REG_LIBC_INIT: if (memcheck_enabled) { memcheck_guest_libc_initialized(value); } break; case TRACE_DEV_REG_PRINT_USER_STR: if (memcheck_enabled) { memcheck_guest_print_str(value); } break; #endif default: if (offset < 4096) { cpu_abort(cpu_single_env, "trace_dev_write: Bad offset %x\n", offset); } else { D("%s: offset=%d (0x%x) value=%d (0x%x)\n", __FUNCTION__, offset, offset, value, value); } break; } }
/* I/O write */ static void trace_dev_write(void *opaque, target_phys_addr_t offset, uint32_t value) { trace_dev_state *s = (trace_dev_state *)opaque; offset -= s->base; switch (offset >> 2) { case TRACE_DEV_REG_SWITCH: // context switch, switch to pid trace_switch(value); #ifdef DEBUG printf("QEMU.trace: kernel, context switch %u\n", value); #endif break; case TRACE_DEV_REG_TGID: // save the tgid for the following fork/clone tgid = value; #ifdef DEBUG printf("QEMU.trace: kernel, tgid %u\n", value); #endif break; case TRACE_DEV_REG_FORK: // fork, fork new pid trace_fork(tgid, value); #ifdef DEBUG printf("QEMU.trace: kernel, fork %u\n", value); #endif break; case TRACE_DEV_REG_CLONE: // fork, clone new pid (i.e. thread) trace_clone(tgid, value); #ifdef DEBUG printf("QEMU.trace: kernel, clone %u\n", value); #endif break; case TRACE_DEV_REG_EXECVE_VMSTART: // execve, vstart vstart = value; break; case TRACE_DEV_REG_EXECVE_VMEND: // execve, vend vend = value; break; case TRACE_DEV_REG_EXECVE_OFFSET: // execve, offset in EXE eoff = value; break; case TRACE_DEV_REG_EXECVE_EXEPATH: // init exec, path of EXE vstrcpy(value, path, CLIENT_PAGE_SIZE); trace_init_exec(vstart, vend, eoff, path); #ifdef DEBUG printf("QEMU.trace: kernel, init exec [%lx,%lx]@%lx [%s]\n", vstart, vend, eoff, path); #endif path[0] = 0; break; case TRACE_DEV_REG_CMDLINE_LEN: // execve, process cmdline length cmdlen = value; break; case TRACE_DEV_REG_CMDLINE: // execve, process cmdline vmemcpy(value, arg, cmdlen); trace_execve(arg, cmdlen); #ifdef DEBUG { int i; for (i = 0; i < cmdlen; i ++) if (i != cmdlen - 1 && arg[i] == 0) arg[i] = ' '; printf("QEMU.trace: kernel, execve %s[%d]\n", arg, cmdlen); } #endif arg[0] = 0; break; case TRACE_DEV_REG_EXIT: // exit, exit current process with exit code trace_exit(value); #ifdef DEBUG printf("QEMU.trace: kernel, exit %x\n", value); #endif break; case TRACE_DEV_REG_NAME: // record thread name vstrcpy(value, path, CLIENT_PAGE_SIZE); // Remove the trailing newline if it exists int len = strlen(path); if (path[len - 1] == '\n') { path[len - 1] = 0; } trace_name(path); #ifdef DEBUG printf("QEMU.trace: kernel, name %s\n", path); #endif break; case TRACE_DEV_REG_MMAP_EXEPATH: // mmap, path of EXE, the others are same as execve vstrcpy(value, path, CLIENT_PAGE_SIZE); trace_mmap(vstart, vend, eoff, path); #ifdef DEBUG printf("QEMU.trace: kernel, mmap [%lx,%lx]@%lx [%s]\n", vstart, vend, eoff, path); #endif path[0] = 0; break; case TRACE_DEV_REG_INIT_PID: // init, name the pid that starts before device registered pid = value; break; case TRACE_DEV_REG_INIT_NAME: // init, the comm of the init pid vstrcpy(value, path, CLIENT_PAGE_SIZE); trace_init_name(tgid, pid, path); #ifdef DEBUG printf("QEMU.trace: kernel, init name %u [%s]\n", pid, path); #endif path[0] = 0; break; case TRACE_DEV_REG_DYN_SYM_ADDR: // dynamic symbol address dsaddr = value; break; case TRACE_DEV_REG_DYN_SYM: // add dynamic symbol vstrcpy(value, arg, CLIENT_PAGE_SIZE); trace_dynamic_symbol_add(dsaddr, arg); #ifdef DEBUG printf("QEMU.trace: dynamic symbol %lx:%s\n", dsaddr, arg); #endif arg[0] = 0; break; case TRACE_DEV_REG_REMOVE_ADDR: // remove dynamic symbol addr trace_dynamic_symbol_remove(value); #ifdef DEBUG printf("QEMU.trace: dynamic symbol remove %lx\n", dsaddr); #endif arg[0] = 0; break; case TRACE_DEV_REG_PRINT_STR: // print string vstrcpy(value, arg, CLIENT_PAGE_SIZE); printf("%s", arg); arg[0] = 0; break; case TRACE_DEV_REG_PRINT_NUM_DEC: // print number in decimal printf("%d", value); break; case TRACE_DEV_REG_PRINT_NUM_HEX: // print number in hexical printf("%x", value); break; case TRACE_DEV_REG_STOP_EMU: // stop the VM execution // To ensure that the number of instructions executed in this // block is correct, we pretend that there was an exception. trace_exception(0); cpu_single_env->exception_index = EXCP_HLT; cpu_single_env->halted = 1; qemu_system_shutdown_request(); cpu_loop_exit(); break; case TRACE_DEV_REG_ENABLE: // tracing enable: 0 = stop, 1 = start if (value == 1) start_tracing(); else if (value == 0) { stop_tracing(); // To ensure that the number of instructions executed in this // block is correct, we pretend that there was an exception. trace_exception(0); } break; case TRACE_DEV_REG_UNMAP_START: unmap_start = value; break; case TRACE_DEV_REG_UNMAP_END: trace_munmap(unmap_start, value); break; default: cpu_abort(cpu_single_env, "trace_dev_write: Bad offset %x\n", offset); break; } }
void linux32_syscall(struct trapframe *frame) { const struct sysent *callp; struct proc *p; struct lwp *l; int error; size_t narg; register32_t code, args[6]; register_t rval[2]; int i; register_t args64[6]; l = curlwp; p = l->l_proc; code = frame->tf_rax; LWP_CACHE_CREDS(l, p); callp = p->p_emul->e_sysent; code &= (LINUX32_SYS_NSYSENT - 1); callp += code; /* * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in * increasing order. */ args[0] = frame->tf_rbx & 0xffffffff; args[1] = frame->tf_rcx & 0xffffffff; args[2] = frame->tf_rdx & 0xffffffff; args[3] = frame->tf_rsi & 0xffffffff; args[4] = frame->tf_rdi & 0xffffffff; args[5] = frame->tf_rbp & 0xffffffff; if (__predict_false(p->p_trace_enabled)) { narg = callp->sy_narg; if (__predict_false(narg > __arraycount(args))) panic("impossible syscall narg, code %d, narg %zd", code, narg); for (i = 0; i < narg; i++) args64[i] = args[i] & 0xffffffff; if ((error = trace_enter(code, args64, narg)) != 0) goto out; } rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: frame->tf_rax = rval[0]; frame->tf_rflags &= ~PSL_C; /* carry bit */ break; case ERESTART: /* * The offset to adjust the PC by depends on whether we entered * the kernel through the trap or call gate. We pushed the * size of the instruction into tf_err on entry. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux32_errno[error]; frame->tf_rax = error; frame->tf_rflags |= PSL_C; /* carry bit */ break; } if (__predict_false(p->p_trace_enabled)) trace_exit(code, rval, error); userret(l); }
static void sunos_syscall_fancy(register_t code, struct lwp *l, struct frame *frame) { struct proc *p = l->l_proc; char *params; const struct sysent *callp; int error, nsys; size_t argsize; register_t args[16], rval[2]; nsys = p->p_emul->e_nsysent; callp = p->p_emul->e_sysent; /* * SunOS passes the syscall-number on the stack, whereas * BSD passes it in D0. So, we have to get the real "code" * from the stack, and clean up the stack, as SunOS glue * code assumes the kernel pops the syscall argument the * glue pushed on the stack. Sigh... */ code = fuword((void *)frame->f_regs[SP]); /* * XXX * Don't do this for sunos_sigreturn, as there's no stored pc * on the stack to skip, the argument follows the syscall * number without a gap. */ if (code != SUNOS_SYS_sigreturn) { frame->f_regs[SP] += sizeof (int); /* * remember that we adjusted the SP, * might have to undo this if the system call * returns ERESTART. */ l->l_md.md_flags |= MDL_STACKADJ; } else l->l_md.md_flags &= ~MDL_STACKADJ; params = (char *)frame->f_regs[SP] + sizeof(int); switch (code) { case SUNOS_SYS_syscall: /* * Code is first argument, followed by actual args. */ code = fuword(params); params += sizeof(int); break; default: break; } if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; /* illegal */ else callp += code; argsize = callp->sy_argsize; if (argsize) { error = copyin(params, (void *)args, argsize); if (error) goto bad; } if ((error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = frame->f_regs[D1]; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; frame->f_regs[D0] = rval[0]; frame->f_regs[D1] = rval[1]; frame->f_sr &= ~PSL_C; /* carry bit */ break; case ERESTART: /* * We always enter through a `trap' instruction, which is 2 * bytes, so adjust the pc by that amount. */ frame->f_pc = frame->f_pc - 2; break; case EJUSTRETURN: /* nothing to do */ break; default: bad: frame->f_regs[D0] = error; frame->f_sr |= PSL_C; /* carry bit */ break; } /* need new p-value for this */ if (l->l_md.md_flags & MDL_STACKADJ) { l->l_md.md_flags &= ~MDL_STACKADJ; if (error == ERESTART) frame->f_regs[SP] -= sizeof (int); } trace_exit(code, rval, error); }
int trace_read (const char *filename) { FILE *trace = fopen(filename, "r"); assert_inner(trace, "fopen"); void *trace_buf = NULL; unsigned long trace_bufsize = 0; size_t n = fread(&trace_bufsize, sizeof(unsigned long), 1, trace); assert_set_errno(ENOTSUP, n == 1, "fread"); trace_buf = malloc(trace_bufsize); assert_inner(trace_buf, "malloc"); n = fread(trace_buf, 1, trace_bufsize, trace); assert_set_errno(ENOTSUP, n == trace_bufsize, "fread"); assert_set_errno(ENOTSUP, feof(trace), "feof"); unsigned int trace_ended = 0; unsigned long trace_index = 0; while (trace_index < trace_bufsize) { char sign = *((char*)(trace_buf + trace_index)); trace_index += sizeof(char); switch (sign) { case 'e': trace_enter(trace_buf + trace_index); trace_index += 2 * sizeof(uintptr_t) + sizeof(unsigned long long); break; case 'x': trace_exit(trace_buf + trace_index); trace_index += sizeof(unsigned long long); break; case '+': trace_malloc(trace_buf + trace_index); trace_index += sizeof(size_t) + 2 * sizeof(uintptr_t) + sizeof(unsigned long long); break; case '*': trace_realloc(trace_buf + trace_index); trace_index += sizeof(size_t) + 3 * sizeof(uintptr_t) + sizeof(unsigned long long); break; case '-': trace_free(trace_buf + trace_index); trace_index += 2 * sizeof(uintptr_t) + sizeof(unsigned long long); break; case 'E': trace_end(trace_buf + trace_index); trace_index += sizeof(unsigned long long); trace_ended = 1; assert_set_errno(ENOTSUP, trace_bufsize == trace_index, "END not at end"); break; default: assert_set_errno(ENOTSUP, 0, "sign switch"); break; } } if (!trace_ended) assert_set_errno(ENOTSUP, 0, "no END at end"); free(trace_buf); fclose(trace); return 0; }
static void linux_syscall_fancy(register_t code, struct lwp *l, struct frame *frame) { struct proc *p = l->l_proc; char *params; const struct sysent *callp; int error, nsys; size_t argsize; register_t args[8], rval[2]; nsys = p->p_emul->e_nsysent; callp = p->p_emul->e_sysent; params = (char *)frame->f_regs[SP] + sizeof(int); if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; /* illegal */ else callp += code; argsize = callp->sy_argsize; /* * Linux passes the args in d1-d5 */ switch (argsize) { case 20: args[4] = frame->f_regs[D5]; case 16: args[3] = frame->f_regs[D4]; case 12: args[2] = frame->f_regs[D3]; case 8: args[1] = frame->f_regs[D2]; case 4: args[0] = frame->f_regs[D1]; case 0: break; default: panic("linux syscall %d weird argsize %d", code, argsize); break; } if ((error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = frame->f_regs[D1]; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; frame->f_regs[D0] = rval[0]; frame->f_regs[D1] = rval[1]; frame->f_sr &= ~PSL_C; /* carry bit */ break; case ERESTART: /* * We always enter through a `trap' instruction, which is 2 * bytes, so adjust the pc by that amount. */ frame->f_pc = frame->f_pc - 2; break; case EJUSTRETURN: /* nothing to do */ break; default: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[D0] = error; frame->f_sr |= PSL_C; /* carry bit */ break; } trace_exit(code, rval, error); }
/* * call actual syscall routine * from the low-level syscall handler: * - all HPPA_FRAME_NARGS syscall's arguments supposed to be copied onto * our stack, this wins compared to copyin just needed amount anyway * - register args are copied onto stack too */ void syscall(struct trapframe *frame, int *args) { struct lwp *l; struct proc *p; const struct sysent *callp; int nsys, code, argsize, error; int tmp; int rval[2]; uvmexp.syscalls++; #ifdef DEBUG frame_sanity_check(frame, curlwp); #endif /* DEBUG */ if (!USERMODE(frame->tf_iioq_head)) panic("syscall"); l = curlwp; p = l->l_proc; l->l_md.md_regs = frame; nsys = p->p_emul->e_nsysent; callp = p->p_emul->e_sysent; code = frame->tf_t1; /* * Restarting a system call is touchy on the HPPA, * because syscall arguments are passed in registers * and the program counter of the syscall "point" * isn't easily divined. * * We handle the first problem by assuming that we * will have to restart this system call, so we * stuff the first four words of the original arguments * back into the frame as arg0...arg3, which is where * we found them in the first place. Any further * arguments are (still) on the user's stack and the * syscall code will fetch them from there (again). * * The program counter problem is addressed below. */ frame->tf_arg0 = args[0]; frame->tf_arg1 = args[1]; frame->tf_arg2 = args[2]; frame->tf_arg3 = args[3]; /* * Some special handling for the syscall(2) and * __syscall(2) system calls. */ switch (code) { case SYS_syscall: code = *args; args += 1; break; case SYS___syscall: if (callp != sysent) break; /* * NB: even though __syscall(2) takes a quad_t * containing the system call number, because * our argument copying word-swaps 64-bit arguments, * the least significant word of that quad_t * is the first word in the argument array. */ code = *args; args += 2; } /* * Stacks growing from lower addresses to higher * addresses are not really such a good idea, because * it makes it impossible to overlay a struct on top * of C stack arguments (the arguments appear in * reversed order). * * You can do the obvious thing (as locore.S does) and * copy argument words one by one, laying them out in * the "right" order in the destination buffer, but this * ends up word-swapping multi-word arguments (like off_t). * * To compensate, we have some automatically-generated * code that word-swaps these multi-word arguments. * Right now the script that generates this code is * in Perl, because I don't know awk. * * FIXME - this works only on native binaries and * will probably screw up any and all emulation. */ switch (code) { /* * BEGIN automatically generated * by /home/fredette/project/hppa/makescargfix.pl * do not edit! */ case SYS_pread: /* * syscallarg(int) fd; * syscallarg(void *) buf; * syscallarg(size_t) nbyte; * syscallarg(int) pad; * syscallarg(off_t) offset; */ tmp = args[4]; args[4] = args[4 + 1]; args[4 + 1] = tmp; break; case SYS_pwrite: /* * syscallarg(int) fd; * syscallarg(const void *) buf; * syscallarg(size_t) nbyte; * syscallarg(int) pad; * syscallarg(off_t) offset; */ tmp = args[4]; args[4] = args[4 + 1]; args[4 + 1] = tmp; break; case SYS_mmap: /* * syscallarg(void *) addr; * syscallarg(size_t) len; * syscallarg(int) prot; * syscallarg(int) flags; * syscallarg(int) fd; * syscallarg(long) pad; * syscallarg(off_t) pos; */ tmp = args[6]; args[6] = args[6 + 1]; args[6 + 1] = tmp; break; case SYS_lseek: /* * syscallarg(int) fd; * syscallarg(int) pad; * syscallarg(off_t) offset; */ tmp = args[2]; args[2] = args[2 + 1]; args[2 + 1] = tmp; break; case SYS_truncate: /* * syscallarg(const char *) path; * syscallarg(int) pad; * syscallarg(off_t) length; */ tmp = args[2]; args[2] = args[2 + 1]; args[2 + 1] = tmp; break; case SYS_ftruncate: /* * syscallarg(int) fd; * syscallarg(int) pad; * syscallarg(off_t) length; */ tmp = args[2]; args[2] = args[2 + 1]; args[2 + 1] = tmp; break; case SYS_preadv: /* * syscallarg(int) fd; * syscallarg(const struct iovec *) iovp; * syscallarg(int) iovcnt; * syscallarg(int) pad; * syscallarg(off_t) offset; */ tmp = args[4]; args[4] = args[4 + 1]; args[4 + 1] = tmp; break; case SYS_pwritev: /* * syscallarg(int) fd; * syscallarg(const struct iovec *) iovp; * syscallarg(int) iovcnt; * syscallarg(int) pad; * syscallarg(off_t) offset; */ tmp = args[4]; args[4] = args[4 + 1]; args[4 + 1] = tmp; break; default: break; /* * END automatically generated * by /home/fredette/project/hppa/makescargfix.pl * do not edit! */ } #ifdef USERTRACE if (0) { user_backtrace(frame, p, -1); frame->tf_ipsw |= PSW_R; frame->tf_rctr = 0; printf("r %08x", frame->tf_iioq_head); rctr_next_iioq = frame->tf_iioq_head + 4; } #endif if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; /* bad syscall # */ else callp += code; argsize = callp->sy_argsize; if ((error = trace_enter(l, code, code, NULL, args)) != 0) goto bad; rval[0] = 0; rval[1] = 0; switch (error = (*callp->sy_call)(l, args, rval)) { case 0: l = curlwp; /* changes on exec() */ frame = l->l_md.md_regs; frame->tf_ret0 = rval[0]; frame->tf_ret1 = rval[1]; frame->tf_t1 = 0; break; case ERESTART: /* * Now we have to wind back the instruction * offset queue to the point where the system * call will be made again. This is inherently * tied to the SYSCALL macro. * * Currently, the part of the SYSCALL macro * that we want to rerun reads as: * * ldil L%SYSCALLGATE, r1 * ble 4(sr7, r1) * ldi __CONCAT(SYS_,x), t1 * ldw HPPA_FRAME_ERP(sr0,sp), rp * * And our offset queue head points to the * final ldw instruction. So we need to * subtract twelve to reach the ldil. */ frame->tf_iioq_head -= 12; frame->tf_iioq_tail = frame->tf_iioq_head + 4; break; case EJUSTRETURN: p = curproc; break; default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->tf_t1 = error; break; } trace_exit(l, code, args, rval, error); userret(l, frame->tf_iioq_head, 0); #ifdef DEBUG frame_sanity_check(frame, l); #endif /* DEBUG */ }