/* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ void linux_syscall(struct trapframe *frame) { register const struct sysent *callp; struct lwp *l; int error; register_t code, args[6], rval[2]; l = curlwp; LWP_CACHE_CREDS(l, l->l_proc); code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1); callp = linux_sysent; callp += code; /* * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in * increasing order. */ args[0] = frame->tf_ebx; args[1] = frame->tf_ecx; args[2] = frame->tf_edx; args[3] = frame->tf_esi; args[4] = frame->tf_edi; args[5] = frame->tf_ebp; rval[0] = 0; rval[1] = 0; if (__predict_false(l->l_proc->p_trace_enabled)) { error = trace_enter(code, args, callp->sy_narg); if (__predict_true(error == 0)) { error = sy_call(callp, l, args, rval); code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1); trace_exit(code, rval, error); } } else error = sy_call(callp, l, args, rval); if (__predict_true(error == 0)) { frame->tf_eax = rval[0]; /* * XXX The linux libc code I (dsl) looked at doesn't use the * carry bit. * Values above 0xfffff000 are assumed to be errno values and * not result codes! */ frame->tf_eflags &= ~PSL_C; /* carry bit */ } else { switch (error) { case ERESTART: /* * The offset to adjust the PC by depends on whether * we entered the kernel through the trap or call gate. * We save the instruction size in tf_err on entry. */ frame->tf_eip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux_errno[error]; frame->tf_eax = error; frame->tf_eflags |= PSL_C; /* carry bit */ break; } } userret(l); }
/* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ static void linux_syscall(struct trapframe *frame) { const struct sysent *callp; struct proc *p; struct lwp *l; int error; register_t code, rval[2]; #define args (&frame->tf_rdi) l = curlwp; p = l->l_proc; code = frame->tf_rax; LWP_CACHE_CREDS(l, p); callp = p->p_emul->e_sysent; code &= (LINUX_SYS_NSYSENT - 1); callp += code; /* * Linux system calls have a maximum of 6 arguments, they are * already adjacent in the syscall trapframe. */ if (__predict_false(p->p_trace_enabled) && (error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: frame->tf_rax = rval[0]; break; case ERESTART: /* * The offset to adjust the PC by depends on whether we entered * the kernel through the trap or call gate. We pushed the * size of the instruction into tf_err on entry. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux_errno[error]; frame->tf_rax = error; break; } if (__predict_false(p->p_trace_enabled)) trace_exit(code, rval, error); userret(l); }
static void syscall_fancy(register_t code, struct lwp *l, struct frame *frame) { char *params; const struct sysent *callp; int error, nsys; size_t argsize; register_t args[16], rval[2]; struct proc *p = l->l_proc; nsys = p->p_emul->e_nsysent; callp = p->p_emul->e_sysent; params = (char *)frame->f_regs[SP] + sizeof(int); switch (code) { case SYS_syscall: /* * Code is first argument, followed by actual args. */ code = fuword(params); params += sizeof(int); #if defined(COMPAT_13) || defined(COMPAT_16) /* * XXX sigreturn requires special stack manipulation * that is only done if entered via the sigreturn * trap. Cannot allow it here so make sure we fail. */ switch (code) { #ifdef COMPAT_13 case SYS_compat_13_sigreturn13: #endif #ifdef COMPAT_16 case SYS_compat_16___sigreturn14: #endif code = nsys; break; } #endif break; case SYS___syscall: /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = fuword(params + _QUAD_LOWWORD * sizeof(int)); params += sizeof(quad_t); break; default: break; } if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; /* illegal */ else callp += code; argsize = callp->sy_argsize; if (argsize) { error = copyin(params, (void *)args, argsize); if (error) goto bad; } if ((error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = frame->f_regs[D1]; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: /* * Reinitialize lwp/proc pointers as they may be different * if this is a child returning from fork syscall. */ l = curlwp; p = l->l_proc; frame->f_regs[D0] = rval[0]; frame->f_regs[D1] = rval[1]; frame->f_sr &= ~PSL_C; /* carry bit */ #ifdef COMPAT_50 /* see syscall_plain for a comment explaining this */ /* * Some pre-m68k ELF libc assembler stubs assume * %a0 is preserved across system calls... */ if (p->p_emul == &emul_netbsd) frame->f_regs[A0] = rval[0]; #endif break; case ERESTART: /* * We always enter through a `trap' instruction, which is 2 * bytes, so adjust the pc by that amount. */ frame->f_pc = frame->f_pc - 2; break; case EJUSTRETURN: /* nothing to do */ break; default: bad: /* * XXX: SVR4 uses this code-path, so we may have * to translate errno. */ if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[D0] = error; frame->f_sr |= PSL_C; /* carry bit */ break; } trace_exit(code, rval, error); }
code = frame->tf_rax & (SYS_NSYSENT - 1); } if (__predict_false(p->p_trace_enabled) && !__predict_false(callp->sy_flags & SYCALL_INDIRECT)) { int narg = callp->sy_argsize >> 2; for (i = 0; i < narg; i++) args64[i] = args[i]; error = trace_enter(code, args64, narg); if (__predict_false(error != 0)) goto out; } rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); out: if (__predict_false(p->p_trace_enabled) && !__predict_false(callp->sy_flags & SYCALL_INDIRECT)) { /* Recover 'code' - the compiler doesn't assign it a register */ code = frame->tf_rax & (SYS_NSYSENT - 1); trace_exit(code, rval, error); } if (__predict_true(error == 0)) { frame->tf_rax = rval[0]; frame->tf_rdx = rval[1]; frame->tf_rflags &= ~PSL_C; /* carry bit */ } else { switch (error) {
void linux32_syscall(struct trapframe *frame) { const struct sysent *callp; struct proc *p; struct lwp *l; int error; size_t narg; register32_t code, args[6]; register_t rval[2]; int i; register_t args64[6]; l = curlwp; p = l->l_proc; code = frame->tf_rax; LWP_CACHE_CREDS(l, p); callp = p->p_emul->e_sysent; code &= (LINUX32_SYS_NSYSENT - 1); callp += code; /* * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in * increasing order. */ args[0] = frame->tf_rbx & 0xffffffff; args[1] = frame->tf_rcx & 0xffffffff; args[2] = frame->tf_rdx & 0xffffffff; args[3] = frame->tf_rsi & 0xffffffff; args[4] = frame->tf_rdi & 0xffffffff; args[5] = frame->tf_rbp & 0xffffffff; if (__predict_false(p->p_trace_enabled)) { narg = callp->sy_narg; if (__predict_false(narg > __arraycount(args))) panic("impossible syscall narg, code %d, narg %zd", code, narg); for (i = 0; i < narg; i++) args64[i] = args[i] & 0xffffffff; if ((error = trace_enter(code, args64, narg)) != 0) goto out; } rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: frame->tf_rax = rval[0]; frame->tf_rflags &= ~PSL_C; /* carry bit */ break; case ERESTART: /* * The offset to adjust the PC by depends on whether we entered * the kernel through the trap or call gate. We pushed the * size of the instruction into tf_err on entry. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux32_errno[error]; frame->tf_rax = error; frame->tf_rflags |= PSL_C; /* carry bit */ break; } if (__predict_false(p->p_trace_enabled)) trace_exit(code, rval, error); userret(l); }
static void sunos_syscall_fancy(register_t code, struct lwp *l, struct frame *frame) { struct proc *p = l->l_proc; char *params; const struct sysent *callp; int error, nsys; size_t argsize; register_t args[16], rval[2]; nsys = p->p_emul->e_nsysent; callp = p->p_emul->e_sysent; /* * SunOS passes the syscall-number on the stack, whereas * BSD passes it in D0. So, we have to get the real "code" * from the stack, and clean up the stack, as SunOS glue * code assumes the kernel pops the syscall argument the * glue pushed on the stack. Sigh... */ code = fuword((void *)frame->f_regs[SP]); /* * XXX * Don't do this for sunos_sigreturn, as there's no stored pc * on the stack to skip, the argument follows the syscall * number without a gap. */ if (code != SUNOS_SYS_sigreturn) { frame->f_regs[SP] += sizeof (int); /* * remember that we adjusted the SP, * might have to undo this if the system call * returns ERESTART. */ l->l_md.md_flags |= MDL_STACKADJ; } else l->l_md.md_flags &= ~MDL_STACKADJ; params = (char *)frame->f_regs[SP] + sizeof(int); switch (code) { case SUNOS_SYS_syscall: /* * Code is first argument, followed by actual args. */ code = fuword(params); params += sizeof(int); break; default: break; } if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; /* illegal */ else callp += code; argsize = callp->sy_argsize; if (argsize) { error = copyin(params, (void *)args, argsize); if (error) goto bad; } if ((error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = frame->f_regs[D1]; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; frame->f_regs[D0] = rval[0]; frame->f_regs[D1] = rval[1]; frame->f_sr &= ~PSL_C; /* carry bit */ break; case ERESTART: /* * We always enter through a `trap' instruction, which is 2 * bytes, so adjust the pc by that amount. */ frame->f_pc = frame->f_pc - 2; break; case EJUSTRETURN: /* nothing to do */ break; default: bad: frame->f_regs[D0] = error; frame->f_sr |= PSL_C; /* carry bit */ break; } /* need new p-value for this */ if (l->l_md.md_flags & MDL_STACKADJ) { l->l_md.md_flags &= ~MDL_STACKADJ; if (error == ERESTART) frame->f_regs[SP] -= sizeof (int); } trace_exit(code, rval, error); }
static void linux_syscall_fancy(register_t code, struct lwp *l, struct frame *frame) { struct proc *p = l->l_proc; char *params; const struct sysent *callp; int error, nsys; size_t argsize; register_t args[8], rval[2]; nsys = p->p_emul->e_nsysent; callp = p->p_emul->e_sysent; params = (char *)frame->f_regs[SP] + sizeof(int); if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; /* illegal */ else callp += code; argsize = callp->sy_argsize; /* * Linux passes the args in d1-d5 */ switch (argsize) { case 20: args[4] = frame->f_regs[D5]; case 16: args[3] = frame->f_regs[D4]; case 12: args[2] = frame->f_regs[D3]; case 8: args[1] = frame->f_regs[D2]; case 4: args[0] = frame->f_regs[D1]; case 0: break; default: panic("linux syscall %d weird argsize %d", code, argsize); break; } if ((error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = frame->f_regs[D1]; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; frame->f_regs[D0] = rval[0]; frame->f_regs[D1] = rval[1]; frame->f_sr &= ~PSL_C; /* carry bit */ break; case ERESTART: /* * We always enter through a `trap' instruction, which is 2 * bytes, so adjust the pc by that amount. */ frame->f_pc = frame->f_pc - 2; break; case EJUSTRETURN: /* nothing to do */ break; default: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[D0] = error; frame->f_sr |= PSL_C; /* carry bit */ break; } trace_exit(code, rval, error); }
void linux_syscall_plain(struct lwp *l, u_int status, u_int cause, u_int opc) { struct proc *p = l->l_proc; struct frame *frame = (struct frame *)l->l_md.md_regs; register_t *args, copyargs[8]; register_t *rval = NULL; /* XXX gcc */ register_t copyrval[2]; size_t nsaved, nargs; const struct sysent *callp; int error; u_int code; LWP_CACHE_CREDS(l, p); uvmexp.syscalls++; callp = p->p_emul->e_sysent; code = frame->f_regs[_R_R8]; #ifdef KERN_SA if (__predict_false((l->l_savp) && (l->l_savp->savp_pflags & SAVP_FLAG_DELIVERING))) l->l_savp->savp_pflags &= ~SAVP_FLAG_DELIVERING; #endif switch (code) { case SYS_syscall: case SYS___syscall: panic ("linux_syscall_plain: SYS*syscall: not yet"); #if notyet args = copyargs; if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = frame->f_regs[_R_A0] - SYSCALL_SHIFT; args[0] = frame->f_regs[_R_A1]; args[1] = frame->f_regs[_R_A2]; args[2] = frame->f_regs[_R_A3]; nsaved = 3; } else { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = frame->f_regs[_R_A0 + _QUAD_LOWWORD] - SYSCALL_SHIFT; args[0] = frame->f_regs[_R_A2]; args[1] = frame->f_regs[_R_A3]; nsaved = 2; } if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_argsize / sizeof(register_t); if (nargs > nsaved) { error = copyin( ((register_t *)(vaddr_t)frame->f_regs[_R_SP] + 4), (args + nsaved), (nargs - nsaved) * sizeof(register_t)); if (error) goto bad; } #endif break; default: if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_narg; if (nargs < 5) { args = copyargs; args[0] = frame->f_regs[_R_R12]; args[1] = frame->f_regs[_R_R11]; args[2] = frame->f_regs[_R_R10]; args[3] = frame->f_regs[_R_R9]; } else { panic("linux_syscall_plain: nargs >=5: notyet"); } break; } rval = copyrval; rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); switch (error) { case 0: frame->f_regs[_R_R12] = rval[0]; if (rval[0] != 0 && rval[1] != 0) panic("linux_syscall_plain: rval[1] != 0: notyet"); break; case ERESTART: panic("linux_syscall_plain: ERESTART: notyet"); break; case EJUSTRETURN: break; /* nothing to do */ default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[_R_R12] = error; break; } userret(l); }