예제 #1
0
/*
 * syscall(frame):
 *	System call request from POSIX system call gate interface to kernel.
 * Like trap(), argument is call by reference.
 */
void
linux_syscall(struct trapframe *frame)
{
	register const struct sysent *callp;
	struct lwp *l;
	int error;
	register_t code, args[6], rval[2];

	l = curlwp;
	LWP_CACHE_CREDS(l, l->l_proc);

	code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1);
	callp = linux_sysent;

	callp += code;
	/*
	 * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in
	 * increasing order.
	 */
	args[0] = frame->tf_ebx;
	args[1] = frame->tf_ecx;
	args[2] = frame->tf_edx;
	args[3] = frame->tf_esi;
	args[4] = frame->tf_edi;
	args[5] = frame->tf_ebp;

	rval[0] = 0;
	rval[1] = 0;

	if (__predict_false(l->l_proc->p_trace_enabled)) {
		error = trace_enter(code, args, callp->sy_narg);
		if (__predict_true(error == 0)) {
			error = sy_call(callp, l, args, rval);
			code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1);
			trace_exit(code, rval, error);
		}
	} else
		error = sy_call(callp, l, args, rval);

	if (__predict_true(error == 0)) {
		frame->tf_eax = rval[0];
		/*
		 * XXX The linux libc code I (dsl) looked at doesn't use the
		 * carry bit.
		 * Values above 0xfffff000 are assumed to be errno values and
		 * not result codes!
		 */
		frame->tf_eflags &= ~PSL_C;	/* carry bit */
	} else {
		switch (error) {
		case ERESTART:
			/*
			 * The offset to adjust the PC by depends on whether
			 * we entered the kernel through the trap or call gate.
			 * We save the instruction size in tf_err on entry.
			 */
			frame->tf_eip -= frame->tf_err;
			break;
		case EJUSTRETURN:
			/* nothing to do */
			break;
		default:
			error = native_to_linux_errno[error];
			frame->tf_eax = error;
			frame->tf_eflags |= PSL_C;	/* carry bit */
			break;
		}
	}

	userret(l);
}
예제 #2
0
/*
 * syscall(frame):
 *	System call request from POSIX system call gate interface to kernel.
 * Like trap(), argument is call by reference.
 */
static void
linux_syscall(struct trapframe *frame)
{
    const struct sysent *callp;
    struct proc *p;
    struct lwp *l;
    int error;
    register_t code, rval[2];
#define args (&frame->tf_rdi)

    l = curlwp;
    p = l->l_proc;

    code = frame->tf_rax;

    LWP_CACHE_CREDS(l, p);

    callp = p->p_emul->e_sysent;

    code &= (LINUX_SYS_NSYSENT - 1);
    callp += code;

    /*
     * Linux system calls have a maximum of 6 arguments, they are
     * already adjacent in the syscall trapframe.
     */

    if (__predict_false(p->p_trace_enabled)
            && (error = trace_enter(code, args, callp->sy_narg)) != 0)
        goto out;

    rval[0] = 0;
    rval[1] = 0;
    error = sy_call(callp, l, args, rval);
out:
    switch (error) {
    case 0:
        frame->tf_rax = rval[0];
        break;
    case ERESTART:
        /*
         * The offset to adjust the PC by depends on whether we entered
         * the kernel through the trap or call gate.  We pushed the
         * size of the instruction into tf_err on entry.
         */
        frame->tf_rip -= frame->tf_err;
        break;
    case EJUSTRETURN:
        /* nothing to do */
        break;
    default:
        error = native_to_linux_errno[error];
        frame->tf_rax = error;
        break;
    }

    if (__predict_false(p->p_trace_enabled))
        trace_exit(code, rval, error);

    userret(l);
}
static void
syscall_fancy(register_t code, struct lwp *l, struct frame *frame)
{
	char *params;
	const struct sysent *callp;
	int error, nsys;
	size_t argsize;
	register_t args[16], rval[2];
	struct proc *p = l->l_proc;

	nsys = p->p_emul->e_nsysent;
	callp = p->p_emul->e_sysent;

	params = (char *)frame->f_regs[SP] + sizeof(int);

	switch (code) {
	case SYS_syscall:
		/*
		 * Code is first argument, followed by actual args.
		 */
		code = fuword(params);
		params += sizeof(int);
#if defined(COMPAT_13) || defined(COMPAT_16)
		/*
		 * XXX sigreturn requires special stack manipulation
		 * that is only done if entered via the sigreturn
		 * trap.  Cannot allow it here so make sure we fail.
		 */
		switch (code) {
#ifdef COMPAT_13
		case SYS_compat_13_sigreturn13:
#endif
#ifdef COMPAT_16
		case SYS_compat_16___sigreturn14:
#endif
			code = nsys;
			break;
		}
#endif
		break;
	case SYS___syscall:
		/*
		 * Like syscall, but code is a quad, so as to maintain
		 * quad alignment for the rest of the arguments.
		 */
		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
		params += sizeof(quad_t);
		break;
	default:
		break;
	}

	if (code < 0 || code >= nsys)
		callp += p->p_emul->e_nosys;		/* illegal */
	else
		callp += code;

	argsize = callp->sy_argsize;
	if (argsize) {
		error = copyin(params, (void *)args, argsize);
		if (error)
			goto bad;
	}

	if ((error = trace_enter(code, args, callp->sy_narg)) != 0)
		goto out;

	rval[0] = 0;
	rval[1] = frame->f_regs[D1];
	error = sy_call(callp, l, args, rval);
out:
	switch (error) {
	case 0:
		/*
		 * Reinitialize lwp/proc pointers as they may be different
		 * if this is a child returning from fork syscall.
		 */
		l = curlwp;
		p = l->l_proc;
		frame->f_regs[D0] = rval[0];
		frame->f_regs[D1] = rval[1];
		frame->f_sr &= ~PSL_C;	/* carry bit */
#ifdef COMPAT_50
		/* see syscall_plain for a comment explaining this */

		/*
		 * Some pre-m68k ELF libc assembler stubs assume
		 * %a0 is preserved across system calls...
		 */
		if (p->p_emul == &emul_netbsd)
			frame->f_regs[A0] = rval[0];
#endif
		break;
	case ERESTART:
		/*
		 * We always enter through a `trap' instruction, which is 2
		 * bytes, so adjust the pc by that amount.
		 */
		frame->f_pc = frame->f_pc - 2;
		break;
	case EJUSTRETURN:
		/* nothing to do */
		break;
	default:
	bad:
		/*
		 * XXX: SVR4 uses this code-path, so we may have
		 * to translate errno.
		 */
		if (p->p_emul->e_errno)
			error = p->p_emul->e_errno[error];
		frame->f_regs[D0] = error;
		frame->f_sr |= PSL_C;	/* carry bit */
		break;
	}

	trace_exit(code, rval, error);
}
		code = frame->tf_rax & (SYS_NSYSENT - 1);
	}

	if (__predict_false(p->p_trace_enabled)
	    && !__predict_false(callp->sy_flags & SYCALL_INDIRECT)) {
		int narg = callp->sy_argsize >> 2;
		for (i = 0; i < narg; i++)
			args64[i] = args[i];
		error = trace_enter(code, args64, narg);
		if (__predict_false(error != 0))
			goto out;
	}

	rval[0] = 0;
	rval[1] = 0;
	error = sy_call(callp, l, args, rval);

out:
	if (__predict_false(p->p_trace_enabled)
	    && !__predict_false(callp->sy_flags & SYCALL_INDIRECT)) {
		/* Recover 'code' - the compiler doesn't assign it a register */
		code = frame->tf_rax & (SYS_NSYSENT - 1);
		trace_exit(code, rval, error);
	}

	if (__predict_true(error == 0)) {
		frame->tf_rax = rval[0];
		frame->tf_rdx = rval[1];
		frame->tf_rflags &= ~PSL_C;	/* carry bit */
	} else {
		switch (error) {
void
linux32_syscall(struct trapframe *frame)
{
	const struct sysent *callp;
	struct proc *p;
	struct lwp *l;
	int error;
	size_t narg;
	register32_t code, args[6];
	register_t rval[2];
	int i;
	register_t args64[6];

	l = curlwp;
	p = l->l_proc;

	code = frame->tf_rax;

	LWP_CACHE_CREDS(l, p);

	callp = p->p_emul->e_sysent;

	code &= (LINUX32_SYS_NSYSENT - 1);
	callp += code;

	/*
	 * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in
	 * increasing order.
	 */
	args[0] = frame->tf_rbx & 0xffffffff;
	args[1] = frame->tf_rcx & 0xffffffff;
	args[2] = frame->tf_rdx & 0xffffffff;
	args[3] = frame->tf_rsi & 0xffffffff;
	args[4] = frame->tf_rdi & 0xffffffff;
	args[5] = frame->tf_rbp & 0xffffffff;

	if (__predict_false(p->p_trace_enabled)) {
		narg = callp->sy_narg;
		if (__predict_false(narg > __arraycount(args)))
			panic("impossible syscall narg, code %d, narg %zd",
			    code, narg);
		for (i = 0; i < narg; i++)
			args64[i] = args[i] & 0xffffffff;
		if ((error = trace_enter(code, args64, narg)) != 0)
			goto out;
	}

	rval[0] = 0;
	rval[1] = 0;

	error = sy_call(callp, l, args, rval);
out:
	switch (error) {
	case 0:
		frame->tf_rax = rval[0];
		frame->tf_rflags &= ~PSL_C;	/* carry bit */
		break;
	case ERESTART:
		/*
		 * The offset to adjust the PC by depends on whether we entered
		 * the kernel through the trap or call gate.  We pushed the
		 * size of the instruction into tf_err on entry.
		 */
		frame->tf_rip -= frame->tf_err;
		break;
	case EJUSTRETURN:
		/* nothing to do */
		break;
	default:
		error = native_to_linux32_errno[error];
		frame->tf_rax = error;
		frame->tf_rflags |= PSL_C;	/* carry bit */
		break;
	}

	if (__predict_false(p->p_trace_enabled))
		trace_exit(code, rval, error);
	userret(l);
}
static void
sunos_syscall_fancy(register_t code, struct lwp *l, struct frame *frame)
{
	struct proc *p = l->l_proc;
	char *params;
	const struct sysent *callp;
	int error, nsys;
	size_t argsize;
	register_t args[16], rval[2];

	nsys = p->p_emul->e_nsysent;
	callp = p->p_emul->e_sysent;

	/*
	 * SunOS passes the syscall-number on the stack, whereas
	 * BSD passes it in D0. So, we have to get the real "code"
	 * from the stack, and clean up the stack, as SunOS glue
	 * code assumes the kernel pops the syscall argument the
	 * glue pushed on the stack. Sigh...
	 */
	code = fuword((void *)frame->f_regs[SP]);

	/*
	 * XXX
	 * Don't do this for sunos_sigreturn, as there's no stored pc
	 * on the stack to skip, the argument follows the syscall
	 * number without a gap.
	 */
	if (code != SUNOS_SYS_sigreturn) {
		frame->f_regs[SP] += sizeof (int);
		/*
		 * remember that we adjusted the SP,
		 * might have to undo this if the system call
		 * returns ERESTART.
		 */
		l->l_md.md_flags |= MDL_STACKADJ;
	} else
		l->l_md.md_flags &= ~MDL_STACKADJ;

	params = (char *)frame->f_regs[SP] + sizeof(int);

	switch (code) {
	case SUNOS_SYS_syscall:
		/*
		 * Code is first argument, followed by actual args.
		 */
		code = fuword(params);
		params += sizeof(int);
		break;
	default:
		break;
	}

	if (code < 0 || code >= nsys)
		callp += p->p_emul->e_nosys;		/* illegal */
	else
		callp += code;

	argsize = callp->sy_argsize;
	if (argsize) {
		error = copyin(params, (void *)args, argsize);
		if (error)
			goto bad;
	}

	if ((error = trace_enter(code, args, callp->sy_narg)) != 0)
		goto out;

	rval[0] = 0;
	rval[1] = frame->f_regs[D1];
	error = sy_call(callp, l, args, rval);
out:
	switch (error) {
	case 0:
		/*
		 * Reinitialize proc pointer `p' as it may be different
		 * if this is a child returning from fork syscall.
		 */
		p = curproc;
		frame->f_regs[D0] = rval[0];
		frame->f_regs[D1] = rval[1];
		frame->f_sr &= ~PSL_C;	/* carry bit */
		break;
	case ERESTART:
		/*
		 * We always enter through a `trap' instruction, which is 2
		 * bytes, so adjust the pc by that amount.
		 */
		frame->f_pc = frame->f_pc - 2;
		break;
	case EJUSTRETURN:
		/* nothing to do */
		break;
	default:
	bad:
		frame->f_regs[D0] = error;
		frame->f_sr |= PSL_C;	/* carry bit */
		break;
	}

	/* need new p-value for this */
	if (l->l_md.md_flags & MDL_STACKADJ) {
		l->l_md.md_flags &= ~MDL_STACKADJ;
		if (error == ERESTART)
			frame->f_regs[SP] -= sizeof (int);
	}

	trace_exit(code, rval, error);
}
예제 #7
0
static void
linux_syscall_fancy(register_t code, struct lwp *l, struct frame *frame)
{
	struct proc *p = l->l_proc;
	char *params;
	const struct sysent *callp;
	int error, nsys;
	size_t argsize;
	register_t args[8], rval[2];

	nsys = p->p_emul->e_nsysent;
	callp = p->p_emul->e_sysent;
	params = (char *)frame->f_regs[SP] + sizeof(int);

	if (code < 0 || code >= nsys)
		callp += p->p_emul->e_nosys;		/* illegal */
	else
		callp += code;

	argsize = callp->sy_argsize;

	/*
	 * Linux passes the args in d1-d5
	 */
	switch (argsize) {
	case 20:
		args[4] = frame->f_regs[D5];
	case 16:
		args[3] = frame->f_regs[D4];
	case 12:
		args[2] = frame->f_regs[D3];
	case 8:
		args[1] = frame->f_regs[D2];
	case 4:
		args[0] = frame->f_regs[D1];
	case 0:
		break;
	default:
		panic("linux syscall %d weird argsize %d",
			code, argsize);
		break;
	}

	if ((error = trace_enter(code, args, callp->sy_narg)) != 0)
		goto out;

	rval[0] = 0;
	rval[1] = frame->f_regs[D1];
	error = sy_call(callp, l, args, rval);
out:
	switch (error) {
	case 0:
		/*
		 * Reinitialize proc pointer `p' as it may be different
		 * if this is a child returning from fork syscall.
		 */
		p = curproc;
		frame->f_regs[D0] = rval[0];
		frame->f_regs[D1] = rval[1];
		frame->f_sr &= ~PSL_C;	/* carry bit */
		break;
	case ERESTART:
		/*
		 * We always enter through a `trap' instruction, which is 2
		 * bytes, so adjust the pc by that amount.
		 */
		frame->f_pc = frame->f_pc - 2;
		break;
	case EJUSTRETURN:
		/* nothing to do */
		break;
	default:
		if (p->p_emul->e_errno)
			error = p->p_emul->e_errno[error];
		frame->f_regs[D0] = error;
		frame->f_sr |= PSL_C;	/* carry bit */
		break;
	}

	trace_exit(code, rval, error);
}
예제 #8
0
void
linux_syscall_plain(struct lwp *l, u_int status, u_int cause, u_int opc)
{
	struct proc *p = l->l_proc;
	struct frame *frame = (struct frame *)l->l_md.md_regs;
	register_t *args, copyargs[8];
	register_t *rval = NULL;	/* XXX gcc */
	register_t copyrval[2];

	size_t nsaved, nargs;
	const struct sysent *callp;
	int error;
	u_int code;

	LWP_CACHE_CREDS(l, p);

	uvmexp.syscalls++;
	
	callp = p->p_emul->e_sysent;
	code = frame->f_regs[_R_R8];

#ifdef KERN_SA
	if (__predict_false((l->l_savp)
            && (l->l_savp->savp_pflags & SAVP_FLAG_DELIVERING)))
		l->l_savp->savp_pflags &= ~SAVP_FLAG_DELIVERING;
#endif

	switch (code) {
	case SYS_syscall:
	case SYS___syscall:
		panic ("linux_syscall_plain: SYS*syscall: not yet");
#if notyet
		args = copyargs;
		if (code == SYS_syscall) {
			/*
			 * Code is first argument, followed by actual args.
			 */
			code = frame->f_regs[_R_A0] - SYSCALL_SHIFT;
			args[0] = frame->f_regs[_R_A1];
			args[1] = frame->f_regs[_R_A2];
			args[2] = frame->f_regs[_R_A3];
			nsaved = 3;
		} else {
			/*
			 * Like syscall, but code is a quad, so as to maintain
			 * quad alignment for the rest of the arguments.
			 */
			code = frame->f_regs[_R_A0 + _QUAD_LOWWORD]
			    - SYSCALL_SHIFT;
			args[0] = frame->f_regs[_R_A2];
			args[1] = frame->f_regs[_R_A3];
			nsaved = 2;
		}

		if (code >= p->p_emul->e_nsysent)
			callp += p->p_emul->e_nosys;
		else
			callp += code;
		nargs = callp->sy_argsize / sizeof(register_t);

		if (nargs > nsaved) {
			error = copyin(
			    ((register_t *)(vaddr_t)frame->f_regs[_R_SP] + 4),
			    (args + nsaved),
			    (nargs - nsaved) * sizeof(register_t));
			if (error)
				goto bad;
		}
#endif
		break;

	default:
		if (code >= p->p_emul->e_nsysent)
			callp += p->p_emul->e_nosys;
		else
			callp += code;
		nargs = callp->sy_narg;

		if (nargs < 5) {
			args = copyargs;
			args[0] = frame->f_regs[_R_R12];
			args[1] = frame->f_regs[_R_R11];
			args[2] = frame->f_regs[_R_R10];
			args[3] = frame->f_regs[_R_R9];
		} else {
			panic("linux_syscall_plain: nargs >=5: notyet");
		}
		break;
	}
	rval = copyrval;
	rval[0] = 0;
	rval[1] = 0;

	error = sy_call(callp, l, args, rval);

	switch (error) {
	case 0:
		frame->f_regs[_R_R12] = rval[0];
		if (rval[0] != 0 && rval[1] != 0)
			panic("linux_syscall_plain: rval[1] != 0: notyet");
		break;
	case ERESTART:
		panic("linux_syscall_plain: ERESTART: notyet");
		break;
	case EJUSTRETURN:
		break;	/* nothing to do */
	default:
	bad:
		if (p->p_emul->e_errno)
			error = p->p_emul->e_errno[error];
		frame->f_regs[_R_R12] = error;
		break;
	}

	userret(l);
}