Beispiel #1
0
/* ARGSUSED */
int
sys_bind(struct proc *p, void *v, register_t *retval)
{
	struct sys_bind_args /* {
		syscallarg(int) s;
		syscallarg(const struct sockaddr *) name;
		syscallarg(socklen_t) namelen;
	} */ *uap = v;
	struct file *fp;
	struct mbuf *nam;
	int error;

	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
		return (error);
	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
	    MT_SONAME);
	if (error == 0) {
#ifdef KTRACE
		if (KTRPOINT(p, KTR_STRUCT))
			ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
#endif
		error = sobind(fp->f_data, nam, p);
		m_freem(nam);
	}
	FRELE(fp, p);
	return (error);
}
Beispiel #2
0
/*
 * Simplified back end of syscall(), used when returning from fork()
 * directly into user mode.
 *
 * This code will return back into the fork trampoline code which then
 * runs doreti.
 *
 * NOTE: The mplock is not held at any point.
 */
void
generic_lwp_return(struct lwp *lp, struct trapframe *frame)
{
	struct proc *p = lp->lwp_proc;

	/*
	 * Newly forked processes are given a kernel priority.  We have to
	 * adjust the priority to a normal user priority and fake entry
	 * into the kernel (call userenter()) to install a passive release
	 * function just in case userret() decides to stop the process.  This
	 * can occur when ^Z races a fork.  If we do not install the passive
	 * release function the current process designation will not be
	 * released when the thread goes to sleep.
	 */
	lwkt_setpri_self(TDPRI_USER_NORM);
	userenter(lp->lwp_thread, p);
	userret(lp, frame, 0);
#ifdef KTRACE
	if (KTRPOINT(lp->lwp_thread, KTR_SYSRET))
		ktrsysret(lp, SYS_fork, 0, 0);
#endif
	lp->lwp_flags |= LWP_PASSIVE_ACQ;
	userexit(lp);
	lp->lwp_flags &= ~LWP_PASSIVE_ACQ;
}
int
kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
{
	struct socket *so;
	struct file *fp;
	cap_rights_t rights;
	int error;

	AUDIT_ARG_FD(fd);
	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND),
	    &fp, NULL, NULL);
	if (error != 0)
		return (error);
	so = fp->f_data;
#ifdef KTRACE
	if (KTRPOINT(td, KTR_STRUCT))
		ktrsockaddr(sa);
#endif
#ifdef MAC
	error = mac_socket_check_bind(td->td_ucred, so, sa);
	if (error == 0) {
#endif
		if (dirfd == AT_FDCWD)
			error = sobind(so, sa, td);
		else
			error = sobindat(dirfd, so, sa, td);
#ifdef MAC
	}
#endif
	fdrop(fp, td);
	return (error);
}
Beispiel #4
0
void
netbsd32_ktrpsig(int sig, sig_t action, const sigset_t *mask,
	 const ksiginfo_t *ksi)
{
	struct ktrace_entry *kte;
	lwp_t *l = curlwp;
	struct {
		struct netbsd32_ktr_psig	kp;
		siginfo32_t			si;
	} *kbuf;

	if (!KTRPOINT(l->l_proc, KTR_PSIG))
		return;

	if (ktealloc(&kte, (void *)&kbuf, l, KTR_PSIG, sizeof(*kbuf)))
		return;

	kbuf->kp.signo = (char)sig;
	NETBSD32PTR32(kbuf->kp.action, action);
	kbuf->kp.mask = *mask;

	if (ksi) {
		kbuf->kp.code = KSI_TRAPCODE(ksi);
		(void)memset(&kbuf->si, 0, sizeof(kbuf->si));
		netbsd32_ksi_to_ksi32(&kbuf->si._info, &ksi->ksi_info);
		ktesethdrlen(kte, sizeof(*kbuf));
	} else {
		kbuf->kp.code = 0;
		ktesethdrlen(kte, sizeof(struct netbsd32_ktr_psig));
	}

	ktraddentry(l, kte, KTA_WAITOK);
}
Beispiel #5
0
/*
 * Simplified back end of syscall(), used when returning from fork()
 * directly into user mode.
 *
 * This code will return back into the fork trampoline code which then
 * runs doreti.
 */
void
generic_lwp_return(struct lwp *lp, struct trapframe *frame)
{
	struct proc *p = lp->lwp_proc;

	/*
	 * Check for exit-race.  If one lwp exits the process concurrent with
	 * another lwp creating a new thread, the two operations may cross
	 * each other resulting in the newly-created lwp not receiving a
	 * KILL signal.
	 */
	if (p->p_flags & P_WEXIT) {
		lwpsignal(p, lp, SIGKILL);
	}

	/*
	 * Newly forked processes are given a kernel priority.  We have to
	 * adjust the priority to a normal user priority and fake entry
	 * into the kernel (call userenter()) to install a passive release
	 * function just in case userret() decides to stop the process.  This
	 * can occur when ^Z races a fork.  If we do not install the passive
	 * release function the current process designation will not be
	 * released when the thread goes to sleep.
	 */
	lwkt_setpri_self(TDPRI_USER_NORM);
	userenter(lp->lwp_thread, p);
	userret(lp, frame, 0);
#ifdef KTRACE
	if (KTRPOINT(lp->lwp_thread, KTR_SYSRET))
		ktrsysret(lp, SYS_fork, 0, 0);
#endif
	lp->lwp_flags |= LWP_PASSIVE_ACQ;
	userexit(lp);
	lp->lwp_flags &= ~LWP_PASSIVE_ACQ;
}
Beispiel #6
0
int
sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap)
{
	char *path;
	Fnv32_t fnv;
	int error;

	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	if (error) {
		free(path, M_TEMP);
		return (error);
	}
#ifdef KTRACE
	if (KTRPOINT(curthread, KTR_NAMEI))
		ktrnamei(path);
#endif
	fnv = fnv_32_str(path, FNV1_32_INIT);
	sx_xlock(&shm_dict_lock);
	error = shm_remove(path, fnv, td->td_ucred);
	sx_xunlock(&shm_dict_lock);
	free(path, M_TEMP);

	return (error);
}
Beispiel #7
0
int
pledge_fail(struct proc *p, int error, uint64_t code)
{
	char *codes = "";
	int i;
	struct sigaction sa;

	/* Print first matching pledge */
	for (i = 0; code && pledgenames[i].bits != 0; i++)
		if (pledgenames[i].bits & code) {
			codes = pledgenames[i].name;
			break;
		}
	printf("%s(%d): syscall %d \"%s\"\n", p->p_comm, p->p_pid,
	    p->p_pledge_syscall, codes);
#ifdef KTRACE
	if (KTRPOINT(p, KTR_PLEDGE))
		ktrpledge(p, error, code, p->p_pledge_syscall);
#endif
	/* Send uncatchable SIGABRT for coredump */
	memset(&sa, 0, sizeof sa);
	sa.sa_handler = SIG_DFL;
	setsigvec(p, SIGABRT, &sa);
	psignal(p, SIGABRT);

	p->p_p->ps_pledge = 0;		/* Disable all PLEDGE_ flags */
	return (error);
}
Beispiel #8
0
/*
 * General sleep call.  Suspends the current thread until a wakeup is
 * performed on the specified identifier.  The thread will then be made
 * runnable with the specified priority.  Sleeps at most sbt units of time
 * (0 means no timeout).  If pri includes the PCATCH flag, let signals
 * interrupt the sleep, otherwise ignore them while sleeping.  Returns 0 if
 * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
 * signal becomes pending, ERESTART is returned if the current system
 * call should be restarted if possible, and EINTR is returned if the system
 * call should be interrupted by the signal (return EINTR).
 *
 * The lock argument is unlocked before the caller is suspended, and
 * re-locked before _sleep() returns.  If priority includes the PDROP
 * flag the lock is not re-locked before returning.
 */
int
_sleep(void *ident, struct lock_object *lock, int priority,
    const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
{
	struct thread *td;
	struct proc *p;
	struct lock_class *class;
	uintptr_t lock_state;
	int catch, pri, rval, sleepq_flags;
	WITNESS_SAVE_DECL(lock_witness);

	td = curthread;
	p = td->td_proc;
#ifdef KTRACE
	if (KTRPOINT(td, KTR_CSW))
		ktrcsw(1, 0, wmesg);
#endif
	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
	    "Sleeping on \"%s\"", wmesg);
	KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL,
	    ("sleeping without a lock"));
	KASSERT(p != NULL, ("msleep1"));
	KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
	if (priority & PDROP)
		KASSERT(lock != NULL && lock != &Giant.lock_object,
		    ("PDROP requires a non-Giant lock"));
	if (lock != NULL)
		class = LOCK_CLASS(lock);
	else
Beispiel #9
0
/*
 * Simplified back end of syscall(), used when returning from fork()
 * directly into user mode.  Giant is not held on entry, and must not
 * be held on return.  This function is passed in to fork_exit() as the
 * first parameter and is called when returning to a new userland process.
 */
void
fork_return(struct thread *td, struct trapframe *frame)
{
	struct proc *p, *dbg;

	p = td->td_proc;
	if (td->td_dbgflags & TDB_STOPATFORK) {
		sx_xlock(&proctree_lock);
		PROC_LOCK(p);
		if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) ==
		    (P_TRACED | P_FOLLOWFORK)) {
			/*
			 * If debugger still wants auto-attach for the
			 * parent's children, do it now.
			 */
			dbg = p->p_pptr->p_pptr;
			p->p_flag |= P_TRACED;
			p->p_oppid = p->p_pptr->p_pid;
			CTR2(KTR_PTRACE,
		    "fork_return: attaching to new child pid %d: oppid %d",
			    p->p_pid, p->p_oppid);
			proc_reparent(p, dbg);
			sx_xunlock(&proctree_lock);
			td->td_dbgflags |= TDB_CHILD | TDB_SCX;
			ptracestop(td, SIGSTOP);
			td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);
		} else {
			/*
			 * ... otherwise clear the request.
			 */
			sx_xunlock(&proctree_lock);
			td->td_dbgflags &= ~TDB_STOPATFORK;
			cv_broadcast(&p->p_dbgwait);
		}
		PROC_UNLOCK(p);
	} else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) {
 		/*
		 * This is the start of a new thread in a traced
		 * process.  Report a system call exit event.
		 */
		PROC_LOCK(p);
		td->td_dbgflags |= TDB_SCX;
		_STOPEVENT(p, S_SCX, td->td_dbg_sc_code);
		if ((p->p_stops & S_PT_SCX) != 0 ||
		    (td->td_dbgflags & TDB_BORN) != 0)
			ptracestop(td, SIGTRAP);
		td->td_dbgflags &= ~(TDB_SCX | TDB_BORN);
		PROC_UNLOCK(p);
	}

	userret(td, frame);

#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSRET))
		ktrsysret(SYS_fork, 0, 0);
#endif
}
Beispiel #10
0
/* ARGSUSED */
int
sys_connect(struct proc *p, void *v, register_t *retval)
{
	struct sys_connect_args /* {
		syscallarg(int) s;
		syscallarg(const struct sockaddr *) name;
		syscallarg(socklen_t) namelen;
	} */ *uap = v;
	struct file *fp;
	struct socket *so;
	struct mbuf *nam = NULL;
	int error, s;

	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
		return (error);
	so = fp->f_data;
	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
		FRELE(fp, p);
		return (EALREADY);
	}
	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
	    MT_SONAME);
	if (error)
		goto bad;
#ifdef KTRACE
	if (KTRPOINT(p, KTR_STRUCT))
		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
#endif
	error = soconnect(so, nam);
	if (error)
		goto bad;
	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
		FRELE(fp, p);
		m_freem(nam);
		return (EINPROGRESS);
	}
	s = splsoftnet();
	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
		if (error)
			break;
	}
	if (error == 0) {
		error = so->so_error;
		so->so_error = 0;
	}
	splx(s);
bad:
	so->so_state &= ~SS_ISCONNECTING;
	FRELE(fp, p);
	if (nam)
		m_freem(nam);
	if (error == ERESTART)
		error = EINTR;
	return (error);
}
Beispiel #11
0
/*
 *	vm_fault:
 *
 *	Handle a page fault occurring at the given address,
 *	requiring the given permissions, in the map specified.
 *	If successful, the page is inserted into the
 *	associated physical map.
 *
 *	NOTE: the given address should be truncated to the
 *	proper page address.
 *
 *	KERN_SUCCESS is returned if the page fault is handled; otherwise,
 *	a standard error specifying why the fault is fatal is returned.
 *
 *	The map in question must be referenced, and remains so.
 *	Caller may hold no locks.
 */
int
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
    int fault_flags)
{
	struct thread *td;
	int result;

	td = curthread;
	if ((td->td_pflags & TDP_NOFAULTING) != 0)
		return (KERN_PROTECTION_FAILURE);
#ifdef KTRACE
	if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
		ktrfault(vaddr, fault_type);
#endif
	result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
	    NULL);
#ifdef KTRACE
	if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
		ktrfaultend(result);
#endif
	return (result);
}
Beispiel #12
0
static inline int
_cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type)
{


	if ((need & ~have) != 0) {
#ifdef KTRACE
		if (KTRPOINT(curthread, KTR_CAPFAIL))
			ktrcapfail(type, need, have);
#endif
		return (ENOTCAPABLE);
	}
	return (0);
}
Beispiel #13
0
static inline int
_cap_check(const cap_rights_t *havep, const cap_rights_t *needp,
    enum ktr_cap_fail_type type)
{

	if (!cap_rights_contains(havep, needp)) {
#ifdef KTRACE
		if (KTRPOINT(curthread, KTR_CAPFAIL))
			ktrcapfail(type, needp, havep);
#endif
		return (ENOTCAPABLE);
	}
	return (0);
}
Beispiel #14
0
void
child_return(void *arg)
{
	struct lwp *l = arg;
	struct proc *p = l->l_proc;

	userret(l, l->l_md.md_regs->tf_iioq_head, 0);
#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSRET))
		ktrsysret(p, SYS_fork, 0, 0);
#endif
#ifdef DEBUG
	frame_sanity_check(l->l_md.md_regs, l);
#endif /* DEBUG */
}
Beispiel #15
0
int
sysarch(struct thread *td, struct sysarch_args *uap)
{
	int error;

#ifdef CAPABILITY_MODE
	/*
	 * When adding new operations, add a new case statement here to
	 * explicitly indicate whether or not the operation is safe to
	 * perform in capability mode.
	 */
	if (IN_CAPABILITY_MODE(td)) {
		switch (uap->op) {
		case ARM_SYNC_ICACHE:
		case ARM_DRAIN_WRITEBUF:
		case ARM_SET_TP:
		case ARM_GET_TP:
			break;

		default:
#ifdef KTRACE
			if (KTRPOINT(td, KTR_CAPFAIL))
				ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
#endif
			return (ECAPMODE);
		}
	}
#endif

	switch (uap->op) {
	case ARM_SYNC_ICACHE:
		error = arm32_sync_icache(td, uap->parms);
		break;
	case ARM_DRAIN_WRITEBUF:
		error = arm32_drain_writebuf(td, uap->parms);
		break;
	case ARM_SET_TP:
		error = arm32_set_tp(td, uap->parms);
		break;
	case ARM_GET_TP:
		error = arm32_get_tp(td, uap->parms);
		break;
	default:
		error = EINVAL;
		break;
	}
	return (error);
}
Beispiel #16
0
/*
 * Process the tail end of a fork() for the child.
 */
void
child_return(void *arg)
{
	struct proc *p = arg;

	/*
	 * Return values in the frame set by cpu_fork().
	 */

	KERNEL_PROC_UNLOCK(p);
	userret(p);
#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSRET)) {
		KERNEL_PROC_LOCK(p);
		ktrsysret(p, SYS_fork, 0, 0);
		KERNEL_PROC_UNLOCK(p);
	}
#endif
}
Beispiel #17
0
int
sysarch(struct thread *td, struct sysarch_args *uap)
{
	int error;

#ifdef CAPABILITY_MODE
	/*
	 * When adding new operations, add a new case statement here to
	 * explicitly indicate whether or not the operation is safe to
	 * perform in capability mode.
	 */
	if (IN_CAPABILITY_MODE(td)) {
		switch (uap->op) {
		case SPARC_SIGTRAMP_INSTALL:
		case SPARC_UTRAP_INSTALL:
			break;

		default:
#ifdef KTRACE
			if (KTRPOINT(td, KTR_CAPFAIL))
				ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
#endif
			return (ECAPMODE);
		}
	}
#endif

	mtx_lock(&Giant);
	switch (uap->op) {
	case SPARC_SIGTRAMP_INSTALL:
		error = sparc_sigtramp_install(td, uap->parms);
		break;
	case SPARC_UTRAP_INSTALL:
		error = sparc_utrap_install(td, uap->parms);
		break;
	default:
		error = EINVAL;
		break;
	}
	mtx_unlock(&Giant);
	return (error);
}
Beispiel #18
0
/*
 * System call to query the rights mask associated with a capability.
 */
int
sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap)
{
	struct filedesc *fdp;
	cap_rights_t rights;
	int error, fd, i, n;

	if (uap->version != CAP_RIGHTS_VERSION_00)
		return (EINVAL);

	fd = uap->fd;

	AUDIT_ARG_FD(fd);

	fdp = td->td_proc->p_fd;
	FILEDESC_SLOCK(fdp);
	if (fget_locked(fdp, fd) == NULL) {
		FILEDESC_SUNLOCK(fdp);
		return (EBADF);
	}
	rights = *cap_rights(fdp, fd);
	FILEDESC_SUNLOCK(fdp);
	n = uap->version + 2;
	if (uap->version != CAPVER(&rights)) {
		/*
		 * For older versions we need to check if the descriptor
		 * doesn't contain rights not understood by the caller.
		 * If it does, we have to return an error.
		 */
		for (i = n; i < CAPARSIZE(&rights); i++) {
			if ((rights.cr_rights[i] & ~(0x7FULL << 57)) != 0)
				return (EINVAL);
		}
	}
	error = copyout(&rights, uap->rightsp, sizeof(rights.cr_rights[0]) * n);
#ifdef KTRACE
	if (error == 0 && KTRPOINT(td, KTR_STRUCT))
		ktrcaprights(&rights);
#endif
	return (error);
}
Beispiel #19
0
/*
 * System call to limit rights of the given capability.
 */
int
sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
{
	cap_rights_t rights;
	int error, version;

	cap_rights_init(&rights);

	error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0]));
	if (error != 0)
		return (error);
	version = CAPVER(&rights);
	if (version != CAP_RIGHTS_VERSION_00)
		return (EINVAL);

	error = copyin(uap->rightsp, &rights,
	    sizeof(rights.cr_rights[0]) * CAPARSIZE(&rights));
	if (error != 0)
		return (error);
	/* Check for race. */
	if (CAPVER(&rights) != version)
		return (EINVAL);

	if (!cap_rights_is_valid(&rights))
		return (EINVAL);

	if (version != CAP_RIGHTS_VERSION) {
		rights.cr_rights[0] &= ~(0x3ULL << 62);
		rights.cr_rights[0] |= ((uint64_t)CAP_RIGHTS_VERSION << 62);
	}
#ifdef KTRACE
	if (KTRPOINT(td, KTR_STRUCT))
		ktrcaprights(&rights);
#endif

	AUDIT_ARG_FD(uap->fd);
	AUDIT_ARG_RIGHTS(&rights);
	return (kern_cap_rights_limit(td, uap->fd, &rights));
}
Beispiel #20
0
void
EMULNAME(syscall_fancy)(struct proc *p, u_int status, u_int cause, u_int opc)
{
	struct frame *frame = (struct frame *)p->p_md.md_regs;
	register_t *args, copyargs[8];
	register_t *rval;
#if _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN
	register_t copyrval[2];
#endif
	mips_reg_t ov0;
	size_t code, numsys, nsaved, nargs;
	const struct sysent *callp;
	int error;

	uvmexp.syscalls++;

	if (DELAYBRANCH(cause))
		frame->f_regs[PC] = MachEmulateBranch(frame, opc, 0, 0);
	else
		frame->f_regs[PC] = opc + sizeof(int);

	callp = p->p_emul->e_sysent;
	numsys = p->p_emul->e_nsysent;
	ov0 = code = frame->f_regs[V0] - SYSCALL_SHIFT;

	switch (code) {
	case SYS_syscall:
	case SYS___syscall:
		args = copyargs;
		if (code == SYS_syscall) {
			/*
			 * Code is first argument, followed by actual args.
			 */
			code = frame->f_regs[A0] - SYSCALL_SHIFT;
			args[0] = frame->f_regs[A1];
			args[1] = frame->f_regs[A2];
			args[2] = frame->f_regs[A3];
			nsaved = 3;
		} else {
			/*
			 * Like syscall, but code is a quad, so as to maintain
			 * quad alignment for the rest of the arguments.
			 */
			code = frame->f_regs[A0 + _QUAD_LOWWORD] 
			    - SYSCALL_SHIFT;
			args[0] = frame->f_regs[A2];
			args[1] = frame->f_regs[A3];
			nsaved = 2;
		}

		if (code >= p->p_emul->e_nsysent)
			callp += p->p_emul->e_nosys;
		else
			callp += code;
		nargs = callp->sy_argsize / sizeof(register_t);

		if (nargs > nsaved) {
			error = copyin(
			    ((register_t *)(vaddr_t)frame->f_regs[SP] + 4),
			    (args + nsaved),
			    (nargs - nsaved) * sizeof(register_t));
			if (error)
				goto bad;
		}
		break;

	default:
		if (code >= p->p_emul->e_nsysent)
			callp += p->p_emul->e_nosys;
		else
			callp += code;
		nargs = callp->sy_narg;

		if (nargs < 5) {
#if !defined(_MIPS_BSD_API) || _MIPS_BSD_API == _MIPS_BSD_API_LP32
			args = (register_t *)&frame->f_regs[A0];
#elif _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN
			args = copyargs;
			args[0] = frame->f_regs[A0];
			args[1] = frame->f_regs[A1];
			args[2] = frame->f_regs[A2];
			args[3] = frame->f_regs[A3];
#else
# error syscall not implemented for current MIPS ABI
#endif
		} else {
			args = copyargs;
			error = copyin(
			    ((register_t *)(vaddr_t)frame->f_regs[SP] + 4),
			    (&copyargs[4]),
			    (nargs - 4) * sizeof(register_t));
			if (error)
				goto bad;
			args[0] = frame->f_regs[A0];
			args[1] = frame->f_regs[A1];
			args[2] = frame->f_regs[A2];
			args[3] = frame->f_regs[A3];
		}
		break;
	}

#ifdef SYSCALL_DEBUG
	scdebug_call(p, code, args);
#endif

#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSCALL))
		ktrsyscall(p, code, callp->sy_argsize, args);
#endif

#if !defined(_MIPS_BSD_API) || _MIPS_BSD_API == _MIPS_BSD_API_LP32
	rval = (register_t *)&frame->f_regs[V0];
	rval[0] = 0;
	/* rval[1] already has V1 */
#elif _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN
	rval = copyrval;
	rval[0] = 0;
	rval[1] = frame->f_regs[V1];
#endif

	error = (*callp->sy_call)(p, args, rval);

	switch (error) {
	case 0:
#if _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN
		frame->f_regs[V0] = rval[0];
		frame->f_regs[V1] = rval[1];
#endif
		frame->f_regs[A3] = 0;
		break;
	case ERESTART:
		frame->f_regs[V0] = ov0;	/* restore syscall code */
		frame->f_regs[PC] = opc;
		break;
	case EJUSTRETURN:
		break;	/* nothing to do */
	default:
	bad:
		if (p->p_emul->e_errno)
			error = p->p_emul->e_errno[error];
		frame->f_regs[V0] = error;
		frame->f_regs[A3] = 1;
		break;
	}

#ifdef SYSCALL_DEBUG
	scdebug_ret(p, code, error, rval);
#endif

	userret(p);

#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSRET))
		ktrsysret(p, code, error, rval[0]);
#endif
}
Beispiel #21
0
/*
 * Process an asynchronous software trap.
 * This is relatively easy.
 * This function will return with preemption disabled.
 */
void
ast(struct trapframe *framep)
{
	struct thread *td;
	struct proc *p;
	int flags;
	int sig;

	td = curthread;
	p = td->td_proc;

	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
            p->p_comm);
	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
	WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
	mtx_assert(&Giant, MA_NOTOWNED);
	THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
	td->td_frame = framep;
	td->td_pticks = 0;

	/*
	 * This updates the td_flag's for the checks below in one
	 * "atomic" operation with turning off the astpending flag.
	 * If another AST is triggered while we are handling the
	 * AST's saved in flags, the astpending flag will be set and
	 * ast() will be called again.
	 */
	thread_lock(td);
	flags = td->td_flags;
	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK |
	    TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND);
	thread_unlock(td);
	PCPU_INC(cnt.v_trap);

	if (td->td_ucred != p->p_ucred) 
		cred_update_thread(td);
	if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) {
		addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
		td->td_profil_ticks = 0;
		td->td_pflags &= ~TDP_OWEUPC;
	}
#ifdef HWPMC_HOOKS
	/* Handle Software PMC callchain capture. */
	if (PMC_IS_PENDING_CALLCHAIN(td))
		PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN_SOFT, (void *) framep);
#endif
	if (flags & TDF_ALRMPEND) {
		PROC_LOCK(p);
		kern_psignal(p, SIGVTALRM);
		PROC_UNLOCK(p);
	}
	if (flags & TDF_PROFPEND) {
		PROC_LOCK(p);
		kern_psignal(p, SIGPROF);
		PROC_UNLOCK(p);
	}
#ifdef MAC
	if (flags & TDF_MACPEND)
		mac_thread_userret(td);
#endif
	if (flags & TDF_NEEDRESCHED) {
#ifdef KTRACE
		if (KTRPOINT(td, KTR_CSW))
			ktrcsw(1, 1, __func__);
#endif
		thread_lock(td);
		sched_prio(td, td->td_user_pri);
		mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
		thread_unlock(td);
#ifdef KTRACE
		if (KTRPOINT(td, KTR_CSW))
			ktrcsw(0, 1, __func__);
#endif
	}

	/*
	 * Check for signals. Unlocked reads of p_pendingcnt or
	 * p_siglist might cause process-directed signal to be handled
	 * later.
	 */
	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
	    !SIGISEMPTY(p->p_siglist)) {
		PROC_LOCK(p);
		mtx_lock(&p->p_sigacts->ps_mtx);
		while ((sig = cursig(td)) != 0)
			postsig(sig);
		mtx_unlock(&p->p_sigacts->ps_mtx);
		PROC_UNLOCK(p);
	}
	/*
	 * We need to check to see if we have to exit or wait due to a
	 * single threading requirement or some other STOP condition.
	 */
	if (flags & TDF_NEEDSUSPCHK) {
		PROC_LOCK(p);
		thread_suspend_check(0);
		PROC_UNLOCK(p);
	}

	if (td->td_pflags & TDP_OLDMASK) {
		td->td_pflags &= ~TDP_OLDMASK;
		kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0);
	}

	userret(td, framep);
}
Beispiel #22
0
int
sys_pledge(struct proc *p, void *v, register_t *retval)
{
	struct sys_pledge_args /* {
		syscallarg(const char *)request;
		syscallarg(const char **)paths;
	} */	*uap = v;
	uint64_t flags = 0;
	int error;

	if (SCARG(uap, request)) {
		size_t rbuflen;
		char *rbuf, *rp, *pn;
		uint64_t f;

		rbuf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
		error = copyinstr(SCARG(uap, request), rbuf, MAXPATHLEN,
		    &rbuflen);
		if (error) {
			free(rbuf, M_TEMP, MAXPATHLEN);
			return (error);
		}
#ifdef KTRACE
		if (KTRPOINT(p, KTR_STRUCT))
			ktrstruct(p, "pledgereq", rbuf, rbuflen-1);
#endif

		for (rp = rbuf; rp && *rp && error == 0; rp = pn) {
			pn = strchr(rp, ' ');	/* find terminator */
			if (pn) {
				while (*pn == ' ')
					*pn++ = '\0';
			}

			if ((f = pledgereq_flags(rp)) == 0) {
				free(rbuf, M_TEMP, MAXPATHLEN);
				return (EINVAL);
			}
			flags |= f;
		}
		free(rbuf, M_TEMP, MAXPATHLEN);

		/*
		 * if we are already pledged, allow only promises reductions.
		 * flags doesn't contain flags outside _USERSET: they will be
		 * relearned.
		 */
		if (ISSET(p->p_p->ps_flags, PS_PLEDGE) &&
		    (((flags | p->p_p->ps_pledge) != p->p_p->ps_pledge)))
			return (EPERM);
	}

	if (SCARG(uap, paths)) {
#if 1
		return (EINVAL);
#else
		const char **u = SCARG(uap, paths), *sp;
		struct whitepaths *wl;
		char *path, *rdir = NULL, *cwd = NULL;
		size_t pathlen, rdirlen, cwdlen;

		size_t maxargs = 0;
		int i, error;

		if (p->p_p->ps_pledgepaths)
			return (EPERM);

		/* Count paths */
		for (i = 0; i < PLEDGE_MAXPATHS; i++) {
			if ((error = copyin(u + i, &sp, sizeof(sp))) != 0)
				return (error);
			if (sp == NULL)
				break;
		}
		if (i == PLEDGE_MAXPATHS)
			return (E2BIG);

		wl = malloc(sizeof *wl + sizeof(struct whitepath) * (i+1),
		    M_TEMP, M_WAITOK | M_ZERO);
		wl->wl_size = sizeof *wl + sizeof(struct whitepath) * (i+1);
		wl->wl_count = i;
		wl->wl_ref = 1;

		path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);

		/* Copy in */
		for (i = 0; i < wl->wl_count; i++) {
			char *resolved = NULL;
			size_t resolvedlen;

			if ((error = copyin(u + i, &sp, sizeof(sp))) != 0)
				break;
			if (sp == NULL)
				break;
			if ((error = copyinstr(sp, path, MAXPATHLEN, &pathlen)) != 0)
				break;
#ifdef KTRACE
			if (KTRPOINT(p, KTR_STRUCT))
				ktrstruct(p, "pledgepath", path, pathlen-1);
#endif

			error = resolvpath(p, &rdir, &rdirlen, &cwd, &cwdlen,
			    path, pathlen, &resolved, &resolvedlen);

			if (error != 0)
				/* resolved is allocated only if !error */
				break;

			maxargs += resolvedlen;
			if (maxargs > ARG_MAX) {
				error = E2BIG;
				free(resolved, M_TEMP, resolvedlen);
				break;
			}
			wl->wl_paths[i].name = resolved;
			wl->wl_paths[i].len = resolvedlen;
		}
		free(rdir, M_TEMP, rdirlen);
		free(cwd, M_TEMP, cwdlen);
		free(path, M_TEMP, MAXPATHLEN);

		if (error) {
			for (i = 0; i < wl->wl_count; i++)
				free(wl->wl_paths[i].name,
				    M_TEMP, wl->wl_paths[i].len);
			free(wl, M_TEMP, wl->wl_size);
			return (error);
		}
		p->p_p->ps_pledgepaths = wl;

#ifdef DEBUG_PLEDGE
		/* print paths registered as whilelisted (viewed as without chroot) */
		DNPRINTF(1, "pledge: %s(%d): paths loaded:\n", p->p_comm,
		    p->p_pid);
		for (i = 0; i < wl->wl_count; i++)
			if (wl->wl_paths[i].name)
				DNPRINTF(1, "pledge: %d=\"%s\" [%lld]\n", i,
				    wl->wl_paths[i].name,
				    (long long)wl->wl_paths[i].len);
#endif
#endif
	}

	if (SCARG(uap, request)) {
		p->p_p->ps_pledge = flags;
		p->p_p->ps_flags |= PS_PLEDGE;
	}

	return (0);
}
Beispiel #23
0
/* ARGSUSED */
int
sys_execve(struct proc *p, void *v, register_t *retval)
{
	struct sys_execve_args /* {
		syscallarg(const char *) path;
		syscallarg(char *const *) argp;
		syscallarg(char *const *) envp;
	} */ *uap = v;
	int error;
	struct exec_package pack;
	struct nameidata nid;
	struct vattr attr;
	struct ucred *cred = p->p_ucred;
	char *argp;
	char * const *cpp, *dp, *sp;
#ifdef KTRACE
	char *env_start;
#endif
	struct process *pr = p->p_p;
	long argc, envc;
	size_t len, sgap;
#ifdef MACHINE_STACK_GROWS_UP
	size_t slen;
#endif
	char *stack;
	struct ps_strings arginfo;
	struct vmspace *vm = pr->ps_vmspace;
	char **tmpfap;
	extern struct emul emul_native;
#if NSYSTRACE > 0
	int wassugid = ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC);
	size_t pathbuflen;
#endif
	char *pathbuf = NULL;
	struct vnode *otvp;

	/* get other threads to stop */
	if ((error = single_thread_set(p, SINGLE_UNWIND, 1)))
		return (error);

	/*
	 * Cheap solution to complicated problems.
	 * Mark this process as "leave me alone, I'm execing".
	 */
	atomic_setbits_int(&pr->ps_flags, PS_INEXEC);

#if NSYSTRACE > 0
	if (ISSET(p->p_flag, P_SYSTRACE)) {
		systrace_execve0(p);
		pathbuf = pool_get(&namei_pool, PR_WAITOK);
		error = copyinstr(SCARG(uap, path), pathbuf, MAXPATHLEN,
		    &pathbuflen);
		if (error != 0)
			goto clrflag;
	}
#endif
	if (pathbuf != NULL) {
		NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, p);
	} else {
		NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE,
		    SCARG(uap, path), p);
	}

	/*
	 * initialize the fields of the exec package.
	 */
	if (pathbuf != NULL)
		pack.ep_name = pathbuf;
	else
		pack.ep_name = (char *)SCARG(uap, path);
	pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK);
	pack.ep_hdrlen = exec_maxhdrsz;
	pack.ep_hdrvalid = 0;
	pack.ep_ndp = &nid;
	pack.ep_interp = NULL;
	pack.ep_emul_arg = NULL;
	VMCMDSET_INIT(&pack.ep_vmcmds);
	pack.ep_vap = &attr;
	pack.ep_emul = &emul_native;
	pack.ep_flags = 0;

	/* see if we can run it. */
	if ((error = check_exec(p, &pack)) != 0) {
		goto freehdr;
	}

	/* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */

	/* allocate an argument buffer */
	argp = km_alloc(NCARGS, &kv_exec, &kp_pageable, &kd_waitok);
#ifdef DIAGNOSTIC
	if (argp == NULL)
		panic("execve: argp == NULL");
#endif
	dp = argp;
	argc = 0;

	/* copy the fake args list, if there's one, freeing it as we go */
	if (pack.ep_flags & EXEC_HASARGL) {
		tmpfap = pack.ep_fa;
		while (*tmpfap != NULL) {
			char *cp;

			cp = *tmpfap;
			while (*cp)
				*dp++ = *cp++;
			*dp++ = '\0';

			free(*tmpfap, M_EXEC, 0);
			tmpfap++; argc++;
		}
		free(pack.ep_fa, M_EXEC, 0);
		pack.ep_flags &= ~EXEC_HASARGL;
	}

	/* Now get argv & environment */
	if (!(cpp = SCARG(uap, argp))) {
		error = EFAULT;
		goto bad;
	}

	if (pack.ep_flags & EXEC_SKIPARG)
		cpp++;

	while (1) {
		len = argp + ARG_MAX - dp;
		if ((error = copyin(cpp, &sp, sizeof(sp))) != 0)
			goto bad;
		if (!sp)
			break;
		if ((error = copyinstr(sp, dp, len, &len)) != 0) {
			if (error == ENAMETOOLONG)
				error = E2BIG;
			goto bad;
		}
		dp += len;
		cpp++;
		argc++;
	}

	/* must have at least one argument */
	if (argc == 0) {
		error = EINVAL;
		goto bad;
	}

#ifdef KTRACE
	if (KTRPOINT(p, KTR_EXECARGS))
		ktrexec(p, KTR_EXECARGS, argp, dp - argp);
#endif

	envc = 0;
	/* environment does not need to be there */
	if ((cpp = SCARG(uap, envp)) != NULL ) {
#ifdef KTRACE
		env_start = dp;
#endif
		while (1) {
			len = argp + ARG_MAX - dp;
			if ((error = copyin(cpp, &sp, sizeof(sp))) != 0)
				goto bad;
			if (!sp)
				break;
			if ((error = copyinstr(sp, dp, len, &len)) != 0) {
				if (error == ENAMETOOLONG)
					error = E2BIG;
				goto bad;
			}
			dp += len;
			cpp++;
			envc++;
		}

#ifdef KTRACE
		if (KTRPOINT(p, KTR_EXECENV))
			ktrexec(p, KTR_EXECENV, env_start, dp - env_start);
#endif
	}

	dp = (char *)(((long)dp + _STACKALIGNBYTES) & ~_STACKALIGNBYTES);

	sgap = STACKGAPLEN;

	/*
	 * If we have enabled random stackgap, the stack itself has already
	 * been moved from a random location, but is still aligned to a page
	 * boundary.  Provide the lower bits of random placement now.
	 */
	if (stackgap_random != 0) {
		sgap += arc4random() & PAGE_MASK;
		sgap = (sgap + _STACKALIGNBYTES) & ~_STACKALIGNBYTES;
	}

	/* Now check if args & environ fit into new stack */
	len = ((argc + envc + 2 + pack.ep_emul->e_arglen) * sizeof(char *) +
	    sizeof(long) + dp + sgap + sizeof(struct ps_strings)) - argp;

	len = (len + _STACKALIGNBYTES) &~ _STACKALIGNBYTES;

	if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
		error = ENOMEM;
		goto bad;
	}

	/* adjust "active stack depth" for process VSZ */
	pack.ep_ssize = len;	/* maybe should go elsewhere, but... */

	/*
	 * we're committed: any further errors will kill the process, so
	 * kill the other threads now.
	 */
	single_thread_set(p, SINGLE_EXIT, 0);

	/*
	 * Prepare vmspace for remapping. Note that uvmspace_exec can replace
	 * pr_vmspace!
	 */
	uvmspace_exec(p, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);

	vm = pr->ps_vmspace;
	/* Now map address space */
	vm->vm_taddr = (char *)trunc_page(pack.ep_taddr);
	vm->vm_tsize = atop(round_page(pack.ep_taddr + pack.ep_tsize) -
	    trunc_page(pack.ep_taddr));
	vm->vm_daddr = (char *)trunc_page(pack.ep_daddr);
	vm->vm_dsize = atop(round_page(pack.ep_daddr + pack.ep_dsize) -
	    trunc_page(pack.ep_daddr));
	vm->vm_dused = 0;
	vm->vm_ssize = atop(round_page(pack.ep_ssize));
	vm->vm_maxsaddr = (char *)pack.ep_maxsaddr;
	vm->vm_minsaddr = (char *)pack.ep_minsaddr;

	/* create the new process's VM space by running the vmcmds */
#ifdef DIAGNOSTIC
	if (pack.ep_vmcmds.evs_used == 0)
		panic("execve: no vmcmds");
#endif
	error = exec_process_vmcmds(p, &pack);

	/* if an error happened, deallocate and punt */
	if (error)
		goto exec_abort;

	/* old "stackgap" is gone now */
	pr->ps_stackgap = 0;

#ifdef MACHINE_STACK_GROWS_UP
	pr->ps_strings = (vaddr_t)vm->vm_maxsaddr + sgap;
        if (uvm_map_protect(&vm->vm_map, (vaddr_t)vm->vm_maxsaddr,
            trunc_page(pr->ps_strings), PROT_NONE, TRUE))
                goto exec_abort;
#else
	pr->ps_strings = (vaddr_t)vm->vm_minsaddr - sizeof(arginfo) - sgap;
        if (uvm_map_protect(&vm->vm_map,
            round_page(pr->ps_strings + sizeof(arginfo)),
            (vaddr_t)vm->vm_minsaddr, PROT_NONE, TRUE))
                goto exec_abort;
#endif

	/* remember information about the process */
	arginfo.ps_nargvstr = argc;
	arginfo.ps_nenvstr = envc;

#ifdef MACHINE_STACK_GROWS_UP
	stack = (char *)vm->vm_maxsaddr + sizeof(arginfo) + sgap;
	slen = len - sizeof(arginfo) - sgap;
#else
	stack = (char *)(vm->vm_minsaddr - len);
#endif
	/* Now copy argc, args & environ to new stack */
	if (!(*pack.ep_emul->e_copyargs)(&pack, &arginfo, stack, argp))
		goto exec_abort;

	/* copy out the process's ps_strings structure */
	if (copyout(&arginfo, (char *)pr->ps_strings, sizeof(arginfo)))
		goto exec_abort;

	stopprofclock(pr);	/* stop profiling */
	fdcloseexec(p);		/* handle close on exec */
	execsigs(p);		/* reset caught signals */
	TCB_SET(p, NULL);	/* reset the TCB address */
	pr->ps_kbind_addr = 0;	/* reset the kbind bits */
	pr->ps_kbind_cookie = 0;

	/* set command name & other accounting info */
	memset(p->p_comm, 0, sizeof(p->p_comm));
	len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
	memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len);
	pr->ps_acflag &= ~AFORK;

	/* record proc's vnode, for use by sysctl */
	otvp = pr->ps_textvp;
	vref(pack.ep_vp);
	pr->ps_textvp = pack.ep_vp;
	if (otvp)
		vrele(otvp);

	atomic_setbits_int(&pr->ps_flags, PS_EXEC);
	if (pr->ps_flags & PS_PPWAIT) {
		atomic_clearbits_int(&pr->ps_flags, PS_PPWAIT);
		atomic_clearbits_int(&pr->ps_pptr->ps_flags, PS_ISPWAIT);
		wakeup(pr->ps_pptr);
	}

	/*
	 * If process does execve() while it has a mismatched real,
	 * effective, or saved uid/gid, we set PS_SUGIDEXEC.
	 */
	if (cred->cr_uid != cred->cr_ruid ||
	    cred->cr_uid != cred->cr_svuid ||
	    cred->cr_gid != cred->cr_rgid ||
	    cred->cr_gid != cred->cr_svgid)
		atomic_setbits_int(&pr->ps_flags, PS_SUGIDEXEC);
	else
		atomic_clearbits_int(&pr->ps_flags, PS_SUGIDEXEC);

	atomic_clearbits_int(&pr->ps_flags, PS_TAMED);
	tame_dropwpaths(pr);

	/*
	 * deal with set[ug]id.
	 * MNT_NOEXEC has already been used to disable s[ug]id.
	 */
	if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) {
		int i;

		atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC);

#ifdef KTRACE
		/*
		 * If process is being ktraced, turn off - unless
		 * root set it.
		 */
		if (pr->ps_tracevp && !(pr->ps_traceflag & KTRFAC_ROOT))
			ktrcleartrace(pr);
#endif
		p->p_ucred = cred = crcopy(cred);
		if (attr.va_mode & VSUID)
			cred->cr_uid = attr.va_uid;
		if (attr.va_mode & VSGID)
			cred->cr_gid = attr.va_gid;

		/*
		 * For set[ug]id processes, a few caveats apply to
		 * stdin, stdout, and stderr.
		 */
		error = 0;
		fdplock(p->p_fd);
		for (i = 0; i < 3; i++) {
			struct file *fp = NULL;

			/*
			 * NOTE - This will never return NULL because of
			 * immature fds. The file descriptor table is not
			 * shared because we're suid.
			 */
			fp = fd_getfile(p->p_fd, i);

			/*
			 * Ensure that stdin, stdout, and stderr are already
			 * allocated.  We do not want userland to accidentally
			 * allocate descriptors in this range which has implied
			 * meaning to libc.
			 */
			if (fp == NULL) {
				short flags = FREAD | (i == 0 ? 0 : FWRITE);
				struct vnode *vp;
				int indx;

				if ((error = falloc(p, &fp, &indx)) != 0)
					break;
#ifdef DIAGNOSTIC
				if (indx != i)
					panic("sys_execve: falloc indx != i");
#endif
				if ((error = cdevvp(getnulldev(), &vp)) != 0) {
					fdremove(p->p_fd, indx);
					closef(fp, p);
					break;
				}
				if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) {
					fdremove(p->p_fd, indx);
					closef(fp, p);
					vrele(vp);
					break;
				}
				if (flags & FWRITE)
					vp->v_writecount++;
				fp->f_flag = flags;
				fp->f_type = DTYPE_VNODE;
				fp->f_ops = &vnops;
				fp->f_data = (caddr_t)vp;
				FILE_SET_MATURE(fp, p);
			}
		}
		fdpunlock(p->p_fd);
		if (error)
			goto exec_abort;
	} else
		atomic_clearbits_int(&pr->ps_flags, PS_SUGID);

	/*
	 * Reset the saved ugids and update the process's copy of the
	 * creds if the creds have been changed
	 */
	if (cred->cr_uid != cred->cr_svuid ||
	    cred->cr_gid != cred->cr_svgid) {
		/* make sure we have unshared ucreds */
		p->p_ucred = cred = crcopy(cred);
		cred->cr_svuid = cred->cr_uid;
		cred->cr_svgid = cred->cr_gid;
	}

	if (pr->ps_ucred != cred) {
		struct ucred *ocred;

		ocred = pr->ps_ucred;
		crhold(cred);
		pr->ps_ucred = cred;
		crfree(ocred);
	}

	if (pr->ps_flags & PS_SUGIDEXEC) {
		int i, s = splclock();

		timeout_del(&pr->ps_realit_to);
		for (i = 0; i < nitems(pr->ps_timer); i++) {
			timerclear(&pr->ps_timer[i].it_interval);
			timerclear(&pr->ps_timer[i].it_value);
		}
		splx(s);
	}

	/* reset CPU time usage for the thread, but not the process */
	timespecclear(&p->p_tu.tu_runtime);
	p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0;

	km_free(argp, NCARGS, &kv_exec, &kp_pageable);

	pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf);
	vn_close(pack.ep_vp, FREAD, cred, p);

	/*
	 * notify others that we exec'd
	 */
	KNOTE(&pr->ps_klist, NOTE_EXEC);

	/* setup new registers and do misc. setup. */
	if (pack.ep_emul->e_fixup != NULL) {
		if ((*pack.ep_emul->e_fixup)(p, &pack) != 0)
			goto free_pack_abort;
	}
#ifdef MACHINE_STACK_GROWS_UP
	(*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack + slen, retval);
#else
	(*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack, retval);
#endif

	/* map the process's signal trampoline code */
	if (exec_sigcode_map(pr, pack.ep_emul))
		goto free_pack_abort;

#ifdef __HAVE_EXEC_MD_MAP
	/* perform md specific mappings that process might need */
	if (exec_md_map(p, &pack))
		goto free_pack_abort;
#endif

	if (pr->ps_flags & PS_TRACED)
		psignal(p, SIGTRAP);

	free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen);

	/*
	 * Call emulation specific exec hook. This can setup per-process
	 * p->p_emuldata or do any other per-process stuff an emulation needs.
	 *
	 * If we are executing process of different emulation than the
	 * original forked process, call e_proc_exit() of the old emulation
	 * first, then e_proc_exec() of new emulation. If the emulation is
	 * same, the exec hook code should deallocate any old emulation
	 * resources held previously by this process.
	 */
	if (pr->ps_emul && pr->ps_emul->e_proc_exit &&
	    pr->ps_emul != pack.ep_emul)
		(*pr->ps_emul->e_proc_exit)(p);

	p->p_descfd = 255;
	if ((pack.ep_flags & EXEC_HASFD) && pack.ep_fd < 255)
		p->p_descfd = pack.ep_fd;

	/*
	 * Call exec hook. Emulation code may NOT store reference to anything
	 * from &pack.
	 */
	if (pack.ep_emul->e_proc_exec)
		(*pack.ep_emul->e_proc_exec)(p, &pack);

#if defined(KTRACE) && defined(COMPAT_LINUX)
	/* update ps_emul, but don't ktrace it if native-execing-native */
	if (pr->ps_emul != pack.ep_emul || pack.ep_emul != &emul_native) {
		pr->ps_emul = pack.ep_emul;

		if (KTRPOINT(p, KTR_EMUL))
			ktremul(p);
	}
#else
	/* update ps_emul, the old value is no longer needed */
	pr->ps_emul = pack.ep_emul;
#endif

	atomic_clearbits_int(&pr->ps_flags, PS_INEXEC);
	single_thread_clear(p, P_SUSPSIG);

#if NSYSTRACE > 0
	if (ISSET(p->p_flag, P_SYSTRACE) &&
	    wassugid && !ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC))
		systrace_execve1(pathbuf, p);
#endif

	if (pathbuf != NULL)
		pool_put(&namei_pool, pathbuf);

	return (0);

bad:
	/* free the vmspace-creation commands, and release their references */
	kill_vmcmds(&pack.ep_vmcmds);
	/* kill any opened file descriptor, if necessary */
	if (pack.ep_flags & EXEC_HASFD) {
		pack.ep_flags &= ~EXEC_HASFD;
		fdplock(p->p_fd);
		(void) fdrelease(p, pack.ep_fd);
		fdpunlock(p->p_fd);
	}
	if (pack.ep_interp != NULL)
		pool_put(&namei_pool, pack.ep_interp);
	if (pack.ep_emul_arg != NULL)
		free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize);
	/* close and put the exec'd file */
	vn_close(pack.ep_vp, FREAD, cred, p);
	pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf);
	km_free(argp, NCARGS, &kv_exec, &kp_pageable);

 freehdr:
	free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen);
#if NSYSTRACE > 0
 clrflag:
#endif
	atomic_clearbits_int(&pr->ps_flags, PS_INEXEC);
	single_thread_clear(p, P_SUSPSIG);

	if (pathbuf != NULL)
		pool_put(&namei_pool, pathbuf);

	return (error);

exec_abort:
	/*
	 * the old process doesn't exist anymore.  exit gracefully.
	 * get rid of the (new) address space we have created, if any, get rid
	 * of our namei data and vnode, and exit noting failure
	 */
	uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
		VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
	if (pack.ep_interp != NULL)
		pool_put(&namei_pool, pack.ep_interp);
	if (pack.ep_emul_arg != NULL)
		free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize);
	pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf);
	vn_close(pack.ep_vp, FREAD, cred, p);
	km_free(argp, NCARGS, &kv_exec, &kp_pageable);

free_pack_abort:
	free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen);
	if (pathbuf != NULL)
		pool_put(&namei_pool, pathbuf);
	exit1(p, W_EXITCODE(0, SIGABRT), EXIT_NORMAL);

	/* NOTREACHED */
	atomic_clearbits_int(&pr->ps_flags, PS_INEXEC);

	return (0);
}
Beispiel #24
0
void
osf1_syscall_fancy(struct proc *p, u_int64_t code, struct trapframe *framep)
{
	const struct sysent *callp;
	int error;
	u_int64_t rval[2];
	u_int64_t *args, copyargs[10];				/* XXX */
	u_int hidden, nargs;

	KERNEL_PROC_LOCK(p);

	uvmexp.syscalls++;
	p->p_md.md_tf = framep;

	callp = p->p_emul->e_sysent;

	switch (code) {
	case OSF1_SYS_syscall:
		/* OSF/1 syscall() */
		code = framep->tf_regs[FRAME_A0];
		hidden = 1;
		break;
	default:
		hidden = 0;
		break;
	}

	code &= (OSF1_SYS_NSYSENT - 1);
	callp += code;

	nargs = callp->sy_narg + hidden;
	switch (nargs) {
	default:
		error = copyin((caddr_t)alpha_pal_rdusp(), &copyargs[6],
		    (nargs - 6) * sizeof(u_int64_t));
		if (error)
			goto bad;
	case 6:	
		copyargs[5] = framep->tf_regs[FRAME_A5];
	case 5:	
		copyargs[4] = framep->tf_regs[FRAME_A4];
	case 4:	
		copyargs[3] = framep->tf_regs[FRAME_A3];
		copyargs[2] = framep->tf_regs[FRAME_A2];
		copyargs[1] = framep->tf_regs[FRAME_A1];
		copyargs[0] = framep->tf_regs[FRAME_A0];
		args = copyargs;
		break;
	case 3:	
	case 2:	
	case 1:	
	case 0:
		args = &framep->tf_regs[FRAME_A0];
		break;
	}
	args += hidden;

#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSCALL))
		ktrsyscall(p, code, callp->sy_argsize, args);
#endif
#ifdef SYSCALL_DEBUG
	scdebug_call(p, code, args);
#endif

	rval[0] = 0;
	rval[1] = 0;
	error = (*callp->sy_call)(p, args, rval);

	switch (error) {
	case 0:
		framep->tf_regs[FRAME_V0] = rval[0];
		framep->tf_regs[FRAME_A4] = rval[1];
		framep->tf_regs[FRAME_A3] = 0;
		break;
	case ERESTART:
		framep->tf_regs[FRAME_PC] -= 4;
		break;
	case EJUSTRETURN:
		break;
	default:
	bad:
		error = native_to_osf1_errno[error];
		framep->tf_regs[FRAME_V0] = error;
		framep->tf_regs[FRAME_A3] = 1;
		break;
	}

#ifdef SYSCALL_DEBUG
	scdebug_ret(p, code, error, rval);
#endif
	KERNEL_PROC_UNLOCK(p);
	userret(p);
#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSRET)) {
		KERNEL_PROC_LOCK(p);
		ktrsysret(p, code, error, rval[0]);
		KERNEL_PROC_UNLOCK(p);
	}
#endif
}
Beispiel #25
0
/* Instruction pointers operate differently on mc88110 */
void
m88110_syscall(register_t code, struct trapframe *tf)
{
	int i, nsys, nap;
	struct sysent *callp;
	struct proc *p;
	int error;
	register_t args[11], rval[2], *ap;
	u_quad_t sticks;
#ifdef DIAGNOSTIC
	extern struct pcb *curpcb;
#endif

	uvmexp.syscalls++;

	p = curproc;

	callp = p->p_emul->e_sysent;
	nsys  = p->p_emul->e_nsysent;

#ifdef DIAGNOSTIC
	if (USERMODE(tf->tf_epsr) == 0)
		panic("syscall");
	if (curpcb != &p->p_addr->u_pcb)
		panic("syscall curpcb/ppcb");
	if (tf != (struct trapframe *)&curpcb->user_state)
		panic("syscall trapframe");
#endif

	sticks = p->p_sticks;
	p->p_md.md_tf = tf;

	/*
	 * For 88k, all the arguments are passed in the registers (r2-r12)
	 * For syscall (and __syscall), r2 (and r3) has the actual code.
	 * __syscall  takes a quad syscall number, so that other
	 * arguments are at their natural alignments.
	 */
	ap = &tf->tf_r[2];
	nap = 11;	/* r2-r12 */

	switch (code) {
	case SYS_syscall:
		code = *ap++;
		nap--;
		break;
	case SYS___syscall:
		if (callp != sysent)
			break;
		code = ap[_QUAD_LOWWORD];
		ap += 2;
		nap -= 2;
		break;
	}

	/* Callp currently points to syscall, which returns ENOSYS. */
	if (code < 0 || code >= nsys)
		callp += p->p_emul->e_nosys;
	else {
		callp += code;
		i = callp->sy_argsize / sizeof(register_t);
		if (i > nap)
			panic("syscall nargs");
		/*
		 * just copy them; syscall stub made sure all the
		 * args are moved from user stack to registers.
		 */
		bcopy((caddr_t)ap, (caddr_t)args, i * sizeof(register_t));
	}
#ifdef SYSCALL_DEBUG
	scdebug_call(p, code, args);
#endif
#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSCALL))
		ktrsyscall(p, code, callp->sy_argsize, args);
#endif
	rval[0] = 0;
	rval[1] = tf->tf_r[3];
#if NSYSTRACE > 0
	if (ISSET(p->p_flag, P_SYSTRACE))
		error = systrace_redirect(code, p, args, rval);
	else
#endif
		error = (*callp->sy_call)(p, args, rval);
	/*
	 * system call will look like:
	 *	 ld r10, r31, 32; r10,r11,r12 might be garbage.
	 *	 ld r11, r31, 36
	 *	 ld r12, r31, 40
	 *	 or r13, r0, <code>
	 *       tb0 0, r0, <128> <- exip
	 *	 br err 	  <- enip
	 *       jmp r1
	 *  err: or.u r3, r0, hi16(errno)
	 *	 st r2, r3, lo16(errno)
	 *	 subu r2, r0, 1
	 *	 jmp r1
	 *
	 * So, when we take syscall trap, exip/enip will be as
	 * shown above.
	 * Given this,
	 * 1. If the system call returned 0, need to jmp r1.
	 *    exip += 8
	 * 2. If the system call returned an errno > 0, increment
	 *    exip += 4 and plug the value in r2. This will have us
	 *    executing "br err" on return to user space.
	 * 3. If the system call code returned ERESTART,
	 *    we need to rexecute the trap instruction. leave exip as is.
	 * 4. If the system call returned EJUSTRETURN, just return.
	 *    exip += 4
	 */

	switch (error) {
	case 0:
		/*
		 * If fork succeeded and we are the child, our stack
		 * has moved and the pointer tf is no longer valid,
		 * and p is wrong.  Compute the new trapframe pointer.
		 * (The trap frame invariably resides at the
		 * tippity-top of the u. area.)
		 */
		p = curproc;
		tf = (struct trapframe *)USER_REGS(p);
		tf->tf_r[2] = rval[0];
		tf->tf_r[3] = rval[1];
		tf->tf_epsr &= ~PSR_C;
		/* skip two instructions */
		if (tf->tf_exip & 1)
			tf->tf_exip = tf->tf_enip + 4;
		else
			tf->tf_exip += 4 + 4;
		break;
	case ERESTART:
		/*
		 * Reexecute the trap.
		 * exip is already at the trap instruction, so
		 * there is nothing to do.
		 */
		tf->tf_epsr &= ~PSR_C;
		break;
	case EJUSTRETURN:
		tf->tf_epsr &= ~PSR_C;
		/* skip one instruction */
		if (tf->tf_exip & 1)
			tf->tf_exip = tf->tf_enip;
		else
			tf->tf_exip += 4;
		break;
	default:
		if (p->p_emul->e_errno)
			error = p->p_emul->e_errno[error];
		tf->tf_r[2] = error;
		tf->tf_epsr |= PSR_C;   /* fail */
		/* skip one instruction */
		if (tf->tf_exip & 1)
			tf->tf_exip = tf->tf_enip;
		else
			tf->tf_exip += 4;
		break;
	}

#ifdef SYSCALL_DEBUG
	scdebug_ret(p, code, error, rval);
#endif
	userret(p, tf, sticks);
#ifdef KTRACE
	if (KTRPOINT(p, KTR_SYSRET))
		ktrsysret(p, code, error, rval[0]);
#endif
}
Beispiel #26
0
/*
 * syscall2 -	MP aware system call request C handler
 *
 * A system call is essentially treated as a trap except that the
 * MP lock is not held on entry or return.  We are responsible for
 * obtaining the MP lock if necessary and for handling ASTs
 * (e.g. a task switch) prior to return.
 *
 * MPSAFE
 */
void
syscall2(struct trapframe *frame)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	struct lwp *lp = td->td_lwp;
	struct sysent *callp;
	register_t orig_tf_rflags;
	int sticks;
	int error;
	int narg;
#ifdef INVARIANTS
	int crit_count = td->td_critcount;
#endif
	register_t *argp;
	u_int code;
	int regcnt, optimized_regcnt;
	union sysunion args;
	register_t *argsdst;

	mycpu->gd_cnt.v_syscall++;

#ifdef DIAGNOSTIC
	if (ISPL(frame->tf_cs) != SEL_UPL) {
		panic("syscall");
		/* NOT REACHED */
	}
#endif

	KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid,
		frame->tf_rax);

	userenter(td, p);	/* lazy raise our priority */

	regcnt = 6;
	optimized_regcnt = 6;

	/*
	 * Misc
	 */
	sticks = (int)td->td_sticks;
	orig_tf_rflags = frame->tf_rflags;

	/*
	 * Virtual kernel intercept - if a VM context managed by a virtual
	 * kernel issues a system call the virtual kernel handles it, not us.
	 * Restore the virtual kernel context and return from its system
	 * call.  The current frame is copied out to the virtual kernel.
	 */
	if (lp->lwp_vkernel && lp->lwp_vkernel->ve) {
		vkernel_trap(lp, frame);
		error = EJUSTRETURN;
		callp = NULL;
		code = 0;
		goto out;
	}

	/*
	 * Get the system call parameters and account for time
	 */
	KASSERT(lp->lwp_md.md_regs == frame,
		("Frame mismatch %p %p", lp->lwp_md.md_regs, frame));
	code = (u_int)frame->tf_rax;

	if (code == SYS_syscall || code == SYS___syscall) {
		code = frame->tf_rdi;
		regcnt--;
		argp = &frame->tf_rdi + 1;
	} else {
		argp = &frame->tf_rdi;
	}

	if (code >= p->p_sysent->sv_size)
		callp = &p->p_sysent->sv_table[0];
	else
		callp = &p->p_sysent->sv_table[code];

	narg = callp->sy_narg & SYF_ARGMASK;

	/*
	 * On x86_64 we get up to six arguments in registers. The rest are
	 * on the stack. The first six members of 'struct trapframe' happen
	 * to be the registers used to pass arguments, in exactly the right
	 * order.
	 */
	argsdst = (register_t *)(&args.nosys.sysmsg + 1);

	/*
	 * Its easier to copy up to the highest number of syscall arguments
	 * passed in registers, which is 6, than to conditionalize it.
	 */
	bcopy(argp, argsdst, sizeof(register_t) * optimized_regcnt);

	/*
	 * Any arguments beyond available argument-passing registers must
	 * be copyin()'d from the user stack.
	 */
	if (narg > regcnt) {
		caddr_t params;

		params = (caddr_t)frame->tf_rsp + sizeof(register_t);
		error = copyin(params, &argsdst[regcnt],
			       (narg - regcnt) * sizeof(register_t));
		if (error) {
#ifdef KTRACE
			if (KTRPOINT(td, KTR_SYSCALL)) {
				ktrsyscall(lp, code, narg,
					(void *)(&args.nosys.sysmsg + 1));
			}
#endif
			goto bad;
		}
	}

#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSCALL)) {
		ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1));
	}
#endif

	/*
	 * Default return value is 0 (will be copied to %rax).  Double-value
	 * returns use %rax and %rdx.  %rdx is left unchanged for system
	 * calls which return only one result.
	 */
	args.sysmsg_fds[0] = 0;
	args.sysmsg_fds[1] = frame->tf_rdx;

	/*
	 * The syscall might manipulate the trap frame. If it does it
	 * will probably return EJUSTRETURN.
	 */
	args.sysmsg_frame = frame;

	STOPEVENT(p, S_SCE, narg);	/* MP aware */

	/*
	 * NOTE: All system calls run MPSAFE now.  The system call itself
	 *	 is responsible for getting the MP lock.
	 */
#ifdef SYSCALL_DEBUG
	tsc_uclock_t tscval = rdtsc();
#endif
	error = (*callp->sy_call)(&args);
#ifdef SYSCALL_DEBUG
	tscval = rdtsc() - tscval;
	tscval = tscval * 1000000 / tsc_frequency;
	if (SysCallsWorstCase[code] < tscval)
		SysCallsWorstCase[code] = tscval;
#endif

out:
	/*
	 * MP SAFE (we may or may not have the MP lock at this point)
	 */
	//kprintf("SYSMSG %d ", error);
	switch (error) {
	case 0:
		/*
		 * Reinitialize proc pointer `p' as it may be different
		 * if this is a child returning from fork syscall.
		 */
		p = curproc;
		lp = curthread->td_lwp;
		frame->tf_rax = args.sysmsg_fds[0];
		frame->tf_rdx = args.sysmsg_fds[1];
		frame->tf_rflags &= ~PSL_C;
		break;
	case ERESTART:
		/*
		 * Reconstruct pc, we know that 'syscall' is 2 bytes.
		 * We have to do a full context restore so that %r10
		 * (which was holding the value of %rcx) is restored for
		 * the next iteration.
		 */
		if (frame->tf_err != 0 && frame->tf_err != 2)
			kprintf("lp %s:%d frame->tf_err is weird %ld\n",
				td->td_comm, lp->lwp_proc->p_pid, frame->tf_err);
		frame->tf_rip -= frame->tf_err;
		frame->tf_r10 = frame->tf_rcx;
		break;
	case EJUSTRETURN:
		break;
	case EASYNC:
		panic("Unexpected EASYNC return value (for now)");
	default:
bad:
		if (p->p_sysent->sv_errsize) {
			if (error >= p->p_sysent->sv_errsize)
				error = -1;	/* XXX */
			else
				error = p->p_sysent->sv_errtbl[error];
		}
		frame->tf_rax = error;
		frame->tf_rflags |= PSL_C;
		break;
	}

	/*
	 * Traced syscall.  trapsignal() should now be MP aware
	 */
	if (orig_tf_rflags & PSL_T) {
		frame->tf_rflags &= ~PSL_T;
		trapsignal(lp, SIGTRAP, TRAP_TRACE);
	}

	/*
	 * Handle reschedule and other end-of-syscall issues
	 */
	userret(lp, frame, sticks);

#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSRET)) {
		ktrsysret(lp, code, error, args.sysmsg_result);
	}
#endif

	/*
	 * This works because errno is findable through the
	 * register set.  If we ever support an emulation where this
	 * is not the case, this code will need to be revisited.
	 */
	STOPEVENT(p, S_SCX, code);

	userexit(lp);
	KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error);
#ifdef INVARIANTS
	KASSERT(crit_count == td->td_critcount,
		("syscall: critical section count mismatch! %d/%d",
		crit_count, td->td_pri));
	KASSERT(&td->td_toks_base == td->td_toks_stop,
		("syscall: %ld extra tokens held after trap! syscall %p",
		td->td_toks_stop - &td->td_toks_base,
		callp->sy_call));
#endif
}
Beispiel #27
0
/*
 * System call to limit rights of the given capability.
 */
int
sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
{
	struct filedesc *fdp;
	cap_rights_t rights;
	int error, fd, version;

	cap_rights_init(&rights);

	error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0]));
	if (error != 0)
		return (error);
	version = CAPVER(&rights);
	if (version != CAP_RIGHTS_VERSION_00)
		return (EINVAL);

	error = copyin(uap->rightsp, &rights,
	    sizeof(rights.cr_rights[0]) * CAPARSIZE(&rights));
	if (error != 0)
		return (error);
	/* Check for race. */
	if (CAPVER(&rights) != version)
		return (EINVAL);

	if (!cap_rights_is_valid(&rights))
		return (EINVAL);

	if (version != CAP_RIGHTS_VERSION) {
		rights.cr_rights[0] &= ~(0x3ULL << 62);
		rights.cr_rights[0] |= ((uint64_t)CAP_RIGHTS_VERSION << 62);
	}
#ifdef KTRACE
	if (KTRPOINT(td, KTR_STRUCT))
		ktrcaprights(&rights);
#endif

	fd = uap->fd;

	AUDIT_ARG_FD(fd);
	AUDIT_ARG_RIGHTS(&rights);

	fdp = td->td_proc->p_fd;
	FILEDESC_XLOCK(fdp);
	if (fget_locked(fdp, fd) == NULL) {
		FILEDESC_XUNLOCK(fdp);
		return (EBADF);
	}
	error = _cap_check(cap_rights(fdp, fd), &rights, CAPFAIL_INCREASE);
	if (error == 0) {
		fdp->fd_ofiles[fd].fde_rights = rights;
		if (!cap_rights_is_set(&rights, CAP_IOCTL)) {
			free(fdp->fd_ofiles[fd].fde_ioctls, M_FILECAPS);
			fdp->fd_ofiles[fd].fde_ioctls = NULL;
			fdp->fd_ofiles[fd].fde_nioctls = 0;
		}
		if (!cap_rights_is_set(&rights, CAP_FCNTL))
			fdp->fd_ofiles[fd].fde_fcntls = 0;
	}
	FILEDESC_XUNLOCK(fdp);
	return (error);
}
Beispiel #28
0
/*
 * syscall2 -	MP aware system call request C handler
 *
 * A system call is essentially treated as a trap.  The MP lock is not
 * held on entry or return.  We are responsible for handling ASTs
 * (e.g. a task switch) prior to return.
 *
 * MPSAFE
 */
void
syscall2(struct trapframe *frame)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	struct lwp *lp = td->td_lwp;
	caddr_t params;
	struct sysent *callp;
	register_t orig_tf_eflags;
	int sticks;
	int error;
	int narg;
#ifdef INVARIANTS
	int crit_count = td->td_critcount;
#endif
	int have_mplock = 0;
	u_int code;
	union sysunion args;

#ifdef DIAGNOSTIC
	if (ISPL(frame->tf_cs) != SEL_UPL) {
		get_mplock();
		panic("syscall");
		/* NOT REACHED */
	}
#endif

	KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid,
		frame->tf_eax);

	userenter(td, p);	/* lazy raise our priority */

	/*
	 * Misc
	 */
	sticks = (int)td->td_sticks;
	orig_tf_eflags = frame->tf_eflags;

	/*
	 * Virtual kernel intercept - if a VM context managed by a virtual
	 * kernel issues a system call the virtual kernel handles it, not us.
	 * Restore the virtual kernel context and return from its system
	 * call.  The current frame is copied out to the virtual kernel.
	 */
	if (lp->lwp_vkernel && lp->lwp_vkernel->ve) {
		vkernel_trap(lp, frame);
		error = EJUSTRETURN;
		callp = NULL;
		goto out;
	}

	/*
	 * Get the system call parameters and account for time
	 */
	lp->lwp_md.md_regs = frame;
	params = (caddr_t)frame->tf_esp + sizeof(int);
	code = frame->tf_eax;

	if (p->p_sysent->sv_prepsyscall) {
		(*p->p_sysent->sv_prepsyscall)(
			frame, (int *)(&args.nosys.sysmsg + 1),
			&code, &params);
	} else {
		/*
		 * Need to check if this is a 32 bit or 64 bit syscall.
		 * fuword is MP aware.
		 */
		if (code == SYS_syscall) {
			/*
			 * Code is first argument, followed by actual args.
			 */
			code = fuword(params);
			params += sizeof(int);
		} else if (code == SYS___syscall) {
			/*
			 * Like syscall, but code is a quad, so as to maintain
			 * quad alignment for the rest of the arguments.
			 */
			code = fuword(params);
			params += sizeof(quad_t);
		}
	}

	code &= p->p_sysent->sv_mask;

	if (code >= p->p_sysent->sv_size)
		callp = &p->p_sysent->sv_table[0];
	else
		callp = &p->p_sysent->sv_table[code];

	narg = callp->sy_narg & SYF_ARGMASK;

#if 0
	if (p->p_sysent->sv_name[0] == 'L')
		kprintf("Linux syscall, code = %d\n", code);
#endif

	/*
	 * copyin is MP aware, but the tracing code is not
	 */
	if (narg && params) {
		error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1),
				narg * sizeof(register_t));
		if (error) {
#ifdef KTRACE
			if (KTRPOINT(td, KTR_SYSCALL)) {
				MAKEMPSAFE(have_mplock);
				
				ktrsyscall(lp, code, narg,
					(void *)(&args.nosys.sysmsg + 1));
			}
#endif
			goto bad;
		}
	}

#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSCALL)) {
		MAKEMPSAFE(have_mplock);
		ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1));
	}
#endif

	/*
	 * For traditional syscall code edx is left untouched when 32 bit
	 * results are returned.  Since edx is loaded from fds[1] when the 
	 * system call returns we pre-set it here.
	 */
	args.sysmsg_fds[0] = 0;
	args.sysmsg_fds[1] = frame->tf_edx;

	/*
	 * The syscall might manipulate the trap frame. If it does it
	 * will probably return EJUSTRETURN.
	 */
	args.sysmsg_frame = frame;

	STOPEVENT(p, S_SCE, narg);	/* MP aware */

	/*
	 * NOTE: All system calls run MPSAFE now.  The system call itself
	 *	 is responsible for getting the MP lock.
	 */
	error = (*callp->sy_call)(&args);

out:
	/*
	 * MP SAFE (we may or may not have the MP lock at this point)
	 */
	switch (error) {
	case 0:
		/*
		 * Reinitialize proc pointer `p' as it may be different
		 * if this is a child returning from fork syscall.
		 */
		p = curproc;
		lp = curthread->td_lwp;
		frame->tf_eax = args.sysmsg_fds[0];
		frame->tf_edx = args.sysmsg_fds[1];
		frame->tf_eflags &= ~PSL_C;
		break;
	case ERESTART:
		/*
		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
		 * int 0x80 is 2 bytes. We saved this in tf_err.
		 */
		frame->tf_eip -= frame->tf_err;
		break;
	case EJUSTRETURN:
		break;
	case EASYNC:
		panic("Unexpected EASYNC return value (for now)");
	default:
bad:
		if (p->p_sysent->sv_errsize) {
			if (error >= p->p_sysent->sv_errsize)
				error = -1;	/* XXX */
			else
				error = p->p_sysent->sv_errtbl[error];
		}
		frame->tf_eax = error;
		frame->tf_eflags |= PSL_C;
		break;
	}

	/*
	 * Traced syscall.  trapsignal() is not MP aware.
	 */
	if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
		MAKEMPSAFE(have_mplock);
		frame->tf_eflags &= ~PSL_T;
		trapsignal(lp, SIGTRAP, TRAP_TRACE);
	}

	/*
	 * Handle reschedule and other end-of-syscall issues
	 */
	userret(lp, frame, sticks);

#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSRET)) {
		MAKEMPSAFE(have_mplock);
		ktrsysret(lp, code, error, args.sysmsg_result);
	}
#endif

	/*
	 * This works because errno is findable through the
	 * register set.  If we ever support an emulation where this
	 * is not the case, this code will need to be revisited.
	 */
	STOPEVENT(p, S_SCX, code);

	userexit(lp);
	/*
	 * Release the MP lock if we had to get it
	 */
	if (have_mplock)
		rel_mplock();
	KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error);
#ifdef INVARIANTS
	KASSERT(crit_count == td->td_critcount,
		("syscall: critical section count mismatch! %d/%d",
		crit_count, td->td_pri));
	KASSERT(&td->td_toks_base == td->td_toks_stop,
		("syscall: extra tokens held after trap! %zd",
		td->td_toks_stop - &td->td_toks_base));
#endif
}
Beispiel #29
0
static inline int
syscallenter(struct thread *td, struct syscall_args *sa)
{
	struct proc *p;
	int error, traced;

	PCPU_INC(cnt.v_syscall);
	p = td->td_proc;

	td->td_pticks = 0;
	if (td->td_ucred != p->p_ucred)
		cred_update_thread(td);
	if (p->p_flag & P_TRACED) {
		traced = 1;
		PROC_LOCK(p);
		td->td_dbgflags &= ~TDB_USERWR;
		td->td_dbgflags |= TDB_SCE;
		PROC_UNLOCK(p);
	} else
		traced = 0;
	error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSCALL))
		ktrsyscall(sa->code, sa->narg, sa->args);
#endif

	CTR6(KTR_SYSC,
"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
	    td, td->td_proc->p_pid, syscallname(p, sa->code),
	    sa->args[0], sa->args[1], sa->args[2]);

	if (error == 0) {
		STOPEVENT(p, S_SCE, sa->narg);
		if (p->p_flag & P_TRACED && p->p_stops & S_PT_SCE) {
			PROC_LOCK(p);
			ptracestop((td), SIGTRAP);
			PROC_UNLOCK(p);
		}
		if (td->td_dbgflags & TDB_USERWR) {
			/*
			 * Reread syscall number and arguments if
			 * debugger modified registers or memory.
			 */
			error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
#ifdef KTRACE
			if (KTRPOINT(td, KTR_SYSCALL))
				ktrsyscall(sa->code, sa->narg, sa->args);
#endif
			if (error != 0)
				goto retval;
		}

#ifdef CAPABILITY_MODE
		/*
		 * In capability mode, we only allow access to system calls
		 * flagged with SYF_CAPENABLED.
		 */
		if (IN_CAPABILITY_MODE(td) &&
		    !(sa->callp->sy_flags & SYF_CAPENABLED)) {
			error = ECAPMODE;
			goto retval;
		}
#endif

		error = syscall_thread_enter(td, sa->callp);
		if (error != 0)
			goto retval;

#ifdef KDTRACE_HOOKS
		/*
		 * If the systrace module has registered it's probe
		 * callback and if there is a probe active for the
		 * syscall 'entry', process the probe.
		 */
		if (systrace_probe_func != NULL && sa->callp->sy_entry != 0)
			(*systrace_probe_func)(sa->callp->sy_entry, sa->code,
			    sa->callp, sa->args, 0);
#endif

		AUDIT_SYSCALL_ENTER(sa->code, td);
		error = (sa->callp->sy_call)(td, sa->args);
		AUDIT_SYSCALL_EXIT(error, td);

		/* Save the latest error return value. */
		td->td_errno = error;

#ifdef KDTRACE_HOOKS
		/*
		 * If the systrace module has registered it's probe
		 * callback and if there is a probe active for the
		 * syscall 'return', process the probe.
		 */
		if (systrace_probe_func != NULL && sa->callp->sy_return != 0)
			(*systrace_probe_func)(sa->callp->sy_return, sa->code,
			    sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
#endif
		syscall_thread_exit(td, sa->callp);
		CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
		    p, error, td->td_retval[0], td->td_retval[1]);
	}
 retval:
	if (traced) {
		PROC_LOCK(p);
		td->td_dbgflags &= ~TDB_SCE;
		PROC_UNLOCK(p);
	}
	(p->p_sysent->sv_set_syscall_retval)(td, error);
	return (error);
}
Beispiel #30
0
/*
 * Convert a pathname into a pointer to a vnode.
 *
 * The FOLLOW flag is set when symbolic links are to be followed
 * when they occur at the end of the name translation process.
 * Symbolic links are always followed for all other pathname
 * components other than the last.
 *
 * If the LOCKLEAF flag is set, a locked vnode is returned.
 *
 * The segflg defines whether the name is to be copied from user
 * space or kernel space.
 *
 * Overall outline of namei:
 *
 *	copy in name
 *	get starting directory
 *	while (!done && !error) {
 *		call lookup to search path.
 *		if symbolic link, massage name in buffer and continue
 *	}
 */
int
namei(struct nameidata *ndp)
{
	struct filedesc *fdp;		/* pointer to file descriptor state */
	char *cp;			/* pointer into pathname argument */
	struct vnode *dp;		/* the directory we are searching */
	struct iovec aiov;		/* uio for reading symbolic links */
	struct uio auio;
	int error, linklen;
	struct componentname *cnp = &ndp->ni_cnd;
	struct proc *p = cnp->cn_proc;

	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
#ifdef DIAGNOSTIC
	if (!cnp->cn_cred || !cnp->cn_proc)
		panic ("namei: bad cred/proc");
	if (cnp->cn_nameiop & (~OPMASK))
		panic ("namei: nameiop contaminated with flags");
	if (cnp->cn_flags & OPMASK)
		panic ("namei: flags contaminated with nameiops");
#endif
	fdp = cnp->cn_proc->p_fd;

	/*
	 * Get a buffer for the name to be translated, and copy the
	 * name into the buffer.
	 */
	if ((cnp->cn_flags & HASBUF) == 0)
		cnp->cn_pnbuf = pool_get(&namei_pool, PR_WAITOK);
	if (ndp->ni_segflg == UIO_SYSSPACE)
		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
			    MAXPATHLEN, &ndp->ni_pathlen);
	else
		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
			    MAXPATHLEN, &ndp->ni_pathlen);

	/*
	 * Fail on null pathnames
	 */
	if (error == 0 && ndp->ni_pathlen == 1)
		error = ENOENT;

	if (error) {
		pool_put(&namei_pool, cnp->cn_pnbuf);
		ndp->ni_vp = NULL;
		return (error);
	}

#ifdef KTRACE
	if (KTRPOINT(cnp->cn_proc, KTR_NAMEI))
		ktrnamei(cnp->cn_proc, cnp->cn_pnbuf);
#endif
#if NSYSTRACE > 0
	if (ISSET(cnp->cn_proc->p_flag, P_SYSTRACE))
		systrace_namei(ndp);
#endif

	/*
	 *  Strip trailing slashes, as requested
	 */
	if (cnp->cn_flags & STRIPSLASHES) {
		char *end = cnp->cn_pnbuf + ndp->ni_pathlen - 2;

		cp = end;
		while (cp >= cnp->cn_pnbuf && (*cp == '/'))
			cp--;

		/* Still some remaining characters in the buffer */
		if (cp >= cnp->cn_pnbuf) {
			ndp->ni_pathlen -= (end - cp);
			*(cp + 1) = '\0';
		}
	}

	ndp->ni_loopcnt = 0;

	/*
	 * Get starting point for the translation.
	 */
	if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL)
		ndp->ni_rootdir = rootvnode;
	/*
	 * Check if starting from root directory or current directory.
	 */
	if (cnp->cn_pnbuf[0] == '/') {
		dp = ndp->ni_rootdir;
		vref(dp);
	} else {
		dp = fdp->fd_cdir;
		vref(dp);
	}
	for (;;) {
		if (!dp->v_mount) {
			/* Give up if the directory is no longer mounted */
			pool_put(&namei_pool, cnp->cn_pnbuf);
			return (ENOENT);
		}
		cnp->cn_nameptr = cnp->cn_pnbuf;
		ndp->ni_startdir = dp;
		if ((error = lookup(ndp)) != 0) {
			pool_put(&namei_pool, cnp->cn_pnbuf);
			return (error);
		}
		/*
		 * If not a symbolic link, return search result.
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
				pool_put(&namei_pool, cnp->cn_pnbuf);
			else
				cnp->cn_flags |= HASBUF;
			return (0);
		}
		if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
			VOP_UNLOCK(ndp->ni_dvp, 0, p);
		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
			error = ELOOP;
			break;
		}
		if (ndp->ni_pathlen > 1)
			cp = pool_get(&namei_pool, PR_WAITOK);
		else
			cp = cnp->cn_pnbuf;
		aiov.iov_base = cp;
		aiov.iov_len = MAXPATHLEN;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = 0;
		auio.uio_rw = UIO_READ;
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_procp = cnp->cn_proc;
		auio.uio_resid = MAXPATHLEN;
		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
		if (error) {
badlink:
			if (ndp->ni_pathlen > 1)
				pool_put(&namei_pool, cp);
			break;
		}
		linklen = MAXPATHLEN - auio.uio_resid;
		if (linklen == 0) {
			error = ENOENT;
			goto badlink;
		}
		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
			error = ENAMETOOLONG;
			goto badlink;
		}
		if (ndp->ni_pathlen > 1) {
			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
			pool_put(&namei_pool, cnp->cn_pnbuf);
			cnp->cn_pnbuf = cp;
		} else
			cnp->cn_pnbuf[linklen] = '\0';
		ndp->ni_pathlen += linklen;
		vput(ndp->ni_vp);
		dp = ndp->ni_dvp;
		/*
		 * Check if root directory should replace current directory.
		 */
		if (cnp->cn_pnbuf[0] == '/') {
			vrele(dp);
			dp = ndp->ni_rootdir;
			vref(dp);
		}
	}
	pool_put(&namei_pool, cnp->cn_pnbuf);
	vrele(ndp->ni_dvp);
	vput(ndp->ni_vp);
	ndp->ni_vp = NULL;
	return (error);
}