Example #1
0
/*
 * vfork_return
 *
 * Description:	"Return" to parent vfork thread() following execve/_exit;
 *		this is done by reassociating the parent process structure
 *		with the task, thread, and uthread.
 *
 * Parameters:	child_proc		Child process
 *		retval			System call return value array
 *		rval			Return value to present to parent
 *
 * Returns:	void
 *
 * Note:	The caller resumes or exits the parent, as appropriate, after
 *		callling this function.
 */
void
vfork_return(proc_t child_proc, int32_t *retval, int rval)
{
	proc_t parent_proc = child_proc->p_pptr;
	thread_t parent_thread = (thread_t)current_thread();
	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
	

	act_thread_catt(parent_uthread->uu_userstate);

	/* end vfork in parent */
	proc_vfork_end(parent_proc);

	/* REPATRIATE PARENT TASK, THREAD, UTHREAD */
	parent_uthread->uu_userstate = 0;
	parent_uthread->uu_flag &= ~UT_VFORK;
	/* restore thread-set-id state */
	if (parent_uthread->uu_flag & UT_WASSETUID) {
		parent_uthread->uu_flag |= UT_SETUID;
		parent_uthread->uu_flag &= UT_WASSETUID;
	}
	parent_uthread->uu_proc = 0;
	parent_uthread->uu_sigmask = parent_uthread->uu_vforkmask;
	child_proc->p_lflag  &= ~P_LINVFORK;
	child_proc->p_vforkact = (void *)0;

	thread_set_parent(parent_thread, rval);

	if (retval) {
		retval[0] = rval;
		retval[1] = 0;			/* mark parent */
	}

	return;
}
Example #2
0
void
bsd_utaskbootstrap(void)
{
	thread_t thread;
	struct uthread *ut;

	/*
	 * Clone the bootstrap process from the kernel process, without
	 * inheriting either task characteristics or memory from the kernel;
	 */
	thread = cloneproc(TASK_NULL, COALITION_NULL, kernproc, FALSE, TRUE);

	/* Hold the reference as it will be dropped during shutdown */
	initproc = proc_find(1);				
#if __PROC_INTERNAL_DEBUG
	if (initproc == PROC_NULL)
		panic("bsd_utaskbootstrap: initproc not set\n");
#endif
	/*
	 * Since we aren't going back out the normal way to our parent,
	 * we have to drop the transition locks explicitly.
	 */
	proc_signalend(initproc, 0);
	proc_transend(initproc, 0);

	ut = (struct uthread *)get_bsdthread_info(thread);
	ut->uu_sigmask = 0;
	act_set_astbsd(thread);
	proc_clear_return_wait(initproc, thread);
}
Example #3
0
struct label *
mac_thread_get_threadlabel(struct thread *thread)
{
	struct uthread *uthread = get_bsdthread_info(thread);

	return (mac_thread_get_uthreadlabel(uthread));
}
Example #4
0
void
bsdinit_task(void)
{
	proc_t p = current_proc();
	struct uthread *ut;
	thread_t thread;

	process_name("init", p);

	ux_handler_init();

	thread = current_thread();
	(void) host_set_exception_ports(host_priv_self(),
					EXC_MASK_ALL & ~(EXC_MASK_RPC_ALERT),//pilotfish (shark) needs this port
					(mach_port_t) ux_exception_port,
					EXCEPTION_DEFAULT| MACH_EXCEPTION_CODES,
					0);

	ut = (uthread_t)get_bsdthread_info(thread);

	bsd_init_task = get_threadtask(thread);
	init_task_died = FALSE;

#if CONFIG_MACF
	mac_cred_label_associate_user(p->p_ucred);
#endif
	load_init_program(p);
	lock_trace = 1;
}
Example #5
0
/*
 * vfork
 *
 * Description:	vfork system call
 *
 * Parameters:	void			[no arguments]
 *
 * Retval:	0			(to child process)
 *		!0			pid of child (to parent process)
 *		-1			error (see "Returns:")
 *
 * Returns:	EAGAIN			Administrative limit reached
 *		EINVAL			vfork() called during vfork()
 *		ENOMEM			Failed to allocate new process
 *
 * Note:	After a successful call to this function, the parent process
 *		has its task, thread, and uthread lent to the child process,
 *		and control is returned to the caller; if this function is
 *		invoked as a system call, the return is to user space, and
 *		is effectively running on the child process.
 *
 *		Subsequent calls that operate on process state are permitted,
 *		though discouraged, and will operate on the child process; any
 *		operations on the task, thread, or uthread will result in
 *		changes in the parent state, and, if inheritable, the child
 *		state, when a task, thread, and uthread are realized for the
 *		child process at execve() time, will also be effected.  Given
 *		this, it's recemmended that people use the posix_spawn() call
 *		instead.
 *
 * BLOCK DIAGRAM OF VFORK
 *
 * Before:
 *
 *     ,----------------.         ,-------------.
 *     |                |   task  |             |
 *     | parent_thread  | ------> | parent_task |
 *     |                | <.list. |             |
 *     `----------------'         `-------------'
 *    uthread |  ^             bsd_info |  ^
 *            v  | vc_thread            v  | task
 *     ,----------------.         ,-------------.
 *     |                |         |             |
 *     | parent_uthread | <.list. | parent_proc | <-- current_proc()
 *     |                |         |             |
 *     `----------------'         `-------------'
 *    uu_proc |
 *            v
 *           NULL
 *
 * After:
 *
 *                 ,----------------.         ,-------------.
 *                 |                |   task  |             |
 *          ,----> | parent_thread  | ------> | parent_task |
 *          |      |                | <.list. |             |
 *          |      `----------------'         `-------------'
 *          |     uthread |  ^             bsd_info |  ^
 *          |             v  | vc_thread            v  | task
 *          |      ,----------------.         ,-------------.
 *          |      |                |         |             |
 *          |      | parent_uthread | <.list. | parent_proc |
 *          |      |                |         |             |
 *          |      `----------------'         `-------------'
 *          |     uu_proc |  . list
 *          |             v  v
 *          |      ,----------------.
 *          `----- |                |
 *      p_vforkact | child_proc     | <-- current_proc()
 *                 |                |
 *                 `----------------'
 */
int
vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval)
{
	thread_t child_thread;
	int err;

	if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK, NULL)) != 0) {
		retval[1] = 0;
	} else {
		uthread_t ut = get_bsdthread_info(current_thread());
		proc_t child_proc = ut->uu_proc;

		retval[0] = child_proc->p_pid;
		retval[1] = 1;		/* flag child return for user space */

		/*
		 * Drop the signal lock on the child which was taken on our
		 * behalf by forkproc()/cloneproc() to prevent signals being
		 * received by the child in a partially constructed state.
		 */
		proc_signalend(child_proc, 0);
		proc_transend(child_proc, 0);

		proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid);
		DTRACE_PROC1(create, proc_t, child_proc);
		ut->uu_flag &= ~UT_VFORKING;
	}

	return (err);
}
Example #6
0
File: stubs.c Project: 0xffea/xnu
int *
get_bsduthreadrval(thread_t th)
{
struct uthread *ut;

	ut = get_bsdthread_info(th);
	return(&ut->uu_rval[0]);
}
Example #7
0
/*
 * dtrace_CRED() can be called from probe context. We cannot simply call kauth_cred_get() since
 * that function may try to resolve a lazy credential binding, which entails taking the proc_lock.
 */ 
cred_t *
dtrace_CRED(void)
{
	struct uthread *uthread = get_bsdthread_info(current_thread());

	if (uthread == NULL)
		return NULL;
	else
		return uthread->uu_ucred; /* May return NOCRED which is defined to be 0 */
}
Example #8
0
static void
_sleep_continue(void)
{
	register struct proc *p;
	register thread_t self = current_act();
	struct uthread * ut;
	int sig, catch;
	int error = 0;

	ut = get_bsdthread_info(self);
	catch = ut->uu_pri & PCATCH;
Example #9
0
static void
_sleep_continue( __unused void *parameter, wait_result_t wresult)
{
	struct proc *p = current_proc();
	thread_t self  = current_thread();
	struct uthread * ut;
	int sig, catch;
	int error = 0;
	int dropmutex, spinmutex;

	ut = get_bsdthread_info(self);
	catch     = ut->uu_pri & PCATCH;
Example #10
0
void 
afs_osi_fullSigRestore()
{
#ifndef AFS_DARWIN80_ENV
    struct uthread *user_thread = (struct uthread *)get_bsdthread_info(current_act());
       
    /* Protect original sigmask */
    if (user_thread->uu_oldmask) {
	/* Restore original sigmask */
	user_thread->uu_sigmask = user_thread->uu_oldmask;
	/* Clear the oldmask */
	user_thread->uu_oldmask = (sigset_t)0;
    }
#endif
}
Example #11
0
File: stubs.c Project: 0xffea/xnu
void *
get_bsduthreadarg(thread_t th)
{
        void	*arg_ptr;
struct uthread *ut;
  
	ut = get_bsdthread_info(th);

	if (ml_thread_is64bit(th) == TRUE)
	        arg_ptr = (void *)saved_state64(find_user_regs(th));
	else
		arg_ptr = (void *)(ut->uu_arg);

	return(arg_ptr);
}
Example #12
0
void 
afs_osi_fullSigMask()
{
#ifndef AFS_DARWIN80_ENV
    struct uthread *user_thread = (struct uthread *)get_bsdthread_info(current_act());
       
    /* Protect original sigmask */
    if (!user_thread->uu_oldmask) {
	/* Back up current sigmask */
	user_thread->uu_oldmask = user_thread->uu_sigmask;
	/* Mask all signals */
	user_thread->uu_sigmask = ~(sigset_t)0;
    }
#endif
}
Example #13
0
int throttle_get_io_policy(struct uthread **ut)
{
	int policy = IOPOL_DEFAULT;
	proc_t p = current_proc();

	*ut = get_bsdthread_info(current_thread());
		
	if (p != NULL)
		policy = p->p_iopol_disk;

	if (*ut != NULL) {
		// the I/O policy of the thread overrides that of the process
		// unless the I/O policy of the thread is default
		if ((*ut)->uu_iopol_disk != IOPOL_DEFAULT)
			policy = (*ut)->uu_iopol_disk;
	}
	return policy;
}
Example #14
0
void throttle_lowpri_io(boolean_t ok_to_sleep)
{
	int i;
	int max_try_num;
	struct uthread *ut;
	struct _throttle_io_info_t *info;

	ut = get_bsdthread_info(current_thread());

	if ((ut->uu_lowpri_window == 0) || (ut->uu_throttle_info == NULL))
		goto done;

	info = ut->uu_throttle_info;
	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
		     ut->uu_lowpri_window, ok_to_sleep, 0, 0, 0);

	if (ok_to_sleep == TRUE) {
		max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, info->numthreads_throttling);

		for (i=0; i<max_try_num; i++) {
			if (throttle_io_will_be_throttled_internal(ut->uu_lowpri_window, info)) {
				IOSleep(LOWPRI_SLEEP_INTERVAL);
    				DEBUG_ALLOC_THROTTLE_INFO("sleeping because of info = %p\n", info, info );
			} else {
				break;
			}
		}
	}
	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
		     ut->uu_lowpri_window, i*5, 0, 0, 0);
	SInt32 oldValue;
	oldValue = OSDecrementAtomic(&info->numthreads_throttling);

	if (oldValue <= 0) {
		panic("%s: numthreads negative", __func__);
	}
done:
	ut->uu_lowpri_window = 0;
	if (ut->uu_throttle_info)
		throttle_info_rel(ut->uu_throttle_info);
	ut->uu_throttle_info = NULL;
}
Example #15
0
static uint64_t
machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
{
#pragma unused(arg,id,parg,aframes)     /* __APPLE__ */
	uint64_t val = 0;
	syscall_arg_t *stack = (syscall_arg_t *)NULL;

	uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
	
	if (uthread)
		stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
	
	if (!stack)
		return(0);

	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
	/* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
	val = (uint64_t)*(stack+argno);	
	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
	return (val);
}
Example #16
0
/*
 * vfork_return
 *
 * Description:	"Return" to parent vfork thread() following execve/_exit;
 *		this is done by reassociating the parent process structure
 *		with the task, thread, and uthread.
 *
 *		Refer to the ASCII art above vfork() to figure out the
 *		state we're undoing.
 *
 * Parameters:	child_proc		Child process
 *		retval			System call return value array
 *		rval			Return value to present to parent
 *
 * Returns:	void
 *
 * Notes:	The caller resumes or exits the parent, as appropriate, after
 *		calling this function.
 */
void
vfork_return(proc_t child_proc, int32_t *retval, int rval)
{
	task_t parent_task = get_threadtask(child_proc->p_vforkact);
	proc_t parent_proc = get_bsdtask_info(parent_task);
	thread_t th = current_thread();
	uthread_t uth = get_bsdthread_info(th);
	
	act_thread_catt(uth->uu_userstate);

	/* clear vfork state in parent proc structure */
	proc_vfork_end(parent_proc);

	/* REPATRIATE PARENT TASK, THREAD, UTHREAD */
	uth->uu_userstate = 0;
	uth->uu_flag &= ~UT_VFORK;
	/* restore thread-set-id state */
	if (uth->uu_flag & UT_WASSETUID) {
		uth->uu_flag |= UT_SETUID;
		uth->uu_flag &= UT_WASSETUID;
	}
	uth->uu_proc = 0;
	uth->uu_sigmask = uth->uu_vforkmask;

	proc_lock(child_proc);
	child_proc->p_lflag &= ~P_LINVFORK;
	child_proc->p_vforkact = 0;
	proc_unlock(child_proc);

	thread_set_parent(th, rval);

	if (retval) {
		retval[0] = rval;
		retval[1] = 0;			/* mark parent */
	}
}
Example #17
0
/*
 * Convert a pathname into a pointer to a locked inode.
 *
 * The FOLLOW flag is set when symbolic links are to be followed
 * when they occur at the end of the name translation process.
 * Symbolic links are always followed for all other pathname
 * components other than the last.
 *
 * The segflg defines whether the name is to be copied from user
 * space or kernel space.
 *
 * Overall outline of namei:
 *
 *	copy in name
 *	get starting directory
 *	while (!done && !error) {
 *		call lookup to search path.
 *		if symbolic link, massage name in buffer and continue
 *	}
 *
 * Returns:	0			Success
 *		ENOENT			No such file or directory
 *		ELOOP			Too many levels of symbolic links
 *		ENAMETOOLONG		Filename too long
 *		copyinstr:EFAULT	Bad address
 *		copyinstr:ENAMETOOLONG	Filename too long
 *		lookup:EBADF		Bad file descriptor
 *		lookup:EROFS
 *		lookup:EACCES
 *		lookup:EPERM
 *		lookup:ERECYCLE	 vnode was recycled from underneath us in lookup.
 *						 This means we should re-drive lookup from this point.
 *		lookup: ???
 *		VNOP_READLINK:???
 */
int
namei(struct nameidata *ndp)
{
	struct filedesc *fdp;	/* pointer to file descriptor state */
	struct vnode *dp;	/* the directory we are searching */
	struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
										   	heavy vnode pressure */
	u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
	int error;
	struct componentname *cnp = &ndp->ni_cnd;
	vfs_context_t ctx = cnp->cn_context;
	proc_t p = vfs_context_proc(ctx);
#if CONFIG_AUDIT
/* XXX ut should be from context */
	uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
#endif

	fdp = p->p_fd;

#if DIAGNOSTIC
	if (!vfs_context_ucred(ctx) || !p)
		panic ("namei: bad cred/proc");
	if (cnp->cn_nameiop & (~OPMASK))
		panic ("namei: nameiop contaminated with flags");
	if (cnp->cn_flags & OPMASK)
		panic ("namei: flags contaminated with nameiops");
#endif

	/*
	 * A compound VNOP found something that needs further processing:
	 * either a trigger vnode, a covered directory, or a symlink.
	 */
	if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
		int rdonly, vbusyflags, keep_going, wantparent;

		rdonly = cnp->cn_flags & RDONLY;
		vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
		keep_going = 0;
		wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);

		ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);

		error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags, 
				&keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
		if (error)
			goto out_drop;
		if (keep_going) {
			if ((cnp->cn_flags & ISSYMLINK) == 0) {
				panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
			}
			goto continue_symlink;
		}

		return 0;

	}

vnode_recycled:

	/*
	 * Get a buffer for the name to be translated, and copy the
	 * name into the buffer.
	 */
	if ((cnp->cn_flags & HASBUF) == 0) {
		cnp->cn_pnbuf = ndp->ni_pathbuf;
		cnp->cn_pnlen = PATHBUFLEN;
	}
#if LP64_DEBUG
	if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0)
		&& (ndp->ni_segflg != UIO_SYSSPACE)
		&& (ndp->ni_segflg != UIO_SYSSPACE32)) {
		panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__); 
	}
#endif /* LP64_DEBUG */

retry_copy:
	if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
			    cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
	} else {
		error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
			    cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
	}
	if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
		MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
		if (cnp->cn_pnbuf == NULL) {
			error = ENOMEM;
			goto error_out;
		}

		cnp->cn_flags |= HASBUF;
		cnp->cn_pnlen = MAXPATHLEN;
		
		goto retry_copy;
	}
	if (error)
	        goto error_out;

#if CONFIG_VOLFS
 	/*
	 * Check for legacy volfs style pathnames.
	 *
	 * For compatibility reasons we currently allow these paths,
	 * but future versions of the OS may not support them.
	 */
	if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
	    cnp->cn_pnbuf[0] == '/' &&
	    cnp->cn_pnbuf[1] == '.' &&
	    cnp->cn_pnbuf[2] == 'v' &&
	    cnp->cn_pnbuf[3] == 'o' &&
	    cnp->cn_pnbuf[4] == 'l' &&
	    cnp->cn_pnbuf[5] == '/' ) {
		char * realpath;
		int realpath_err;
		/* Attempt to resolve a legacy volfs style pathname. */
		MALLOC_ZONE(realpath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
		if (realpath) {
			/* 
			 * We only error out on the ENAMETOOLONG cases where we know that
			 * vfs_getrealpath translation succeeded but the path could not fit into
			 * MAXPATHLEN characters.  In other failure cases, we may be dealing with a path
			 * that legitimately looks like /.vol/1234/567 and is not meant to be translated
			 */
			if ((realpath_err= vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) {
				FREE_ZONE(realpath, MAXPATHLEN, M_NAMEI);
				if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG){
					error = ENAMETOOLONG;
					goto error_out;
				}
			} else {
				if (cnp->cn_flags & HASBUF) {
					FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
				}
				cnp->cn_pnbuf = realpath;
				cnp->cn_pnlen = MAXPATHLEN;
				ndp->ni_pathlen = strlen(realpath) + 1;
				cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
			}
		}
	}
#endif /* CONFIG_VOLFS */

#if CONFIG_AUDIT
	/* If we are auditing the kernel pathname, save the user pathname */
	if (cnp->cn_flags & AUDITVNPATH1)
		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1); 
	if (cnp->cn_flags & AUDITVNPATH2)
		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2); 
#endif /* CONFIG_AUDIT */

	/*
	 * Do not allow empty pathnames
	 */
	if (*cnp->cn_pnbuf == '\0') {
		error = ENOENT;
		goto error_out;
	}
	ndp->ni_loopcnt = 0;

	/*
	 * determine the starting point for the translation.
	 */
	if ((ndp->ni_rootdir = fdp->fd_rdir) == NULLVP) {
	        if ( !(fdp->fd_flags & FD_CHROOT))
		        ndp->ni_rootdir = rootvnode;
	}
	cnp->cn_nameptr = cnp->cn_pnbuf;

	ndp->ni_usedvp = NULLVP;

	if (*(cnp->cn_nameptr) == '/') {
	        while (*(cnp->cn_nameptr) == '/') {
		        cnp->cn_nameptr++;
			ndp->ni_pathlen--;
		}
		dp = ndp->ni_rootdir;
	} else if (cnp->cn_flags & USEDVP) {
	        dp = ndp->ni_dvp;
		ndp->ni_usedvp = dp;
	} else
	        dp = vfs_context_cwd(ctx);

	if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
	        error = ENOENT;
		goto error_out;
	}
	ndp->ni_dvp = NULLVP;
	ndp->ni_vp  = NULLVP;

	for (;;) {
		ndp->ni_startdir = dp;

		if ( (error = lookup(ndp)) ) {
			goto error_out;
		}
		/*
		 * Check for symbolic link
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			return (0);
		}

continue_symlink:
		/* Gives us a new path to process, and a starting dir */
		error = lookup_handle_symlink(ndp, &dp, ctx);
		if (error != 0) {
			break;
		}
	}
	/*
	 * only come here if we fail to handle a SYMLINK...
	 * if either ni_dvp or ni_vp is non-NULL, then
	 * we need to drop the iocount that was picked
	 * up in the lookup routine
	 */
out_drop:
	if (ndp->ni_dvp)
	        vnode_put(ndp->ni_dvp);
	if (ndp->ni_vp)
	        vnode_put(ndp->ni_vp);
 error_out:
	if ( (cnp->cn_flags & HASBUF) ) {
		cnp->cn_flags &= ~HASBUF;
		FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
	}
	cnp->cn_pnbuf = NULL;
	ndp->ni_vp = NULLVP;
	ndp->ni_dvp = NULLVP;
	if (error == ERECYCLE){
		/* vnode was recycled underneath us. re-drive lookup to start at 
		   the beginning again, since recycling invalidated last lookup*/
		ndp->ni_cnd.cn_flags = cnpflags;
		ndp->ni_dvp = usedvp;
		goto vnode_recycled;
	}


	return (error);
}
Example #18
0
/*
 * Function:	unix_syscall
 *
 * Inputs:	regs	- pointer to i386 save area
 *
 * Outputs:	none
 */
void
unix_syscall(x86_saved_state_t *state)
{
	thread_t		thread;
	void			*vt;
	unsigned int		code;
	struct sysent		*callp;

	int			error;
	vm_offset_t		params;
	struct proc		*p;
	struct uthread		*uthread;
	x86_saved_state32_t	*regs;
	boolean_t		is_vfork;

	assert(is_saved_state32(state));
	regs = saved_state32(state);
#if DEBUG
	if (regs->eax == 0x800)
		thread_exception_return();
#endif
	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	/* Get the approriate proc; may be different from task's for vfork() */
	is_vfork = uthread->uu_flag & UT_VFORK;
	if (__improbable(is_vfork != 0))
		p = current_proc();
	else 
		p = (struct proc *)get_bsdtask_info(current_task());

	/* Verify that we are not being called from a task without a proc */
	if (__improbable(p == NULL)) {
		regs->eax = EPERM;
		regs->efl |= EFL_CF;
		task_terminate_internal(current_task());
		thread_exception_return();
		/* NOTREACHED */
	}

	code = regs->eax & I386_SYSCALL_NUMBER_MASK;
	DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
							  code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
	params = (vm_offset_t) (regs->uesp + sizeof (int));

	regs->efl &= ~(EFL_CF);

	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

	if (__improbable(callp == sysent)) {
		code = fuword(params);
		params += sizeof(int);
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
	}

	vt = (void *)uthread->uu_arg;

	if (callp->sy_arg_bytes != 0) {
#if CONFIG_REQUIRES_U32_MUNGING
		sy_munge_t	*mungerp;
#else
#error U32 syscalls on x86_64 kernel requires munging
#endif
		uint32_t	 nargs;

		assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
		nargs = callp->sy_arg_bytes;
		error = copyin((user_addr_t) params, (char *) vt, nargs);
		if (error) {
			regs->eax = error;
			regs->efl |= EFL_CF;
			thread_exception_return();
			/* NOTREACHED */
		}

		if (__probable(code != 180)) {
	        	int *ip = (int *)vt;

			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
				BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
				*ip, *(ip+1), *(ip+2), *(ip+3), 0);
		}

#if CONFIG_REQUIRES_U32_MUNGING
		mungerp = callp->sy_arg_munge32;

		if (mungerp != NULL)
			(*mungerp)(vt);
#endif
	} else
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
			0, 0, 0, 0, 0);

	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	kauth_cred_uthread_update(uthread, p);

	uthread->uu_rval[0] = 0;
	uthread->uu_rval[1] = 0;
	uthread->uu_flag |= UT_NOTCANCELPT;
	uthread->syscall_code = code;

#ifdef JOE_DEBUG
        uthread->uu_iocount = 0;
        uthread->uu_vpindex = 0;
#endif

	AUDIT_SYSCALL_ENTER(code, p, uthread);
	error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
	AUDIT_SYSCALL_EXIT(code, p, uthread, error);

#ifdef JOE_DEBUG
        if (uthread->uu_iocount)
                printf("system call returned with uu_iocount != 0\n");
#endif
#if CONFIG_DTRACE
	uthread->t_dtrace_errno = error;
#endif /* CONFIG_DTRACE */

	if (__improbable(error == ERESTART)) {
		/*
		 * Move the user's pc back to repeat the syscall:
		 * 5 bytes for a sysenter, or 2 for an int 8x.
		 * The SYSENTER_TF_CS covers single-stepping over a sysenter
		 * - see debug trap handler in idt.s/idt64.s
		 */

		pal_syscall_restart(thread, state);
	}
	else if (error != EJUSTRETURN) {
		if (__improbable(error)) {
		    regs->eax = error;
		    regs->efl |= EFL_CF;	/* carry bit */
		} else { /* (not error) */
			/*
			 * We split retval across two registers, in case the
			 * syscall had a 64-bit return value, in which case
			 * eax/edx matches the function call ABI.
			 */
		    regs->eax = uthread->uu_rval[0];
		    regs->edx = uthread->uu_rval[1];
		} 
	}

	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall: error=%d retval=(%u,%u)\n",
		error, regs->eax, regs->edx);

	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (__improbable(uthread->uu_lowpri_window)) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (__probable(code != 180))
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
		pal_execve_return(thread);
	}

	thread_exception_return();
	/* NOTREACHED */
}
Example #19
0
void
unix_syscall_return(int error)
{
	thread_t		thread;
	struct uthread		*uthread;
	struct proc *p;
	unsigned int code;
	struct sysent *callp;

	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	pal_register_cache_state(thread, DIRTY);

	p = current_proc();

	if (proc_is64bit(p)) {
		x86_saved_state64_t *regs;

		regs = saved_state64(find_user_regs(thread));

		code = uthread->syscall_code;
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

#if CONFIG_DTRACE
		if (callp->sy_call == dtrace_systrace_syscall)
			dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
#endif /* CONFIG_DTRACE */
		AUDIT_SYSCALL_EXIT(code, p, uthread, error);

		if (error == ERESTART) {
			/*
			 * repeat the syscall
			 */
			pal_syscall_restart( thread, find_user_regs(thread) );
		}
		else if (error != EJUSTRETURN) {
			if (error) {
				regs->rax = error;
				regs->isf.rflags |= EFL_CF;	/* carry bit */
			} else { /* (not error) */

				switch (callp->sy_return_type) {
				case _SYSCALL_RET_INT_T:
					regs->rax = uthread->uu_rval[0];
					regs->rdx = uthread->uu_rval[1];
					break;
				case _SYSCALL_RET_UINT_T:
					regs->rax = ((u_int)uthread->uu_rval[0]);
					regs->rdx = ((u_int)uthread->uu_rval[1]);
					break;
				case _SYSCALL_RET_OFF_T:
				case _SYSCALL_RET_ADDR_T:
				case _SYSCALL_RET_SIZE_T:
				case _SYSCALL_RET_SSIZE_T:
				case _SYSCALL_RET_UINT64_T:
					regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
					regs->rdx = 0;
					break;
				case _SYSCALL_RET_NONE:
					break;
				default:
					panic("unix_syscall: unknown return type");
					break;
				}
				regs->isf.rflags &= ~EFL_CF;
			} 
		}
		DEBUG_KPRINT_SYSCALL_UNIX(
			"unix_syscall_return: error=%d retval=(%llu,%llu)\n",
			error, regs->rax, regs->rdx);
	} else {
		x86_saved_state32_t	*regs;

		regs = saved_state32(find_user_regs(thread));

		regs->efl &= ~(EFL_CF);

		code = uthread->syscall_code;
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

#if CONFIG_DTRACE
		if (callp->sy_call == dtrace_systrace_syscall)
			dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
#endif /* CONFIG_DTRACE */
		AUDIT_SYSCALL_EXIT(code, p, uthread, error);

		if (error == ERESTART) {
			pal_syscall_restart( thread, find_user_regs(thread) );
		}
		else if (error != EJUSTRETURN) {
			if (error) {
				regs->eax = error;
				regs->efl |= EFL_CF;	/* carry bit */
			} else { /* (not error) */
				regs->eax = uthread->uu_rval[0];
				regs->edx = uthread->uu_rval[1];
			} 
		}
		DEBUG_KPRINT_SYSCALL_UNIX(
			"unix_syscall_return: error=%d retval=(%u,%u)\n",
			error, regs->eax, regs->edx);
	}


	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (uthread->uu_lowpri_window) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (code != 180)
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	thread_exception_return();
	/* NOTREACHED */
}
Example #20
0
void
unix_syscall64(x86_saved_state_t *state)
{
	thread_t	thread;
	void			*vt;
	unsigned int	code;
	struct sysent	*callp;
	int		args_in_regs;
	boolean_t	args_start_at_rdi;
	int		error;
	struct proc	*p;
	struct uthread	*uthread;
	x86_saved_state64_t *regs;

	assert(is_saved_state64(state));
	regs = saved_state64(state);
#if	DEBUG
	if (regs->rax == 0x2000800)
		thread_exception_return();
#endif
	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	/* Get the approriate proc; may be different from task's for vfork() */
	if (__probable(!(uthread->uu_flag & UT_VFORK)))
		p = (struct proc *)get_bsdtask_info(current_task());
	else 
		p = current_proc();

	/* Verify that we are not being called from a task without a proc */
	if (__improbable(p == NULL)) {
		regs->rax = EPERM;
		regs->isf.rflags |= EFL_CF;
		task_terminate_internal(current_task());
		thread_exception_return();
		/* NOTREACHED */
	}

	code = regs->rax & SYSCALL_NUMBER_MASK;
	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall64: code=%d(%s) rip=%llx\n",
		code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

	vt = (void *)uthread->uu_arg;

	if (__improbable(callp == sysent)) {
	        /*
		 * indirect system call... system call number
		 * passed as 'arg0'
		 */
		code = regs->rdi;
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
		args_start_at_rdi = FALSE;
		args_in_regs = 5;
	} else {
		args_start_at_rdi = TRUE;
		args_in_regs = 6;
	}

	if (callp->sy_narg != 0) {
		assert(callp->sy_narg <= 8); /* size of uu_arg */

		args_in_regs = MIN(args_in_regs, callp->sy_narg);
		memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));


		if (code != 180) {
			uint64_t *ip = (uint64_t *)vt;

			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
				BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
				(int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
		}

		if (__improbable(callp->sy_narg > args_in_regs)) {
			int copyin_count;

			copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);

			error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
			if (error) {
				regs->rax = error;
				regs->isf.rflags |= EFL_CF;
				thread_exception_return();
				/* NOTREACHED */
			}
		}
	} else
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
			0, 0, 0, 0, 0);

	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	kauth_cred_uthread_update(uthread, p);

	uthread->uu_rval[0] = 0;
	uthread->uu_rval[1] = 0;
	uthread->uu_flag |= UT_NOTCANCELPT;
	uthread->syscall_code = code;

#ifdef JOE_DEBUG
        uthread->uu_iocount = 0;
        uthread->uu_vpindex = 0;
#endif

	AUDIT_SYSCALL_ENTER(code, p, uthread);
	error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
	AUDIT_SYSCALL_EXIT(code, p, uthread, error);

#ifdef JOE_DEBUG
        if (uthread->uu_iocount)
               printf("system call returned with uu_iocount != 0\n");
#endif

#if CONFIG_DTRACE
	uthread->t_dtrace_errno = error;
#endif /* CONFIG_DTRACE */
	
	if (__improbable(error == ERESTART)) {
		/*
		 * all system calls come through via the syscall instruction
		 * in 64 bit mode... its 2 bytes in length
		 * move the user's pc back to repeat the syscall:
		 */
		pal_syscall_restart( thread, state );
	}
	else if (error != EJUSTRETURN) {
		if (__improbable(error)) {
			regs->rax = error;
			regs->isf.rflags |= EFL_CF;	/* carry bit */
		} else { /* (not error) */

			switch (callp->sy_return_type) {
			case _SYSCALL_RET_INT_T:
				regs->rax = uthread->uu_rval[0];
				regs->rdx = uthread->uu_rval[1];
				break;
			case _SYSCALL_RET_UINT_T:
				regs->rax = ((u_int)uthread->uu_rval[0]);
				regs->rdx = ((u_int)uthread->uu_rval[1]);
				break;
			case _SYSCALL_RET_OFF_T:
			case _SYSCALL_RET_ADDR_T:
			case _SYSCALL_RET_SIZE_T:
			case _SYSCALL_RET_SSIZE_T:
			case _SYSCALL_RET_UINT64_T:
			        regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
				regs->rdx = 0;
				break;
			case _SYSCALL_RET_NONE:
				break;
			default:
				panic("unix_syscall: unknown return type");
				break;
			}
			regs->isf.rflags &= ~EFL_CF;
		} 
	}

	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall64: error=%d retval=(%llu,%llu)\n",
		error, regs->rax, regs->rdx);
	
	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (__improbable(uthread->uu_lowpri_window)) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (__probable(code != 180))
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	thread_exception_return();
	/* NOTREACHED */
}
Example #21
0
/* ARGSUSED */
int
sigreturn(
	  struct proc * p,
	  struct sigreturn_args * uap,
	  __unused int *retval)
{
	union {
		user_ucontext32_t uc32;
#if defined(__arm64__)
		user_ucontext64_t uc64;
#endif
	} uctx;

	union { 
		mcontext32_t mc32;
#if defined(__arm64__)
		mcontext64_t mc64;
#endif
	} mctx;

	struct sigacts *ps = p->p_sigacts;
	int             error, sigmask = 0, onstack = 0;
	thread_t        th_act;
	struct uthread *ut;
	uint32_t        sigreturn_validation;
	user_addr_t     token_uctx;
	kern_return_t   kr;

	th_act = current_thread();
	ut = (struct uthread *) get_bsdthread_info(th_act);

	if (proc_is64bit_data(p)) {
#if defined(__arm64__)
		error = sigreturn_copyin_ctx64(&uctx.uc64, &mctx.mc64, uap->uctx);
		if (error != 0) {
			return error;
		}

		onstack = uctx.uc64.uc_onstack;
		sigmask = uctx.uc64.uc_sigmask;
#else
		panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
	} else {
		error = sigreturn_copyin_ctx32(&uctx.uc32, &mctx.mc32, uap->uctx);
		if (error != 0) {
			return error;
		}

		onstack = uctx.uc32.uc_onstack;
		sigmask = uctx.uc32.uc_sigmask;
	}

	if ((onstack & 01))
		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
	else
		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;

	ut->uu_sigmask = sigmask & ~sigcantmask;
	if (ut->uu_siglist & ~ut->uu_sigmask)
		signal_setast(current_thread());

	sigreturn_validation = atomic_load_explicit(
			&ps->ps_sigreturn_validation, memory_order_relaxed);
	token_uctx = uap->uctx;
	kr = machine_thread_siguctx_pointer_convert_to_user(th_act, &token_uctx);
	assert(kr == KERN_SUCCESS);

	if (proc_is64bit_data(p)) {
#if defined(__arm64__)
		user64_addr_t token;
		token = (user64_addr_t)token_uctx ^ (user64_addr_t)ps->ps_sigreturn_token;
		if ((user64_addr_t)uap->token != token) {
#if DEVELOPMENT || DEBUG
			printf("process %s[%d] sigreturn token mismatch: received 0x%llx expected 0x%llx\n",
					p->p_comm, p->p_pid, (user64_addr_t)uap->token, token);
#endif /* DEVELOPMENT || DEBUG */
			if (sigreturn_validation != PS_SIGRETURN_VALIDATION_DISABLED) {
				return EINVAL;
			}
		}
		error = sigreturn_set_state64(th_act, &mctx.mc64);
		if (error != 0) {
#if DEVELOPMENT || DEBUG
		printf("process %s[%d] sigreturn set_state64 error %d\n",
				p->p_comm, p->p_pid, error);
#endif /* DEVELOPMENT || DEBUG */
			return error;
		}
#else
		panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
	} else {
		user32_addr_t token;
		token = (user32_addr_t)token_uctx ^ (user32_addr_t)ps->ps_sigreturn_token;
		if ((user32_addr_t)uap->token != token) {
#if DEVELOPMENT || DEBUG
			printf("process %s[%d] sigreturn token mismatch: received 0x%x expected 0x%x\n",
					p->p_comm, p->p_pid, (user32_addr_t)uap->token, token);
#endif /* DEVELOPMENT || DEBUG */
			if (sigreturn_validation != PS_SIGRETURN_VALIDATION_DISABLED) {
				return EINVAL;
			}
		}
		error = sigreturn_set_state32(th_act, &mctx.mc32);
		if (error != 0) {
#if DEVELOPMENT || DEBUG
		printf("process %s[%d] sigreturn sigreturn_set_state32 error %d\n",
				p->p_comm, p->p_pid, error);
#endif /* DEVELOPMENT || DEBUG */
			return error;
		}
	}

	return (EJUSTRETURN);
}
Example #22
0
/*
 * Send an interrupt to process.
 *
 */
void
sendsig(
	struct proc * p,
	user_addr_t catcher,
	int sig,
	int mask,
	__unused uint32_t code
)
{
	union {
		struct ts32 {
			arm_thread_state_t ss;
		} ts32;
#if defined(__arm64__)
		struct ts64 {
			arm_thread_state64_t ss;
		} ts64;
#endif
	} ts;
	union { 
		struct user_sigframe32 uf32;
#if defined(__arm64__)
		struct user_sigframe64 uf64;
#endif
	} user_frame;

	user_siginfo_t sinfo;
	user_addr_t 	sp = 0, trampact;
	struct sigacts *ps = p->p_sigacts;
	int             oonstack, infostyle;
	thread_t        th_act;
	struct uthread *ut;
	user_size_t	stack_size = 0;
	user_addr_t     p_uctx, token_uctx;
	kern_return_t   kr;

	th_act = current_thread();
	ut = get_bsdthread_info(th_act);

	bzero(&ts, sizeof(ts));
	bzero(&user_frame, sizeof(user_frame));

	if (p->p_sigacts->ps_siginfo & sigmask(sig))
		infostyle = UC_FLAVOR;
	else
		infostyle = UC_TRAD;

	trampact = ps->ps_trampact[sig];
	oonstack = ps->ps_sigstk.ss_flags & SA_ONSTACK;

	/*
	 * Get sundry thread state.
	 */
	if (proc_is64bit_data(p)) {
#ifdef __arm64__
		if (sendsig_get_state64(th_act, &ts.ts64.ss, &user_frame.uf64.mctx) != 0) {
			goto bad2;
		}
#else
	panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
	} else {
		if (sendsig_get_state32(th_act, &ts.ts32.ss, &user_frame.uf32.mctx) != 0) {
			goto bad2;
		}
	}

	/*
	 * Figure out where our new stack lives.
	 */
	if ((ps->ps_flags & SAS_ALTSTACK) && !oonstack &&
	    (ps->ps_sigonstack & sigmask(sig))) {
		sp = ps->ps_sigstk.ss_sp;
		sp += ps->ps_sigstk.ss_size;
		stack_size = ps->ps_sigstk.ss_size;
		ps->ps_sigstk.ss_flags |= SA_ONSTACK;
	} else {
		/*
		 * Get stack pointer, and allocate enough space
		 * for signal handler data.
		 */
		if (proc_is64bit_data(p)) {
#if defined(__arm64__)
			sp = CAST_USER_ADDR_T(ts.ts64.ss.sp);
			sp = (sp - sizeof(user_frame.uf64) - C_64_REDZONE_LEN) & ~0xf; /* Make sure to align to 16 bytes and respect red zone */
#else
			panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
		} else {
			sp = CAST_USER_ADDR_T(ts.ts32.ss.sp);
			sp -= sizeof(user_frame.uf32);
#if defined(__arm__) && (__BIGGEST_ALIGNMENT__ > 4)
			sp &= ~0xf; /* Make sure to align to 16 bytes for armv7k */
#endif
		}
	}

	proc_unlock(p);

	/*
	 * Fill in ucontext (points to mcontext, i.e. thread states).
	 */
	if (proc_is64bit_data(p)) {
#if defined(__arm64__)
		sendsig_fill_uctx64(&user_frame.uf64.uctx, oonstack, mask, sp, (user64_size_t)stack_size,
				(user64_addr_t)&((struct user_sigframe64*)sp)->mctx);
#else
		panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
	} else {
		sendsig_fill_uctx32(&user_frame.uf32.uctx, oonstack, mask, sp, (user32_size_t)stack_size, 
				(user32_addr_t)&((struct user_sigframe32*)sp)->mctx);
	}

	/*
	 * Setup siginfo.
	 */
	bzero((caddr_t) & sinfo, sizeof(sinfo));
	sinfo.si_signo = sig;

	if (proc_is64bit_data(p)) {
#if defined(__arm64__)
		sinfo.si_addr = ts.ts64.ss.pc;
		sinfo.pad[0] = ts.ts64.ss.sp;
#else
		panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
	} else {
		sinfo.si_addr = ts.ts32.ss.pc;
		sinfo.pad[0] = ts.ts32.ss.sp;
	}

	switch (sig) {
	case SIGILL:
#ifdef	BER_XXX
		if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_ILL_INS_BIT)))
			sinfo.si_code = ILL_ILLOPC;
		else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_PRV_INS_BIT)))
			sinfo.si_code = ILL_PRVOPC;
		else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_TRAP_BIT)))
			sinfo.si_code = ILL_ILLTRP;
		else
			sinfo.si_code = ILL_NOOP;
#else
		sinfo.si_code = ILL_ILLTRP;
#endif
		break;

	case SIGFPE:
		break;

	case SIGBUS:
		if (proc_is64bit_data(p)) {
#if defined(__arm64__)
			sinfo.si_addr = user_frame.uf64.mctx.es.far;
#else
			panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
		} else {
			sinfo.si_addr = user_frame.uf32.mctx.es.far;
		}

		sinfo.si_code = BUS_ADRALN;
		break;

	case SIGSEGV:
		if (proc_is64bit_data(p)) {
#if defined(__arm64__)
			sinfo.si_addr = user_frame.uf64.mctx.es.far;
#else
			panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
		} else {
			sinfo.si_addr = user_frame.uf32.mctx.es.far;
		}

#ifdef	BER_XXX
		/* First check in srr1 and then in dsisr */
		if (mctx.ss.srr1 & (1 << (31 - DSISR_PROT_BIT)))
			sinfo.si_code = SEGV_ACCERR;
		else if (mctx.es.dsisr & (1 << (31 - DSISR_PROT_BIT)))
			sinfo.si_code = SEGV_ACCERR;
		else
			sinfo.si_code = SEGV_MAPERR;
#else
		sinfo.si_code = SEGV_ACCERR;
#endif
		break;

	default:
	{
		int status_and_exitcode;

		/*
		 * All other signals need to fill out a minimum set of
		 * information for the siginfo structure passed into
		 * the signal handler, if SA_SIGINFO was specified.
		 *
		 * p->si_status actually contains both the status and
		 * the exit code; we save it off in its own variable
		 * for later breakdown.
		 */
		proc_lock(p);
		sinfo.si_pid = p->si_pid;
		p->si_pid = 0;
		status_and_exitcode = p->si_status;
		p->si_status = 0;
		sinfo.si_uid = p->si_uid;
		p->si_uid = 0;
		sinfo.si_code = p->si_code;
		p->si_code = 0;
		proc_unlock(p);
		if (sinfo.si_code == CLD_EXITED) {
			if (WIFEXITED(status_and_exitcode))
				sinfo.si_code = CLD_EXITED;
			else if (WIFSIGNALED(status_and_exitcode)) {
				if (WCOREDUMP(status_and_exitcode)) {
					sinfo.si_code = CLD_DUMPED;
					status_and_exitcode = W_EXITCODE(status_and_exitcode,status_and_exitcode);
				} else {
					sinfo.si_code = CLD_KILLED;
					status_and_exitcode = W_EXITCODE(status_and_exitcode,status_and_exitcode);
				}
			}
		}
		/*
		 * The recorded status contains the exit code and the
		 * signal information, but the information to be passed
		 * in the siginfo to the handler is supposed to only
		 * contain the status, so we have to shift it out.
		 */
		sinfo.si_status = (WEXITSTATUS(status_and_exitcode) & 0x00FFFFFF) | (((uint32_t)(p->p_xhighbits) << 24) & 0xFF000000);
		p->p_xhighbits = 0;
		break;
	}
	}

#if CONFIG_DTRACE	
	sendsig_do_dtrace(ut, &sinfo, sig, catcher);
#endif /* CONFIG_DTRACE */

	/* 
	 * Copy signal-handling frame out to user space, set thread state.
	 */
	if (proc_is64bit_data(p)) {
#if defined(__arm64__)
		user64_addr_t token;

		/*
		 * mctx filled in when we get state.  uctx filled in by 
		 * sendsig_fill_uctx64(). We fill in the sinfo now.
		 */
		siginfo_user_to_user64(&sinfo, &user_frame.uf64.sinfo);

		p_uctx = (user_addr_t)&((struct user_sigframe64*)sp)->uctx;
		/*
		 * Generate the validation token for sigreturn
		 */
		token_uctx = p_uctx;
		kr = machine_thread_siguctx_pointer_convert_to_user(th_act, &token_uctx);
		assert(kr == KERN_SUCCESS);
		token = (user64_addr_t)token_uctx ^ (user64_addr_t)ps->ps_sigreturn_token;

		if (copyout(&user_frame.uf64, sp, sizeof(user_frame.uf64)) != 0) {
			goto bad; 
		} 

		if (sendsig_set_thread_state64(&ts.ts64.ss,
			catcher, infostyle, sig, (user64_addr_t)&((struct user_sigframe64*)sp)->sinfo,
			(user64_addr_t)p_uctx, token, trampact, sp, th_act) != KERN_SUCCESS)
			goto bad;

#else
	panic("Shouldn't have 64-bit thread states on a 32-bit kernel.");
#endif
	} else {
		user32_addr_t token;

		/*
		 * mctx filled in when we get state.  uctx filled in by 
		 * sendsig_fill_uctx32(). We fill in the sinfo, *pointer*
		 * to uctx and token now.
		 */
		siginfo_user_to_user32(&sinfo, &user_frame.uf32.sinfo);

		p_uctx = (user_addr_t)&((struct user_sigframe32*)sp)->uctx;
		/*
		 * Generate the validation token for sigreturn
		 */
		token_uctx = (user_addr_t)p_uctx;
		kr = machine_thread_siguctx_pointer_convert_to_user(th_act, &token_uctx);
		assert(kr == KERN_SUCCESS);
		token = (user32_addr_t)token_uctx ^ (user32_addr_t)ps->ps_sigreturn_token;

		user_frame.uf32.puctx = (user32_addr_t)p_uctx;
		user_frame.uf32.token = token;

		if (copyout(&user_frame.uf32, sp, sizeof(user_frame.uf32)) != 0) {
			goto bad; 
		} 

		if (sendsig_set_thread_state32(&ts.ts32.ss,
			CAST_DOWN_EXPLICIT(user32_addr_t, catcher), infostyle, sig, (user32_addr_t)&((struct user_sigframe32*)sp)->sinfo,
			CAST_DOWN_EXPLICIT(user32_addr_t, trampact), CAST_DOWN_EXPLICIT(user32_addr_t, sp), th_act) != KERN_SUCCESS)
			goto bad;
	}

	proc_lock(p);
	return;

bad:
	proc_lock(p);
bad2:
	SIGACTION(p, SIGILL) = SIG_DFL;
	sig = sigmask(SIGILL);
	p->p_sigignore &= ~sig;
	p->p_sigcatch &= ~sig;
	ut->uu_sigmask &= ~sig;
	/* sendsig is called with signal lock held */
	proc_unlock(p);
	psignal_locked(p, SIGILL);
	proc_lock(p);
}
Example #23
0
void *
uthread_alloc(task_t task, thread_t thread, int noinherit)
{
	proc_t p;
	uthread_t uth;
	uthread_t uth_parent;
	void *ut;

	if (!uthread_zone_inited)
		uthread_zone_init();

	ut = (void *)zalloc(uthread_zone);
	bzero(ut, sizeof(struct uthread));

	p = (proc_t) get_bsdtask_info(task);
	uth = (uthread_t)ut;

	/*
	 * Thread inherits credential from the creating thread, if both
	 * are in the same task.
	 *
	 * If the creating thread has no credential or is from another
	 * task we can leave the new thread credential NULL.  If it needs
	 * one later, it will be lazily assigned from the task's process.
	 */
	uth_parent = (uthread_t)get_bsdthread_info(current_thread());
	if ((noinherit == 0) && task == current_task() && 
	    uth_parent != NULL &&
	    IS_VALID_CRED(uth_parent->uu_ucred)) {
		/*
		 * XXX The new thread is, in theory, being created in context
		 * XXX of parent thread, so a direct reference to the parent
		 * XXX is OK.
		 */
		kauth_cred_ref(uth_parent->uu_ucred);
		uth->uu_ucred = uth_parent->uu_ucred;
		/* the credential we just inherited is an assumed credential */
		if (uth_parent->uu_flag & UT_SETUID)
			uth->uu_flag |= UT_SETUID;
	} else {
		/* sometimes workqueue threads are created out task context */
		if ((task != kernel_task) && (p != PROC_NULL))
			uth->uu_ucred = kauth_cred_proc_ref(p);
		else
			uth->uu_ucred = NOCRED;
	}

	
	if ((task != kernel_task) && p) {
		
		proc_lock(p);
		if (noinherit != 0) {
			/* workq threads will not inherit masks */
			uth->uu_sigmask = ~workq_threadmask;
		} else if (uth_parent) {
			if (uth_parent->uu_flag & UT_SAS_OLDMASK)
				uth->uu_sigmask = uth_parent->uu_oldmask;
			else
				uth->uu_sigmask = uth_parent->uu_sigmask;
		}
		uth->uu_context.vc_thread = thread;
		TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list);
		proc_unlock(p);

#if CONFIG_DTRACE
		if (p->p_dtrace_ptss_pages != NULL) {
			uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p);
		}
#endif
	}

	return (ut);
}
Example #24
0
/*
 * Search a pathname.
 * This is a very central and rather complicated routine.
 *
 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
 * The starting directory is taken from ni_startdir. The pathname is
 * descended until done, or a symbolic link is encountered. The variable
 * ni_more is clear if the path is completed; it is set to one if a
 * symbolic link needing interpretation is encountered.
 *
 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
 * whether the name is to be looked up, created, renamed, or deleted.
 * When CREATE, RENAME, or DELETE is specified, information usable in
 * creating, renaming, or deleting a directory entry may be calculated.
 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
 * locked. If flag has WANTPARENT or'ed into it, the parent directory is
 * returned unlocked. Otherwise the parent directory is not returned. If
 * the target of the pathname exists and LOCKLEAF is or'ed into the flag
 * the target is returned locked, otherwise it is returned unlocked.
 * When creating or renaming and LOCKPARENT is specified, the target may not
 * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
 * 
 * Overall outline of lookup:
 *
 * dirloop:
 *	identify next component of name at ndp->ni_ptr
 *	handle degenerate case where name is null string
 *	if .. and crossing mount points and on mounted filesys, find parent
 *	call VNOP_LOOKUP routine for next component name
 *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
 *	    component vnode returned in ni_vp (if it exists), locked.
 *	if result vnode is mounted on and crossing mount points,
 *	    find mounted on vnode
 *	if more components of name, do next level at dirloop
 *	return the answer in ni_vp, locked if LOCKLEAF set
 *	    if LOCKPARENT set, return locked parent in ni_dvp
 *	    if WANTPARENT set, return unlocked parent in ni_dvp
 *
 * Returns:	0			Success
 *		ENOENT			No such file or directory
 *		EBADF			Bad file descriptor
 *		ENOTDIR			Not a directory
 *		EROFS			Read-only file system [CREATE]
 *		EISDIR			Is a directory [CREATE]
 *		cache_lookup_path:ERECYCLE  (vnode was recycled from underneath us, redrive lookup again)
 *		vnode_authorize:EROFS
 *		vnode_authorize:EACCES
 *		vnode_authorize:EPERM
 *		vnode_authorize:???
 *		VNOP_LOOKUP:ENOENT	No such file or directory
 *		VNOP_LOOKUP:EJUSTRETURN	Restart system call (INTERNAL)
 *		VNOP_LOOKUP:???
 *		VFS_ROOT:ENOTSUP
 *		VFS_ROOT:ENOENT
 *		VFS_ROOT:???
 */
int
lookup(struct nameidata *ndp)
{
	char	*cp;		/* pointer into pathname argument */
	vnode_t		tdp;		/* saved dp */
	vnode_t		dp;		/* the directory we are searching */
	int docache = 1;		/* == 0 do not cache last component */
	int wantparent;			/* 1 => wantparent or lockparent flag */
	int rdonly;			/* lookup read-only flag bit */
	int dp_authorized = 0;
	int error = 0;
	struct componentname *cnp = &ndp->ni_cnd;
	vfs_context_t ctx = cnp->cn_context;
	int vbusyflags = 0;
	int nc_generation = 0;
	vnode_t last_dp = NULLVP;
	int keep_going;
	int atroot;

	/*
	 * Setup: break out flag bits into variables.
	 */
	if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) {
	        if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE))
		        docache = 0;
	}
	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
	rdonly = cnp->cn_flags & RDONLY;
	cnp->cn_flags &= ~ISSYMLINK;
	cnp->cn_consume = 0;

	dp = ndp->ni_startdir;
	ndp->ni_startdir = NULLVP;

	if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0)
			vbusyflags = LK_NOWAIT;
	cp = cnp->cn_nameptr;

	if (*cp == '\0') {
	        if ( (vnode_getwithref(dp)) ) {
			dp = NULLVP;
		        error = ENOENT;
			goto bad;
		}
		ndp->ni_vp = dp;
		error = lookup_handle_emptyname(ndp, cnp, wantparent);
		if (error) {
			goto bad;
		}

		return 0;
	}
dirloop: 
	atroot = 0;
	ndp->ni_vp = NULLVP;

	if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp)) ) {
		dp = NULLVP;
		goto bad;
	}
	if ((cnp->cn_flags & ISLASTCN)) {
	        if (docache)
		        cnp->cn_flags |= MAKEENTRY;
	} else
	        cnp->cn_flags |= MAKEENTRY;

	dp = ndp->ni_dvp;

	if (ndp->ni_vp != NULLVP) {
	        /*
		 * cache_lookup_path returned a non-NULL ni_vp then,
		 * we're guaranteed that the dp is a VDIR, it's 
		 * been authorized, and vp is not ".."
		 *
		 * make sure we don't try to enter the name back into
		 * the cache if this vp is purged before we get to that
		 * check since we won't have serialized behind whatever
		 * activity is occurring in the FS that caused the purge
		 */
	        if (dp != NULLVP)
		        nc_generation = dp->v_nc_generation - 1;

	        goto returned_from_lookup_path;
	}

	/*
	 * Handle "..": two special cases.
	 * 1. If at root directory (e.g. after chroot)
	 *    or at absolute root directory
	 *    then ignore it so can't get out.
	 * 2. If this vnode is the root of a mounted
	 *    filesystem, then replace it with the
	 *    vnode which was mounted on so we take the
	 *    .. in the other file system.
	 */
	if ( (cnp->cn_flags & ISDOTDOT) ) {
		for (;;) {
		        if (dp == ndp->ni_rootdir || dp == rootvnode) {
			        ndp->ni_dvp = dp;
				ndp->ni_vp = dp;
				/*
				 * we're pinned at the root
				 * we've already got one reference on 'dp'
				 * courtesy of cache_lookup_path... take
				 * another one for the ".."
				 * if we fail to get the new reference, we'll
				 * drop our original down in 'bad'
				 */
				if ( (vnode_get(dp)) ) {
					error = ENOENT;
					goto bad;
				}
				atroot = 1;
				goto returned_from_lookup_path;
			}
			if ((dp->v_flag & VROOT) == 0 ||
			    (cnp->cn_flags & NOCROSSMOUNT))
			        break;
			if (dp->v_mount == NULL) {	/* forced umount */
			        error = EBADF;
				goto bad;
			}
			tdp = dp;
			dp = tdp->v_mount->mnt_vnodecovered;

			vnode_put(tdp);

			if ( (vnode_getwithref(dp)) ) {
			        dp = NULLVP;
				error = ENOENT;
				goto bad;
			}
			ndp->ni_dvp = dp;
			dp_authorized = 0;
		}
	}

	/*
	 * We now have a segment name to search for, and a directory to search.
	 */
unionlookup:
	ndp->ni_vp = NULLVP;

	if (dp->v_type != VDIR) {
	        error = ENOTDIR;
	        goto lookup_error;
	}
	if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) {
		error = lookup_authorize_search(dp, cnp, dp_authorized, ctx);
		if (error) {
			goto lookup_error;
		}
	}

	/*
	 * Now that we've authorized a lookup, can bail out if the filesystem
	 * will be doing a batched operation.  Return an iocount on dvp.
	 */
#if NAMEDRSRCFORK
	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) {
#else 
	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) {
#endif /* NAMEDRSRCFORK */
		ndp->ni_flag |= NAMEI_UNFINISHED;
		ndp->ni_ncgeneration = dp->v_nc_generation;
		return 0;
	}

        nc_generation = dp->v_nc_generation;

	error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx);


	if ( error ) {
lookup_error:
		if ((error == ENOENT) &&
		    (dp->v_flag & VROOT) && (dp->v_mount != NULL) &&
		    (dp->v_mount->mnt_flag & MNT_UNION)) {
#if CONFIG_VFS_FUNNEL
		        if ((cnp->cn_flags & FSNODELOCKHELD)) {
			        cnp->cn_flags &= ~FSNODELOCKHELD;
				unlock_fsnode(dp, NULL);
			}	
#endif /* CONFIG_VFS_FUNNEL */
			tdp = dp;
			dp = tdp->v_mount->mnt_vnodecovered;

			vnode_put(tdp);

			if ( (vnode_getwithref(dp)) ) {
			        dp = NULLVP;
				error = ENOENT;
				goto bad;
			}
			ndp->ni_dvp = dp;
			dp_authorized = 0;
			goto unionlookup;
		}

		if (error != EJUSTRETURN)
			goto bad;

		if (ndp->ni_vp != NULLVP)
			panic("leaf should be empty");

		error = lookup_validate_creation_path(ndp);
		if (error)
			goto bad;
		/*
		 * We return with ni_vp NULL to indicate that the entry
		 * doesn't currently exist, leaving a pointer to the
		 * referenced directory vnode in ndp->ni_dvp.
		 */
		if (cnp->cn_flags & SAVESTART) {
			if ( (vnode_get(ndp->ni_dvp)) ) {
				error = ENOENT;
				goto bad;
			}
			ndp->ni_startdir = ndp->ni_dvp;
		}
		if (!wantparent)
		        vnode_put(ndp->ni_dvp);

		if (kdebug_enable)
		        kdebug_lookup(ndp->ni_dvp, cnp);
		return (0);
	}
returned_from_lookup_path:
	/* We'll always have an iocount on ni_vp when this finishes. */
	error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx);
	if (error != 0) {
		goto bad2; 
	}

	if (keep_going) {
		dp = ndp->ni_vp;

		/* namei() will handle symlinks */
		if ((dp->v_type == VLNK) &&
				((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
			return 0; 
		}

		/*
		 * Otherwise, there's more path to process.  
		 * cache_lookup_path is now responsible for dropping io ref on dp
		 * when it is called again in the dirloop.  This ensures we hold
		 * a ref on dp until we complete the next round of lookup.
		 */
		last_dp = dp;

		goto dirloop;
	}

	return (0);
bad2:
#if CONFIG_VFS_FUNNEL
	if ((cnp->cn_flags & FSNODELOCKHELD)) {
	        cnp->cn_flags &= ~FSNODELOCKHELD;
		unlock_fsnode(ndp->ni_dvp, NULL);
	}
#endif /* CONFIG_VFS_FUNNEL */
	if (ndp->ni_dvp)
		vnode_put(ndp->ni_dvp);

	vnode_put(ndp->ni_vp);
	ndp->ni_vp = NULLVP;

	if (kdebug_enable)
	        kdebug_lookup(dp, cnp);
	return (error);

bad:
#if CONFIG_VFS_FUNNEL
	if ((cnp->cn_flags & FSNODELOCKHELD)) {
	        cnp->cn_flags &= ~FSNODELOCKHELD;
		unlock_fsnode(ndp->ni_dvp, NULL);
	}	
#endif /* CONFIG_VFS_FUNNEL */
	if (dp)
	        vnode_put(dp);
	ndp->ni_vp = NULLVP;

	if (kdebug_enable)
	        kdebug_lookup(dp, cnp);
	return (error);
}

int 
lookup_validate_creation_path(struct nameidata *ndp)
{
	struct componentname *cnp = &ndp->ni_cnd;

	/*
	 * If creating and at end of pathname, then can consider
	 * allowing file to be created.
	 */
	if (cnp->cn_flags & RDONLY) {
		return EROFS;
	}
	if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) {
		return ENOENT;
	}
	
	return 0;
}

/*
 * Modifies only ni_vp.  Always returns with ni_vp still valid (iocount held).
 */
int
lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 
		int vbusyflags, vfs_context_t ctx)
{
	mount_t mp;
	vnode_t tdp;
	int error = 0;
	uthread_t uth;
	uint32_t depth = 0;
	int dont_cache_mp = 0;
	vnode_t	mounted_on_dp;
	int current_mount_generation = 0;
	
	mounted_on_dp = dp;
	current_mount_generation = mount_generation;

	while ((dp->v_type == VDIR) && dp->v_mountedhere &&
			((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
#if CONFIG_TRIGGERS
		/*
		 * For a trigger vnode, call its resolver when crossing its mount (if requested)
		 */
		if (dp->v_resolve) {
			(void) vnode_trigger_resolve(dp, ndp, ctx);
		}
#endif
		vnode_lock(dp);

		if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) {

			mp->mnt_crossref++;
			vnode_unlock(dp);


			if (vfs_busy(mp, vbusyflags)) {
				mount_dropcrossref(mp, dp, 0);
				if (vbusyflags == LK_NOWAIT) {
					error = ENOENT;
					goto out;
				}

				continue;
			}


			/*
			 * XXX - if this is the last component of the
			 * pathname, and it's either not a lookup operation
			 * or the NOTRIGGER flag is set for the operation,
			 * set a uthread flag to let VFS_ROOT() for autofs
			 * know it shouldn't trigger a mount.
			 */
			uth = (struct uthread *)get_bsdthread_info(current_thread());
			if ((cnp->cn_flags & ISLASTCN) &&
					(cnp->cn_nameiop != LOOKUP ||
					 (cnp->cn_flags & NOTRIGGER))) {
				uth->uu_notrigger = 1;
				dont_cache_mp = 1;
			}

			error = VFS_ROOT(mp, &tdp, ctx);
			/* XXX - clear the uthread flag */
			uth->uu_notrigger = 0;

			mount_dropcrossref(mp, dp, 0);
			vfs_unbusy(mp);

			if (error) {
				goto out;
			}

			vnode_put(dp);
			ndp->ni_vp = dp = tdp;
			depth++;

#if CONFIG_TRIGGERS
			/*
			 * Check if root dir is a trigger vnode
			 */
			if (dp->v_resolve) {
				error = vnode_trigger_resolve(dp, ndp, ctx);
				if (error) {
					goto out;
				}
			}
#endif			

		} else { 
			vnode_unlock(dp);
			break;
		}
	}

	if (depth && !dont_cache_mp) {
	        mp = mounted_on_dp->v_mountedhere;

		if (mp) {
		        mount_lock_spin(mp);
			mp->mnt_realrootvp_vid = dp->v_id;
			mp->mnt_realrootvp = dp;
			mp->mnt_generation = current_mount_generation;
			mount_unlock(mp);
		}
	}

	return 0;

out:
	return error;
}
Example #25
0
/*
 * forkproc
 *
 * Description:	Create a new process structure, given a parent process
 *		structure.
 *
 * Parameters:	parent_proc		The parent process
 *
 * Returns:	!NULL			The new process structure
 *		NULL			Error (insufficient free memory)
 *
 * Note:	When successful, the newly created process structure is
 *		partially initialized; if a caller needs to deconstruct the
 *		returned structure, they must call forkproc_free() to do so.
 */
proc_t
forkproc(proc_t parent_proc)
{
	proc_t child_proc;	/* Our new process */
	static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
	int error = 0;
	struct session *sessp;
	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread());

	MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK);
	if (child_proc == NULL) {
		printf("forkproc: M_PROC zone exhausted\n");
		goto bad;
	}
	/* zero it out as we need to insert in hash */
	bzero(child_proc, sizeof *child_proc);

	MALLOC_ZONE(child_proc->p_stats, struct pstats *,
			sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK);
	if (child_proc->p_stats == NULL) {
		printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n");
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}
	MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *,
			sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK);
	if (child_proc->p_sigacts == NULL) {
		printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n");
		FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}

	/* allocate a callout for use by interval timers */
	child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc);
	if (child_proc->p_rcall == NULL) {
		FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS);
		FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}


	/*
	 * Find an unused PID.  
	 */

	proc_list_lock();

	nextpid++;
retry:
	/*
	 * If the process ID prototype has wrapped around,
	 * restart somewhat above 0, as the low-numbered procs
	 * tend to include daemons that don't exit.
	 */
	if (nextpid >= PID_MAX) {
		nextpid = 100;
		pidwrap = 1;
	}
	if (pidwrap != 0) {

		/* if the pid stays in hash both for zombie and runniing state */
		if  (pfind_locked(nextpid) != PROC_NULL) {
			nextpid++;
			goto retry;
		}

		if (pgfind_internal(nextpid) != PGRP_NULL) {
			nextpid++;
			goto retry;
		}	
		if (session_find_internal(nextpid) != SESSION_NULL) {
			nextpid++;
			goto retry;
		}	
	}
	nprocs++;
	child_proc->p_pid = nextpid;
	child_proc->p_idversion = nextpidversion++;
#if 1
	if (child_proc->p_pid != 0) {
		if (pfind_locked(child_proc->p_pid) != PROC_NULL)
			panic("proc in the list already\n");
	}
#endif
	/* Insert in the hash */
	child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE);
	LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash);
	proc_list_unlock();


	/*
	 * We've identified the PID we are going to use; initialize the new
	 * process structure.
	 */
	child_proc->p_stat = SIDL;
	child_proc->p_pgrpid = PGRPID_DEAD;

	/*
	 * The zero'ing of the proc was at the allocation time due to need
	 * for insertion to hash.  Copy the section that is to be copied
	 * directly from the parent.
	 */
	bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy,
	    (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy));

	/*
	 * Some flags are inherited from the parent.
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * The p_stats and p_sigacts substructs are set in vm_fork.
	 */
	child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY));
	if (parent_proc->p_flag & P_PROFIL)
		startprofclock(child_proc);
	/*
	 * Note that if the current thread has an assumed identity, this
	 * credential will be granted to the new process.
	 */
	child_proc->p_ucred = kauth_cred_get_with_ref();

#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
#if CONFIG_DTRACE
	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
	lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
#else /* !CONFIG_EMBEDDED */
	lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
#if CONFIG_DTRACE
	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
	lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
#endif /* !CONFIG_EMBEDDED */
	klist_init(&child_proc->p_klist);

	if (child_proc->p_textvp != NULLVP) {
		/* bump references to the text vnode */
		/* Need to hold iocount across the ref call */
		if (vnode_getwithref(child_proc->p_textvp) == 0) {
			error = vnode_ref(child_proc->p_textvp);
			vnode_put(child_proc->p_textvp);
			if (error != 0)
				child_proc->p_textvp = NULLVP;
		}
	}

	/*
	 * Copy the parents per process open file table to the child; if
	 * there is a per-thread current working directory, set the childs
	 * per-process current working directory to that instead of the
	 * parents.
	 *
	 * XXX may fail to copy descriptors to child
	 */
	child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir);

#if SYSV_SHM
	if (parent_proc->vm_shm) {
		/* XXX may fail to attach shm to child */
		(void)shmfork(parent_proc, child_proc);
	}
#endif
	/*
	 * inherit the limit structure to child
	 */
	proc_limitfork(parent_proc, child_proc);

	if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
		uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur;
		child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur;
	}

	/* Intialize new process stats, including start time */
	/* <rdar://6640543> non-zeroed portion contains garbage AFAICT */
	bzero(&child_proc->p_stats->pstat_startzero,
	    (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero -
	    (caddr_t)&child_proc->p_stats->pstat_startzero));
	bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof));
	microtime(&child_proc->p_start);
	child_proc->p_stats->p_start = child_proc->p_start;     /* for compat */

	if (parent_proc->p_sigacts != NULL)
		(void)memcpy(child_proc->p_sigacts,
				parent_proc->p_sigacts, sizeof *child_proc->p_sigacts);
	else
		(void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts);

	sessp = proc_session(parent_proc);
	if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT)
		OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag);
	session_rele(sessp);

	/*
	 * block all signals to reach the process.
	 * no transition race should be occuring with the child yet,
	 * but indicate that the process is in (the creation) transition.
	 */
	proc_signalstart(child_proc, 0);
	proc_transstart(child_proc, 0);

	child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX;
	TAILQ_INIT(&child_proc->p_uthlist);
	TAILQ_INIT(&child_proc->p_aio_activeq);
	TAILQ_INIT(&child_proc->p_aio_doneq);

	/* Inherit the parent flags for code sign */
	child_proc->p_csflags = parent_proc->p_csflags;

	/*
	 * All processes have work queue locks; cleaned up by
	 * reap_child_locked()
	 */
	workqueue_init_lock(child_proc);

	/*
	 * Copy work queue information
	 *
	 * Note: This should probably only happen in the case where we are
	 *	creating a child that is a copy of the parent; since this
	 *	routine is called in the non-duplication case of vfork()
	 *	or posix_spawn(), then this information should likely not
	 *	be duplicated.
	 *
	 * <rdar://6640553> Work queue pointers that no longer point to code
	 */
	child_proc->p_wqthread = parent_proc->p_wqthread;
	child_proc->p_threadstart = parent_proc->p_threadstart;
	child_proc->p_pthsize = parent_proc->p_pthsize;
	child_proc->p_targconc = parent_proc->p_targconc;
	if ((parent_proc->p_lflag & P_LREGISTER) != 0) {
		child_proc->p_lflag |= P_LREGISTER;
	}
	child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset;
#if PSYNCH
	pth_proc_hashinit(child_proc);
#endif /* PSYNCH */

#if CONFIG_LCTX
	child_proc->p_lctx = NULL;
	/* Add new process to login context (if any). */
	if (parent_proc->p_lctx != NULL) {
		/*
		 * <rdar://6640564> This should probably be delayed in the
		 * vfork() or posix_spawn() cases.
		 */
		LCTX_LOCK(parent_proc->p_lctx);
		enterlctx(child_proc, parent_proc->p_lctx, 0);
	}
#endif

bad:
	return(child_proc);
}
Example #26
0
void
mach_call_munger64(x86_saved_state_t *state)
{
	int call_number;
	int argc;
	mach_call_t mach_call;
	struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
	x86_saved_state64_t	*regs;

#if PROC_REF_DEBUG
	struct uthread *ut = get_bsdthread_info(current_thread());

	uthread_reset_proc_refcount(ut);
#endif

	assert(is_saved_state64(state));
	regs = saved_state64(state);

	call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK);

	DEBUG_KPRINT_SYSCALL_MACH(
		"mach_call_munger64: code=%d(%s)\n",
		call_number, mach_syscall_name_table[call_number]);

	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START,
		regs->rdi, regs->rsi, regs->rdx, regs->r10, 0);
	
	if (call_number < 0 || call_number >= mach_trap_count) {
	        i386_exception(EXC_SYSCALL, regs->rax, 1);
		/* NOTREACHED */
	}
	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;

	if (mach_call == (mach_call_t)kern_invalid) {
	        i386_exception(EXC_SYSCALL, regs->rax, 1);
		/* NOTREACHED */
	}
	argc = mach_trap_table[call_number].mach_trap_arg_count;
	if (argc) {
		int args_in_regs = MIN(6, argc);

		memcpy(&args.arg1, &regs->rdi, args_in_regs * sizeof(syscall_arg_t));

		if (argc > 6) {
	        int copyin_count;

			assert(argc <= 9);
			copyin_count = (argc - 6) * (int)sizeof(syscall_arg_t);

	        if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&args.arg7, copyin_count)) {
		        regs->rax = KERN_INVALID_ARGUMENT;
			
				thread_exception_return();
				/* NOTREACHED */
			}
		}
	}

#ifdef MACH_BSD
	mach_kauth_cred_uthread_update();
#endif

	regs->rax = (uint64_t)mach_call((void *)&args);
	
	DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax);

	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, 
		regs->rax, 0, 0, 0, 0);

	throttle_lowpri_io(1);

#if PROC_REF_DEBUG
	if (__improbable(uthread_get_proc_refcount(ut) != 0)) {
		panic("system call returned with uu_proc_refcount != 0");
	}
#endif

	thread_exception_return();
	/* NOTREACHED */
}
Example #27
0
/*
 * fork1
 *
 * Description:	common code used by all new process creation other than the
 *		bootstrap of the initial process on the system
 *
 * Parameters: parent_proc		parent process of the process being
 *		child_threadp		pointer to location to receive the
 *					Mach thread_t of the child process
 *					breated
 *		kind			kind of creation being requested
 *
 * Notes:	Permissable values for 'kind':
 *
 *		PROC_CREATE_FORK	Create a complete process which will
 *					return actively running in both the
 *					parent and the child; the child copies
 *					the parent address space.
 *		PROC_CREATE_SPAWN	Create a complete process which will
 *					return actively running in the parent
 *					only after returning actively running
 *					in the child; the child address space
 *					is newly created by an image activator,
 *					after which the child is run.
 *		PROC_CREATE_VFORK	Creates a partial process which will
 *					borrow the parent task, thread, and
 *					uthread to return running in the child;
 *					the child address space and other parts
 *					are lazily created at execve() time, or
 *					the child is terminated, and the parent
 *					does not actively run until that
 *					happens.
 *
 *		At first it may seem strange that we return the child thread
 *		address rather than process structure, since the process is
 *		the only part guaranteed to be "new"; however, since we do
 *		not actualy adjust other references between Mach and BSD (see
 *		the block diagram above the implementation of vfork()), this
 *		is the only method which guarantees us the ability to get
 *		back to the other information.
 */
int
fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
{
	thread_t parent_thread = (thread_t)current_thread();
	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
	proc_t child_proc = NULL;	/* set in switch, but compiler... */
	thread_t child_thread = NULL;
	uid_t uid;
	int count;
	int err = 0;
	int spawn = 0;

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create.  Don't allow
	 * a nonprivileged user to use the last process; don't let root
	 * exceed the limit. The variable nprocs is the current number of
	 * processes, maxproc is the limit.
	 */
	uid = kauth_cred_get()->cr_ruid;
	proc_list_lock();
	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
		proc_list_unlock();
		tablefull("proc");
		return (EAGAIN);
	}
	proc_list_unlock();

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit, which is
	 * always less than what an rlim_t can hold.
	 * (locking protection is provided by list lock held in chgproccnt)
	 */
	count = chgproccnt(uid, 1);
	if (uid != 0 &&
	    (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) {
	    	err = EAGAIN;
		goto bad;
	}

#if CONFIG_MACF
	/*
	 * Determine if MAC policies applied to the process will allow
	 * it to fork.  This is an advisory-only check.
	 */
	err = mac_proc_check_fork(parent_proc);
	if (err  != 0) {
		goto bad;
	}
#endif

	switch(kind) {
	case PROC_CREATE_VFORK:
		/*
		 * Prevent a vfork while we are in vfork(); we should
		 * also likely preventing a fork here as well, and this
		 * check should then be outside the switch statement,
		 * since the proc struct contents will copy from the
		 * child and the tash/thread/uthread from the parent in
		 * that case.  We do not support vfork() in vfork()
		 * because we don't have to; the same non-requirement
		 * is true of both fork() and posix_spawn() and any
		 * call  other than execve() amd _exit(), but we've
		 * been historically lenient, so we continue to be so
		 * (for now).
		 *
		 * <rdar://6640521> Probably a source of random panics
		 */
		if (parent_uthread->uu_flag & UT_VFORK) {
			printf("fork1 called within vfork by %s\n", parent_proc->p_comm);
			err = EINVAL;
			goto bad;
		}

		/*
		 * Flag us in progress; if we chose to support vfork() in
		 * vfork(), we would chain our parent at this point (in
		 * effect, a stack push).  We don't, since we actually want
		 * to disallow everything not specified in the standard
		 */
		proc_vfork_begin(parent_proc);

		/* The newly created process comes with signal lock held */
		if ((child_proc = forkproc(parent_proc)) == NULL) {
			/* Failed to allocate new process */
			proc_vfork_end(parent_proc);
			err = ENOMEM;
			goto bad;
		}

// XXX BEGIN: wants to move to be common code (and safe)
#if CONFIG_MACF
		/*
		 * allow policies to associate the credential/label that
		 * we referenced from the parent ... with the child
		 * JMM - this really isn't safe, as we can drop that
		 *       association without informing the policy in other
		 *       situations (keep long enough to get policies changed)
		 */
		mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
#endif

		/*
		 * Propogate change of PID - may get new cred if auditing.
		 *
		 * NOTE: This has no effect in the vfork case, since
		 *	child_proc->task != current_task(), but we duplicate it
		 *	because this is probably, ultimately, wrong, since we
		 *	will be running in the "child" which is the parent task
		 *	with the wrong token until we get to the execve() or
		 *	_exit() call; a lot of "undefined" can happen before
		 *	that.
		 *
		 * <rdar://6640530> disallow everything but exeve()/_exit()?
		 */
		set_security_token(child_proc);

		AUDIT_ARG(pid, child_proc->p_pid);

		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
// XXX END: wants to move to be common code (and safe)

		/*
		 * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD
		 *
		 * Note: this is where we would "push" state instead of setting
		 * it for nested vfork() support (see proc_vfork_end() for
		 * description if issues here).
		 */
		child_proc->task = parent_proc->task;

		child_proc->p_lflag  |= P_LINVFORK;
		child_proc->p_vforkact = parent_thread;
		child_proc->p_stat = SRUN;

		parent_uthread->uu_flag |= UT_VFORK;
		parent_uthread->uu_proc = child_proc;
		parent_uthread->uu_userstate = (void *)act_thread_csave();
		parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask;

		/* temporarily drop thread-set-id state */
		if (parent_uthread->uu_flag & UT_SETUID) {
			parent_uthread->uu_flag |= UT_WASSETUID;
			parent_uthread->uu_flag &= ~UT_SETUID;
		}

		/* blow thread state information */
		/* XXX is this actually necessary, given syscall return? */
		thread_set_child(parent_thread, child_proc->p_pid);

		child_proc->p_acflag = AFORK;	/* forked but not exec'ed */

		/*
		 * Preserve synchronization semantics of vfork.  If
		 * waiting for child to exec or exit, set P_PPWAIT
		 * on child, and sleep on our proc (in case of exit).
		 */
		child_proc->p_lflag |= P_LPPWAIT;
		pinsertchild(parent_proc, child_proc);	/* set visible */

		break;

	case PROC_CREATE_SPAWN:
		/*
		 * A spawned process differs from a forked process in that
		 * the spawned process does not carry around the parents
		 * baggage with regard to address space copying, dtrace,
		 * and so on.
		 */
		spawn = 1;

		/* FALLSTHROUGH */

	case PROC_CREATE_FORK:
		/*
		 * When we clone the parent process, we are going to inherit
		 * its task attributes and memory, since when we fork, we
		 * will, in effect, create a duplicate of it, with only minor
		 * differences.  Contrarily, spawned processes do not inherit.
		 */
		if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) {
			/* Failed to create thread */
			err = EAGAIN;
			goto bad;
		}

		/* copy current thread state into the child thread (only for fork) */
		if (!spawn) {
			thread_dup(child_thread);
		}

		/* child_proc = child_thread->task->proc; */
		child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread)));

// XXX BEGIN: wants to move to be common code (and safe)
#if CONFIG_MACF
		/*
		 * allow policies to associate the credential/label that
		 * we referenced from the parent ... with the child
		 * JMM - this really isn't safe, as we can drop that
		 *       association without informing the policy in other
		 *       situations (keep long enough to get policies changed)
		 */
		mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
#endif

		/*
		 * Propogate change of PID - may get new cred if auditing.
		 *
		 * NOTE: This has no effect in the vfork case, since
		 *	child_proc->task != current_task(), but we duplicate it
		 *	because this is probably, ultimately, wrong, since we
		 *	will be running in the "child" which is the parent task
		 *	with the wrong token until we get to the execve() or
		 *	_exit() call; a lot of "undefined" can happen before
		 *	that.
		 *
		 * <rdar://6640530> disallow everything but exeve()/_exit()?
		 */
		set_security_token(child_proc);

		AUDIT_ARG(pid, child_proc->p_pid);

		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
// XXX END: wants to move to be common code (and safe)

		/*
		 * Blow thread state information; this is what gives the child
		 * process its "return" value from a fork() call.
		 *
		 * Note: this should probably move to fork() proper, since it
		 * is not relevent to spawn, and the value won't matter
		 * until we resume the child there.  If you are in here
		 * refactoring code, consider doing this at the same time.
		 */
		thread_set_child(child_thread, child_proc->p_pid);

		child_proc->p_acflag = AFORK;	/* forked but not exec'ed */

// <rdar://6598155> dtrace code cleanup needed
#if CONFIG_DTRACE
		/*
		 * This code applies to new processes who are copying the task
		 * and thread state and address spaces of their parent process.
		 */
		if (!spawn) {
// <rdar://6598155> call dtrace specific function here instead of all this...
		/*
		 * APPLE NOTE: Solaris does a sprlock() and drops the
		 * proc_lock here. We're cheating a bit and only taking
		 * the p_dtrace_sprlock lock. A full sprlock would
		 * task_suspend the parent.
		 */
		lck_mtx_lock(&parent_proc->p_dtrace_sprlock);

		/*
		 * Remove all DTrace tracepoints from the child process. We
		 * need to do this _before_ duplicating USDT providers since
		 * any associated probes may be immediately enabled.
		 */
		if (parent_proc->p_dtrace_count > 0) {
			dtrace_fasttrap_fork(parent_proc, child_proc);
		}

		lck_mtx_unlock(&parent_proc->p_dtrace_sprlock);

		/*
		 * Duplicate any lazy dof(s). This must be done while NOT
		 * holding the parent sprlock! Lock ordering is
		 * dtrace_dof_mode_lock, then sprlock.  It is imperative we
		 * always call dtrace_lazy_dofs_duplicate, rather than null
		 * check and call if !NULL. If we NULL test, during lazy dof
		 * faulting we can race with the faulting code and proceed
		 * from here to beyond the helpers copy. The lazy dof
		 * faulting will then fail to copy the helpers to the child
		 * process.
		 */
		dtrace_lazy_dofs_duplicate(parent_proc, child_proc);
		
		/*
		 * Duplicate any helper actions and providers. The SFORKING
		 * we set above informs the code to enable USDT probes that
		 * sprlock() may fail because the child is being forked.
		 */
		/*
		 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
		 * never fails to find the child. We do not set SFORKING.
		 */
		if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
			(*dtrace_helpers_fork)(parent_proc, child_proc);
		}

		}
#endif	/* CONFIG_DTRACE */

		break;

	default:
		panic("fork1 called with unknown kind %d", kind);
		break;
	}


	/* return the thread pointer to the caller */
	*child_threadp = child_thread;

bad:
	/*
	 * In the error case, we return a 0 value for the returned pid (but
	 * it is ignored in the trampoline due to the error return); this
	 * is probably not necessary.
	 */
	if (err) {
		(void)chgproccnt(uid, -1);
	}

	return (err);
}
Example #28
0
/*
 *	Routine:	task_for_pid
 *	Purpose:
 *		Get the task port for another "process", named by its
 *		process ID on the same host as "target_task".
 *
 *		Only permitted to privileged processes, or processes
 *		with the same user ID.
 *
 * XXX This should be a BSD system call, not a Mach trap!!!
 */
kern_return_t
task_for_pid(
	struct task_for_pid_args *args)
{
	mach_port_name_t	target_tport = args->target_tport;
	int			pid = args->pid;
	user_addr_t		task_addr = args->t;
	struct uthread		*uthread;
	proc_t 			p = PROC_NULL;
	task_t			t1 = TASK_NULL;
	mach_port_name_t	tret = MACH_PORT_NULL;
 	ipc_port_t 		tfpport;
	void * sright;
	int error = 0;

	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
	AUDIT_ARG(pid, pid);
	AUDIT_ARG(mach_port1, target_tport);

#if defined(SECURE_KERNEL)
	if (0 == pid) {
		(void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
		return(KERN_FAILURE);
	}
#endif

	t1 = port_name_to_task(target_tport);
	if (t1 == TASK_NULL) {
		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
		return(KERN_FAILURE);
	} 


	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	uthread = get_bsdthread_info(current_thread());
	kauth_cred_uthread_update(uthread, current_proc());

	p = proc_find(pid);
	AUDIT_ARG(process, p);

	if (!(task_for_pid_posix_check(p))) {
		error = KERN_FAILURE;
		goto tfpout;
	}

	if (p->task != TASK_NULL) {
		/* If we aren't root and target's task access port is set... */
		if (!kauth_cred_issuser(kauth_cred_get()) &&
			p != current_proc() &&
			(task_get_task_access_port(p->task, &tfpport) == 0) &&
			(tfpport != IPC_PORT_NULL)) {

			if (tfpport == IPC_PORT_DEAD) {
				error = KERN_PROTECTION_FAILURE;
				goto tfpout;
			}

			/* Call up to the task access server */
			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);

			if (error != MACH_MSG_SUCCESS) {
				if (error == MACH_RCV_INTERRUPTED)
					error = KERN_ABORTED;
				else
					error = KERN_FAILURE;
				goto tfpout;
			}
		}
#if CONFIG_MACF
		error = mac_proc_check_get_task(kauth_cred_get(), p);
		if (error) {
			error = KERN_FAILURE;
			goto tfpout;
		}
#endif

		/* Grant task port access */
		task_reference(p->task);
		sright = (void *) convert_task_to_port(p->task);
		tret = ipc_port_copyout_send(
				sright, 
				get_task_ipcspace(current_task()));
	} 
	error = KERN_SUCCESS;

tfpout:
	task_deallocate(t1);
	AUDIT_ARG(mach_port2, tret);
	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
	if (p != PROC_NULL)
		proc_rele(p);
	AUDIT_MACH_SYSCALL_EXIT(error);
	return(error);
}
int
ptrace(struct proc *p, struct ptrace_args *uap, register_t *retval)
{
	struct proc *t = current_proc();	/* target process */
	task_t		task;
	thread_t	th_act;
	struct uthread 	*ut;
	int tr_sigexc = 0;
	int error = 0;
	int stopped = 0;

	AUDIT_ARG(cmd, uap->req);
	AUDIT_ARG(pid, uap->pid);
	AUDIT_ARG(addr, uap->addr);
	AUDIT_ARG(value, uap->data);

	if (uap->req == PT_DENY_ATTACH) {
		proc_lock(p);
		if (ISSET(p->p_lflag, P_LTRACED)) {
			proc_unlock(p);
			exit1(p, W_EXITCODE(ENOTSUP, 0), retval);
			/* drop funnel before we return */
			thread_exception_return();
			/* NOTREACHED */
		}
		SET(p->p_lflag, P_LNOATTACH);
		proc_unlock(p);

		return(0);
	}

	if (uap->req == PT_FORCEQUOTA) {
		if (is_suser()) {
			OSBitOrAtomic(P_FORCEQUOTA, (UInt32 *)&t->p_flag);
			return (0);
		} else
			return (EPERM);
	}

	/*
	 *	Intercept and deal with "please trace me" request.
	 */	 
	if (uap->req == PT_TRACE_ME) {
		proc_lock(p);
		SET(p->p_lflag, P_LTRACED);
		/* Non-attached case, our tracer is our parent. */
		p->p_oppid = p->p_ppid;
		proc_unlock(p);
		return(0);
	}
	if (uap->req == PT_SIGEXC) {
		proc_lock(p);
		if (ISSET(p->p_lflag, P_LTRACED)) {
			SET(p->p_lflag, P_LSIGEXC);
			proc_unlock(p);
			return(0);
		} else {
			proc_unlock(p);
			return(EINVAL);
		}
	}

	/* 
	 * We do not want ptrace to do anything with kernel or launchd 
	 */
	if (uap->pid < 2) {
		return(EPERM);
	}

	/*
	 *	Locate victim, and make sure it is traceable.
	 */
	if ((t = proc_find(uap->pid)) == NULL)
			return (ESRCH);

	AUDIT_ARG(process, t);

	task = t->task;
	if (uap->req == PT_ATTACHEXC) {
		uap->req = PT_ATTACH;
		tr_sigexc = 1;
	}
	if (uap->req == PT_ATTACH) {
		int		err;
		
		if ( kauth_authorize_process(proc_ucred(p), KAUTH_PROCESS_CANTRACE, 
									 t, (uintptr_t)&err, 0, 0) == 0 ) {
			/* it's OK to attach */
			proc_lock(t);
			SET(t->p_lflag, P_LTRACED);
			if (tr_sigexc) 
				SET(t->p_lflag, P_LSIGEXC);
	
			t->p_oppid = t->p_ppid;
			proc_unlock(t);
			if (t->p_pptr != p)
				proc_reparentlocked(t, p, 1, 0);
	
			proc_lock(t);
			if (get_task_userstop(task) > 0 ) {
				stopped = 1;
			}
			t->p_xstat = 0;
			proc_unlock(t);
			psignal(t, SIGSTOP);
			/*
			 * If the process was stopped, wake up and run through
			 * issignal() again to properly connect to the tracing
			 * process.
			 */
			if (stopped)
				task_resume(task);       
			error = 0;
			goto out;
		}
		else {
			/* not allowed to attach, proper error code returned by kauth_authorize_process */
			if (ISSET(t->p_lflag, P_LNOATTACH)) {
				psignal(p, SIGSEGV);
			}
			
			error = err;
			goto out;
		}
	}

	/*
	 * You can't do what you want to the process if:
	 *	(1) It's not being traced at all,
	 */
	proc_lock(t);
	if (!ISSET(t->p_lflag, P_LTRACED)) {
		proc_unlock(t);
		error = EPERM;
		goto out;
	}

	/*
	 *	(2) it's not being traced by _you_, or
	 */
	if (t->p_pptr != p) {
		proc_unlock(t);
		error = EBUSY;
		goto out;
	}

	/*
	 *	(3) it's not currently stopped.
	 */
	if (t->p_stat != SSTOP) {
		proc_unlock(t);
		error = EBUSY;
		goto out;
	}

	/*
	 *	Mach version of ptrace executes request directly here,
	 *	thus simplifying the interaction of ptrace and signals.
	 */
	/* proc lock is held here */
	switch (uap->req) {

	case PT_DETACH:
		if (t->p_oppid != t->p_ppid) {
			struct proc *pp;

			proc_unlock(t);
			pp = proc_find(t->p_oppid);
			proc_reparentlocked(t, pp ? pp : initproc, 1, 0);
			if (pp != PROC_NULL)
				proc_rele(pp);
			proc_lock(t);
			
		}

		t->p_oppid = 0;
		CLR(t->p_lflag, P_LTRACED);
		CLR(t->p_lflag, P_LSIGEXC);
		proc_unlock(t);
		goto resume;
		
	case PT_KILL:
		/*
		 *	Tell child process to kill itself after it
		 *	is resumed by adding NSIG to p_cursig. [see issig]
		 */
		proc_unlock(t);
		psignal(t, SIGKILL);
		goto resume;

	case PT_STEP:			/* single step the child */
	case PT_CONTINUE:		/* continue the child */
		proc_unlock(t);
		th_act = (thread_t)get_firstthread(task);
		if (th_act == THREAD_NULL) {
			error = EINVAL;
			goto out;
		}

		if (uap->addr != (user_addr_t)1) {
#if defined(ppc)
#define ALIGNED(addr,size)	(((unsigned)(addr)&((size)-1))==0)
			if (!ALIGNED((int)uap->addr, sizeof(int)))
				return (ERESTART);
#undef 	ALIGNED
#endif
			thread_setentrypoint(th_act, uap->addr);
		}

		if ((unsigned)uap->data >= NSIG) {
			error = EINVAL;
			goto out;
		}

		if (uap->data != 0) {
			psignal(t, uap->data);
                }

		if (uap->req == PT_STEP) {
		        /*
			 * set trace bit
			 */
			if (thread_setsinglestep(th_act, 1) != KERN_SUCCESS) {
				error = ENOTSUP;
				goto out;
			}
		} else {
		        /*
			 * clear trace bit if on
			 */
			if (thread_setsinglestep(th_act, 0) != KERN_SUCCESS) {
				error = ENOTSUP;
				goto out;
			}
		}	
	resume:
		proc_lock(t);
		t->p_xstat = uap->data;
		t->p_stat = SRUN;
		if (t->sigwait) {
			wakeup((caddr_t)&(t->sigwait));
			proc_unlock(t);
			if ((t->p_lflag & P_LSIGEXC) == 0) {
				task_resume(task);
			}
		} else
			proc_unlock(t);
			
		break;
		
	case PT_THUPDATE:  {
		proc_unlock(t);
		if ((unsigned)uap->data >= NSIG) {
			error = EINVAL;
			goto out;
		}
		th_act = port_name_to_thread(CAST_DOWN(mach_port_name_t, uap->addr));
		if (th_act == THREAD_NULL)
			return (ESRCH);
		ut = (uthread_t)get_bsdthread_info(th_act);
		if (uap->data)
			ut->uu_siglist |= sigmask(uap->data);
		proc_lock(t);
		t->p_xstat = uap->data;
		t->p_stat = SRUN;
		proc_unlock(t);
		thread_deallocate(th_act);
		error = 0;
		}
		break;
	default:
		proc_unlock(t);
		error = EINVAL;
		goto out;
	}

	error = 0;
out:
	proc_rele(t);
	return(error);
}
Example #30
0
kern_return_t
task_name_for_pid(
	struct task_name_for_pid_args *args)
{
	mach_port_name_t	target_tport = args->target_tport;
	int			pid = args->pid;
	user_addr_t		task_addr = args->t;
	struct uthread		*uthread;
	proc_t		p = PROC_NULL;
	task_t		t1;
	mach_port_name_t	tret;
	void * sright;
	int error = 0, refheld = 0;
	kauth_cred_t target_cred;

	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
	AUDIT_ARG(pid, pid);
	AUDIT_ARG(mach_port1, target_tport);

	t1 = port_name_to_task(target_tport);
	if (t1 == TASK_NULL) {
		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
		return(KERN_FAILURE);
	} 


	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	uthread = get_bsdthread_info(current_thread());
	kauth_cred_uthread_update(uthread, current_proc());

	p = proc_find(pid);
	AUDIT_ARG(process, p);
	if (p != PROC_NULL) {
		target_cred = kauth_cred_proc_ref(p);
		refheld = 1;

		if ((p->p_stat != SZOMB)
		    && ((current_proc() == p)
			|| kauth_cred_issuser(kauth_cred_get()) 
			|| ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 
			    ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {

			if (p->task != TASK_NULL) {
				task_reference(p->task);
#if CONFIG_MACF
				error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
				if (error) {
					task_deallocate(p->task);
					goto noperm;
				}
#endif
				sright = (void *)convert_task_name_to_port(p->task);
				tret = ipc_port_copyout_send(sright, 
						get_task_ipcspace(current_task()));
			} else
				tret  = MACH_PORT_NULL;

			AUDIT_ARG(mach_port2, tret);
			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
			task_deallocate(t1);
			error = KERN_SUCCESS;
			goto tnfpout;
		}
	}

#if CONFIG_MACF
noperm:
#endif
    task_deallocate(t1);
	tret = MACH_PORT_NULL;
	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
	error = KERN_FAILURE;
tnfpout:
	if (refheld != 0)
		kauth_cred_unref(&target_cred);
	if (p != PROC_NULL)
		proc_rele(p);
	AUDIT_MACH_SYSCALL_EXIT(error);
	return(error);
}