Exemplo n.º 1
0
/*
 * Helper function to change the real uid of a process
 *
 * The per-uid process count for this process is transfered from
 * the old uid to the new uid.
 */
struct ucred *
change_ruid(uid_t ruid)
{
	struct	proc *p = curproc;
	struct	ucred *cr;

	KKASSERT(p != NULL);

	cr = cratom(&p->p_ucred);
	chgproccnt(cr->cr_ruidinfo, -1, 0);
	cr->cr_ruid = ruid;
	uireplace(&cr->cr_ruidinfo, uifind(ruid));
	chgproccnt(cr->cr_ruidinfo, 1, 0);
	return (cr);
}
Exemplo n.º 2
0
/* ARGSUSED */
int
sys_setuid(struct proc *p, void *v, register_t *retval)
{
	struct sys_setuid_args /* {
		syscallarg(uid_t) uid;
	} */ *uap = v;
	struct pcred *pc = p->p_cred;
	uid_t uid;
	int error;

	uid = SCARG(uap, uid);

	if (pc->pc_ucred->cr_uid == uid &&
	    pc->p_ruid == uid &&
	    pc->p_svuid == uid)
		return (0);

	if (uid != pc->p_ruid &&
	    uid != pc->p_svuid &&
	    uid != pc->pc_ucred->cr_uid &&
	    (error = suser(p, 0)))
		return (error);

	/*
	 * Everything's okay, do it.
	 */
	if (uid == pc->pc_ucred->cr_uid ||
	    suser(p, 0) == 0) {
		/*
		 * Transfer proc count to new user.
		 */
		if (uid != pc->p_ruid) {
			(void)chgproccnt(pc->p_ruid, -p->p_p->ps_refcnt);
			(void)chgproccnt(uid, p->p_p->ps_refcnt);
		}
		pc->p_ruid = uid;
		pc->p_svuid = uid;
	}

	/*
	 * Copy credentials so other references do not see our changes.
	 */
	pc->pc_ucred = crcopy(pc->pc_ucred);
	pc->pc_ucred->cr_uid = uid;
	atomic_setbits_int(&p->p_p->ps_flags, PS_SUGID);
	return (0);
}
Exemplo n.º 3
0
/*
 * fork1
 *
 * Description:	common code used by all new process creation other than the
 *		bootstrap of the initial process on the system
 *
 * Parameters: parent_proc		parent process of the process being
 *		child_threadp		pointer to location to receive the
 *					Mach thread_t of the child process
 *					breated
 *		kind			kind of creation being requested
 *
 * Notes:	Permissable values for 'kind':
 *
 *		PROC_CREATE_FORK	Create a complete process which will
 *					return actively running in both the
 *					parent and the child; the child copies
 *					the parent address space.
 *		PROC_CREATE_SPAWN	Create a complete process which will
 *					return actively running in the parent
 *					only after returning actively running
 *					in the child; the child address space
 *					is newly created by an image activator,
 *					after which the child is run.
 *		PROC_CREATE_VFORK	Creates a partial process which will
 *					borrow the parent task, thread, and
 *					uthread to return running in the child;
 *					the child address space and other parts
 *					are lazily created at execve() time, or
 *					the child is terminated, and the parent
 *					does not actively run until that
 *					happens.
 *
 *		At first it may seem strange that we return the child thread
 *		address rather than process structure, since the process is
 *		the only part guaranteed to be "new"; however, since we do
 *		not actualy adjust other references between Mach and BSD (see
 *		the block diagram above the implementation of vfork()), this
 *		is the only method which guarantees us the ability to get
 *		back to the other information.
 */
int
fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
{
	thread_t parent_thread = (thread_t)current_thread();
	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
	proc_t child_proc = NULL;	/* set in switch, but compiler... */
	thread_t child_thread = NULL;
	uid_t uid;
	int count;
	int err = 0;
	int spawn = 0;

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create.  Don't allow
	 * a nonprivileged user to use the last process; don't let root
	 * exceed the limit. The variable nprocs is the current number of
	 * processes, maxproc is the limit.
	 */
	uid = kauth_cred_get()->cr_ruid;
	proc_list_lock();
	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
		proc_list_unlock();
		tablefull("proc");
		return (EAGAIN);
	}
	proc_list_unlock();

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit, which is
	 * always less than what an rlim_t can hold.
	 * (locking protection is provided by list lock held in chgproccnt)
	 */
	count = chgproccnt(uid, 1);
	if (uid != 0 &&
	    (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) {
	    	err = EAGAIN;
		goto bad;
	}

#if CONFIG_MACF
	/*
	 * Determine if MAC policies applied to the process will allow
	 * it to fork.  This is an advisory-only check.
	 */
	err = mac_proc_check_fork(parent_proc);
	if (err  != 0) {
		goto bad;
	}
#endif

	switch(kind) {
	case PROC_CREATE_VFORK:
		/*
		 * Prevent a vfork while we are in vfork(); we should
		 * also likely preventing a fork here as well, and this
		 * check should then be outside the switch statement,
		 * since the proc struct contents will copy from the
		 * child and the tash/thread/uthread from the parent in
		 * that case.  We do not support vfork() in vfork()
		 * because we don't have to; the same non-requirement
		 * is true of both fork() and posix_spawn() and any
		 * call  other than execve() amd _exit(), but we've
		 * been historically lenient, so we continue to be so
		 * (for now).
		 *
		 * <rdar://6640521> Probably a source of random panics
		 */
		if (parent_uthread->uu_flag & UT_VFORK) {
			printf("fork1 called within vfork by %s\n", parent_proc->p_comm);
			err = EINVAL;
			goto bad;
		}

		/*
		 * Flag us in progress; if we chose to support vfork() in
		 * vfork(), we would chain our parent at this point (in
		 * effect, a stack push).  We don't, since we actually want
		 * to disallow everything not specified in the standard
		 */
		proc_vfork_begin(parent_proc);

		/* The newly created process comes with signal lock held */
		if ((child_proc = forkproc(parent_proc)) == NULL) {
			/* Failed to allocate new process */
			proc_vfork_end(parent_proc);
			err = ENOMEM;
			goto bad;
		}

// XXX BEGIN: wants to move to be common code (and safe)
#if CONFIG_MACF
		/*
		 * allow policies to associate the credential/label that
		 * we referenced from the parent ... with the child
		 * JMM - this really isn't safe, as we can drop that
		 *       association without informing the policy in other
		 *       situations (keep long enough to get policies changed)
		 */
		mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
#endif

		/*
		 * Propogate change of PID - may get new cred if auditing.
		 *
		 * NOTE: This has no effect in the vfork case, since
		 *	child_proc->task != current_task(), but we duplicate it
		 *	because this is probably, ultimately, wrong, since we
		 *	will be running in the "child" which is the parent task
		 *	with the wrong token until we get to the execve() or
		 *	_exit() call; a lot of "undefined" can happen before
		 *	that.
		 *
		 * <rdar://6640530> disallow everything but exeve()/_exit()?
		 */
		set_security_token(child_proc);

		AUDIT_ARG(pid, child_proc->p_pid);

		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
// XXX END: wants to move to be common code (and safe)

		/*
		 * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD
		 *
		 * Note: this is where we would "push" state instead of setting
		 * it for nested vfork() support (see proc_vfork_end() for
		 * description if issues here).
		 */
		child_proc->task = parent_proc->task;

		child_proc->p_lflag  |= P_LINVFORK;
		child_proc->p_vforkact = parent_thread;
		child_proc->p_stat = SRUN;

		parent_uthread->uu_flag |= UT_VFORK;
		parent_uthread->uu_proc = child_proc;
		parent_uthread->uu_userstate = (void *)act_thread_csave();
		parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask;

		/* temporarily drop thread-set-id state */
		if (parent_uthread->uu_flag & UT_SETUID) {
			parent_uthread->uu_flag |= UT_WASSETUID;
			parent_uthread->uu_flag &= ~UT_SETUID;
		}

		/* blow thread state information */
		/* XXX is this actually necessary, given syscall return? */
		thread_set_child(parent_thread, child_proc->p_pid);

		child_proc->p_acflag = AFORK;	/* forked but not exec'ed */

		/*
		 * Preserve synchronization semantics of vfork.  If
		 * waiting for child to exec or exit, set P_PPWAIT
		 * on child, and sleep on our proc (in case of exit).
		 */
		child_proc->p_lflag |= P_LPPWAIT;
		pinsertchild(parent_proc, child_proc);	/* set visible */

		break;

	case PROC_CREATE_SPAWN:
		/*
		 * A spawned process differs from a forked process in that
		 * the spawned process does not carry around the parents
		 * baggage with regard to address space copying, dtrace,
		 * and so on.
		 */
		spawn = 1;

		/* FALLSTHROUGH */

	case PROC_CREATE_FORK:
		/*
		 * When we clone the parent process, we are going to inherit
		 * its task attributes and memory, since when we fork, we
		 * will, in effect, create a duplicate of it, with only minor
		 * differences.  Contrarily, spawned processes do not inherit.
		 */
		if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) {
			/* Failed to create thread */
			err = EAGAIN;
			goto bad;
		}

		/* copy current thread state into the child thread (only for fork) */
		if (!spawn) {
			thread_dup(child_thread);
		}

		/* child_proc = child_thread->task->proc; */
		child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread)));

// XXX BEGIN: wants to move to be common code (and safe)
#if CONFIG_MACF
		/*
		 * allow policies to associate the credential/label that
		 * we referenced from the parent ... with the child
		 * JMM - this really isn't safe, as we can drop that
		 *       association without informing the policy in other
		 *       situations (keep long enough to get policies changed)
		 */
		mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
#endif

		/*
		 * Propogate change of PID - may get new cred if auditing.
		 *
		 * NOTE: This has no effect in the vfork case, since
		 *	child_proc->task != current_task(), but we duplicate it
		 *	because this is probably, ultimately, wrong, since we
		 *	will be running in the "child" which is the parent task
		 *	with the wrong token until we get to the execve() or
		 *	_exit() call; a lot of "undefined" can happen before
		 *	that.
		 *
		 * <rdar://6640530> disallow everything but exeve()/_exit()?
		 */
		set_security_token(child_proc);

		AUDIT_ARG(pid, child_proc->p_pid);

		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
// XXX END: wants to move to be common code (and safe)

		/*
		 * Blow thread state information; this is what gives the child
		 * process its "return" value from a fork() call.
		 *
		 * Note: this should probably move to fork() proper, since it
		 * is not relevent to spawn, and the value won't matter
		 * until we resume the child there.  If you are in here
		 * refactoring code, consider doing this at the same time.
		 */
		thread_set_child(child_thread, child_proc->p_pid);

		child_proc->p_acflag = AFORK;	/* forked but not exec'ed */

// <rdar://6598155> dtrace code cleanup needed
#if CONFIG_DTRACE
		/*
		 * This code applies to new processes who are copying the task
		 * and thread state and address spaces of their parent process.
		 */
		if (!spawn) {
// <rdar://6598155> call dtrace specific function here instead of all this...
		/*
		 * APPLE NOTE: Solaris does a sprlock() and drops the
		 * proc_lock here. We're cheating a bit and only taking
		 * the p_dtrace_sprlock lock. A full sprlock would
		 * task_suspend the parent.
		 */
		lck_mtx_lock(&parent_proc->p_dtrace_sprlock);

		/*
		 * Remove all DTrace tracepoints from the child process. We
		 * need to do this _before_ duplicating USDT providers since
		 * any associated probes may be immediately enabled.
		 */
		if (parent_proc->p_dtrace_count > 0) {
			dtrace_fasttrap_fork(parent_proc, child_proc);
		}

		lck_mtx_unlock(&parent_proc->p_dtrace_sprlock);

		/*
		 * Duplicate any lazy dof(s). This must be done while NOT
		 * holding the parent sprlock! Lock ordering is
		 * dtrace_dof_mode_lock, then sprlock.  It is imperative we
		 * always call dtrace_lazy_dofs_duplicate, rather than null
		 * check and call if !NULL. If we NULL test, during lazy dof
		 * faulting we can race with the faulting code and proceed
		 * from here to beyond the helpers copy. The lazy dof
		 * faulting will then fail to copy the helpers to the child
		 * process.
		 */
		dtrace_lazy_dofs_duplicate(parent_proc, child_proc);
		
		/*
		 * Duplicate any helper actions and providers. The SFORKING
		 * we set above informs the code to enable USDT probes that
		 * sprlock() may fail because the child is being forked.
		 */
		/*
		 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
		 * never fails to find the child. We do not set SFORKING.
		 */
		if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
			(*dtrace_helpers_fork)(parent_proc, child_proc);
		}

		}
#endif	/* CONFIG_DTRACE */

		break;

	default:
		panic("fork1 called with unknown kind %d", kind);
		break;
	}


	/* return the thread pointer to the caller */
	*child_threadp = child_thread;

bad:
	/*
	 * In the error case, we return a 0 value for the returned pid (but
	 * it is ignored in the trampoline due to the error return); this
	 * is probably not necessary.
	 */
	if (err) {
		(void)chgproccnt(uid, -1);
	}

	return (err);
}
Exemplo n.º 4
0
int
do_setresuid(struct lwp *l, uid_t r, uid_t e, uid_t sv, u_int flags)
{
	struct proc *p = l->l_proc;
	kauth_cred_t cred, ncred;

	ncred = kauth_cred_alloc();

	/* Get a write lock on the process credential. */
	proc_crmod_enter();
	cred = p->p_cred;

	/*
	 * Check that the new value is one of the allowed existing values,
	 * or that we have root privilege.
	 */
	if ((r != -1
	    && !((flags & ID_R_EQ_R) && r == kauth_cred_getuid(cred))
	    && !((flags & ID_R_EQ_E) && r == kauth_cred_geteuid(cred))
	    && !((flags & ID_R_EQ_S) && r == kauth_cred_getsvuid(cred))) ||
	    (e != -1
	    && !((flags & ID_E_EQ_R) && e == kauth_cred_getuid(cred))
	    && !((flags & ID_E_EQ_E) && e == kauth_cred_geteuid(cred))
	    && !((flags & ID_E_EQ_S) && e == kauth_cred_getsvuid(cred))) ||
	    (sv != -1
	    && !((flags & ID_S_EQ_R) && sv == kauth_cred_getuid(cred))
	    && !((flags & ID_S_EQ_E) && sv == kauth_cred_geteuid(cred))
	    && !((flags & ID_S_EQ_S) && sv == kauth_cred_getsvuid(cred)))) {
		int error;

		error = kauth_authorize_process(cred, KAUTH_PROCESS_SETID,
		    p, NULL, NULL, NULL);
		if (error != 0) {
		 	proc_crmod_leave(cred, ncred, false);
			return error;
		}
	}

	/* If nothing has changed, short circuit the request */
	if ((r == -1 || r == kauth_cred_getuid(cred))
	    && (e == -1 || e == kauth_cred_geteuid(cred))
	    && (sv == -1 || sv == kauth_cred_getsvuid(cred))) {
		proc_crmod_leave(cred, ncred, false);
		return 0;
	}

	kauth_cred_clone(cred, ncred);

	if (r != -1 && r != kauth_cred_getuid(ncred)) {
		u_long nlwps;

		/* Update count of processes for this user. */
		(void)chgproccnt(kauth_cred_getuid(ncred), -1);
		(void)chgproccnt(r, 1);

		/* The first LWP of a process is excluded. */
		KASSERT(mutex_owned(p->p_lock));
		nlwps = p->p_nlwps - 1;
		(void)chglwpcnt(kauth_cred_getuid(ncred), -nlwps);
		(void)chglwpcnt(r, nlwps);

		kauth_cred_setuid(ncred, r);
	}
	if (sv != -1)
		kauth_cred_setsvuid(ncred, sv);
	if (e != -1)
		kauth_cred_seteuid(ncred, e);

	/* Broadcast our credentials to the process and other LWPs. */
 	proc_crmod_leave(ncred, cred, true);

	return 0;
}
Exemplo n.º 5
0
void
bsd_init(void)
{
	struct uthread *ut;
	unsigned int i;
	struct vfs_context context;
	kern_return_t	ret;
	struct ucred temp_cred;
	struct posix_cred temp_pcred;
#if NFSCLIENT || CONFIG_IMAGEBOOT
	boolean_t       netboot = FALSE;
#endif

#define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */

	throttle_init();

	printf(copyright);
	
	bsd_init_kprintf("calling kmeminit\n");
	kmeminit();
	
	bsd_init_kprintf("calling parse_bsd_args\n");
	parse_bsd_args();

#if CONFIG_DEV_KMEM
	bsd_init_kprintf("calling dev_kmem_init\n");
	dev_kmem_init();
#endif

	/* Initialize kauth subsystem before instancing the first credential */
	bsd_init_kprintf("calling kauth_init\n");
	kauth_init();

	/* Initialize process and pgrp structures. */
	bsd_init_kprintf("calling procinit\n");
	procinit();

	/* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
	tty_init();

	kernproc = &proc0;	/* implicitly bzero'ed */

	/* kernel_task->proc = kernproc; */
	set_bsdtask_info(kernel_task,(void *)kernproc);

	/* give kernproc a name */
	bsd_init_kprintf("calling process_name\n");
	process_name("kernel_task", kernproc);

	/* allocate proc lock group attribute and group */
	bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
	proc_lck_grp_attr= lck_grp_attr_alloc_init();

	proc_lck_grp = lck_grp_alloc_init("proc",  proc_lck_grp_attr);
#if CONFIG_FINE_LOCK_GROUPS
	proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
	proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
	proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock",  proc_lck_grp_attr);
	proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
#endif
	/* Allocate proc lock attribute */
	proc_lck_attr = lck_attr_alloc_init();
#if 0
#if __PROC_INTERNAL_DEBUG
	lck_attr_setdebug(proc_lck_attr);
#endif
#endif

#if CONFIG_FINE_LOCK_GROUPS
	proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
#else
	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_ucred_mlock, proc_lck_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
#endif

	assert(bsd_simul_execs != 0);
	execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	execargs_cache_size = bsd_simul_execs;
	execargs_free_count = bsd_simul_execs;
	execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t));
	bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t));
	
	if (current_task() != kernel_task)
		printf("bsd_init: We have a problem, "
				"current task is not kernel task\n");
	
	bsd_init_kprintf("calling get_bsdthread_info\n");
	ut = (uthread_t)get_bsdthread_info(current_thread());

#if CONFIG_MACF
	/*
	 * Initialize the MAC Framework
	 */
	mac_policy_initbsd();
	kernproc->p_mac_enforce = 0;

#if defined (__i386__) || defined (__x86_64__)
	/*
	 * We currently only support this on i386/x86_64, as that is the
	 * only lock code we have instrumented so far.
	 */
	check_policy_init(policy_check_flags);
#endif
#endif /* MAC */

	/* Initialize System Override call */
	init_system_override();
	
	/*
	 * Create process 0.
	 */
	proc_list_lock();
	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
	kernproc->p_pgrp = &pgrp0;
	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
	LIST_INIT(&pgrp0.pg_members);
#ifdef CONFIG_FINE_LOCK_GROUPS
	lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
#else
	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);
#endif
	/* There is no other bsd thread this point and is safe without pgrp lock */
	LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
	kernproc->p_listflag |= P_LIST_INPGRP;
	kernproc->p_pgrpid = 0;
	kernproc->p_uniqueid = 0;

	pgrp0.pg_session = &session0;
	pgrp0.pg_membercnt = 1;

	session0.s_count = 1;
	session0.s_leader = kernproc;
	session0.s_listflags = 0;
#ifdef CONFIG_FINE_LOCK_GROUPS
	lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
#else
	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
#endif
	LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
	proc_list_unlock();

	kernproc->task = kernel_task;
	
	kernproc->p_stat = SRUN;
	kernproc->p_flag = P_SYSTEM;
	kernproc->p_lflag = 0;
	kernproc->p_ladvflag = 0;
	
#if DEVELOPMENT || DEBUG
	if (bootarg_disable_aslr)
		kernproc->p_flag |= P_DISABLE_ASLR;
#endif

	kernproc->p_nice = NZERO;
	kernproc->p_pptr = kernproc;

	TAILQ_INIT(&kernproc->p_uthlist);
	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
	
	kernproc->sigwait = FALSE;
	kernproc->sigwait_thread = THREAD_NULL;
	kernproc->exit_thread = THREAD_NULL;
	kernproc->p_csflags = CS_VALID;

	/*
	 * Create credential.  This also Initializes the audit information.
	 */
	bsd_init_kprintf("calling bzero\n");
	bzero(&temp_cred, sizeof(temp_cred));
	bzero(&temp_pcred, sizeof(temp_pcred));
	temp_pcred.cr_ngroups = 1;
	/* kern_proc, shouldn't call up to DS for group membership */
	temp_pcred.cr_flags = CRF_NOMEMBERD;
	temp_cred.cr_audit.as_aia_p = audit_default_aia_p;
	
	bsd_init_kprintf("calling kauth_cred_create\n");
	/*
	 * We have to label the temp cred before we create from it to
	 * properly set cr_ngroups, or the create will fail.
	 */
	posix_cred_label(&temp_cred, &temp_pcred);
	kernproc->p_ucred = kauth_cred_create(&temp_cred); 

	/* update cred on proc */
	PROC_UPDATE_CREDS_ONPROC(kernproc);

	/* give the (already exisiting) initial thread a reference on it */
	bsd_init_kprintf("calling kauth_cred_ref\n");
	kauth_cred_ref(kernproc->p_ucred);
	ut->uu_context.vc_ucred = kernproc->p_ucred;
	ut->uu_context.vc_thread = current_thread();

	TAILQ_INIT(&kernproc->p_aio_activeq);
	TAILQ_INIT(&kernproc->p_aio_doneq);
	kernproc->p_aio_total_count = 0;
	kernproc->p_aio_active_count = 0;

	bsd_init_kprintf("calling file_lock_init\n");
	file_lock_init();

#if CONFIG_MACF
	mac_cred_label_associate_kernel(kernproc->p_ucred);
#endif

	/* Create the file descriptor table. */
	kernproc->p_fd = &filedesc0;
	filedesc0.fd_cmask = cmask;
	filedesc0.fd_knlistsize = -1;
	filedesc0.fd_knlist = NULL;
	filedesc0.fd_knhash = NULL;
	filedesc0.fd_knhashmask = 0;

	/* Create the limits structures. */
	kernproc->p_limit = &limit0;
	for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++)
		limit0.pl_rlimit[i].rlim_cur = 
			limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
	limit0.pl_refcnt = 1;

	kernproc->p_stats = &pstats0;
	kernproc->p_sigacts = &sigacts0;

	/*
	 * Charge root for one process: launchd.
	 */
	bsd_init_kprintf("calling chgproccnt\n");
	(void)chgproccnt(0, 1);

	/*
	 *	Allocate a kernel submap for pageable memory
	 *	for temporary copying (execve()).
	 */
	{
		vm_offset_t	minimum;

		bsd_init_kprintf("calling kmem_suballoc\n");
		assert(bsd_pageable_map_size != 0);
		ret = kmem_suballoc(kernel_map,
				&minimum,
				(vm_size_t)bsd_pageable_map_size,
				TRUE,
				VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_BSD),
				&bsd_pageable_map);
		if (ret != KERN_SUCCESS) 
			panic("bsd_init: Failed to allocate bsd pageable map");
	}

	/*
	 * Initialize buffers and hash links for buffers
	 *
	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
	 *		happen after a credential has been associated with
	 *		the kernel task.
	 */
	bsd_init_kprintf("calling bsd_bufferinit\n");
	bsd_bufferinit();

	/* Initialize the execve() semaphore */
	bsd_init_kprintf("calling semaphore_create\n");

	if (ret != KERN_SUCCESS)
		panic("bsd_init: Failed to create execve semaphore");

	/*
	 * Initialize the calendar.
	 */
	bsd_init_kprintf("calling IOKitInitializeTime\n");
	IOKitInitializeTime();

	bsd_init_kprintf("calling ubc_init\n");
	ubc_init();

	/*
	 * Initialize device-switches.
	 */
	bsd_init_kprintf("calling devsw_init() \n");
	devsw_init();

	/* Initialize the file systems. */
	bsd_init_kprintf("calling vfsinit\n");
	vfsinit();

#if CONFIG_PROC_UUID_POLICY
	/* Initial proc_uuid_policy subsystem */
	bsd_init_kprintf("calling proc_uuid_policy_init()\n");
	proc_uuid_policy_init();
#endif

#if SOCKETS
	/* Initialize per-CPU cache allocator */
	mcache_init();

	/* Initialize mbuf's. */
	bsd_init_kprintf("calling mbinit\n");
	mbinit();
	net_str_id_init(); /* for mbuf tags */
#endif /* SOCKETS */

	/*
	 * Initializes security event auditing.
	 * XXX: Should/could this occur later?
	 */
#if CONFIG_AUDIT
	bsd_init_kprintf("calling audit_init\n");
 	audit_init();  
#endif

	/* Initialize kqueues */
	bsd_init_kprintf("calling knote_init\n");
	knote_init();

	/* Initialize for async IO */
	bsd_init_kprintf("calling aio_init\n");
	aio_init();

	/* Initialize pipes */
	bsd_init_kprintf("calling pipeinit\n");
	pipeinit();

	/* Initialize SysV shm subsystem locks; the subsystem proper is
	 * initialized through a sysctl.
	 */
#if SYSV_SHM
	bsd_init_kprintf("calling sysv_shm_lock_init\n");
	sysv_shm_lock_init();
#endif
#if SYSV_SEM
	bsd_init_kprintf("calling sysv_sem_lock_init\n");
	sysv_sem_lock_init();
#endif
#if SYSV_MSG
	bsd_init_kprintf("sysv_msg_lock_init\n");
	sysv_msg_lock_init();
#endif
	bsd_init_kprintf("calling pshm_lock_init\n");
	pshm_lock_init();
	bsd_init_kprintf("calling psem_lock_init\n");
	psem_lock_init();

	pthread_init();
	/* POSIX Shm and Sem */
	bsd_init_kprintf("calling pshm_cache_init\n");
	pshm_cache_init();
	bsd_init_kprintf("calling psem_cache_init\n");
	psem_cache_init();
	bsd_init_kprintf("calling time_zone_slock_init\n");
	time_zone_slock_init();
	bsd_init_kprintf("calling select_waitq_init\n");
	select_waitq_init();

	/*
	 * Initialize protocols.  Block reception of incoming packets
	 * until everything is ready.
	 */
	bsd_init_kprintf("calling sysctl_register_fixed\n");
	sysctl_register_fixed(); 
	bsd_init_kprintf("calling sysctl_mib_init\n");
	sysctl_mib_init();
#if NETWORKING
	bsd_init_kprintf("calling dlil_init\n");
	dlil_init();
	bsd_init_kprintf("calling proto_kpi_init\n");
	proto_kpi_init();
#endif /* NETWORKING */
#if SOCKETS
	bsd_init_kprintf("calling socketinit\n");
	socketinit();
	bsd_init_kprintf("calling domaininit\n");
	domaininit();
	iptap_init();
#if FLOW_DIVERT
	flow_divert_init();
#endif	/* FLOW_DIVERT */
#endif /* SOCKETS */

	kernproc->p_fd->fd_cdir = NULL;
	kernproc->p_fd->fd_rdir = NULL;

#if CONFIG_FREEZE
#ifndef CONFIG_MEMORYSTATUS
    #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
#endif
	/* Initialise background freezing */
	bsd_init_kprintf("calling memorystatus_freeze_init\n");
	memorystatus_freeze_init();
#endif

#if CONFIG_MEMORYSTATUS
	/* Initialize kernel memory status notifications */
	bsd_init_kprintf("calling memorystatus_init\n");
	memorystatus_init();
#endif /* CONFIG_MEMORYSTATUS */

	bsd_init_kprintf("calling macx_init\n");
	macx_init();

	bsd_init_kprintf("calling acct_init\n");
	acct_init();

#ifdef GPROF
	/* Initialize kernel profiling. */
	kmstartup();
#endif

	bsd_init_kprintf("calling bsd_autoconf\n");
	bsd_autoconf();

#if CONFIG_DTRACE
	dtrace_postinit();
#endif

	/*
	 * We attach the loopback interface *way* down here to ensure
	 * it happens after autoconf(), otherwise it becomes the
	 * "primary" interface.
	 */
#include <loop.h>
#if NLOOP > 0
	bsd_init_kprintf("calling loopattach\n");
	loopattach();			/* XXX */
#endif
#if NGIF
	/* Initialize gif interface (after lo0) */
	gif_init();
#endif

#if PFLOG
	/* Initialize packet filter log interface */
	pfloginit();
#endif /* PFLOG */

#if NETHER > 0
	/* Register the built-in dlil ethernet interface family */
	bsd_init_kprintf("calling ether_family_init\n");
	ether_family_init();
#endif /* ETHER */

#if NETWORKING
	/* Call any kext code that wants to run just after network init */
	bsd_init_kprintf("calling net_init_run\n");
	net_init_run();
	
#if CONTENT_FILTER
	cfil_init();
#endif

#if PACKET_MANGLER
	pkt_mnglr_init();
#endif	

#if NECP
	/* Initialize Network Extension Control Policies */
	necp_init();
#endif

	netagent_init();

	/* register user tunnel kernel control handler */
	utun_register_control();
#if IPSEC
	ipsec_register_control();
#endif /* IPSEC */
	netsrc_init();
	nstat_init();
	tcp_cc_init();
#if MPTCP
	mptcp_control_register();
#endif /* MPTCP */
#endif /* NETWORKING */

	bsd_init_kprintf("calling vnode_pager_bootstrap\n");
	vnode_pager_bootstrap();

	bsd_init_kprintf("calling inittodr\n");
	inittodr(0);

	/* Mount the root file system. */
	while( TRUE) {
		int err;

		bsd_init_kprintf("calling setconf\n");
		setconf();
#if NFSCLIENT
		netboot = (mountroot == netboot_mountroot);
#endif

		bsd_init_kprintf("vfs_mountroot\n");
		if (0 == (err = vfs_mountroot()))
			break;
		rootdevice[0] = '\0';
#if NFSCLIENT
		if (netboot) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: failed to mount network root, error %d, %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
#endif
		printf("cannot mount root, errno = %d\n", err);
		boothowto |= RB_ASKNAME;
	}

	IOSecureBSDRoot(rootdevice);

	context.vc_thread = current_thread();
	context.vc_ucred = kernproc->p_ucred;
	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;

	bsd_init_kprintf("calling VFS_ROOT\n");
	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
	if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context))
		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
	rootvnode->v_flag |= VROOT;
	(void)vnode_ref(rootvnode);
	(void)vnode_put(rootvnode);
	filedesc0.fd_cdir = rootvnode;

#if NFSCLIENT
	if (netboot) {
		int err;

		netboot = TRUE;
		/* post mount setup */
		if ((err = netboot_setup()) != 0) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
	}
#endif
	

#if CONFIG_IMAGEBOOT
	/*
	 * See if a system disk image is present. If so, mount it and
	 * switch the root vnode to point to it
	 */ 
	if (netboot == FALSE && imageboot_needed()) {
		/* 
		 * An image was found.  No turning back: we're booted
		 * with a kernel from the disk image.
		 */
		imageboot_setup(); 
	}
#endif /* CONFIG_IMAGEBOOT */
  
	/* set initial time; all other resource data is  already zero'ed */
	microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);

#if DEVFS
	{
	    char mounthere[] = "/dev";	/* !const because of internal casting */

	    bsd_init_kprintf("calling devfs_kernel_mount\n");
	    devfs_kernel_mount(mounthere);
	}
#endif /* DEVFS */

	/* Initialize signal state for process 0. */
	bsd_init_kprintf("calling siginit\n");
	siginit(kernproc);

	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
	bsd_utaskbootstrap();

#if defined(__LP64__)
	kernproc->p_flag |= P_LP64;
#endif

	pal_kernel_announce();

	bsd_init_kprintf("calling mountroot_post_hook\n");

	/* invoke post-root-mount hook */
	if (mountroot_post_hook != NULL)
		mountroot_post_hook();

#if 0 /* not yet */
	consider_zone_gc(FALSE);
#endif


	bsd_init_kprintf("done\n");
}
Exemplo n.º 6
0
int
fork1(struct thread *td, struct fork_req *fr)
{
	struct proc *p1, *newproc;
	struct thread *td2;
	struct vmspace *vm2;
	struct file *fp_procdesc;
	vm_ooffset_t mem_charged;
	int error, nprocs_new, ok;
	static int curfail;
	static struct timeval lastfail;
	int flags, pages;

	flags = fr->fr_flags;
	pages = fr->fr_pages;

	if ((flags & RFSTOPPED) != 0)
		MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL);
	else
		MPASS(fr->fr_procp == NULL);

	/* Check for the undefined or unimplemented flags. */
	if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)
		return (EINVAL);

	/* Signal value requires RFTSIGZMB. */
	if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0)
		return (EINVAL);

	/* Can't copy and clear. */
	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
		return (EINVAL);

	/* Check the validity of the signal number. */
	if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG)
		return (EINVAL);

	if ((flags & RFPROCDESC) != 0) {
		/* Can't not create a process yet get a process descriptor. */
		if ((flags & RFPROC) == 0)
			return (EINVAL);

		/* Must provide a place to put a procdesc if creating one. */
		if (fr->fr_pd_fd == NULL)
			return (EINVAL);

		/* Check if we are using supported flags. */
		if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0)
			return (EINVAL);
	}

	p1 = td->td_proc;

	/*
	 * Here we don't create a new process, but we divorce
	 * certain parts of a process from itself.
	 */
	if ((flags & RFPROC) == 0) {
		if (fr->fr_procp != NULL)
			*fr->fr_procp = NULL;
		else if (fr->fr_pidp != NULL)
			*fr->fr_pidp = 0;
		return (fork_norfproc(td, flags));
	}

	fp_procdesc = NULL;
	newproc = NULL;
	vm2 = NULL;

	/*
	 * Increment the nprocs resource before allocations occur.
	 * Although process entries are dynamically created, we still
	 * keep a global limit on the maximum number we will
	 * create. There are hard-limits as to the number of processes
	 * that can run, established by the KVA and memory usage for
	 * the process data.
	 *
	 * Don't allow a nonprivileged user to use the last ten
	 * processes; don't let root exceed the limit.
	 */
	nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1;
	if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred,
	    PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) {
		error = EAGAIN;
		sx_xlock(&allproc_lock);
		if (ppsratecheck(&lastfail, &curfail, 1)) {
			printf("maxproc limit exceeded by uid %u (pid %d); "
			    "see tuning(7) and login.conf(5)\n",
			    td->td_ucred->cr_ruid, p1->p_pid);
		}
		sx_xunlock(&allproc_lock);
		goto fail2;
	}

	/*
	 * If required, create a process descriptor in the parent first; we
	 * will abandon it if something goes wrong. We don't finit() until
	 * later.
	 */
	if (flags & RFPROCDESC) {
		error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd,
		    fr->fr_pd_flags, fr->fr_pd_fcaps);
		if (error != 0)
			goto fail2;
	}

	mem_charged = 0;
	if (pages == 0)
		pages = kstack_pages;
	/* Allocate new proc. */
	newproc = uma_zalloc(proc_zone, M_WAITOK);
	td2 = FIRST_THREAD_IN_PROC(newproc);
	if (td2 == NULL) {
		td2 = thread_alloc(pages);
		if (td2 == NULL) {
			error = ENOMEM;
			goto fail2;
		}
		proc_linkup(newproc, td2);
	} else {
		if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
			if (td2->td_kstack != 0)
				vm_thread_dispose(td2);
			if (!thread_alloc_stack(td2, pages)) {
				error = ENOMEM;
				goto fail2;
			}
		}
	}

	if ((flags & RFMEM) == 0) {
		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
		if (vm2 == NULL) {
			error = ENOMEM;
			goto fail2;
		}
		if (!swap_reserve(mem_charged)) {
			/*
			 * The swap reservation failed. The accounting
			 * from the entries of the copied vm2 will be
			 * subtracted in vmspace_free(), so force the
			 * reservation there.
			 */
			swap_reserve_force(mem_charged);
			error = ENOMEM;
			goto fail2;
		}
	} else
		vm2 = NULL;

	/*
	 * XXX: This is ugly; when we copy resource usage, we need to bump
	 *      per-cred resource counters.
	 */
	proc_set_cred_init(newproc, crhold(td->td_ucred));

	/*
	 * Initialize resource accounting for the child process.
	 */
	error = racct_proc_fork(p1, newproc);
	if (error != 0) {
		error = EAGAIN;
		goto fail1;
	}

#ifdef MAC
	mac_proc_init(newproc);
#endif
	newproc->p_klist = knlist_alloc(&newproc->p_mtx);
	STAILQ_INIT(&newproc->p_ktr);

	/* We have to lock the process tree while we look for a pid. */
	sx_slock(&proctree_lock);
	sx_xlock(&allproc_lock);

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit.
	 *
	 * XXXRW: Can we avoid privilege here if it's not needed?
	 */
	error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);
	if (error == 0)
		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
	else {
		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
		    lim_cur(td, RLIMIT_NPROC));
	}
	if (ok) {
		do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
		return (0);
	}

	error = EAGAIN;
	sx_sunlock(&proctree_lock);
	sx_xunlock(&allproc_lock);
#ifdef MAC
	mac_proc_destroy(newproc);
#endif
	racct_proc_exit(newproc);
fail1:
	crfree(newproc->p_ucred);
	newproc->p_ucred = NULL;
fail2:
	if (vm2 != NULL)
		vmspace_free(vm2);
	uma_zfree(proc_zone, newproc);
	if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) {
		fdclose(td, fp_procdesc, *fr->fr_pd_fd);
		fdrop(fp_procdesc, td);
	}
	atomic_add_int(&nprocs, -1);
	pause("fork", hz / 2);
	return (error);
}
Exemplo n.º 7
0
/*
 * General fork call.  Note that another LWP in the process may call exec()
 * or exit() while we are forking.  It's safe to continue here, because
 * neither operation will complete until all LWPs have exited the process.
 */
int
fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
    void (*func)(void *), void *arg, register_t *retval,
    struct proc **rnewprocp)
{
	struct proc	*p1, *p2, *parent;
	struct plimit   *p1_lim;
	uid_t		uid;
	struct lwp	*l2;
	int		count;
	vaddr_t		uaddr;
	int		tnprocs;
	int		tracefork;
	int		error = 0;

	p1 = l1->l_proc;
	uid = kauth_cred_getuid(l1->l_cred);
	tnprocs = atomic_inc_uint_nv(&nprocs);

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create.
	 */
	if (__predict_false(tnprocs >= maxproc))
		error = -1;
	else
		error = kauth_authorize_process(l1->l_cred,
		    KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);

	if (error) {
		static struct timeval lasttfm;
		atomic_dec_uint(&nprocs);
		if (ratecheck(&lasttfm, &fork_tfmrate))
			tablefull("proc", "increase kern.maxproc or NPROC");
		if (forkfsleep)
			kpause("forkmx", false, forkfsleep, NULL);
		return EAGAIN;
	}

	/*
	 * Enforce limits.
	 */
	count = chgproccnt(uid, 1);
	if (__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
		if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
		    p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
		    &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0) {
			(void)chgproccnt(uid, -1);
			atomic_dec_uint(&nprocs);
			if (forkfsleep)
				kpause("forkulim", false, forkfsleep, NULL);
			return EAGAIN;
		}
	}

	/*
	 * Allocate virtual address space for the U-area now, while it
	 * is still easy to abort the fork operation if we're out of
	 * kernel virtual address space.
	 */
	uaddr = uvm_uarea_alloc();
	if (__predict_false(uaddr == 0)) {
		(void)chgproccnt(uid, -1);
		atomic_dec_uint(&nprocs);
		return ENOMEM;
	}

	/*
	 * We are now committed to the fork.  From here on, we may
	 * block on resources, but resource allocation may NOT fail.
	 */

	/* Allocate new proc. */
	p2 = proc_alloc();

	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */
	memset(&p2->p_startzero, 0,
	    (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
	memcpy(&p2->p_startcopy, &p1->p_startcopy,
	    (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));

	TAILQ_INIT(&p2->p_sigpend.sp_info);

	LIST_INIT(&p2->p_lwps);
	LIST_INIT(&p2->p_sigwaiters);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * Inherit flags we want to keep.  The flags related to SIGCHLD
	 * handling are important in order to keep a consistent behaviour
	 * for the child after the fork.  If we are a 32-bit process, the
	 * child will be too.
	 */
	p2->p_flag =
	    p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
	p2->p_emul = p1->p_emul;
	p2->p_execsw = p1->p_execsw;

	if (flags & FORK_SYSTEM) {
		/*
		 * Mark it as a system process.  Set P_NOCLDWAIT so that
		 * children are reparented to init(8) when they exit.
		 * init(8) can easily wait them out for us.
		 */
		p2->p_flag |= (PK_SYSTEM | PK_NOCLDWAIT);
	}

	mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
	mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
	rw_init(&p2->p_reflock);
	cv_init(&p2->p_waitcv, "wait");
	cv_init(&p2->p_lwpcv, "lwpwait");

	/*
	 * Share a lock between the processes if they are to share signal
	 * state: we must synchronize access to it.
	 */
	if (flags & FORK_SHARESIGS) {
		p2->p_lock = p1->p_lock;
		mutex_obj_hold(p1->p_lock);
	} else
		p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

	kauth_proc_fork(p1, p2);

	p2->p_raslist = NULL;
#if defined(__HAVE_RAS)
	ras_fork(p1, p2);
#endif

	/* bump references to the text vnode (for procfs) */
	p2->p_textvp = p1->p_textvp;
	if (p2->p_textvp)
		vref(p2->p_textvp);

	if (flags & FORK_SHAREFILES)
		fd_share(p2);
	else if (flags & FORK_CLEANFILES)
		p2->p_fd = fd_init(NULL);
	else
		p2->p_fd = fd_copy();

	/* XXX racy */
	p2->p_mqueue_cnt = p1->p_mqueue_cnt;

	if (flags & FORK_SHARECWD)
		cwdshare(p2);
	else
		p2->p_cwdi = cwdinit();

	/*
	 * Note: p_limit (rlimit stuff) is copy-on-write, so normally
	 * we just need increase pl_refcnt.
	 */
	p1_lim = p1->p_limit;
	if (!p1_lim->pl_writeable) {
		lim_addref(p1_lim);
		p2->p_limit = p1_lim;
	} else {
		p2->p_limit = lim_copy(p1_lim);
	}

	if (flags & FORK_PPWAIT) {
		/* Mark ourselves as waiting for a child. */
		l1->l_pflag |= LP_VFORKWAIT;
		p2->p_lflag = PL_PPWAIT;
		p2->p_vforklwp = l1;
	} else {
		p2->p_lflag = 0;
	}
	p2->p_sflag = 0;
	p2->p_slflag = 0;
	parent = (flags & FORK_NOWAIT) ? initproc : p1;
	p2->p_pptr = parent;
	p2->p_ppid = parent->p_pid;
	LIST_INIT(&p2->p_children);

	p2->p_aio = NULL;

#ifdef KTRACE
	/*
	 * Copy traceflag and tracefile if enabled.
	 * If not inherited, these were zeroed above.
	 */
	if (p1->p_traceflag & KTRFAC_INHERIT) {
		mutex_enter(&ktrace_lock);
		p2->p_traceflag = p1->p_traceflag;
		if ((p2->p_tracep = p1->p_tracep) != NULL)
			ktradref(p2);
		mutex_exit(&ktrace_lock);
	}
#endif

	/*
	 * Create signal actions for the child process.
	 */
	p2->p_sigacts = sigactsinit(p1, flags & FORK_SHARESIGS);
	mutex_enter(p1->p_lock);
	p2->p_sflag |=
	    (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
	sched_proc_fork(p1, p2);
	mutex_exit(p1->p_lock);

	p2->p_stflag = p1->p_stflag;

	/*
	 * p_stats.
	 * Copy parts of p_stats, and zero out the rest.
	 */
	p2->p_stats = pstatscopy(p1->p_stats);

	/*
	 * Set up the new process address space.
	 */
	uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? true : false);

	/*
	 * Finish creating the child process.
	 * It will return through a different path later.
	 */
	lwp_create(l1, p2, uaddr, (flags & FORK_PPWAIT) ? LWP_VFORK : 0,
	    stack, stacksize, (func != NULL) ? func : child_return, arg, &l2,
	    l1->l_class);

	/*
	 * Inherit l_private from the parent.
	 * Note that we cannot use lwp_setprivate() here since that
	 * also sets the CPU TLS register, which is incorrect if the
	 * process has changed that without letting the kernel know.
	 */
	l2->l_private = l1->l_private;

	/*
	 * If emulation has a process fork hook, call it now.
	 */
	if (p2->p_emul->e_proc_fork)
		(*p2->p_emul->e_proc_fork)(p2, l1, flags);

	/*
	 * ...and finally, any other random fork hooks that subsystems
	 * might have registered.
	 */
	doforkhooks(p2, p1);

	SDT_PROBE(proc,,,create, p2, p1, flags, 0, 0);

	/*
	 * It's now safe for the scheduler and other processes to see the
	 * child process.
	 */
	mutex_enter(proc_lock);

	if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
		p2->p_lflag |= PL_CONTROLT;

	LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling);
	p2->p_exitsig = exitsig;		/* signal for parent on exit */

	/*
	 * We don't want to tracefork vfork()ed processes because they
	 * will not receive the SIGTRAP until it is too late.
	 */
	tracefork = (p1->p_slflag & (PSL_TRACEFORK|PSL_TRACED)) ==
	    (PSL_TRACEFORK|PSL_TRACED) && (flags && FORK_PPWAIT) == 0;
	if (tracefork) {
		p2->p_slflag |= PSL_TRACED;
		p2->p_opptr = p2->p_pptr;
		if (p2->p_pptr != p1->p_pptr) {
			struct proc *parent1 = p2->p_pptr;

			if (parent1->p_lock < p2->p_lock) {
				if (!mutex_tryenter(parent1->p_lock)) {
					mutex_exit(p2->p_lock);
					mutex_enter(parent1->p_lock);
				}
			} else if (parent1->p_lock > p2->p_lock) {
				mutex_enter(parent1->p_lock);
			}
			parent1->p_slflag |= PSL_CHTRACED;
			proc_reparent(p2, p1->p_pptr);
			if (parent1->p_lock != p2->p_lock)
				mutex_exit(parent1->p_lock);
		}

		/*
		 * Set ptrace status.
		 */
		p1->p_fpid = p2->p_pid;
		p2->p_fpid = p1->p_pid;
	}

	LIST_INSERT_AFTER(p1, p2, p_pglist);
	LIST_INSERT_HEAD(&allproc, p2, p_list);

	p2->p_trace_enabled = trace_is_enabled(p2);
#ifdef __HAVE_SYSCALL_INTERN
	(*p2->p_emul->e_syscall_intern)(p2);
#endif

	/*
	 * Update stats now that we know the fork was successful.
	 */
	uvmexp.forks++;
	if (flags & FORK_PPWAIT)
		uvmexp.forks_ppwait++;
	if (flags & FORK_SHAREVM)
		uvmexp.forks_sharevm++;

	/*
	 * Pass a pointer to the new process to the caller.
	 */
	if (rnewprocp != NULL)
		*rnewprocp = p2;

	if (ktrpoint(KTR_EMUL))
		p2->p_traceflag |= KTRFAC_TRC_EMUL;

	/*
	 * Notify any interested parties about the new process.
	 */
	if (!SLIST_EMPTY(&p1->p_klist)) {
		mutex_exit(proc_lock);
		KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
		mutex_enter(proc_lock);
	}

	/*
	 * Make child runnable, set start time, and add to run queue except
	 * if the parent requested the child to start in SSTOP state.
	 */
	mutex_enter(p2->p_lock);

	/*
	 * Start profiling.
	 */
	if ((p2->p_stflag & PST_PROFIL) != 0) {
		mutex_spin_enter(&p2->p_stmutex);
		startprofclock(p2);
		mutex_spin_exit(&p2->p_stmutex);
	}

	getmicrotime(&p2->p_stats->p_start);
	p2->p_acflag = AFORK;
	lwp_lock(l2);
	KASSERT(p2->p_nrlwps == 1);
	if (p2->p_sflag & PS_STOPFORK) {
		struct schedstate_percpu *spc = &l2->l_cpu->ci_schedstate;
		p2->p_nrlwps = 0;
		p2->p_stat = SSTOP;
		p2->p_waited = 0;
		p1->p_nstopchild++;
		l2->l_stat = LSSTOP;
		KASSERT(l2->l_wchan == NULL);
		lwp_unlock_to(l2, spc->spc_lwplock);
	} else {
		p2->p_nrlwps = 1;
		p2->p_stat = SACTIVE;
		l2->l_stat = LSRUN;
		sched_enqueue(l2, false);
		lwp_unlock(l2);
	}

	/*
	 * Return child pid to parent process,
	 * marking us as parent via retval[1].
	 */
	if (retval != NULL) {
		retval[0] = p2->p_pid;
		retval[1] = 0;
	}
	mutex_exit(p2->p_lock);

	/*
	 * Preserve synchronization semantics of vfork.  If waiting for
	 * child to exec or exit, sleep until it clears LP_VFORKWAIT.
	 */
#if 0
	while (l1->l_pflag & LP_VFORKWAIT) {
		cv_wait(&l1->l_waitcv, proc_lock);
	}
#else
	while (p2->p_lflag & PL_PPWAIT)
		cv_wait(&p1->p_waitcv, proc_lock);
#endif

	/*
	 * Let the parent know that we are tracing its child.
	 */
	if (tracefork) {
		ksiginfo_t ksi;

		KSI_INIT_EMPTY(&ksi);
		ksi.ksi_signo = SIGTRAP;
		ksi.ksi_lid = l1->l_lid;
		kpsignal(p1, &ksi, NULL);
	}
	mutex_exit(proc_lock);

	return 0;
}
Exemplo n.º 8
0
/* ARGSUSED */
int
sys_setresuid(struct proc *p, void *v, register_t *retval)
{
	struct sys_setresuid_args /* {
		syscallarg(uid_t) ruid;
		syscallarg(uid_t) euid;
		syscallarg(uid_t) suid;
	} */ *uap = v;
	struct pcred *pc = p->p_cred;
	uid_t ruid, euid, suid;
	int error;

	ruid = SCARG(uap, ruid);
	euid = SCARG(uap, euid);
	suid = SCARG(uap, suid);

	if ((ruid == -1 || ruid == pc->p_ruid) &&
	    (euid == -1 || euid == pc->pc_ucred->cr_uid) &&
	    (suid == -1 || suid == pc->p_svuid))
		return (0);			/* no change */

	/*
	 * Any of the real, effective, and saved uids may be changed
	 * to the current value of one of the three (root is not limited).
	 */
	if (ruid != (uid_t)-1 &&
	    ruid != pc->p_ruid &&
	    ruid != pc->pc_ucred->cr_uid &&
	    ruid != pc->p_svuid &&
	    (error = suser(p, 0)))
		return (error);

	if (euid != (uid_t)-1 &&
	    euid != pc->p_ruid &&
	    euid != pc->pc_ucred->cr_uid &&
	    euid != pc->p_svuid &&
	    (error = suser(p, 0)))
		return (error);

	if (suid != (uid_t)-1 &&
	    suid != pc->p_ruid &&
	    suid != pc->pc_ucred->cr_uid &&
	    suid != pc->p_svuid &&
	    (error = suser(p, 0)))
		return (error);

	/*
	 * Note that unlike the other set*uid() calls, each
	 * uid type is set independently of the others.
	 */
	if (ruid != (uid_t)-1 && ruid != pc->p_ruid) {
		/*
		 * Transfer proc count to new user.
		 */
		(void)chgproccnt(pc->p_ruid, -p->p_p->ps_refcnt);
		(void)chgproccnt(ruid, p->p_p->ps_refcnt);
		pc->p_ruid = ruid;
	}
	if (euid != (uid_t)-1 && euid != pc->pc_ucred->cr_uid) {
		/*
		 * Copy credentials so other references do not see our changes.
		 */
		pc->pc_ucred = crcopy(pc->pc_ucred);
		pc->pc_ucred->cr_uid = euid;
	}
	if (suid != (uid_t)-1 && suid != pc->p_svuid)
		pc->p_svuid = suid;

	atomic_setbits_int(&p->p_p->ps_flags, PS_SUGID);
	return (0);
}
Exemplo n.º 9
0
/*
 * This function is called very early on in the Mach startup, from the
 * function start_kernel_threads() in osfmk/kern/startup.c.  It's called
 * in the context of the current (startup) task using a call to the
 * function kernel_thread_create() to jump into start_kernel_threads().
 * Internally, kernel_thread_create() calls thread_create_internal(),
 * which calls uthread_alloc().  The function of uthread_alloc() is
 * normally to allocate a uthread structure, and fill out the uu_sigmask,
 * uu_context fields.  It skips filling these out in the case of the "task"
 * being "kernel_task", because the order of operation is inverted.  To
 * account for that, we need to manually fill in at least the contents
 * of the uu_context.vc_ucred field so that the uthread structure can be
 * used like any other.
 */
void
bsd_init(void)
{
	struct uthread *ut;
	unsigned int i;
#if __i386__ || __x86_64__
	int error;
#endif	
	struct vfs_context context;
	kern_return_t	ret;
	struct ucred temp_cred;

#define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */

	kernel_flock = funnel_alloc(KERNEL_FUNNEL);
	if (kernel_flock == (funnel_t *)0 ) {
		panic("bsd_init: Failed to allocate kernel funnel");
	}
        
	printf(copyright);
	
	bsd_init_kprintf("calling kmeminit\n");
	kmeminit();
	
	bsd_init_kprintf("calling parse_bsd_args\n");
	parse_bsd_args();

	/* Initialize kauth subsystem before instancing the first credential */
	bsd_init_kprintf("calling kauth_init\n");
	kauth_init();

	/* Initialize process and pgrp structures. */
	bsd_init_kprintf("calling procinit\n");
	procinit();

	/* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
	tty_init();

	kernproc = &proc0;	/* implicitly bzero'ed */

	/* kernel_task->proc = kernproc; */
	set_bsdtask_info(kernel_task,(void *)kernproc);

	/* give kernproc a name */
	bsd_init_kprintf("calling process_name\n");
	process_name("kernel_task", kernproc);

	/* allocate proc lock group attribute and group */
	bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
	proc_lck_grp_attr= lck_grp_attr_alloc_init();

	proc_lck_grp = lck_grp_alloc_init("proc",  proc_lck_grp_attr);
#ifndef CONFIG_EMBEDDED
	proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
	proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
	proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
#endif
	/* Allocate proc lock attribute */
	proc_lck_attr = lck_attr_alloc_init();
#if 0
#if __PROC_INTERNAL_DEBUG
	lck_attr_setdebug(proc_lck_attr);
#endif
#endif

#ifdef CONFIG_EMBEDDED
	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
#else	
	proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
#endif

	execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	execargs_cache_size = bsd_simul_execs;
	execargs_free_count = bsd_simul_execs;
	execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t));
	bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t));
	
	if (current_task() != kernel_task)
		printf("bsd_init: We have a problem, "
				"current task is not kernel task\n");
	
	bsd_init_kprintf("calling get_bsdthread_info\n");
	ut = (uthread_t)get_bsdthread_info(current_thread());

#if CONFIG_MACF
	/*
	 * Initialize the MAC Framework
	 */
	mac_policy_initbsd();
	kernproc->p_mac_enforce = 0;
#endif /* MAC */

	/*
	 * Create process 0.
	 */
	proc_list_lock();
	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
	kernproc->p_pgrp = &pgrp0;
	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
	LIST_INIT(&pgrp0.pg_members);
#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);	
#else
	lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
#endif
	/* There is no other bsd thread this point and is safe without pgrp lock */
	LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
	kernproc->p_listflag |= P_LIST_INPGRP;
	kernproc->p_pgrpid = 0;

	pgrp0.pg_session = &session0;
	pgrp0.pg_membercnt = 1;

	session0.s_count = 1;
	session0.s_leader = kernproc;
	session0.s_listflags = 0;
#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
#else
	lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
#endif
	LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
	proc_list_unlock();

#if CONFIG_LCTX
	kernproc->p_lctx = NULL;
#endif

	kernproc->task = kernel_task;
	
	kernproc->p_stat = SRUN;
	kernproc->p_flag = P_SYSTEM;
	kernproc->p_nice = NZERO;
	kernproc->p_pptr = kernproc;

	TAILQ_INIT(&kernproc->p_uthlist);
	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
	
	kernproc->sigwait = FALSE;
	kernproc->sigwait_thread = THREAD_NULL;
	kernproc->exit_thread = THREAD_NULL;
	kernproc->p_csflags = CS_VALID;

	/*
	 * Create credential.  This also Initializes the audit information.
	 */
	bsd_init_kprintf("calling bzero\n");
	bzero(&temp_cred, sizeof(temp_cred));
	temp_cred.cr_ngroups = 1;

	temp_cred.cr_audit.as_aia_p = &audit_default_aia;
        /* XXX the following will go away with cr_au */
	temp_cred.cr_au.ai_auid = AU_DEFAUDITID;

	bsd_init_kprintf("calling kauth_cred_create\n");
	kernproc->p_ucred = kauth_cred_create(&temp_cred); 

	/* give the (already exisiting) initial thread a reference on it */
	bsd_init_kprintf("calling kauth_cred_ref\n");
	kauth_cred_ref(kernproc->p_ucred);
	ut->uu_context.vc_ucred = kernproc->p_ucred;
	ut->uu_context.vc_thread = current_thread();

	TAILQ_INIT(&kernproc->p_aio_activeq);
	TAILQ_INIT(&kernproc->p_aio_doneq);
	kernproc->p_aio_total_count = 0;
	kernproc->p_aio_active_count = 0;

	bsd_init_kprintf("calling file_lock_init\n");
	file_lock_init();

#if CONFIG_MACF
	mac_cred_label_associate_kernel(kernproc->p_ucred);
	mac_task_label_update_cred (kernproc->p_ucred, (struct task *) kernproc->task);
#endif

	/* Create the file descriptor table. */
	filedesc0.fd_refcnt = 1+1;	/* +1 so shutdown will not _FREE_ZONE */
	kernproc->p_fd = &filedesc0;
	filedesc0.fd_cmask = cmask;
	filedesc0.fd_knlistsize = -1;
	filedesc0.fd_knlist = NULL;
	filedesc0.fd_knhash = NULL;
	filedesc0.fd_knhashmask = 0;

	/* Create the limits structures. */
	kernproc->p_limit = &limit0;
	for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++)
		limit0.pl_rlimit[i].rlim_cur = 
			limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
	limit0.pl_refcnt = 1;

	kernproc->p_stats = &pstats0;
	kernproc->p_sigacts = &sigacts0;

	/*
	 * Charge root for two  processes: init and mach_init.
	 */
	bsd_init_kprintf("calling chgproccnt\n");
	(void)chgproccnt(0, 1);

	/*
	 *	Allocate a kernel submap for pageable memory
	 *	for temporary copying (execve()).
	 */
	{
		vm_offset_t	minimum;

		bsd_init_kprintf("calling kmem_suballoc\n");
		ret = kmem_suballoc(kernel_map,
				&minimum,
				(vm_size_t)bsd_pageable_map_size,
				TRUE,
				VM_FLAGS_ANYWHERE,
				&bsd_pageable_map);
		if (ret != KERN_SUCCESS) 
			panic("bsd_init: Failed to allocate bsd pageable map");
	}

	/*
	 * Initialize buffers and hash links for buffers
	 *
	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
	 *		happen after a credential has been associated with
	 *		the kernel task.
	 */
	bsd_init_kprintf("calling bsd_bufferinit\n");
	bsd_bufferinit();

	/* Initialize the execve() semaphore */
	bsd_init_kprintf("calling semaphore_create\n");

	if (ret != KERN_SUCCESS)
		panic("bsd_init: Failed to create execve semaphore");

	/*
	 * Initialize the calendar.
	 */
	bsd_init_kprintf("calling IOKitInitializeTime\n");
	IOKitInitializeTime();

	if (turn_on_log_leaks && !new_nkdbufs)
		new_nkdbufs = 200000;
	start_kern_tracing(new_nkdbufs);
	if (turn_on_log_leaks)
		log_leaks = 1;

	bsd_init_kprintf("calling ubc_init\n");
	ubc_init();

	/* Initialize the file systems. */
	bsd_init_kprintf("calling vfsinit\n");
	vfsinit();

#if SOCKETS
	/* Initialize per-CPU cache allocator */
	mcache_init();

	/* Initialize mbuf's. */
	bsd_init_kprintf("calling mbinit\n");
	mbinit();
	net_str_id_init(); /* for mbuf tags */
#endif /* SOCKETS */

	/*
	 * Initializes security event auditing.
	 * XXX: Should/could this occur later?
	 */
#if CONFIG_AUDIT
	bsd_init_kprintf("calling audit_init\n");
 	audit_init();  
#endif

	/* Initialize kqueues */
	bsd_init_kprintf("calling knote_init\n");
	knote_init();

	/* Initialize for async IO */
	bsd_init_kprintf("calling aio_init\n");
	aio_init();

	/* Initialize pipes */
	bsd_init_kprintf("calling pipeinit\n");
	pipeinit();

	/* Initialize SysV shm subsystem locks; the subsystem proper is
	 * initialized through a sysctl.
	 */
#if SYSV_SHM
	bsd_init_kprintf("calling sysv_shm_lock_init\n");
	sysv_shm_lock_init();
#endif
#if SYSV_SEM
	bsd_init_kprintf("calling sysv_sem_lock_init\n");
	sysv_sem_lock_init();
#endif
#if SYSV_MSG
	bsd_init_kprintf("sysv_msg_lock_init\n");
	sysv_msg_lock_init();
#endif
	bsd_init_kprintf("calling pshm_lock_init\n");
	pshm_lock_init();
	bsd_init_kprintf("calling psem_lock_init\n");
	psem_lock_init();

	pthread_init();
	/* POSIX Shm and Sem */
	bsd_init_kprintf("calling pshm_cache_init\n");
	pshm_cache_init();
	bsd_init_kprintf("calling psem_cache_init\n");
	psem_cache_init();
	bsd_init_kprintf("calling time_zone_slock_init\n");
	time_zone_slock_init();

	/* Stack snapshot facility lock */
	stackshot_lock_init();
	/*
	 * Initialize protocols.  Block reception of incoming packets
	 * until everything is ready.
	 */
	bsd_init_kprintf("calling sysctl_register_fixed\n");
	sysctl_register_fixed(); 
	bsd_init_kprintf("calling sysctl_mib_init\n");
	sysctl_mib_init();
#if NETWORKING
	bsd_init_kprintf("calling dlil_init\n");
	dlil_init();
	bsd_init_kprintf("calling proto_kpi_init\n");
	proto_kpi_init();
#endif /* NETWORKING */
#if SOCKETS
	bsd_init_kprintf("calling socketinit\n");
	socketinit();
	bsd_init_kprintf("calling domaininit\n");
	domaininit();
#endif /* SOCKETS */

	kernproc->p_fd->fd_cdir = NULL;
	kernproc->p_fd->fd_rdir = NULL;

#if CONFIG_EMBEDDED
	/* Initialize kernel memory status notifications */
	bsd_init_kprintf("calling kern_memorystatus_init\n");
	kern_memorystatus_init();
#endif

#ifdef GPROF
	/* Initialize kernel profiling. */
	kmstartup();
#endif

	/* kick off timeout driven events by calling first time */
	thread_wakeup(&lbolt);
	timeout(lightning_bolt, 0, hz);

	bsd_init_kprintf("calling bsd_autoconf\n");
	bsd_autoconf();

#if CONFIG_DTRACE
	dtrace_postinit();
#endif

	/*
	 * We attach the loopback interface *way* down here to ensure
	 * it happens after autoconf(), otherwise it becomes the
	 * "primary" interface.
	 */
#include <loop.h>
#if NLOOP > 0
	bsd_init_kprintf("calling loopattach\n");
	loopattach();			/* XXX */
#endif

#if PFLOG
	/* Initialize packet filter log interface */
	pfloginit();
#endif /* PFLOG */

#if NETHER > 0
	/* Register the built-in dlil ethernet interface family */
	bsd_init_kprintf("calling ether_family_init\n");
	ether_family_init();
#endif /* ETHER */

#if NETWORKING
	/* Call any kext code that wants to run just after network init */
	bsd_init_kprintf("calling net_init_run\n");
	net_init_run();
	
	/* register user tunnel kernel control handler */
	utun_register_control();
#endif /* NETWORKING */

	bsd_init_kprintf("calling vnode_pager_bootstrap\n");
	vnode_pager_bootstrap();
#if 0
	/* XXX Hack for early debug stop */
	printf("\nabout to sleep for 10 seconds\n");
	IOSleep( 10 * 1000 );
	/* Debugger("hello"); */
#endif

	bsd_init_kprintf("calling inittodr\n");
	inittodr(0);

#if CONFIG_EMBEDDED
	{
		/* print out early VM statistics */
		kern_return_t kr1;
		vm_statistics_data_t stat;
		mach_msg_type_number_t count;

		count = HOST_VM_INFO_COUNT;
		kr1 = host_statistics(host_self(),
				      HOST_VM_INFO,
				      (host_info_t)&stat,
				      &count);
		kprintf("Mach Virtual Memory Statistics (page size of 4096) bytes\n"
			"Pages free:\t\t\t%u.\n"
			"Pages active:\t\t\t%u.\n"
			"Pages inactive:\t\t\t%u.\n"
			"Pages wired down:\t\t%u.\n"
			"\"Translation faults\":\t\t%u.\n"
			"Pages copy-on-write:\t\t%u.\n"
			"Pages zero filled:\t\t%u.\n"
			"Pages reactivated:\t\t%u.\n"
			"Pageins:\t\t\t%u.\n"
			"Pageouts:\t\t\t%u.\n"
			"Object cache: %u hits of %u lookups (%d%% hit rate)\n",

			stat.free_count,
			stat.active_count,
			stat.inactive_count,
			stat.wire_count,
			stat.faults,
			stat.cow_faults,
			stat.zero_fill_count,
			stat.reactivations,
			stat.pageins,
			stat.pageouts,
			stat.hits,
			stat.lookups,
			(stat.hits == 0) ? 100 :
			                   ((stat.lookups * 100) / stat.hits));
	}
#endif /* CONFIG_EMBEDDED */
	
	/* Mount the root file system. */
	while( TRUE) {
		int err;

		bsd_init_kprintf("calling setconf\n");
		setconf();

		bsd_init_kprintf("vfs_mountroot\n");
		if (0 == (err = vfs_mountroot()))
			break;
		rootdevice[0] = '\0';
#if NFSCLIENT
		if (mountroot == netboot_mountroot) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: failed to mount network root, error %d, %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
#endif
		printf("cannot mount root, errno = %d\n", err);
		boothowto |= RB_ASKNAME;
	}

	IOSecureBSDRoot(rootdevice);

	context.vc_thread = current_thread();
	context.vc_ucred = kernproc->p_ucred;
	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;

	bsd_init_kprintf("calling VFS_ROOT\n");
	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
	if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context))
		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
	rootvnode->v_flag |= VROOT;
	(void)vnode_ref(rootvnode);
	(void)vnode_put(rootvnode);
	filedesc0.fd_cdir = rootvnode;

#if NFSCLIENT
	if (mountroot == netboot_mountroot) {
		int err;
		/* post mount setup */
		if ((err = netboot_setup()) != 0) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
	}
#endif
	

#if CONFIG_IMAGEBOOT
	/*
	 * See if a system disk image is present. If so, mount it and
	 * switch the root vnode to point to it
	 */ 
  
	if(imageboot_needed()) {
		int err;

		/* An image was found */
		if((err = imageboot_setup())) {
			/*
			 * this is not fatal. Keep trying to root
			 * off the original media
			 */
			printf("%s: imageboot could not find root, %d\n",
				__FUNCTION__, err);
		}
	}
#endif /* CONFIG_IMAGEBOOT */
  
	/* set initial time; all other resource data is  already zero'ed */
	microtime(&kernproc->p_start);
	kernproc->p_stats->p_start = kernproc->p_start;	/* for compat */

#if DEVFS
	{
	    char mounthere[] = "/dev";	/* !const because of internal casting */

	    bsd_init_kprintf("calling devfs_kernel_mount\n");
	    devfs_kernel_mount(mounthere);
	}
#endif /* DEVFS */
	
	/* Initialize signal state for process 0. */
	bsd_init_kprintf("calling siginit\n");
	siginit(kernproc);

	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
	bsd_utaskbootstrap();

#if defined(__LP64__)
	kernproc->p_flag |= P_LP64;
	printf("Kernel is LP64\n");
#endif
#if __i386__ || __x86_64__
	/* this should be done after the root filesystem is mounted */
	error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
	// 10/30/08 - gab: <rdar://problem/6324501>
	// if default 'translate' can't be found, see if the understudy is available
	if (ENOENT == error) {
		strlcpy(exec_archhandler_ppc.path, kRosettaStandIn_str, MAXPATHLEN);
		error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
	}
	if (error) /* XXX make more generic */
		exec_archhandler_ppc.path[0] = 0;
#endif	

	bsd_init_kprintf("calling mountroot_post_hook\n");

	/* invoke post-root-mount hook */
	if (mountroot_post_hook != NULL)
		mountroot_post_hook();

#if 0 /* not yet */
	consider_zone_gc(FALSE);
#endif

	bsd_init_kprintf("done\n");
}
Exemplo n.º 10
0
int
fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
    void (*func)(void *), void *arg, register_t *retval,
    struct proc **rnewprocp)
{
	struct proc *p2;
	uid_t uid;
	struct vmspace *vm;
	int count;
	vaddr_t uaddr;
	int s;
	extern void endtsleep(void *);
	extern void realitexpire(void *);

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create. We reserve
	 * the last 5 processes to root. The variable nprocs is the current
	 * number of processes, maxproc is the limit.
	 */
	uid = p1->p_cred->p_ruid;
	if ((nprocs >= maxproc - 5 && uid != 0) || nprocs >= maxproc) {
		static struct timeval lasttfm;

		if (ratecheck(&lasttfm, &fork_tfmrate))
			tablefull("proc");
		return (EAGAIN);
	}
	nprocs++;

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit.
	 */
	count = chgproccnt(uid, 1);
	if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
		(void)chgproccnt(uid, -1);
		nprocs--;
		return (EAGAIN);
	}

	uaddr = uvm_km_alloc1(kernel_map, USPACE, USPACE_ALIGN, 1);
	if (uaddr == 0) {
		chgproccnt(uid, -1);
		nprocs--;
		return (ENOMEM);
	}

	/*
	 * From now on, we're committed to the fork and cannot fail.
	 */

	/* Allocate new proc. */
	p2 = pool_get(&proc_pool, PR_WAITOK);

	p2->p_stat = SIDL;			/* protect against others */
	p2->p_exitsig = exitsig;
	p2->p_forw = p2->p_back = NULL;

#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		atomic_setbits_int(&p2->p_flag, P_THREAD);
		p2->p_p = p1->p_p;
		TAILQ_INSERT_TAIL(&p2->p_p->ps_threads, p2, p_thr_link);
	} else {
		process_new(p2, p1);
	}
#else
	process_new(p2, p1);
#endif

	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */
	bzero(&p2->p_startzero,
	    (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));

	/*
	 * Initialize the timeouts.
	 */
	timeout_set(&p2->p_sleep_to, endtsleep, p2);
	timeout_set(&p2->p_realit_to, realitexpire, p2);

#if defined(__HAVE_CPUINFO)
	p2->p_cpu = p1->p_cpu;
#endif

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * The p_stats and p_sigacts substructs are set in vm_fork.
	 */
	p2->p_flag = 0;
	p2->p_emul = p1->p_emul;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);
	atomic_setbits_int(&p2->p_flag, p1->p_flag & (P_SUGID | P_SUGIDEXEC));
	if (flags & FORK_PTRACE)
		atomic_setbits_int(&p2->p_flag, p1->p_flag & P_TRACED);
#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		/* nothing */
	} else
#endif
	{
		p2->p_p->ps_cred = pool_get(&pcred_pool, PR_WAITOK);
		bcopy(p1->p_p->ps_cred, p2->p_p->ps_cred, sizeof(*p2->p_p->ps_cred));
		p2->p_p->ps_cred->p_refcnt = 1;
		crhold(p1->p_ucred);
	}

	TAILQ_INIT(&p2->p_selects);

	/* bump references to the text vnode (for procfs) */
	p2->p_textvp = p1->p_textvp;
	if (p2->p_textvp)
		VREF(p2->p_textvp);

	if (flags & FORK_CLEANFILES)
		p2->p_fd = fdinit(p1);
	else if (flags & FORK_SHAREFILES)
		p2->p_fd = fdshare(p1);
	else
		p2->p_fd = fdcopy(p1);

	/*
	 * If ps_limit is still copy-on-write, bump refcnt,
	 * otherwise get a copy that won't be modified.
	 * (If PL_SHAREMOD is clear, the structure is shared
	 * copy-on-write.)
	 */
#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		/* nothing */
	} else
#endif
	{
		if (p1->p_p->ps_limit->p_lflags & PL_SHAREMOD)
			p2->p_p->ps_limit = limcopy(p1->p_p->ps_limit);
		else {
			p2->p_p->ps_limit = p1->p_p->ps_limit;
			p2->p_p->ps_limit->p_refcnt++;
		}
	}

	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		atomic_setbits_int(&p2->p_flag, P_CONTROLT);
	if (flags & FORK_PPWAIT)
		atomic_setbits_int(&p2->p_flag, P_PPWAIT);
	p2->p_pptr = p1;
	if (flags & FORK_NOZOMBIE)
		atomic_setbits_int(&p2->p_flag, P_NOZOMBIE);
	LIST_INIT(&p2->p_children);

#ifdef KTRACE
	/*
	 * Copy traceflag and tracefile if enabled.
	 * If not inherited, these were zeroed above.
	 */
	if (p1->p_traceflag & KTRFAC_INHERIT) {
		p2->p_traceflag = p1->p_traceflag;
		if ((p2->p_tracep = p1->p_tracep) != NULL)
			VREF(p2->p_tracep);
	}
#endif

	/*
	 * set priority of child to be that of parent
	 * XXX should move p_estcpu into the region of struct proc which gets
	 * copied.
	 */
	scheduler_fork_hook(p1, p2);

	/*
	 * Create signal actions for the child process.
	 */
	if (flags & FORK_SIGHAND)
		sigactsshare(p1, p2);
	else
		p2->p_sigacts = sigactsinit(p1);

	/*
	 * If emulation has process fork hook, call it now.
	 */
	if (p2->p_emul->e_proc_fork)
		(*p2->p_emul->e_proc_fork)(p2, p1);

	p2->p_addr = (struct user *)uaddr;

	/*
	 * Finish creating the child process.  It will return through a
	 * different path later.
	 */
	uvm_fork(p1, p2, ((flags & FORK_SHAREVM) ? TRUE : FALSE), stack,
	    stacksize, func ? func : child_return, arg ? arg : p2);

	timeout_set(&p2->p_stats->p_virt_to, virttimer_trampoline, p2);
	timeout_set(&p2->p_stats->p_prof_to, proftimer_trampoline, p2);

	vm = p2->p_vmspace;

	if (flags & FORK_FORK) {
		forkstat.cntfork++;
		forkstat.sizfork += vm->vm_dsize + vm->vm_ssize;
	} else if (flags & FORK_VFORK) {
		forkstat.cntvfork++;
		forkstat.sizvfork += vm->vm_dsize + vm->vm_ssize;
	} else if (flags & FORK_RFORK) {
		forkstat.cntrfork++;
		forkstat.sizrfork += vm->vm_dsize + vm->vm_ssize;
	} else {
		forkstat.cntkthread++;
		forkstat.sizkthread += vm->vm_dsize + vm->vm_ssize;
	}

	/* Find an unused pid satisfying 1 <= lastpid <= PID_MAX */
	do {
		lastpid = 1 + (randompid ? arc4random() : lastpid) % PID_MAX;
	} while (pidtaken(lastpid));
	p2->p_pid = lastpid;

	LIST_INSERT_HEAD(&allproc, p2, p_list);
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	if (p2->p_flag & P_TRACED) {
		p2->p_oppid = p1->p_pid;
		if (p2->p_pptr != p1->p_pptr)
			proc_reparent(p2, p1->p_pptr);

		/*
		 * Set ptrace status.
		 */
		if (flags & FORK_FORK) {
			p2->p_ptstat = malloc(sizeof(*p2->p_ptstat),
			    M_SUBPROC, M_WAITOK);
			p1->p_ptstat->pe_report_event = PTRACE_FORK;
			p2->p_ptstat->pe_report_event = PTRACE_FORK;
			p1->p_ptstat->pe_other_pid = p2->p_pid;
			p2->p_ptstat->pe_other_pid = p1->p_pid;
		}
	}

#if NSYSTRACE > 0
	if (ISSET(p1->p_flag, P_SYSTRACE))
		systrace_fork(p1, p2);
#endif

	/*
	 * Make child runnable, set start time, and add to run queue.
	 */
	SCHED_LOCK(s);
 	getmicrotime(&p2->p_stats->p_start);
	p2->p_acflag = AFORK;
	p2->p_stat = SRUN;
	setrunqueue(p2);
	SCHED_UNLOCK(s);

	/*
	 * Notify any interested parties about the new process.
	 */
	KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);

	/*
	 * Update stats now that we know the fork was successfull.
	 */
	uvmexp.forks++;
	if (flags & FORK_PPWAIT)
		uvmexp.forks_ppwait++;
	if (flags & FORK_SHAREVM)
		uvmexp.forks_sharevm++;

	/*
	 * Pass a pointer to the new process to the caller.
	 */
	if (rnewprocp != NULL)
		*rnewprocp = p2;

	/*
	 * Preserve synchronization semantics of vfork.  If waiting for
	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
	 * proc (in case of exit).
	 */
	if (flags & FORK_PPWAIT)
		while (p2->p_flag & P_PPWAIT)
			tsleep(p1, PWAIT, "ppwait", 0);

	/*
	 * If we're tracing the child, alert the parent too.
	 */
	if ((flags & FORK_PTRACE) && (p1->p_flag & P_TRACED))
		psignal(p1, SIGTRAP);

	/*
	 * Return child pid to parent process,
	 * marking us as parent via retval[1].
	 */
	if (retval != NULL) {
		retval[0] = p2->p_pid;
		retval[1] = 0;
	}
	return (0);
}