Example #1
0
/*
 * Destroy a process structure that resulted from a call to forkproc(), but
 * which must be returned to the system because of a subsequent failure
 * preventing it from becoming active.
 *
 * Parameters:	p			The incomplete process from forkproc()
 *
 * Returns:	(void)
 *
 * Note:	This function should only be used in an error handler following
 *		a call to forkproc().
 *
 *		Operations occur in reverse order of those in forkproc().
 */
void
forkproc_free(proc_t p)
{

	/* We held signal and a transition locks; drop them */
	proc_signalend(p, 0);
	proc_transend(p, 0);

	/*
	 * If we have our own copy of the resource limits structure, we
	 * need to free it.  If it's a shared copy, we need to drop our
	 * reference on it.
	 */
	proc_limitdrop(p, 0);
	p->p_limit = NULL;

#if SYSV_SHM
	/* Need to drop references to the shared memory segment(s), if any */
	if (p->vm_shm) {
		/*
		 * Use shmexec(): we have no address space, so no mappings
		 *
		 * XXX Yes, the routine is badly named.
		 */
		shmexec(p);
	}
#endif

	/* Need to undo the effects of the fdcopy(), if any */
	fdfree(p);

	/*
	 * Drop the reference on a text vnode pointer, if any
	 * XXX This code is broken in forkproc(); see <rdar://4256419>;
	 * XXX if anyone ever uses this field, we will be extremely unhappy.
	 */
	if (p->p_textvp) {
		vnode_rele(p->p_textvp);
		p->p_textvp = NULL;
	}

	/* Stop the profiling clock */
	stopprofclock(p);

	/* Release the credential reference */
	kauth_cred_unref(&p->p_ucred);

	proc_list_lock();
	/* Decrement the count of processes in the system */
	nprocs--;
	proc_list_unlock();

	thread_call_free(p->p_rcall);

	/* Free allocated memory */
	FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS);
	FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS);
	proc_checkdeadrefs(p);
	FREE_ZONE(p, sizeof *p, M_PROC);
}
Example #2
0
static int  
sd_callback2(proc_t p, void * args)
{
	struct sd_iterargs * sd = (struct sd_iterargs *)args;
	int signo = sd->signo;
	int setsdstate = sd->setsdstate;
	int countproc = sd->countproc;

	proc_lock(p);
	p->p_shutdownstate = setsdstate;
	if (p->p_stat != SZOMB) {
		proc_unlock(p);
		if (countproc !=  0) {
			proc_list_lock();
			p->p_listflag |= P_LIST_EXITCOUNT;
			proc_shutdown_exitcount++;
			proc_list_unlock();
		}
		psignal(p, signo);
		if (countproc !=  0)
			sd->activecount++;
	} else
		proc_unlock(p);

	return(PROC_RETURNED);

}
Example #3
0
/*
 * cloneproc
 *
 * Description: Create a new process from a specified process.
 *
 * Parameters:	parent_task		The parent task to be cloned, or
 *					TASK_NULL is task characteristics
 *					are not to be inherited
 *					be cloned, or TASK_NULL if the new
 *					task is not to inherit the VM
 *					characteristics of the parent
 *		parent_proc		The parent process to be cloned
 *		inherit_memory		True if the child is to inherit
 *					memory from the parent; if this is
 *					non-NULL, then the parent_task must
 *					also be non-NULL
 *		memstat_internal	Whether to track the process in the
 *					jetsam priority list (if configured)
 *
 * Returns:	!NULL			pointer to new child thread
 *		NULL			Failure (unspecified)
 *
 * Note:	On return newly created child process has signal lock held
 *		to block delivery of signal to it if called with lock set.
 *		fork() code needs to explicity remove this lock before
 *		signals can be delivered
 *
 *		In the case of bootstrap, this function can be called from
 *		bsd_utaskbootstrap() in order to bootstrap the first process;
 *		the net effect is to provide a uthread structure for the
 *		kernel process associated with the kernel task.
 *
 * XXX:		Tristating using the value parent_task as the major key
 *		and inherit_memory as the minor key is something we should
 *		refactor later; we owe the current semantics, ultimately,
 *		to the semantics of task_create_internal.  For now, we will
 *		live with this being somewhat awkward.
 */
thread_t
cloneproc(task_t parent_task, coalition_t *parent_coalitions, proc_t parent_proc, int inherit_memory, int memstat_internal)
{
#if !CONFIG_MEMORYSTATUS
#pragma unused(memstat_internal)
#endif
	task_t child_task;
	proc_t child_proc;
	thread_t child_thread = NULL;

	if ((child_proc = forkproc(parent_proc)) == NULL) {
		/* Failed to allocate new process */
		goto bad;
	}

	child_thread = fork_create_child(parent_task, parent_coalitions, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64));

	if (child_thread == NULL) {
		/*
		 * Failed to create thread; now we must deconstruct the new
		 * process previously obtained from forkproc().
		 */
		forkproc_free(child_proc);
		goto bad;
	}

	child_task = get_threadtask(child_thread);
	if (parent_proc->p_flag & P_LP64) {
		task_set_64bit(child_task, TRUE);
		OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag);
	} else {
		task_set_64bit(child_task, FALSE);
		OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag);
	}

#if CONFIG_MEMORYSTATUS
	if (memstat_internal) {
		proc_list_lock();
		child_proc->p_memstat_state |= P_MEMSTAT_INTERNAL;
		proc_list_unlock();
	}
#endif

	/* make child visible */
	pinsertchild(parent_proc, child_proc);

	/*
	 * Make child runnable, set start time.
	 */
	child_proc->p_stat = SRUN;
bad:
	return(child_thread);
}
Example #4
0
void
ux_handler_init(void)
{
	thread_t	thread = THREAD_NULL;

	ux_exception_port = MACH_PORT_NULL;
	(void) kernel_thread_start((thread_continue_t)ux_handler, NULL, &thread);
	thread_deallocate(thread);
	proc_list_lock();
	if (ux_exception_port == MACH_PORT_NULL)  {
		(void)msleep(&ux_exception_port, proc_list_mlock, 0, "ux_handler_wait", 0);
	}
	proc_list_unlock();
}
Example #5
0
/*
 * fork1
 *
 * Description:	common code used by all new process creation other than the
 *		bootstrap of the initial process on the system
 *
 * Parameters: parent_proc		parent process of the process being
 *		child_threadp		pointer to location to receive the
 *					Mach thread_t of the child process
 *					breated
 *		kind			kind of creation being requested
 *
 * Notes:	Permissable values for 'kind':
 *
 *		PROC_CREATE_FORK	Create a complete process which will
 *					return actively running in both the
 *					parent and the child; the child copies
 *					the parent address space.
 *		PROC_CREATE_SPAWN	Create a complete process which will
 *					return actively running in the parent
 *					only after returning actively running
 *					in the child; the child address space
 *					is newly created by an image activator,
 *					after which the child is run.
 *		PROC_CREATE_VFORK	Creates a partial process which will
 *					borrow the parent task, thread, and
 *					uthread to return running in the child;
 *					the child address space and other parts
 *					are lazily created at execve() time, or
 *					the child is terminated, and the parent
 *					does not actively run until that
 *					happens.
 *
 *		At first it may seem strange that we return the child thread
 *		address rather than process structure, since the process is
 *		the only part guaranteed to be "new"; however, since we do
 *		not actualy adjust other references between Mach and BSD (see
 *		the block diagram above the implementation of vfork()), this
 *		is the only method which guarantees us the ability to get
 *		back to the other information.
 */
int
fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
{
	thread_t parent_thread = (thread_t)current_thread();
	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
	proc_t child_proc = NULL;	/* set in switch, but compiler... */
	thread_t child_thread = NULL;
	uid_t uid;
	int count;
	int err = 0;
	int spawn = 0;

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create.  Don't allow
	 * a nonprivileged user to use the last process; don't let root
	 * exceed the limit. The variable nprocs is the current number of
	 * processes, maxproc is the limit.
	 */
	uid = kauth_cred_get()->cr_ruid;
	proc_list_lock();
	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
		proc_list_unlock();
		tablefull("proc");
		return (EAGAIN);
	}
	proc_list_unlock();

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit, which is
	 * always less than what an rlim_t can hold.
	 * (locking protection is provided by list lock held in chgproccnt)
	 */
	count = chgproccnt(uid, 1);
	if (uid != 0 &&
	    (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) {
	    	err = EAGAIN;
		goto bad;
	}

#if CONFIG_MACF
	/*
	 * Determine if MAC policies applied to the process will allow
	 * it to fork.  This is an advisory-only check.
	 */
	err = mac_proc_check_fork(parent_proc);
	if (err  != 0) {
		goto bad;
	}
#endif

	switch(kind) {
	case PROC_CREATE_VFORK:
		/*
		 * Prevent a vfork while we are in vfork(); we should
		 * also likely preventing a fork here as well, and this
		 * check should then be outside the switch statement,
		 * since the proc struct contents will copy from the
		 * child and the tash/thread/uthread from the parent in
		 * that case.  We do not support vfork() in vfork()
		 * because we don't have to; the same non-requirement
		 * is true of both fork() and posix_spawn() and any
		 * call  other than execve() amd _exit(), but we've
		 * been historically lenient, so we continue to be so
		 * (for now).
		 *
		 * <rdar://6640521> Probably a source of random panics
		 */
		if (parent_uthread->uu_flag & UT_VFORK) {
			printf("fork1 called within vfork by %s\n", parent_proc->p_comm);
			err = EINVAL;
			goto bad;
		}

		/*
		 * Flag us in progress; if we chose to support vfork() in
		 * vfork(), we would chain our parent at this point (in
		 * effect, a stack push).  We don't, since we actually want
		 * to disallow everything not specified in the standard
		 */
		proc_vfork_begin(parent_proc);

		/* The newly created process comes with signal lock held */
		if ((child_proc = forkproc(parent_proc)) == NULL) {
			/* Failed to allocate new process */
			proc_vfork_end(parent_proc);
			err = ENOMEM;
			goto bad;
		}

// XXX BEGIN: wants to move to be common code (and safe)
#if CONFIG_MACF
		/*
		 * allow policies to associate the credential/label that
		 * we referenced from the parent ... with the child
		 * JMM - this really isn't safe, as we can drop that
		 *       association without informing the policy in other
		 *       situations (keep long enough to get policies changed)
		 */
		mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
#endif

		/*
		 * Propogate change of PID - may get new cred if auditing.
		 *
		 * NOTE: This has no effect in the vfork case, since
		 *	child_proc->task != current_task(), but we duplicate it
		 *	because this is probably, ultimately, wrong, since we
		 *	will be running in the "child" which is the parent task
		 *	with the wrong token until we get to the execve() or
		 *	_exit() call; a lot of "undefined" can happen before
		 *	that.
		 *
		 * <rdar://6640530> disallow everything but exeve()/_exit()?
		 */
		set_security_token(child_proc);

		AUDIT_ARG(pid, child_proc->p_pid);

		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
// XXX END: wants to move to be common code (and safe)

		/*
		 * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD
		 *
		 * Note: this is where we would "push" state instead of setting
		 * it for nested vfork() support (see proc_vfork_end() for
		 * description if issues here).
		 */
		child_proc->task = parent_proc->task;

		child_proc->p_lflag  |= P_LINVFORK;
		child_proc->p_vforkact = parent_thread;
		child_proc->p_stat = SRUN;

		parent_uthread->uu_flag |= UT_VFORK;
		parent_uthread->uu_proc = child_proc;
		parent_uthread->uu_userstate = (void *)act_thread_csave();
		parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask;

		/* temporarily drop thread-set-id state */
		if (parent_uthread->uu_flag & UT_SETUID) {
			parent_uthread->uu_flag |= UT_WASSETUID;
			parent_uthread->uu_flag &= ~UT_SETUID;
		}

		/* blow thread state information */
		/* XXX is this actually necessary, given syscall return? */
		thread_set_child(parent_thread, child_proc->p_pid);

		child_proc->p_acflag = AFORK;	/* forked but not exec'ed */

		/*
		 * Preserve synchronization semantics of vfork.  If
		 * waiting for child to exec or exit, set P_PPWAIT
		 * on child, and sleep on our proc (in case of exit).
		 */
		child_proc->p_lflag |= P_LPPWAIT;
		pinsertchild(parent_proc, child_proc);	/* set visible */

		break;

	case PROC_CREATE_SPAWN:
		/*
		 * A spawned process differs from a forked process in that
		 * the spawned process does not carry around the parents
		 * baggage with regard to address space copying, dtrace,
		 * and so on.
		 */
		spawn = 1;

		/* FALLSTHROUGH */

	case PROC_CREATE_FORK:
		/*
		 * When we clone the parent process, we are going to inherit
		 * its task attributes and memory, since when we fork, we
		 * will, in effect, create a duplicate of it, with only minor
		 * differences.  Contrarily, spawned processes do not inherit.
		 */
		if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) {
			/* Failed to create thread */
			err = EAGAIN;
			goto bad;
		}

		/* copy current thread state into the child thread (only for fork) */
		if (!spawn) {
			thread_dup(child_thread);
		}

		/* child_proc = child_thread->task->proc; */
		child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread)));

// XXX BEGIN: wants to move to be common code (and safe)
#if CONFIG_MACF
		/*
		 * allow policies to associate the credential/label that
		 * we referenced from the parent ... with the child
		 * JMM - this really isn't safe, as we can drop that
		 *       association without informing the policy in other
		 *       situations (keep long enough to get policies changed)
		 */
		mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
#endif

		/*
		 * Propogate change of PID - may get new cred if auditing.
		 *
		 * NOTE: This has no effect in the vfork case, since
		 *	child_proc->task != current_task(), but we duplicate it
		 *	because this is probably, ultimately, wrong, since we
		 *	will be running in the "child" which is the parent task
		 *	with the wrong token until we get to the execve() or
		 *	_exit() call; a lot of "undefined" can happen before
		 *	that.
		 *
		 * <rdar://6640530> disallow everything but exeve()/_exit()?
		 */
		set_security_token(child_proc);

		AUDIT_ARG(pid, child_proc->p_pid);

		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
// XXX END: wants to move to be common code (and safe)

		/*
		 * Blow thread state information; this is what gives the child
		 * process its "return" value from a fork() call.
		 *
		 * Note: this should probably move to fork() proper, since it
		 * is not relevent to spawn, and the value won't matter
		 * until we resume the child there.  If you are in here
		 * refactoring code, consider doing this at the same time.
		 */
		thread_set_child(child_thread, child_proc->p_pid);

		child_proc->p_acflag = AFORK;	/* forked but not exec'ed */

// <rdar://6598155> dtrace code cleanup needed
#if CONFIG_DTRACE
		/*
		 * This code applies to new processes who are copying the task
		 * and thread state and address spaces of their parent process.
		 */
		if (!spawn) {
// <rdar://6598155> call dtrace specific function here instead of all this...
		/*
		 * APPLE NOTE: Solaris does a sprlock() and drops the
		 * proc_lock here. We're cheating a bit and only taking
		 * the p_dtrace_sprlock lock. A full sprlock would
		 * task_suspend the parent.
		 */
		lck_mtx_lock(&parent_proc->p_dtrace_sprlock);

		/*
		 * Remove all DTrace tracepoints from the child process. We
		 * need to do this _before_ duplicating USDT providers since
		 * any associated probes may be immediately enabled.
		 */
		if (parent_proc->p_dtrace_count > 0) {
			dtrace_fasttrap_fork(parent_proc, child_proc);
		}

		lck_mtx_unlock(&parent_proc->p_dtrace_sprlock);

		/*
		 * Duplicate any lazy dof(s). This must be done while NOT
		 * holding the parent sprlock! Lock ordering is
		 * dtrace_dof_mode_lock, then sprlock.  It is imperative we
		 * always call dtrace_lazy_dofs_duplicate, rather than null
		 * check and call if !NULL. If we NULL test, during lazy dof
		 * faulting we can race with the faulting code and proceed
		 * from here to beyond the helpers copy. The lazy dof
		 * faulting will then fail to copy the helpers to the child
		 * process.
		 */
		dtrace_lazy_dofs_duplicate(parent_proc, child_proc);
		
		/*
		 * Duplicate any helper actions and providers. The SFORKING
		 * we set above informs the code to enable USDT probes that
		 * sprlock() may fail because the child is being forked.
		 */
		/*
		 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
		 * never fails to find the child. We do not set SFORKING.
		 */
		if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
			(*dtrace_helpers_fork)(parent_proc, child_proc);
		}

		}
#endif	/* CONFIG_DTRACE */

		break;

	default:
		panic("fork1 called with unknown kind %d", kind);
		break;
	}


	/* return the thread pointer to the caller */
	*child_threadp = child_thread;

bad:
	/*
	 * In the error case, we return a 0 value for the returned pid (but
	 * it is ignored in the trampoline due to the error return); this
	 * is probably not necessary.
	 */
	if (err) {
		(void)chgproccnt(uid, -1);
	}

	return (err);
}
Example #6
0
/*
 * forkproc
 *
 * Description:	Create a new process structure, given a parent process
 *		structure.
 *
 * Parameters:	parent_proc		The parent process
 *
 * Returns:	!NULL			The new process structure
 *		NULL			Error (insufficient free memory)
 *
 * Note:	When successful, the newly created process structure is
 *		partially initialized; if a caller needs to deconstruct the
 *		returned structure, they must call forkproc_free() to do so.
 */
proc_t
forkproc(proc_t parent_proc)
{
	proc_t child_proc;	/* Our new process */
	static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
	int error = 0;
	struct session *sessp;
	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread());

	MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK);
	if (child_proc == NULL) {
		printf("forkproc: M_PROC zone exhausted\n");
		goto bad;
	}
	/* zero it out as we need to insert in hash */
	bzero(child_proc, sizeof *child_proc);

	MALLOC_ZONE(child_proc->p_stats, struct pstats *,
			sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK);
	if (child_proc->p_stats == NULL) {
		printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n");
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}
	MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *,
			sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK);
	if (child_proc->p_sigacts == NULL) {
		printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n");
		FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}

	/* allocate a callout for use by interval timers */
	child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc);
	if (child_proc->p_rcall == NULL) {
		FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS);
		FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
		FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
		child_proc = NULL;
		goto bad;
	}


	/*
	 * Find an unused PID.  
	 */

	proc_list_lock();

	nextpid++;
retry:
	/*
	 * If the process ID prototype has wrapped around,
	 * restart somewhat above 0, as the low-numbered procs
	 * tend to include daemons that don't exit.
	 */
	if (nextpid >= PID_MAX) {
		nextpid = 100;
		pidwrap = 1;
	}
	if (pidwrap != 0) {

		/* if the pid stays in hash both for zombie and runniing state */
		if  (pfind_locked(nextpid) != PROC_NULL) {
			nextpid++;
			goto retry;
		}

		if (pgfind_internal(nextpid) != PGRP_NULL) {
			nextpid++;
			goto retry;
		}	
		if (session_find_internal(nextpid) != SESSION_NULL) {
			nextpid++;
			goto retry;
		}	
	}
	nprocs++;
	child_proc->p_pid = nextpid;
	child_proc->p_idversion = nextpidversion++;
#if 1
	if (child_proc->p_pid != 0) {
		if (pfind_locked(child_proc->p_pid) != PROC_NULL)
			panic("proc in the list already\n");
	}
#endif
	/* Insert in the hash */
	child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE);
	LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash);
	proc_list_unlock();


	/*
	 * We've identified the PID we are going to use; initialize the new
	 * process structure.
	 */
	child_proc->p_stat = SIDL;
	child_proc->p_pgrpid = PGRPID_DEAD;

	/*
	 * The zero'ing of the proc was at the allocation time due to need
	 * for insertion to hash.  Copy the section that is to be copied
	 * directly from the parent.
	 */
	bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy,
	    (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy));

	/*
	 * Some flags are inherited from the parent.
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * The p_stats and p_sigacts substructs are set in vm_fork.
	 */
	child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY));
	if (parent_proc->p_flag & P_PROFIL)
		startprofclock(child_proc);
	/*
	 * Note that if the current thread has an assumed identity, this
	 * credential will be granted to the new process.
	 */
	child_proc->p_ucred = kauth_cred_get_with_ref();

#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
#if CONFIG_DTRACE
	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
	lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
#else /* !CONFIG_EMBEDDED */
	lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
#if CONFIG_DTRACE
	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
	lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
#endif /* !CONFIG_EMBEDDED */
	klist_init(&child_proc->p_klist);

	if (child_proc->p_textvp != NULLVP) {
		/* bump references to the text vnode */
		/* Need to hold iocount across the ref call */
		if (vnode_getwithref(child_proc->p_textvp) == 0) {
			error = vnode_ref(child_proc->p_textvp);
			vnode_put(child_proc->p_textvp);
			if (error != 0)
				child_proc->p_textvp = NULLVP;
		}
	}

	/*
	 * Copy the parents per process open file table to the child; if
	 * there is a per-thread current working directory, set the childs
	 * per-process current working directory to that instead of the
	 * parents.
	 *
	 * XXX may fail to copy descriptors to child
	 */
	child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir);

#if SYSV_SHM
	if (parent_proc->vm_shm) {
		/* XXX may fail to attach shm to child */
		(void)shmfork(parent_proc, child_proc);
	}
#endif
	/*
	 * inherit the limit structure to child
	 */
	proc_limitfork(parent_proc, child_proc);

	if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
		uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur;
		child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur;
	}

	/* Intialize new process stats, including start time */
	/* <rdar://6640543> non-zeroed portion contains garbage AFAICT */
	bzero(&child_proc->p_stats->pstat_startzero,
	    (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero -
	    (caddr_t)&child_proc->p_stats->pstat_startzero));
	bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof));
	microtime(&child_proc->p_start);
	child_proc->p_stats->p_start = child_proc->p_start;     /* for compat */

	if (parent_proc->p_sigacts != NULL)
		(void)memcpy(child_proc->p_sigacts,
				parent_proc->p_sigacts, sizeof *child_proc->p_sigacts);
	else
		(void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts);

	sessp = proc_session(parent_proc);
	if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT)
		OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag);
	session_rele(sessp);

	/*
	 * block all signals to reach the process.
	 * no transition race should be occuring with the child yet,
	 * but indicate that the process is in (the creation) transition.
	 */
	proc_signalstart(child_proc, 0);
	proc_transstart(child_proc, 0);

	child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX;
	TAILQ_INIT(&child_proc->p_uthlist);
	TAILQ_INIT(&child_proc->p_aio_activeq);
	TAILQ_INIT(&child_proc->p_aio_doneq);

	/* Inherit the parent flags for code sign */
	child_proc->p_csflags = parent_proc->p_csflags;

	/*
	 * All processes have work queue locks; cleaned up by
	 * reap_child_locked()
	 */
	workqueue_init_lock(child_proc);

	/*
	 * Copy work queue information
	 *
	 * Note: This should probably only happen in the case where we are
	 *	creating a child that is a copy of the parent; since this
	 *	routine is called in the non-duplication case of vfork()
	 *	or posix_spawn(), then this information should likely not
	 *	be duplicated.
	 *
	 * <rdar://6640553> Work queue pointers that no longer point to code
	 */
	child_proc->p_wqthread = parent_proc->p_wqthread;
	child_proc->p_threadstart = parent_proc->p_threadstart;
	child_proc->p_pthsize = parent_proc->p_pthsize;
	child_proc->p_targconc = parent_proc->p_targconc;
	if ((parent_proc->p_lflag & P_LREGISTER) != 0) {
		child_proc->p_lflag |= P_LREGISTER;
	}
	child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset;
#if PSYNCH
	pth_proc_hashinit(child_proc);
#endif /* PSYNCH */

#if CONFIG_LCTX
	child_proc->p_lctx = NULL;
	/* Add new process to login context (if any). */
	if (parent_proc->p_lctx != NULL) {
		/*
		 * <rdar://6640564> This should probably be delayed in the
		 * vfork() or posix_spawn() cases.
		 */
		LCTX_LOCK(parent_proc->p_lctx);
		enterlctx(child_proc, parent_proc->p_lctx, 0);
	}
#endif

bad:
	return(child_proc);
}
Example #7
0
static void
proc_shutdown(void)
{
	vfs_context_t ctx = vfs_context_current();
	struct proc *p, *self;
	int delayterm = 0;
	struct sd_filterargs sfargs;
	struct sd_iterargs sdargs;
	int error = 0;
	struct timespec ts;

	/*
	 *	Kill as many procs as we can.  (Except ourself...)
	 */
	self = (struct proc *)current_proc();
	
	/*
	 * Signal the init with SIGTERM so that he does not launch
	 * new processes 
	 */
	p = proc_find(1);
	if (p && p != self) {
		psignal(p, SIGTERM);
	}
	proc_rele(p);

	printf("Killing all processes ");

sigterm_loop:
	/*
	 * send SIGTERM to those procs interested in catching one
	 */
	sfargs.delayterm = delayterm;
	sfargs.shutdownstate = 0;
	sdargs.signo = SIGTERM;
	sdargs.setsdstate = 1;
	sdargs.countproc = 1;
	sdargs.activecount = 0;

	error = 0;
	/* post a SIGTERM to all that catch SIGTERM and not marked for delay */
	proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);

	if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
		proc_list_lock();
		if (proc_shutdown_exitcount != 0) {
			/*
	 		* now wait for up to 30 seconds to allow those procs catching SIGTERM
	 		* to digest it
	 		* as soon as these procs have exited, we'll continue on to the next step
	 		*/
			ts.tv_sec = 30;
			ts.tv_nsec = 0;
			error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
			if (error != 0) {
				for (p = allproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
				for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
			}
			
		}
		proc_list_unlock();
	}
	if (error == ETIMEDOUT) {
		/*
		 * log the names of the unresponsive tasks
		 */


		proc_list_lock();

		for (p = allproc.lh_first; p; p = p->p_list.le_next) {
			if (p->p_shutdownstate == 1) {
				printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
				sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
			}
		}

		proc_list_unlock();

		delay_for_interval(1000 * 5, 1000 * 1000);
	}

	/*
	 * send a SIGKILL to all the procs still hanging around
	 */
	sfargs.delayterm = delayterm;
	sfargs.shutdownstate = 2;
	sdargs.signo = SIGKILL;
	sdargs.setsdstate = 2;
	sdargs.countproc = 1;
	sdargs.activecount = 0;

	/* post a SIGKILL to all that catch SIGTERM and not marked for delay */
	proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);

	if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
		proc_list_lock();
		if (proc_shutdown_exitcount != 0) {
			/*
	 		* wait for up to 60 seconds to allow these procs to exit normally
	 		*
	 		* History:	The delay interval was changed from 100 to 200
	 		*		for NFS requests in particular.
	 		*/
			ts.tv_sec = 60;
			ts.tv_nsec = 0;
			error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
			if (error != 0) {
				for (p = allproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
				for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
						p->p_listflag &= ~P_LIST_EXITCOUNT;
				}
			}
		}
		proc_list_unlock();
	}

	/*
	 * if we still have procs that haven't exited, then brute force 'em
	 */
	sfargs.delayterm = delayterm;
	sfargs.shutdownstate = 3;
	sdargs.signo = 0;
	sdargs.setsdstate = 3;
	sdargs.countproc = 0;
	sdargs.activecount = 0;

	/* post a SIGTERM to all that catch SIGTERM and not marked for delay */
	proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
	printf("\n");

	/* Now start the termination of processes that are marked for delayed termn */
	if (delayterm == 0) {
		delayterm = 1;
		goto  sigterm_loop;
	}

	sd_closelog(ctx);

	/* drop the ref on initproc */
	proc_rele(initproc);
	printf("continuing\n");
}
Example #8
0
void
bsd_init(void)
{
	struct uthread *ut;
	unsigned int i;
	struct vfs_context context;
	kern_return_t	ret;
	struct ucred temp_cred;
	struct posix_cred temp_pcred;
#if NFSCLIENT || CONFIG_IMAGEBOOT
	boolean_t       netboot = FALSE;
#endif

#define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */

	throttle_init();

	printf(copyright);
	
	bsd_init_kprintf("calling kmeminit\n");
	kmeminit();
	
	bsd_init_kprintf("calling parse_bsd_args\n");
	parse_bsd_args();

#if CONFIG_DEV_KMEM
	bsd_init_kprintf("calling dev_kmem_init\n");
	dev_kmem_init();
#endif

	/* Initialize kauth subsystem before instancing the first credential */
	bsd_init_kprintf("calling kauth_init\n");
	kauth_init();

	/* Initialize process and pgrp structures. */
	bsd_init_kprintf("calling procinit\n");
	procinit();

	/* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
	tty_init();

	kernproc = &proc0;	/* implicitly bzero'ed */

	/* kernel_task->proc = kernproc; */
	set_bsdtask_info(kernel_task,(void *)kernproc);

	/* give kernproc a name */
	bsd_init_kprintf("calling process_name\n");
	process_name("kernel_task", kernproc);

	/* allocate proc lock group attribute and group */
	bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
	proc_lck_grp_attr= lck_grp_attr_alloc_init();

	proc_lck_grp = lck_grp_alloc_init("proc",  proc_lck_grp_attr);
#if CONFIG_FINE_LOCK_GROUPS
	proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
	proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
	proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock",  proc_lck_grp_attr);
	proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
#endif
	/* Allocate proc lock attribute */
	proc_lck_attr = lck_attr_alloc_init();
#if 0
#if __PROC_INTERNAL_DEBUG
	lck_attr_setdebug(proc_lck_attr);
#endif
#endif

#if CONFIG_FINE_LOCK_GROUPS
	proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
#else
	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_ucred_mlock, proc_lck_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
#endif

	assert(bsd_simul_execs != 0);
	execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	execargs_cache_size = bsd_simul_execs;
	execargs_free_count = bsd_simul_execs;
	execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t));
	bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t));
	
	if (current_task() != kernel_task)
		printf("bsd_init: We have a problem, "
				"current task is not kernel task\n");
	
	bsd_init_kprintf("calling get_bsdthread_info\n");
	ut = (uthread_t)get_bsdthread_info(current_thread());

#if CONFIG_MACF
	/*
	 * Initialize the MAC Framework
	 */
	mac_policy_initbsd();
	kernproc->p_mac_enforce = 0;

#if defined (__i386__) || defined (__x86_64__)
	/*
	 * We currently only support this on i386/x86_64, as that is the
	 * only lock code we have instrumented so far.
	 */
	check_policy_init(policy_check_flags);
#endif
#endif /* MAC */

	/* Initialize System Override call */
	init_system_override();
	
	/*
	 * Create process 0.
	 */
	proc_list_lock();
	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
	kernproc->p_pgrp = &pgrp0;
	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
	LIST_INIT(&pgrp0.pg_members);
#ifdef CONFIG_FINE_LOCK_GROUPS
	lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
#else
	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);
#endif
	/* There is no other bsd thread this point and is safe without pgrp lock */
	LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
	kernproc->p_listflag |= P_LIST_INPGRP;
	kernproc->p_pgrpid = 0;
	kernproc->p_uniqueid = 0;

	pgrp0.pg_session = &session0;
	pgrp0.pg_membercnt = 1;

	session0.s_count = 1;
	session0.s_leader = kernproc;
	session0.s_listflags = 0;
#ifdef CONFIG_FINE_LOCK_GROUPS
	lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
#else
	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
#endif
	LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
	proc_list_unlock();

	kernproc->task = kernel_task;
	
	kernproc->p_stat = SRUN;
	kernproc->p_flag = P_SYSTEM;
	kernproc->p_lflag = 0;
	kernproc->p_ladvflag = 0;
	
#if DEVELOPMENT || DEBUG
	if (bootarg_disable_aslr)
		kernproc->p_flag |= P_DISABLE_ASLR;
#endif

	kernproc->p_nice = NZERO;
	kernproc->p_pptr = kernproc;

	TAILQ_INIT(&kernproc->p_uthlist);
	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
	
	kernproc->sigwait = FALSE;
	kernproc->sigwait_thread = THREAD_NULL;
	kernproc->exit_thread = THREAD_NULL;
	kernproc->p_csflags = CS_VALID;

	/*
	 * Create credential.  This also Initializes the audit information.
	 */
	bsd_init_kprintf("calling bzero\n");
	bzero(&temp_cred, sizeof(temp_cred));
	bzero(&temp_pcred, sizeof(temp_pcred));
	temp_pcred.cr_ngroups = 1;
	/* kern_proc, shouldn't call up to DS for group membership */
	temp_pcred.cr_flags = CRF_NOMEMBERD;
	temp_cred.cr_audit.as_aia_p = audit_default_aia_p;
	
	bsd_init_kprintf("calling kauth_cred_create\n");
	/*
	 * We have to label the temp cred before we create from it to
	 * properly set cr_ngroups, or the create will fail.
	 */
	posix_cred_label(&temp_cred, &temp_pcred);
	kernproc->p_ucred = kauth_cred_create(&temp_cred); 

	/* update cred on proc */
	PROC_UPDATE_CREDS_ONPROC(kernproc);

	/* give the (already exisiting) initial thread a reference on it */
	bsd_init_kprintf("calling kauth_cred_ref\n");
	kauth_cred_ref(kernproc->p_ucred);
	ut->uu_context.vc_ucred = kernproc->p_ucred;
	ut->uu_context.vc_thread = current_thread();

	TAILQ_INIT(&kernproc->p_aio_activeq);
	TAILQ_INIT(&kernproc->p_aio_doneq);
	kernproc->p_aio_total_count = 0;
	kernproc->p_aio_active_count = 0;

	bsd_init_kprintf("calling file_lock_init\n");
	file_lock_init();

#if CONFIG_MACF
	mac_cred_label_associate_kernel(kernproc->p_ucred);
#endif

	/* Create the file descriptor table. */
	kernproc->p_fd = &filedesc0;
	filedesc0.fd_cmask = cmask;
	filedesc0.fd_knlistsize = -1;
	filedesc0.fd_knlist = NULL;
	filedesc0.fd_knhash = NULL;
	filedesc0.fd_knhashmask = 0;

	/* Create the limits structures. */
	kernproc->p_limit = &limit0;
	for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++)
		limit0.pl_rlimit[i].rlim_cur = 
			limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
	limit0.pl_refcnt = 1;

	kernproc->p_stats = &pstats0;
	kernproc->p_sigacts = &sigacts0;

	/*
	 * Charge root for one process: launchd.
	 */
	bsd_init_kprintf("calling chgproccnt\n");
	(void)chgproccnt(0, 1);

	/*
	 *	Allocate a kernel submap for pageable memory
	 *	for temporary copying (execve()).
	 */
	{
		vm_offset_t	minimum;

		bsd_init_kprintf("calling kmem_suballoc\n");
		assert(bsd_pageable_map_size != 0);
		ret = kmem_suballoc(kernel_map,
				&minimum,
				(vm_size_t)bsd_pageable_map_size,
				TRUE,
				VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_BSD),
				&bsd_pageable_map);
		if (ret != KERN_SUCCESS) 
			panic("bsd_init: Failed to allocate bsd pageable map");
	}

	/*
	 * Initialize buffers and hash links for buffers
	 *
	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
	 *		happen after a credential has been associated with
	 *		the kernel task.
	 */
	bsd_init_kprintf("calling bsd_bufferinit\n");
	bsd_bufferinit();

	/* Initialize the execve() semaphore */
	bsd_init_kprintf("calling semaphore_create\n");

	if (ret != KERN_SUCCESS)
		panic("bsd_init: Failed to create execve semaphore");

	/*
	 * Initialize the calendar.
	 */
	bsd_init_kprintf("calling IOKitInitializeTime\n");
	IOKitInitializeTime();

	bsd_init_kprintf("calling ubc_init\n");
	ubc_init();

	/*
	 * Initialize device-switches.
	 */
	bsd_init_kprintf("calling devsw_init() \n");
	devsw_init();

	/* Initialize the file systems. */
	bsd_init_kprintf("calling vfsinit\n");
	vfsinit();

#if CONFIG_PROC_UUID_POLICY
	/* Initial proc_uuid_policy subsystem */
	bsd_init_kprintf("calling proc_uuid_policy_init()\n");
	proc_uuid_policy_init();
#endif

#if SOCKETS
	/* Initialize per-CPU cache allocator */
	mcache_init();

	/* Initialize mbuf's. */
	bsd_init_kprintf("calling mbinit\n");
	mbinit();
	net_str_id_init(); /* for mbuf tags */
#endif /* SOCKETS */

	/*
	 * Initializes security event auditing.
	 * XXX: Should/could this occur later?
	 */
#if CONFIG_AUDIT
	bsd_init_kprintf("calling audit_init\n");
 	audit_init();  
#endif

	/* Initialize kqueues */
	bsd_init_kprintf("calling knote_init\n");
	knote_init();

	/* Initialize for async IO */
	bsd_init_kprintf("calling aio_init\n");
	aio_init();

	/* Initialize pipes */
	bsd_init_kprintf("calling pipeinit\n");
	pipeinit();

	/* Initialize SysV shm subsystem locks; the subsystem proper is
	 * initialized through a sysctl.
	 */
#if SYSV_SHM
	bsd_init_kprintf("calling sysv_shm_lock_init\n");
	sysv_shm_lock_init();
#endif
#if SYSV_SEM
	bsd_init_kprintf("calling sysv_sem_lock_init\n");
	sysv_sem_lock_init();
#endif
#if SYSV_MSG
	bsd_init_kprintf("sysv_msg_lock_init\n");
	sysv_msg_lock_init();
#endif
	bsd_init_kprintf("calling pshm_lock_init\n");
	pshm_lock_init();
	bsd_init_kprintf("calling psem_lock_init\n");
	psem_lock_init();

	pthread_init();
	/* POSIX Shm and Sem */
	bsd_init_kprintf("calling pshm_cache_init\n");
	pshm_cache_init();
	bsd_init_kprintf("calling psem_cache_init\n");
	psem_cache_init();
	bsd_init_kprintf("calling time_zone_slock_init\n");
	time_zone_slock_init();
	bsd_init_kprintf("calling select_waitq_init\n");
	select_waitq_init();

	/*
	 * Initialize protocols.  Block reception of incoming packets
	 * until everything is ready.
	 */
	bsd_init_kprintf("calling sysctl_register_fixed\n");
	sysctl_register_fixed(); 
	bsd_init_kprintf("calling sysctl_mib_init\n");
	sysctl_mib_init();
#if NETWORKING
	bsd_init_kprintf("calling dlil_init\n");
	dlil_init();
	bsd_init_kprintf("calling proto_kpi_init\n");
	proto_kpi_init();
#endif /* NETWORKING */
#if SOCKETS
	bsd_init_kprintf("calling socketinit\n");
	socketinit();
	bsd_init_kprintf("calling domaininit\n");
	domaininit();
	iptap_init();
#if FLOW_DIVERT
	flow_divert_init();
#endif	/* FLOW_DIVERT */
#endif /* SOCKETS */

	kernproc->p_fd->fd_cdir = NULL;
	kernproc->p_fd->fd_rdir = NULL;

#if CONFIG_FREEZE
#ifndef CONFIG_MEMORYSTATUS
    #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
#endif
	/* Initialise background freezing */
	bsd_init_kprintf("calling memorystatus_freeze_init\n");
	memorystatus_freeze_init();
#endif

#if CONFIG_MEMORYSTATUS
	/* Initialize kernel memory status notifications */
	bsd_init_kprintf("calling memorystatus_init\n");
	memorystatus_init();
#endif /* CONFIG_MEMORYSTATUS */

	bsd_init_kprintf("calling macx_init\n");
	macx_init();

	bsd_init_kprintf("calling acct_init\n");
	acct_init();

#ifdef GPROF
	/* Initialize kernel profiling. */
	kmstartup();
#endif

	bsd_init_kprintf("calling bsd_autoconf\n");
	bsd_autoconf();

#if CONFIG_DTRACE
	dtrace_postinit();
#endif

	/*
	 * We attach the loopback interface *way* down here to ensure
	 * it happens after autoconf(), otherwise it becomes the
	 * "primary" interface.
	 */
#include <loop.h>
#if NLOOP > 0
	bsd_init_kprintf("calling loopattach\n");
	loopattach();			/* XXX */
#endif
#if NGIF
	/* Initialize gif interface (after lo0) */
	gif_init();
#endif

#if PFLOG
	/* Initialize packet filter log interface */
	pfloginit();
#endif /* PFLOG */

#if NETHER > 0
	/* Register the built-in dlil ethernet interface family */
	bsd_init_kprintf("calling ether_family_init\n");
	ether_family_init();
#endif /* ETHER */

#if NETWORKING
	/* Call any kext code that wants to run just after network init */
	bsd_init_kprintf("calling net_init_run\n");
	net_init_run();
	
#if CONTENT_FILTER
	cfil_init();
#endif

#if PACKET_MANGLER
	pkt_mnglr_init();
#endif	

#if NECP
	/* Initialize Network Extension Control Policies */
	necp_init();
#endif

	netagent_init();

	/* register user tunnel kernel control handler */
	utun_register_control();
#if IPSEC
	ipsec_register_control();
#endif /* IPSEC */
	netsrc_init();
	nstat_init();
	tcp_cc_init();
#if MPTCP
	mptcp_control_register();
#endif /* MPTCP */
#endif /* NETWORKING */

	bsd_init_kprintf("calling vnode_pager_bootstrap\n");
	vnode_pager_bootstrap();

	bsd_init_kprintf("calling inittodr\n");
	inittodr(0);

	/* Mount the root file system. */
	while( TRUE) {
		int err;

		bsd_init_kprintf("calling setconf\n");
		setconf();
#if NFSCLIENT
		netboot = (mountroot == netboot_mountroot);
#endif

		bsd_init_kprintf("vfs_mountroot\n");
		if (0 == (err = vfs_mountroot()))
			break;
		rootdevice[0] = '\0';
#if NFSCLIENT
		if (netboot) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: failed to mount network root, error %d, %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
#endif
		printf("cannot mount root, errno = %d\n", err);
		boothowto |= RB_ASKNAME;
	}

	IOSecureBSDRoot(rootdevice);

	context.vc_thread = current_thread();
	context.vc_ucred = kernproc->p_ucred;
	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;

	bsd_init_kprintf("calling VFS_ROOT\n");
	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
	if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context))
		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
	rootvnode->v_flag |= VROOT;
	(void)vnode_ref(rootvnode);
	(void)vnode_put(rootvnode);
	filedesc0.fd_cdir = rootvnode;

#if NFSCLIENT
	if (netboot) {
		int err;

		netboot = TRUE;
		/* post mount setup */
		if ((err = netboot_setup()) != 0) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
	}
#endif
	

#if CONFIG_IMAGEBOOT
	/*
	 * See if a system disk image is present. If so, mount it and
	 * switch the root vnode to point to it
	 */ 
	if (netboot == FALSE && imageboot_needed()) {
		/* 
		 * An image was found.  No turning back: we're booted
		 * with a kernel from the disk image.
		 */
		imageboot_setup(); 
	}
#endif /* CONFIG_IMAGEBOOT */
  
	/* set initial time; all other resource data is  already zero'ed */
	microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);

#if DEVFS
	{
	    char mounthere[] = "/dev";	/* !const because of internal casting */

	    bsd_init_kprintf("calling devfs_kernel_mount\n");
	    devfs_kernel_mount(mounthere);
	}
#endif /* DEVFS */

	/* Initialize signal state for process 0. */
	bsd_init_kprintf("calling siginit\n");
	siginit(kernproc);

	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
	bsd_utaskbootstrap();

#if defined(__LP64__)
	kernproc->p_flag |= P_LP64;
#endif

	pal_kernel_announce();

	bsd_init_kprintf("calling mountroot_post_hook\n");

	/* invoke post-root-mount hook */
	if (mountroot_post_hook != NULL)
		mountroot_post_hook();

#if 0 /* not yet */
	consider_zone_gc(FALSE);
#endif


	bsd_init_kprintf("done\n");
}
Example #9
0
int
ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
{
	struct proc *t = current_proc();	/* target process */
	task_t		task;
	thread_t	th_act;
	struct uthread 	*ut;
	int tr_sigexc = 0;
	int error = 0;
	int stopped = 0;

	AUDIT_ARG(cmd, uap->req);
	AUDIT_ARG(pid, uap->pid);
	AUDIT_ARG(addr, uap->addr);
	AUDIT_ARG(value32, uap->data);

	if (uap->req == PT_DENY_ATTACH) {
		proc_lock(p);
		if (ISSET(p->p_lflag, P_LTRACED)) {
			proc_unlock(p);
			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
					      p->p_pid, W_EXITCODE(ENOTSUP, 0), 4, 0, 0);
			exit1(p, W_EXITCODE(ENOTSUP, 0), retval);

			thread_exception_return();
			/* NOTREACHED */
		}
		SET(p->p_lflag, P_LNOATTACH);
		proc_unlock(p);

		return(0);
	}

	if (uap->req == PT_FORCEQUOTA) {
		if (kauth_cred_issuser(kauth_cred_get())) {
			OSBitOrAtomic(P_FORCEQUOTA, &t->p_flag);
			return (0);
		} else
			return (EPERM);
	}

	/*
	 *	Intercept and deal with "please trace me" request.
	 */	 
	if (uap->req == PT_TRACE_ME) {
retry_trace_me:;
		proc_t pproc = proc_parent(p);
		if (pproc == NULL)
			return (EINVAL);
#if CONFIG_MACF
		/*
		 * NB: Cannot call kauth_authorize_process(..., KAUTH_PROCESS_CANTRACE, ...)
		 *     since that assumes the process being checked is the current process
		 *     when, in this case, it is the current process's parent.
		 *     Most of the other checks in cantrace() don't apply either.
		 */
		if ((error = mac_proc_check_debug(pproc, p)) == 0) {
#endif
			proc_lock(p);
			/* Make sure the process wasn't re-parented. */
			if (p->p_ppid != pproc->p_pid) {
				proc_unlock(p);
				proc_rele(pproc);
				goto retry_trace_me;
			}
			SET(p->p_lflag, P_LTRACED);
			/* Non-attached case, our tracer is our parent. */
			p->p_oppid = p->p_ppid;
			proc_unlock(p);
			/* Child and parent will have to be able to run modified code. */
			cs_allow_invalid(p);
			cs_allow_invalid(pproc);
#if CONFIG_MACF
		}
#endif
		proc_rele(pproc);
		return (error);
	}
	if (uap->req == PT_SIGEXC) {
		proc_lock(p);
		if (ISSET(p->p_lflag, P_LTRACED)) {
			SET(p->p_lflag, P_LSIGEXC);
			proc_unlock(p);
			return(0);
		} else {
			proc_unlock(p);
			return(EINVAL);
		}
	}

	/* 
	 * We do not want ptrace to do anything with kernel or launchd 
	 */
	if (uap->pid < 2) {
		return(EPERM);
	}

	/*
	 *	Locate victim, and make sure it is traceable.
	 */
	if ((t = proc_find(uap->pid)) == NULL)
			return (ESRCH);

	AUDIT_ARG(process, t);

	task = t->task;
	if (uap->req == PT_ATTACHEXC) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
		uap->req = PT_ATTACH;
		tr_sigexc = 1;
	}
	if (uap->req == PT_ATTACH) {
#pragma clang diagnostic pop
		int		err;


		if ( kauth_authorize_process(proc_ucred(p), KAUTH_PROCESS_CANTRACE, 
									 t, (uintptr_t)&err, 0, 0) == 0 ) {
			/* it's OK to attach */
			proc_lock(t);
			SET(t->p_lflag, P_LTRACED);
			if (tr_sigexc) 
				SET(t->p_lflag, P_LSIGEXC);
	
			t->p_oppid = t->p_ppid;
			/* Check whether child and parent are allowed to run modified
			 * code (they'll have to) */
			proc_unlock(t);
			cs_allow_invalid(t);
			cs_allow_invalid(p);
			if (t->p_pptr != p)
				proc_reparentlocked(t, p, 1, 0);
	
			proc_lock(t);
			if (get_task_userstop(task) > 0 ) {
				stopped = 1;
			}
			t->p_xstat = 0;
			proc_unlock(t);
			psignal(t, SIGSTOP);
			/*
			 * If the process was stopped, wake up and run through
			 * issignal() again to properly connect to the tracing
			 * process.
			 */
			if (stopped)
				task_resume(task);       
			error = 0;
			goto out;
		}
		else {
			/* not allowed to attach, proper error code returned by kauth_authorize_process */
			if (ISSET(t->p_lflag, P_LNOATTACH)) {
				psignal(p, SIGSEGV);
			}
			
			error = err;
			goto out;
		}
	}

	/*
	 * You can't do what you want to the process if:
	 *	(1) It's not being traced at all,
	 */
	proc_lock(t);
	if (!ISSET(t->p_lflag, P_LTRACED)) {
		proc_unlock(t);
		error = EPERM;
		goto out;
	}

	/*
	 *	(2) it's not being traced by _you_, or
	 */
	if (t->p_pptr != p) {
		proc_unlock(t);
		error = EBUSY;
		goto out;
	}

	/*
	 *	(3) it's not currently stopped.
	 */
	if (t->p_stat != SSTOP) {
		proc_unlock(t);
		error = EBUSY;
		goto out;
	}

	/*
	 *	Mach version of ptrace executes request directly here,
	 *	thus simplifying the interaction of ptrace and signals.
	 */
	/* proc lock is held here */
	switch (uap->req) {

	case PT_DETACH:
		if (t->p_oppid != t->p_ppid) {
			struct proc *pp;

			proc_unlock(t);
			pp = proc_find(t->p_oppid);
			if (pp != PROC_NULL) {
				proc_reparentlocked(t, pp, 1, 0);
				proc_rele(pp);
			} else {
				/* original parent exited while traced */
				proc_list_lock();
				t->p_listflag |= P_LIST_DEADPARENT;
				proc_list_unlock();
				proc_reparentlocked(t, initproc, 1, 0);
			}
			proc_lock(t);
		}

		t->p_oppid = 0;
		CLR(t->p_lflag, P_LTRACED);
		CLR(t->p_lflag, P_LSIGEXC);
		proc_unlock(t);
		goto resume;
		
	case PT_KILL:
		/*
		 *	Tell child process to kill itself after it
		 *	is resumed by adding NSIG to p_cursig. [see issig]
		 */
		proc_unlock(t);
#if CONFIG_MACF
		error = mac_proc_check_signal(p, t, SIGKILL);
		if (0 != error)
			goto resume;
#endif
		psignal(t, SIGKILL);
		goto resume;

	case PT_STEP:			/* single step the child */
	case PT_CONTINUE:		/* continue the child */
		proc_unlock(t);
		th_act = (thread_t)get_firstthread(task);
		if (th_act == THREAD_NULL) {
			error = EINVAL;
			goto out;
		}

		/* force use of Mach SPIs (and task_for_pid security checks) to adjust PC */
		if (uap->addr != (user_addr_t)1) {
			error = ENOTSUP;
			goto out;
		}

		if ((unsigned)uap->data >= NSIG) {
			error = EINVAL;
			goto out;
		}

		if (uap->data != 0) {
#if CONFIG_MACF
			error = mac_proc_check_signal(p, t, uap->data);
			if (0 != error)
				goto out;
#endif
			psignal(t, uap->data);
		}

		if (uap->req == PT_STEP) {
		        /*
			 * set trace bit 
			 * we use sending SIGSTOP as a comparable security check.
			 */
#if CONFIG_MACF
			error = mac_proc_check_signal(p, t, SIGSTOP);
			if (0 != error) {
				goto out;
			}
#endif
			if (thread_setsinglestep(th_act, 1) != KERN_SUCCESS) {
				error = ENOTSUP;
				goto out;
			}
		} else {
		        /*
			 * clear trace bit if on
			 * we use sending SIGCONT as a comparable security check.
			 */
#if CONFIG_MACF
			error = mac_proc_check_signal(p, t, SIGCONT);
			if (0 != error) {
				goto out;
			}
#endif
			if (thread_setsinglestep(th_act, 0) != KERN_SUCCESS) {
				error = ENOTSUP;
				goto out;
			}
		}	
	resume:
		proc_lock(t);
		t->p_xstat = uap->data;
		t->p_stat = SRUN;
		if (t->sigwait) {
			wakeup((caddr_t)&(t->sigwait));
			proc_unlock(t);
			if ((t->p_lflag & P_LSIGEXC) == 0) {
				task_resume(task);
			}
		} else
			proc_unlock(t);
			
		break;
		
	case PT_THUPDATE:  {
		proc_unlock(t);
		if ((unsigned)uap->data >= NSIG) {
			error = EINVAL;
			goto out;
		}
		th_act = port_name_to_thread(CAST_MACH_PORT_TO_NAME(uap->addr));
		if (th_act == THREAD_NULL) {
			error = ESRCH;
			goto out;
		}
		ut = (uthread_t)get_bsdthread_info(th_act);
		if (uap->data)
			ut->uu_siglist |= sigmask(uap->data);
		proc_lock(t);
		t->p_xstat = uap->data;
		t->p_stat = SRUN;
		proc_unlock(t);
		thread_deallocate(th_act);
		error = 0;
		}
		break;
	default:
		proc_unlock(t);
		error = EINVAL;
		goto out;
	}

	error = 0;
out:
	proc_rele(t);
	return(error);
}
Example #10
0
static
void
ux_handler(void)
{
    task_t		self = current_task();
    mach_port_name_t	exc_port_name;
    mach_port_name_t	exc_set_name;

    /* self->kernel_vm_space = TRUE; */
    ux_handler_self = self;


    /*
     *	Allocate a port set that we will receive on.
     */
    if (mach_port_allocate(get_task_ipcspace(ux_handler_self), MACH_PORT_RIGHT_PORT_SET,  &exc_set_name) != MACH_MSG_SUCCESS)
	    panic("ux_handler: port_set_allocate failed");

    /*
     *	Allocate an exception port and use object_copyin to
     *	translate it to the global name.  Put it into the set.
     */
    if (mach_port_allocate(get_task_ipcspace(ux_handler_self), MACH_PORT_RIGHT_RECEIVE, &exc_port_name) != MACH_MSG_SUCCESS)
	panic("ux_handler: port_allocate failed");
    if (mach_port_move_member(get_task_ipcspace(ux_handler_self),
    			exc_port_name,  exc_set_name) != MACH_MSG_SUCCESS)
	panic("ux_handler: port_set_add failed");

    if (ipc_object_copyin(get_task_ipcspace(self), exc_port_name,
			MACH_MSG_TYPE_MAKE_SEND, 
			(void *) &ux_exception_port) != MACH_MSG_SUCCESS)
		panic("ux_handler: object_copyin(ux_exception_port) failed");

    proc_list_lock();
    thread_wakeup(&ux_exception_port);
    proc_list_unlock();

    /* Message handling loop. */

    for (;;) {
	struct rep_msg {
		mach_msg_header_t Head;
		NDR_record_t NDR;
		kern_return_t RetCode;
	} rep_msg;
	struct exc_msg {
		mach_msg_header_t Head;
		/* start of the kernel processed data */
		mach_msg_body_t msgh_body;
		mach_msg_port_descriptor_t thread;
		mach_msg_port_descriptor_t task;
		/* end of the kernel processed data */
		NDR_record_t NDR;
		exception_type_t exception;
		mach_msg_type_number_t codeCnt;
		mach_exception_data_t code;
		/* some times RCV_TO_LARGE probs */
		char pad[512];
	} exc_msg;
	mach_port_name_t	reply_port;
	kern_return_t	 result;

	exc_msg.Head.msgh_local_port = CAST_MACH_NAME_TO_PORT(exc_set_name);
	exc_msg.Head.msgh_size = sizeof (exc_msg);
#if 0
	result = mach_msg_receive(&exc_msg.Head);
#else
	result = mach_msg_receive(&exc_msg.Head, MACH_RCV_MSG,
			     sizeof (exc_msg), exc_set_name,
			     MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL,
			     0);
#endif
	if (result == MACH_MSG_SUCCESS) {
	    reply_port = CAST_MACH_PORT_TO_NAME(exc_msg.Head.msgh_remote_port);

	    if (mach_exc_server(&exc_msg.Head, &rep_msg.Head)) {
		result = mach_msg_send(&rep_msg.Head, MACH_SEND_MSG,
			sizeof (rep_msg),MACH_MSG_TIMEOUT_NONE,MACH_PORT_NULL);
		if (reply_port != 0 && result != MACH_MSG_SUCCESS)
			mach_port_deallocate(get_task_ipcspace(ux_handler_self), reply_port);
	    }

	}
	else if (result == MACH_RCV_TOO_LARGE)
		/* ignore oversized messages */;
	else
		panic("exception_handler");
    }
}
Example #11
0
int
fasttrap_pid_probe(arm_saved_state_t *regs)
{
	proc_t *p = current_proc();
	user_addr_t new_pc = 0;
	fasttrap_bucket_t *bucket;
	lck_mtx_t *pid_mtx;
	fasttrap_tracepoint_t *tp, tp_local;
	pid_t pid;
	dtrace_icookie_t cookie;
	uint_t is_enabled = 0;
	int instr_size;
	int was_simulated = 1, retire_tp = 1;

	user_addr_t pc = regs->pc;

	uthread_t uthread = (uthread_t) get_bsdthread_info(current_thread());

	/*
	 * It's possible that a user (in a veritable orgy of bad planning)
	 * could redirect this thread's flow of control before it reached the
	 * return probe fasttrap. In this case we need to kill the process
	 * since it's in a unrecoverable state.
	 */
	if (uthread->t_dtrace_step) {
		ASSERT(uthread->t_dtrace_on);
		fasttrap_sigtrap(p, uthread, pc);
		return (0);
	}

	/*
	 * Clear all user tracing flags.
	 */
	uthread->t_dtrace_ft = 0;
	uthread->t_dtrace_pc = 0;
	uthread->t_dtrace_npc = 0;
	uthread->t_dtrace_scrpc = 0;
	uthread->t_dtrace_astpc = 0;

	/*
	 * Treat a child created by a call to vfork(2) as if it were its
	 * parent. We know that there's only one thread of control in such a
	 * process: this one.
	 */
	if (p->p_lflag & P_LINVFORK) {
		proc_list_lock();
		while (p->p_lflag & P_LINVFORK)
			p = p->p_pptr;
		proc_list_unlock();
	}

	pid = p->p_pid;
	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
	lck_mtx_lock(pid_mtx);
	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid,pc)];

	/*
	 * Lookup the tracepoint that the process just hit.
	 */
	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
		    tp->ftt_proc->ftpc_acount != 0)
			break;
	}

	/*
	 * If we couldn't find a matching tracepoint, either a tracepoint has
	 * been inserted without using the pid<pid> ioctl interface (see
	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
	 */
	if (tp == NULL) {
		lck_mtx_unlock(pid_mtx);
		return (-1);
	}

	/* Default to always execute */
	int condition_code = 0xE;
	if (tp->ftt_thumb) {
		uint32_t itstate = GETITSTATE(regs->cpsr);
		if (itstate != 0) {
			/* In IT block, make sure it's the last statement in the block */
			if (ISLASTINIT(itstate)) {
				condition_code = itstate >> 4;
			} else {
				printf("dtrace: fasttrap: Tried to trace instruction %08x at %08x but not at end of IT block\n",
				    (tp->ftt_thumb && dtrace_instr_size(tp->ftt_instr,tp->ftt_thumb) == 2) ? tp->ftt_instr1 : tp->ftt_instr, pc);

				fasttrap_tracepoint_remove(p, tp);
				lck_mtx_unlock(pid_mtx);
				return (-1);
			}
		}
Example #12
0
/*
 * This function is called very early on in the Mach startup, from the
 * function start_kernel_threads() in osfmk/kern/startup.c.  It's called
 * in the context of the current (startup) task using a call to the
 * function kernel_thread_create() to jump into start_kernel_threads().
 * Internally, kernel_thread_create() calls thread_create_internal(),
 * which calls uthread_alloc().  The function of uthread_alloc() is
 * normally to allocate a uthread structure, and fill out the uu_sigmask,
 * uu_context fields.  It skips filling these out in the case of the "task"
 * being "kernel_task", because the order of operation is inverted.  To
 * account for that, we need to manually fill in at least the contents
 * of the uu_context.vc_ucred field so that the uthread structure can be
 * used like any other.
 */
void
bsd_init(void)
{
	struct uthread *ut;
	unsigned int i;
#if __i386__ || __x86_64__
	int error;
#endif	
	struct vfs_context context;
	kern_return_t	ret;
	struct ucred temp_cred;

#define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */

	kernel_flock = funnel_alloc(KERNEL_FUNNEL);
	if (kernel_flock == (funnel_t *)0 ) {
		panic("bsd_init: Failed to allocate kernel funnel");
	}
        
	printf(copyright);
	
	bsd_init_kprintf("calling kmeminit\n");
	kmeminit();
	
	bsd_init_kprintf("calling parse_bsd_args\n");
	parse_bsd_args();

	/* Initialize kauth subsystem before instancing the first credential */
	bsd_init_kprintf("calling kauth_init\n");
	kauth_init();

	/* Initialize process and pgrp structures. */
	bsd_init_kprintf("calling procinit\n");
	procinit();

	/* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
	tty_init();

	kernproc = &proc0;	/* implicitly bzero'ed */

	/* kernel_task->proc = kernproc; */
	set_bsdtask_info(kernel_task,(void *)kernproc);

	/* give kernproc a name */
	bsd_init_kprintf("calling process_name\n");
	process_name("kernel_task", kernproc);

	/* allocate proc lock group attribute and group */
	bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
	proc_lck_grp_attr= lck_grp_attr_alloc_init();

	proc_lck_grp = lck_grp_alloc_init("proc",  proc_lck_grp_attr);
#ifndef CONFIG_EMBEDDED
	proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
	proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
	proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
#endif
	/* Allocate proc lock attribute */
	proc_lck_attr = lck_attr_alloc_init();
#if 0
#if __PROC_INTERNAL_DEBUG
	lck_attr_setdebug(proc_lck_attr);
#endif
#endif

#ifdef CONFIG_EMBEDDED
	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
#else	
	proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
	lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
	lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
#endif

	execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
	execargs_cache_size = bsd_simul_execs;
	execargs_free_count = bsd_simul_execs;
	execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t));
	bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t));
	
	if (current_task() != kernel_task)
		printf("bsd_init: We have a problem, "
				"current task is not kernel task\n");
	
	bsd_init_kprintf("calling get_bsdthread_info\n");
	ut = (uthread_t)get_bsdthread_info(current_thread());

#if CONFIG_MACF
	/*
	 * Initialize the MAC Framework
	 */
	mac_policy_initbsd();
	kernproc->p_mac_enforce = 0;
#endif /* MAC */

	/*
	 * Create process 0.
	 */
	proc_list_lock();
	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
	kernproc->p_pgrp = &pgrp0;
	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
	LIST_INIT(&pgrp0.pg_members);
#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);	
#else
	lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
#endif
	/* There is no other bsd thread this point and is safe without pgrp lock */
	LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
	kernproc->p_listflag |= P_LIST_INPGRP;
	kernproc->p_pgrpid = 0;

	pgrp0.pg_session = &session0;
	pgrp0.pg_membercnt = 1;

	session0.s_count = 1;
	session0.s_leader = kernproc;
	session0.s_listflags = 0;
#ifdef CONFIG_EMBEDDED
	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
#else
	lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
#endif
	LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
	proc_list_unlock();

#if CONFIG_LCTX
	kernproc->p_lctx = NULL;
#endif

	kernproc->task = kernel_task;
	
	kernproc->p_stat = SRUN;
	kernproc->p_flag = P_SYSTEM;
	kernproc->p_nice = NZERO;
	kernproc->p_pptr = kernproc;

	TAILQ_INIT(&kernproc->p_uthlist);
	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
	
	kernproc->sigwait = FALSE;
	kernproc->sigwait_thread = THREAD_NULL;
	kernproc->exit_thread = THREAD_NULL;
	kernproc->p_csflags = CS_VALID;

	/*
	 * Create credential.  This also Initializes the audit information.
	 */
	bsd_init_kprintf("calling bzero\n");
	bzero(&temp_cred, sizeof(temp_cred));
	temp_cred.cr_ngroups = 1;

	temp_cred.cr_audit.as_aia_p = &audit_default_aia;
        /* XXX the following will go away with cr_au */
	temp_cred.cr_au.ai_auid = AU_DEFAUDITID;

	bsd_init_kprintf("calling kauth_cred_create\n");
	kernproc->p_ucred = kauth_cred_create(&temp_cred); 

	/* give the (already exisiting) initial thread a reference on it */
	bsd_init_kprintf("calling kauth_cred_ref\n");
	kauth_cred_ref(kernproc->p_ucred);
	ut->uu_context.vc_ucred = kernproc->p_ucred;
	ut->uu_context.vc_thread = current_thread();

	TAILQ_INIT(&kernproc->p_aio_activeq);
	TAILQ_INIT(&kernproc->p_aio_doneq);
	kernproc->p_aio_total_count = 0;
	kernproc->p_aio_active_count = 0;

	bsd_init_kprintf("calling file_lock_init\n");
	file_lock_init();

#if CONFIG_MACF
	mac_cred_label_associate_kernel(kernproc->p_ucred);
	mac_task_label_update_cred (kernproc->p_ucred, (struct task *) kernproc->task);
#endif

	/* Create the file descriptor table. */
	filedesc0.fd_refcnt = 1+1;	/* +1 so shutdown will not _FREE_ZONE */
	kernproc->p_fd = &filedesc0;
	filedesc0.fd_cmask = cmask;
	filedesc0.fd_knlistsize = -1;
	filedesc0.fd_knlist = NULL;
	filedesc0.fd_knhash = NULL;
	filedesc0.fd_knhashmask = 0;

	/* Create the limits structures. */
	kernproc->p_limit = &limit0;
	for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++)
		limit0.pl_rlimit[i].rlim_cur = 
			limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
	limit0.pl_refcnt = 1;

	kernproc->p_stats = &pstats0;
	kernproc->p_sigacts = &sigacts0;

	/*
	 * Charge root for two  processes: init and mach_init.
	 */
	bsd_init_kprintf("calling chgproccnt\n");
	(void)chgproccnt(0, 1);

	/*
	 *	Allocate a kernel submap for pageable memory
	 *	for temporary copying (execve()).
	 */
	{
		vm_offset_t	minimum;

		bsd_init_kprintf("calling kmem_suballoc\n");
		ret = kmem_suballoc(kernel_map,
				&minimum,
				(vm_size_t)bsd_pageable_map_size,
				TRUE,
				VM_FLAGS_ANYWHERE,
				&bsd_pageable_map);
		if (ret != KERN_SUCCESS) 
			panic("bsd_init: Failed to allocate bsd pageable map");
	}

	/*
	 * Initialize buffers and hash links for buffers
	 *
	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
	 *		happen after a credential has been associated with
	 *		the kernel task.
	 */
	bsd_init_kprintf("calling bsd_bufferinit\n");
	bsd_bufferinit();

	/* Initialize the execve() semaphore */
	bsd_init_kprintf("calling semaphore_create\n");

	if (ret != KERN_SUCCESS)
		panic("bsd_init: Failed to create execve semaphore");

	/*
	 * Initialize the calendar.
	 */
	bsd_init_kprintf("calling IOKitInitializeTime\n");
	IOKitInitializeTime();

	if (turn_on_log_leaks && !new_nkdbufs)
		new_nkdbufs = 200000;
	start_kern_tracing(new_nkdbufs);
	if (turn_on_log_leaks)
		log_leaks = 1;

	bsd_init_kprintf("calling ubc_init\n");
	ubc_init();

	/* Initialize the file systems. */
	bsd_init_kprintf("calling vfsinit\n");
	vfsinit();

#if SOCKETS
	/* Initialize per-CPU cache allocator */
	mcache_init();

	/* Initialize mbuf's. */
	bsd_init_kprintf("calling mbinit\n");
	mbinit();
	net_str_id_init(); /* for mbuf tags */
#endif /* SOCKETS */

	/*
	 * Initializes security event auditing.
	 * XXX: Should/could this occur later?
	 */
#if CONFIG_AUDIT
	bsd_init_kprintf("calling audit_init\n");
 	audit_init();  
#endif

	/* Initialize kqueues */
	bsd_init_kprintf("calling knote_init\n");
	knote_init();

	/* Initialize for async IO */
	bsd_init_kprintf("calling aio_init\n");
	aio_init();

	/* Initialize pipes */
	bsd_init_kprintf("calling pipeinit\n");
	pipeinit();

	/* Initialize SysV shm subsystem locks; the subsystem proper is
	 * initialized through a sysctl.
	 */
#if SYSV_SHM
	bsd_init_kprintf("calling sysv_shm_lock_init\n");
	sysv_shm_lock_init();
#endif
#if SYSV_SEM
	bsd_init_kprintf("calling sysv_sem_lock_init\n");
	sysv_sem_lock_init();
#endif
#if SYSV_MSG
	bsd_init_kprintf("sysv_msg_lock_init\n");
	sysv_msg_lock_init();
#endif
	bsd_init_kprintf("calling pshm_lock_init\n");
	pshm_lock_init();
	bsd_init_kprintf("calling psem_lock_init\n");
	psem_lock_init();

	pthread_init();
	/* POSIX Shm and Sem */
	bsd_init_kprintf("calling pshm_cache_init\n");
	pshm_cache_init();
	bsd_init_kprintf("calling psem_cache_init\n");
	psem_cache_init();
	bsd_init_kprintf("calling time_zone_slock_init\n");
	time_zone_slock_init();

	/* Stack snapshot facility lock */
	stackshot_lock_init();
	/*
	 * Initialize protocols.  Block reception of incoming packets
	 * until everything is ready.
	 */
	bsd_init_kprintf("calling sysctl_register_fixed\n");
	sysctl_register_fixed(); 
	bsd_init_kprintf("calling sysctl_mib_init\n");
	sysctl_mib_init();
#if NETWORKING
	bsd_init_kprintf("calling dlil_init\n");
	dlil_init();
	bsd_init_kprintf("calling proto_kpi_init\n");
	proto_kpi_init();
#endif /* NETWORKING */
#if SOCKETS
	bsd_init_kprintf("calling socketinit\n");
	socketinit();
	bsd_init_kprintf("calling domaininit\n");
	domaininit();
#endif /* SOCKETS */

	kernproc->p_fd->fd_cdir = NULL;
	kernproc->p_fd->fd_rdir = NULL;

#if CONFIG_EMBEDDED
	/* Initialize kernel memory status notifications */
	bsd_init_kprintf("calling kern_memorystatus_init\n");
	kern_memorystatus_init();
#endif

#ifdef GPROF
	/* Initialize kernel profiling. */
	kmstartup();
#endif

	/* kick off timeout driven events by calling first time */
	thread_wakeup(&lbolt);
	timeout(lightning_bolt, 0, hz);

	bsd_init_kprintf("calling bsd_autoconf\n");
	bsd_autoconf();

#if CONFIG_DTRACE
	dtrace_postinit();
#endif

	/*
	 * We attach the loopback interface *way* down here to ensure
	 * it happens after autoconf(), otherwise it becomes the
	 * "primary" interface.
	 */
#include <loop.h>
#if NLOOP > 0
	bsd_init_kprintf("calling loopattach\n");
	loopattach();			/* XXX */
#endif

#if PFLOG
	/* Initialize packet filter log interface */
	pfloginit();
#endif /* PFLOG */

#if NETHER > 0
	/* Register the built-in dlil ethernet interface family */
	bsd_init_kprintf("calling ether_family_init\n");
	ether_family_init();
#endif /* ETHER */

#if NETWORKING
	/* Call any kext code that wants to run just after network init */
	bsd_init_kprintf("calling net_init_run\n");
	net_init_run();
	
	/* register user tunnel kernel control handler */
	utun_register_control();
#endif /* NETWORKING */

	bsd_init_kprintf("calling vnode_pager_bootstrap\n");
	vnode_pager_bootstrap();
#if 0
	/* XXX Hack for early debug stop */
	printf("\nabout to sleep for 10 seconds\n");
	IOSleep( 10 * 1000 );
	/* Debugger("hello"); */
#endif

	bsd_init_kprintf("calling inittodr\n");
	inittodr(0);

#if CONFIG_EMBEDDED
	{
		/* print out early VM statistics */
		kern_return_t kr1;
		vm_statistics_data_t stat;
		mach_msg_type_number_t count;

		count = HOST_VM_INFO_COUNT;
		kr1 = host_statistics(host_self(),
				      HOST_VM_INFO,
				      (host_info_t)&stat,
				      &count);
		kprintf("Mach Virtual Memory Statistics (page size of 4096) bytes\n"
			"Pages free:\t\t\t%u.\n"
			"Pages active:\t\t\t%u.\n"
			"Pages inactive:\t\t\t%u.\n"
			"Pages wired down:\t\t%u.\n"
			"\"Translation faults\":\t\t%u.\n"
			"Pages copy-on-write:\t\t%u.\n"
			"Pages zero filled:\t\t%u.\n"
			"Pages reactivated:\t\t%u.\n"
			"Pageins:\t\t\t%u.\n"
			"Pageouts:\t\t\t%u.\n"
			"Object cache: %u hits of %u lookups (%d%% hit rate)\n",

			stat.free_count,
			stat.active_count,
			stat.inactive_count,
			stat.wire_count,
			stat.faults,
			stat.cow_faults,
			stat.zero_fill_count,
			stat.reactivations,
			stat.pageins,
			stat.pageouts,
			stat.hits,
			stat.lookups,
			(stat.hits == 0) ? 100 :
			                   ((stat.lookups * 100) / stat.hits));
	}
#endif /* CONFIG_EMBEDDED */
	
	/* Mount the root file system. */
	while( TRUE) {
		int err;

		bsd_init_kprintf("calling setconf\n");
		setconf();

		bsd_init_kprintf("vfs_mountroot\n");
		if (0 == (err = vfs_mountroot()))
			break;
		rootdevice[0] = '\0';
#if NFSCLIENT
		if (mountroot == netboot_mountroot) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: failed to mount network root, error %d, %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
#endif
		printf("cannot mount root, errno = %d\n", err);
		boothowto |= RB_ASKNAME;
	}

	IOSecureBSDRoot(rootdevice);

	context.vc_thread = current_thread();
	context.vc_ucred = kernproc->p_ucred;
	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;

	bsd_init_kprintf("calling VFS_ROOT\n");
	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
	if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context))
		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
	rootvnode->v_flag |= VROOT;
	(void)vnode_ref(rootvnode);
	(void)vnode_put(rootvnode);
	filedesc0.fd_cdir = rootvnode;

#if NFSCLIENT
	if (mountroot == netboot_mountroot) {
		int err;
		/* post mount setup */
		if ((err = netboot_setup()) != 0) {
			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
			vc_progress_set(FALSE, 0);
			for (i=1; 1; i*=2) {
				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
					err, PE_boot_args());
				printf("We are hanging here...\n");
				IOSleep(i*60*1000);
			}
			/*NOTREACHED*/
		}
	}
#endif
	

#if CONFIG_IMAGEBOOT
	/*
	 * See if a system disk image is present. If so, mount it and
	 * switch the root vnode to point to it
	 */ 
  
	if(imageboot_needed()) {
		int err;

		/* An image was found */
		if((err = imageboot_setup())) {
			/*
			 * this is not fatal. Keep trying to root
			 * off the original media
			 */
			printf("%s: imageboot could not find root, %d\n",
				__FUNCTION__, err);
		}
	}
#endif /* CONFIG_IMAGEBOOT */
  
	/* set initial time; all other resource data is  already zero'ed */
	microtime(&kernproc->p_start);
	kernproc->p_stats->p_start = kernproc->p_start;	/* for compat */

#if DEVFS
	{
	    char mounthere[] = "/dev";	/* !const because of internal casting */

	    bsd_init_kprintf("calling devfs_kernel_mount\n");
	    devfs_kernel_mount(mounthere);
	}
#endif /* DEVFS */
	
	/* Initialize signal state for process 0. */
	bsd_init_kprintf("calling siginit\n");
	siginit(kernproc);

	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
	bsd_utaskbootstrap();

#if defined(__LP64__)
	kernproc->p_flag |= P_LP64;
	printf("Kernel is LP64\n");
#endif
#if __i386__ || __x86_64__
	/* this should be done after the root filesystem is mounted */
	error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
	// 10/30/08 - gab: <rdar://problem/6324501>
	// if default 'translate' can't be found, see if the understudy is available
	if (ENOENT == error) {
		strlcpy(exec_archhandler_ppc.path, kRosettaStandIn_str, MAXPATHLEN);
		error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
	}
	if (error) /* XXX make more generic */
		exec_archhandler_ppc.path[0] = 0;
#endif	

	bsd_init_kprintf("calling mountroot_post_hook\n");

	/* invoke post-root-mount hook */
	if (mountroot_post_hook != NULL)
		mountroot_post_hook();

#if 0 /* not yet */
	consider_zone_gc(FALSE);
#endif

	bsd_init_kprintf("done\n");
}