/* * Destroy a process structure that resulted from a call to forkproc(), but * which must be returned to the system because of a subsequent failure * preventing it from becoming active. * * Parameters: p The incomplete process from forkproc() * * Returns: (void) * * Note: This function should only be used in an error handler following * a call to forkproc(). * * Operations occur in reverse order of those in forkproc(). */ void forkproc_free(proc_t p) { /* We held signal and a transition locks; drop them */ proc_signalend(p, 0); proc_transend(p, 0); /* * If we have our own copy of the resource limits structure, we * need to free it. If it's a shared copy, we need to drop our * reference on it. */ proc_limitdrop(p, 0); p->p_limit = NULL; #if SYSV_SHM /* Need to drop references to the shared memory segment(s), if any */ if (p->vm_shm) { /* * Use shmexec(): we have no address space, so no mappings * * XXX Yes, the routine is badly named. */ shmexec(p); } #endif /* Need to undo the effects of the fdcopy(), if any */ fdfree(p); /* * Drop the reference on a text vnode pointer, if any * XXX This code is broken in forkproc(); see <rdar://4256419>; * XXX if anyone ever uses this field, we will be extremely unhappy. */ if (p->p_textvp) { vnode_rele(p->p_textvp); p->p_textvp = NULL; } /* Stop the profiling clock */ stopprofclock(p); /* Release the credential reference */ kauth_cred_unref(&p->p_ucred); proc_list_lock(); /* Decrement the count of processes in the system */ nprocs--; proc_list_unlock(); thread_call_free(p->p_rcall); /* Free allocated memory */ FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS); FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS); proc_checkdeadrefs(p); FREE_ZONE(p, sizeof *p, M_PROC); }
static int sd_callback2(proc_t p, void * args) { struct sd_iterargs * sd = (struct sd_iterargs *)args; int signo = sd->signo; int setsdstate = sd->setsdstate; int countproc = sd->countproc; proc_lock(p); p->p_shutdownstate = setsdstate; if (p->p_stat != SZOMB) { proc_unlock(p); if (countproc != 0) { proc_list_lock(); p->p_listflag |= P_LIST_EXITCOUNT; proc_shutdown_exitcount++; proc_list_unlock(); } psignal(p, signo); if (countproc != 0) sd->activecount++; } else proc_unlock(p); return(PROC_RETURNED); }
/* * cloneproc * * Description: Create a new process from a specified process. * * Parameters: parent_task The parent task to be cloned, or * TASK_NULL is task characteristics * are not to be inherited * be cloned, or TASK_NULL if the new * task is not to inherit the VM * characteristics of the parent * parent_proc The parent process to be cloned * inherit_memory True if the child is to inherit * memory from the parent; if this is * non-NULL, then the parent_task must * also be non-NULL * memstat_internal Whether to track the process in the * jetsam priority list (if configured) * * Returns: !NULL pointer to new child thread * NULL Failure (unspecified) * * Note: On return newly created child process has signal lock held * to block delivery of signal to it if called with lock set. * fork() code needs to explicity remove this lock before * signals can be delivered * * In the case of bootstrap, this function can be called from * bsd_utaskbootstrap() in order to bootstrap the first process; * the net effect is to provide a uthread structure for the * kernel process associated with the kernel task. * * XXX: Tristating using the value parent_task as the major key * and inherit_memory as the minor key is something we should * refactor later; we owe the current semantics, ultimately, * to the semantics of task_create_internal. For now, we will * live with this being somewhat awkward. */ thread_t cloneproc(task_t parent_task, coalition_t *parent_coalitions, proc_t parent_proc, int inherit_memory, int memstat_internal) { #if !CONFIG_MEMORYSTATUS #pragma unused(memstat_internal) #endif task_t child_task; proc_t child_proc; thread_t child_thread = NULL; if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ goto bad; } child_thread = fork_create_child(parent_task, parent_coalitions, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); if (child_thread == NULL) { /* * Failed to create thread; now we must deconstruct the new * process previously obtained from forkproc(). */ forkproc_free(child_proc); goto bad; } child_task = get_threadtask(child_thread); if (parent_proc->p_flag & P_LP64) { task_set_64bit(child_task, TRUE); OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag); } else { task_set_64bit(child_task, FALSE); OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag); } #if CONFIG_MEMORYSTATUS if (memstat_internal) { proc_list_lock(); child_proc->p_memstat_state |= P_MEMSTAT_INTERNAL; proc_list_unlock(); } #endif /* make child visible */ pinsertchild(parent_proc, child_proc); /* * Make child runnable, set start time. */ child_proc->p_stat = SRUN; bad: return(child_thread); }
void ux_handler_init(void) { thread_t thread = THREAD_NULL; ux_exception_port = MACH_PORT_NULL; (void) kernel_thread_start((thread_continue_t)ux_handler, NULL, &thread); thread_deallocate(thread); proc_list_lock(); if (ux_exception_port == MACH_PORT_NULL) { (void)msleep(&ux_exception_port, proc_list_mlock, 0, "ux_handler_wait", 0); } proc_list_unlock(); }
/* * fork1 * * Description: common code used by all new process creation other than the * bootstrap of the initial process on the system * * Parameters: parent_proc parent process of the process being * child_threadp pointer to location to receive the * Mach thread_t of the child process * breated * kind kind of creation being requested * * Notes: Permissable values for 'kind': * * PROC_CREATE_FORK Create a complete process which will * return actively running in both the * parent and the child; the child copies * the parent address space. * PROC_CREATE_SPAWN Create a complete process which will * return actively running in the parent * only after returning actively running * in the child; the child address space * is newly created by an image activator, * after which the child is run. * PROC_CREATE_VFORK Creates a partial process which will * borrow the parent task, thread, and * uthread to return running in the child; * the child address space and other parts * are lazily created at execve() time, or * the child is terminated, and the parent * does not actively run until that * happens. * * At first it may seem strange that we return the child thread * address rather than process structure, since the process is * the only part guaranteed to be "new"; however, since we do * not actualy adjust other references between Mach and BSD (see * the block diagram above the implementation of vfork()), this * is the only method which guarantees us the ability to get * back to the other information. */ int fork1(proc_t parent_proc, thread_t *child_threadp, int kind) { thread_t parent_thread = (thread_t)current_thread(); uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); proc_t child_proc = NULL; /* set in switch, but compiler... */ thread_t child_thread = NULL; uid_t uid; int count; int err = 0; int spawn = 0; /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. Don't allow * a nonprivileged user to use the last process; don't let root * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ uid = kauth_cred_get()->cr_ruid; proc_list_lock(); if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { proc_list_unlock(); tablefull("proc"); return (EAGAIN); } proc_list_unlock(); /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit, which is * always less than what an rlim_t can hold. * (locking protection is provided by list lock held in chgproccnt) */ count = chgproccnt(uid, 1); if (uid != 0 && (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) { err = EAGAIN; goto bad; } #if CONFIG_MACF /* * Determine if MAC policies applied to the process will allow * it to fork. This is an advisory-only check. */ err = mac_proc_check_fork(parent_proc); if (err != 0) { goto bad; } #endif switch(kind) { case PROC_CREATE_VFORK: /* * Prevent a vfork while we are in vfork(); we should * also likely preventing a fork here as well, and this * check should then be outside the switch statement, * since the proc struct contents will copy from the * child and the tash/thread/uthread from the parent in * that case. We do not support vfork() in vfork() * because we don't have to; the same non-requirement * is true of both fork() and posix_spawn() and any * call other than execve() amd _exit(), but we've * been historically lenient, so we continue to be so * (for now). * * <rdar://6640521> Probably a source of random panics */ if (parent_uthread->uu_flag & UT_VFORK) { printf("fork1 called within vfork by %s\n", parent_proc->p_comm); err = EINVAL; goto bad; } /* * Flag us in progress; if we chose to support vfork() in * vfork(), we would chain our parent at this point (in * effect, a stack push). We don't, since we actually want * to disallow everything not specified in the standard */ proc_vfork_begin(parent_proc); /* The newly created process comes with signal lock held */ if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ proc_vfork_end(parent_proc); err = ENOMEM; goto bad; } // XXX BEGIN: wants to move to be common code (and safe) #if CONFIG_MACF /* * allow policies to associate the credential/label that * we referenced from the parent ... with the child * JMM - this really isn't safe, as we can drop that * association without informing the policy in other * situations (keep long enough to get policies changed) */ mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); #endif /* * Propogate change of PID - may get new cred if auditing. * * NOTE: This has no effect in the vfork case, since * child_proc->task != current_task(), but we duplicate it * because this is probably, ultimately, wrong, since we * will be running in the "child" which is the parent task * with the wrong token until we get to the execve() or * _exit() call; a lot of "undefined" can happen before * that. * * <rdar://6640530> disallow everything but exeve()/_exit()? */ set_security_token(child_proc); AUDIT_ARG(pid, child_proc->p_pid); AUDIT_SESSION_PROCNEW(child_proc->p_ucred); // XXX END: wants to move to be common code (and safe) /* * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD * * Note: this is where we would "push" state instead of setting * it for nested vfork() support (see proc_vfork_end() for * description if issues here). */ child_proc->task = parent_proc->task; child_proc->p_lflag |= P_LINVFORK; child_proc->p_vforkact = parent_thread; child_proc->p_stat = SRUN; parent_uthread->uu_flag |= UT_VFORK; parent_uthread->uu_proc = child_proc; parent_uthread->uu_userstate = (void *)act_thread_csave(); parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask; /* temporarily drop thread-set-id state */ if (parent_uthread->uu_flag & UT_SETUID) { parent_uthread->uu_flag |= UT_WASSETUID; parent_uthread->uu_flag &= ~UT_SETUID; } /* blow thread state information */ /* XXX is this actually necessary, given syscall return? */ thread_set_child(parent_thread, child_proc->p_pid); child_proc->p_acflag = AFORK; /* forked but not exec'ed */ /* * Preserve synchronization semantics of vfork. If * waiting for child to exec or exit, set P_PPWAIT * on child, and sleep on our proc (in case of exit). */ child_proc->p_lflag |= P_LPPWAIT; pinsertchild(parent_proc, child_proc); /* set visible */ break; case PROC_CREATE_SPAWN: /* * A spawned process differs from a forked process in that * the spawned process does not carry around the parents * baggage with regard to address space copying, dtrace, * and so on. */ spawn = 1; /* FALLSTHROUGH */ case PROC_CREATE_FORK: /* * When we clone the parent process, we are going to inherit * its task attributes and memory, since when we fork, we * will, in effect, create a duplicate of it, with only minor * differences. Contrarily, spawned processes do not inherit. */ if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) { /* Failed to create thread */ err = EAGAIN; goto bad; } /* copy current thread state into the child thread (only for fork) */ if (!spawn) { thread_dup(child_thread); } /* child_proc = child_thread->task->proc; */ child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread))); // XXX BEGIN: wants to move to be common code (and safe) #if CONFIG_MACF /* * allow policies to associate the credential/label that * we referenced from the parent ... with the child * JMM - this really isn't safe, as we can drop that * association without informing the policy in other * situations (keep long enough to get policies changed) */ mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); #endif /* * Propogate change of PID - may get new cred if auditing. * * NOTE: This has no effect in the vfork case, since * child_proc->task != current_task(), but we duplicate it * because this is probably, ultimately, wrong, since we * will be running in the "child" which is the parent task * with the wrong token until we get to the execve() or * _exit() call; a lot of "undefined" can happen before * that. * * <rdar://6640530> disallow everything but exeve()/_exit()? */ set_security_token(child_proc); AUDIT_ARG(pid, child_proc->p_pid); AUDIT_SESSION_PROCNEW(child_proc->p_ucred); // XXX END: wants to move to be common code (and safe) /* * Blow thread state information; this is what gives the child * process its "return" value from a fork() call. * * Note: this should probably move to fork() proper, since it * is not relevent to spawn, and the value won't matter * until we resume the child there. If you are in here * refactoring code, consider doing this at the same time. */ thread_set_child(child_thread, child_proc->p_pid); child_proc->p_acflag = AFORK; /* forked but not exec'ed */ // <rdar://6598155> dtrace code cleanup needed #if CONFIG_DTRACE /* * This code applies to new processes who are copying the task * and thread state and address spaces of their parent process. */ if (!spawn) { // <rdar://6598155> call dtrace specific function here instead of all this... /* * APPLE NOTE: Solaris does a sprlock() and drops the * proc_lock here. We're cheating a bit and only taking * the p_dtrace_sprlock lock. A full sprlock would * task_suspend the parent. */ lck_mtx_lock(&parent_proc->p_dtrace_sprlock); /* * Remove all DTrace tracepoints from the child process. We * need to do this _before_ duplicating USDT providers since * any associated probes may be immediately enabled. */ if (parent_proc->p_dtrace_count > 0) { dtrace_fasttrap_fork(parent_proc, child_proc); } lck_mtx_unlock(&parent_proc->p_dtrace_sprlock); /* * Duplicate any lazy dof(s). This must be done while NOT * holding the parent sprlock! Lock ordering is * dtrace_dof_mode_lock, then sprlock. It is imperative we * always call dtrace_lazy_dofs_duplicate, rather than null * check and call if !NULL. If we NULL test, during lazy dof * faulting we can race with the faulting code and proceed * from here to beyond the helpers copy. The lazy dof * faulting will then fail to copy the helpers to the child * process. */ dtrace_lazy_dofs_duplicate(parent_proc, child_proc); /* * Duplicate any helper actions and providers. The SFORKING * we set above informs the code to enable USDT probes that * sprlock() may fail because the child is being forked. */ /* * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent * never fails to find the child. We do not set SFORKING. */ if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { (*dtrace_helpers_fork)(parent_proc, child_proc); } } #endif /* CONFIG_DTRACE */ break; default: panic("fork1 called with unknown kind %d", kind); break; } /* return the thread pointer to the caller */ *child_threadp = child_thread; bad: /* * In the error case, we return a 0 value for the returned pid (but * it is ignored in the trampoline due to the error return); this * is probably not necessary. */ if (err) { (void)chgproccnt(uid, -1); } return (err); }
/* * forkproc * * Description: Create a new process structure, given a parent process * structure. * * Parameters: parent_proc The parent process * * Returns: !NULL The new process structure * NULL Error (insufficient free memory) * * Note: When successful, the newly created process structure is * partially initialized; if a caller needs to deconstruct the * returned structure, they must call forkproc_free() to do so. */ proc_t forkproc(proc_t parent_proc) { proc_t child_proc; /* Our new process */ static int nextpid = 0, pidwrap = 0, nextpidversion = 0; int error = 0; struct session *sessp; uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread()); MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK); if (child_proc == NULL) { printf("forkproc: M_PROC zone exhausted\n"); goto bad; } /* zero it out as we need to insert in hash */ bzero(child_proc, sizeof *child_proc); MALLOC_ZONE(child_proc->p_stats, struct pstats *, sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK); if (child_proc->p_stats == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n"); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *, sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK); if (child_proc->p_sigacts == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n"); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* allocate a callout for use by interval timers */ child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc); if (child_proc->p_rcall == NULL) { FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* * Find an unused PID. */ proc_list_lock(); nextpid++; retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (nextpid >= PID_MAX) { nextpid = 100; pidwrap = 1; } if (pidwrap != 0) { /* if the pid stays in hash both for zombie and runniing state */ if (pfind_locked(nextpid) != PROC_NULL) { nextpid++; goto retry; } if (pgfind_internal(nextpid) != PGRP_NULL) { nextpid++; goto retry; } if (session_find_internal(nextpid) != SESSION_NULL) { nextpid++; goto retry; } } nprocs++; child_proc->p_pid = nextpid; child_proc->p_idversion = nextpidversion++; #if 1 if (child_proc->p_pid != 0) { if (pfind_locked(child_proc->p_pid) != PROC_NULL) panic("proc in the list already\n"); } #endif /* Insert in the hash */ child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE); LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash); proc_list_unlock(); /* * We've identified the PID we are going to use; initialize the new * process structure. */ child_proc->p_stat = SIDL; child_proc->p_pgrpid = PGRPID_DEAD; /* * The zero'ing of the proc was at the allocation time due to need * for insertion to hash. Copy the section that is to be copied * directly from the parent. */ bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy, (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy)); /* * Some flags are inherited from the parent. * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY)); if (parent_proc->p_flag & P_PROFIL) startprofclock(child_proc); /* * Note that if the current thread has an assumed identity, this * credential will be granted to the new process. */ child_proc->p_ucred = kauth_cred_get_with_ref(); #ifdef CONFIG_EMBEDDED lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr); #else /* !CONFIG_EMBEDDED */ lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr); #endif /* !CONFIG_EMBEDDED */ klist_init(&child_proc->p_klist); if (child_proc->p_textvp != NULLVP) { /* bump references to the text vnode */ /* Need to hold iocount across the ref call */ if (vnode_getwithref(child_proc->p_textvp) == 0) { error = vnode_ref(child_proc->p_textvp); vnode_put(child_proc->p_textvp); if (error != 0) child_proc->p_textvp = NULLVP; } } /* * Copy the parents per process open file table to the child; if * there is a per-thread current working directory, set the childs * per-process current working directory to that instead of the * parents. * * XXX may fail to copy descriptors to child */ child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir); #if SYSV_SHM if (parent_proc->vm_shm) { /* XXX may fail to attach shm to child */ (void)shmfork(parent_proc, child_proc); } #endif /* * inherit the limit structure to child */ proc_limitfork(parent_proc, child_proc); if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur; child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur; } /* Intialize new process stats, including start time */ /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */ bzero(&child_proc->p_stats->pstat_startzero, (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero - (caddr_t)&child_proc->p_stats->pstat_startzero)); bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof)); microtime(&child_proc->p_start); child_proc->p_stats->p_start = child_proc->p_start; /* for compat */ if (parent_proc->p_sigacts != NULL) (void)memcpy(child_proc->p_sigacts, parent_proc->p_sigacts, sizeof *child_proc->p_sigacts); else (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts); sessp = proc_session(parent_proc); if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT) OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag); session_rele(sessp); /* * block all signals to reach the process. * no transition race should be occuring with the child yet, * but indicate that the process is in (the creation) transition. */ proc_signalstart(child_proc, 0); proc_transstart(child_proc, 0); child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX; TAILQ_INIT(&child_proc->p_uthlist); TAILQ_INIT(&child_proc->p_aio_activeq); TAILQ_INIT(&child_proc->p_aio_doneq); /* Inherit the parent flags for code sign */ child_proc->p_csflags = parent_proc->p_csflags; /* * All processes have work queue locks; cleaned up by * reap_child_locked() */ workqueue_init_lock(child_proc); /* * Copy work queue information * * Note: This should probably only happen in the case where we are * creating a child that is a copy of the parent; since this * routine is called in the non-duplication case of vfork() * or posix_spawn(), then this information should likely not * be duplicated. * * <rdar://6640553> Work queue pointers that no longer point to code */ child_proc->p_wqthread = parent_proc->p_wqthread; child_proc->p_threadstart = parent_proc->p_threadstart; child_proc->p_pthsize = parent_proc->p_pthsize; child_proc->p_targconc = parent_proc->p_targconc; if ((parent_proc->p_lflag & P_LREGISTER) != 0) { child_proc->p_lflag |= P_LREGISTER; } child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; #if PSYNCH pth_proc_hashinit(child_proc); #endif /* PSYNCH */ #if CONFIG_LCTX child_proc->p_lctx = NULL; /* Add new process to login context (if any). */ if (parent_proc->p_lctx != NULL) { /* * <rdar://6640564> This should probably be delayed in the * vfork() or posix_spawn() cases. */ LCTX_LOCK(parent_proc->p_lctx); enterlctx(child_proc, parent_proc->p_lctx, 0); } #endif bad: return(child_proc); }
static void proc_shutdown(void) { vfs_context_t ctx = vfs_context_current(); struct proc *p, *self; int delayterm = 0; struct sd_filterargs sfargs; struct sd_iterargs sdargs; int error = 0; struct timespec ts; /* * Kill as many procs as we can. (Except ourself...) */ self = (struct proc *)current_proc(); /* * Signal the init with SIGTERM so that he does not launch * new processes */ p = proc_find(1); if (p && p != self) { psignal(p, SIGTERM); } proc_rele(p); printf("Killing all processes "); sigterm_loop: /* * send SIGTERM to those procs interested in catching one */ sfargs.delayterm = delayterm; sfargs.shutdownstate = 0; sdargs.signo = SIGTERM; sdargs.setsdstate = 1; sdargs.countproc = 1; sdargs.activecount = 0; error = 0; /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs); if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { proc_list_lock(); if (proc_shutdown_exitcount != 0) { /* * now wait for up to 30 seconds to allow those procs catching SIGTERM * to digest it * as soon as these procs have exited, we'll continue on to the next step */ ts.tv_sec = 30; ts.tv_nsec = 0; error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); if (error != 0) { for (p = allproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } for (p = zombproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } } } proc_list_unlock(); } if (error == ETIMEDOUT) { /* * log the names of the unresponsive tasks */ proc_list_lock(); for (p = allproc.lh_first; p; p = p->p_list.le_next) { if (p->p_shutdownstate == 1) { printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); } } proc_list_unlock(); delay_for_interval(1000 * 5, 1000 * 1000); } /* * send a SIGKILL to all the procs still hanging around */ sfargs.delayterm = delayterm; sfargs.shutdownstate = 2; sdargs.signo = SIGKILL; sdargs.setsdstate = 2; sdargs.countproc = 1; sdargs.activecount = 0; /* post a SIGKILL to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs); if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { proc_list_lock(); if (proc_shutdown_exitcount != 0) { /* * wait for up to 60 seconds to allow these procs to exit normally * * History: The delay interval was changed from 100 to 200 * for NFS requests in particular. */ ts.tv_sec = 60; ts.tv_nsec = 0; error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); if (error != 0) { for (p = allproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } for (p = zombproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } } } proc_list_unlock(); } /* * if we still have procs that haven't exited, then brute force 'em */ sfargs.delayterm = delayterm; sfargs.shutdownstate = 3; sdargs.signo = 0; sdargs.setsdstate = 3; sdargs.countproc = 0; sdargs.activecount = 0; /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs); printf("\n"); /* Now start the termination of processes that are marked for delayed termn */ if (delayterm == 0) { delayterm = 1; goto sigterm_loop; } sd_closelog(ctx); /* drop the ref on initproc */ proc_rele(initproc); printf("continuing\n"); }
void bsd_init(void) { struct uthread *ut; unsigned int i; struct vfs_context context; kern_return_t ret; struct ucred temp_cred; struct posix_cred temp_pcred; #if NFSCLIENT || CONFIG_IMAGEBOOT boolean_t netboot = FALSE; #endif #define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */ throttle_init(); printf(copyright); bsd_init_kprintf("calling kmeminit\n"); kmeminit(); bsd_init_kprintf("calling parse_bsd_args\n"); parse_bsd_args(); #if CONFIG_DEV_KMEM bsd_init_kprintf("calling dev_kmem_init\n"); dev_kmem_init(); #endif /* Initialize kauth subsystem before instancing the first credential */ bsd_init_kprintf("calling kauth_init\n"); kauth_init(); /* Initialize process and pgrp structures. */ bsd_init_kprintf("calling procinit\n"); procinit(); /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/ tty_init(); kernproc = &proc0; /* implicitly bzero'ed */ /* kernel_task->proc = kernproc; */ set_bsdtask_info(kernel_task,(void *)kernproc); /* give kernproc a name */ bsd_init_kprintf("calling process_name\n"); process_name("kernel_task", kernproc); /* allocate proc lock group attribute and group */ bsd_init_kprintf("calling lck_grp_attr_alloc_init\n"); proc_lck_grp_attr= lck_grp_attr_alloc_init(); proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); #if CONFIG_FINE_LOCK_GROUPS proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr); proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr); proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock", proc_lck_grp_attr); proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr); #endif /* Allocate proc lock attribute */ proc_lck_attr = lck_attr_alloc_init(); #if 0 #if __PROC_INTERNAL_DEBUG lck_attr_setdebug(proc_lck_attr); #endif #endif #if CONFIG_FINE_LOCK_GROUPS proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr); #else proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_ucred_mlock, proc_lck_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr); #endif assert(bsd_simul_execs != 0); execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); execargs_cache_size = bsd_simul_execs; execargs_free_count = bsd_simul_execs; execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t)); bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t)); if (current_task() != kernel_task) printf("bsd_init: We have a problem, " "current task is not kernel task\n"); bsd_init_kprintf("calling get_bsdthread_info\n"); ut = (uthread_t)get_bsdthread_info(current_thread()); #if CONFIG_MACF /* * Initialize the MAC Framework */ mac_policy_initbsd(); kernproc->p_mac_enforce = 0; #if defined (__i386__) || defined (__x86_64__) /* * We currently only support this on i386/x86_64, as that is the * only lock code we have instrumented so far. */ check_policy_init(policy_check_flags); #endif #endif /* MAC */ /* Initialize System Override call */ init_system_override(); /* * Create process 0. */ proc_list_lock(); LIST_INSERT_HEAD(&allproc, kernproc, p_list); kernproc->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); #ifdef CONFIG_FINE_LOCK_GROUPS lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr); #else lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr); #endif /* There is no other bsd thread this point and is safe without pgrp lock */ LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist); kernproc->p_listflag |= P_LIST_INPGRP; kernproc->p_pgrpid = 0; kernproc->p_uniqueid = 0; pgrp0.pg_session = &session0; pgrp0.pg_membercnt = 1; session0.s_count = 1; session0.s_leader = kernproc; session0.s_listflags = 0; #ifdef CONFIG_FINE_LOCK_GROUPS lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr); #else lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr); #endif LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash); proc_list_unlock(); kernproc->task = kernel_task; kernproc->p_stat = SRUN; kernproc->p_flag = P_SYSTEM; kernproc->p_lflag = 0; kernproc->p_ladvflag = 0; #if DEVELOPMENT || DEBUG if (bootarg_disable_aslr) kernproc->p_flag |= P_DISABLE_ASLR; #endif kernproc->p_nice = NZERO; kernproc->p_pptr = kernproc; TAILQ_INIT(&kernproc->p_uthlist); TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list); kernproc->sigwait = FALSE; kernproc->sigwait_thread = THREAD_NULL; kernproc->exit_thread = THREAD_NULL; kernproc->p_csflags = CS_VALID; /* * Create credential. This also Initializes the audit information. */ bsd_init_kprintf("calling bzero\n"); bzero(&temp_cred, sizeof(temp_cred)); bzero(&temp_pcred, sizeof(temp_pcred)); temp_pcred.cr_ngroups = 1; /* kern_proc, shouldn't call up to DS for group membership */ temp_pcred.cr_flags = CRF_NOMEMBERD; temp_cred.cr_audit.as_aia_p = audit_default_aia_p; bsd_init_kprintf("calling kauth_cred_create\n"); /* * We have to label the temp cred before we create from it to * properly set cr_ngroups, or the create will fail. */ posix_cred_label(&temp_cred, &temp_pcred); kernproc->p_ucred = kauth_cred_create(&temp_cred); /* update cred on proc */ PROC_UPDATE_CREDS_ONPROC(kernproc); /* give the (already exisiting) initial thread a reference on it */ bsd_init_kprintf("calling kauth_cred_ref\n"); kauth_cred_ref(kernproc->p_ucred); ut->uu_context.vc_ucred = kernproc->p_ucred; ut->uu_context.vc_thread = current_thread(); TAILQ_INIT(&kernproc->p_aio_activeq); TAILQ_INIT(&kernproc->p_aio_doneq); kernproc->p_aio_total_count = 0; kernproc->p_aio_active_count = 0; bsd_init_kprintf("calling file_lock_init\n"); file_lock_init(); #if CONFIG_MACF mac_cred_label_associate_kernel(kernproc->p_ucred); #endif /* Create the file descriptor table. */ kernproc->p_fd = &filedesc0; filedesc0.fd_cmask = cmask; filedesc0.fd_knlistsize = -1; filedesc0.fd_knlist = NULL; filedesc0.fd_knhash = NULL; filedesc0.fd_knhashmask = 0; /* Create the limits structures. */ kernproc->p_limit = &limit0; for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++) limit0.pl_rlimit[i].rlim_cur = limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid; limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack; limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data; limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core; limit0.pl_refcnt = 1; kernproc->p_stats = &pstats0; kernproc->p_sigacts = &sigacts0; /* * Charge root for one process: launchd. */ bsd_init_kprintf("calling chgproccnt\n"); (void)chgproccnt(0, 1); /* * Allocate a kernel submap for pageable memory * for temporary copying (execve()). */ { vm_offset_t minimum; bsd_init_kprintf("calling kmem_suballoc\n"); assert(bsd_pageable_map_size != 0); ret = kmem_suballoc(kernel_map, &minimum, (vm_size_t)bsd_pageable_map_size, TRUE, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_BSD), &bsd_pageable_map); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to allocate bsd pageable map"); } /* * Initialize buffers and hash links for buffers * * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must * happen after a credential has been associated with * the kernel task. */ bsd_init_kprintf("calling bsd_bufferinit\n"); bsd_bufferinit(); /* Initialize the execve() semaphore */ bsd_init_kprintf("calling semaphore_create\n"); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to create execve semaphore"); /* * Initialize the calendar. */ bsd_init_kprintf("calling IOKitInitializeTime\n"); IOKitInitializeTime(); bsd_init_kprintf("calling ubc_init\n"); ubc_init(); /* * Initialize device-switches. */ bsd_init_kprintf("calling devsw_init() \n"); devsw_init(); /* Initialize the file systems. */ bsd_init_kprintf("calling vfsinit\n"); vfsinit(); #if CONFIG_PROC_UUID_POLICY /* Initial proc_uuid_policy subsystem */ bsd_init_kprintf("calling proc_uuid_policy_init()\n"); proc_uuid_policy_init(); #endif #if SOCKETS /* Initialize per-CPU cache allocator */ mcache_init(); /* Initialize mbuf's. */ bsd_init_kprintf("calling mbinit\n"); mbinit(); net_str_id_init(); /* for mbuf tags */ #endif /* SOCKETS */ /* * Initializes security event auditing. * XXX: Should/could this occur later? */ #if CONFIG_AUDIT bsd_init_kprintf("calling audit_init\n"); audit_init(); #endif /* Initialize kqueues */ bsd_init_kprintf("calling knote_init\n"); knote_init(); /* Initialize for async IO */ bsd_init_kprintf("calling aio_init\n"); aio_init(); /* Initialize pipes */ bsd_init_kprintf("calling pipeinit\n"); pipeinit(); /* Initialize SysV shm subsystem locks; the subsystem proper is * initialized through a sysctl. */ #if SYSV_SHM bsd_init_kprintf("calling sysv_shm_lock_init\n"); sysv_shm_lock_init(); #endif #if SYSV_SEM bsd_init_kprintf("calling sysv_sem_lock_init\n"); sysv_sem_lock_init(); #endif #if SYSV_MSG bsd_init_kprintf("sysv_msg_lock_init\n"); sysv_msg_lock_init(); #endif bsd_init_kprintf("calling pshm_lock_init\n"); pshm_lock_init(); bsd_init_kprintf("calling psem_lock_init\n"); psem_lock_init(); pthread_init(); /* POSIX Shm and Sem */ bsd_init_kprintf("calling pshm_cache_init\n"); pshm_cache_init(); bsd_init_kprintf("calling psem_cache_init\n"); psem_cache_init(); bsd_init_kprintf("calling time_zone_slock_init\n"); time_zone_slock_init(); bsd_init_kprintf("calling select_waitq_init\n"); select_waitq_init(); /* * Initialize protocols. Block reception of incoming packets * until everything is ready. */ bsd_init_kprintf("calling sysctl_register_fixed\n"); sysctl_register_fixed(); bsd_init_kprintf("calling sysctl_mib_init\n"); sysctl_mib_init(); #if NETWORKING bsd_init_kprintf("calling dlil_init\n"); dlil_init(); bsd_init_kprintf("calling proto_kpi_init\n"); proto_kpi_init(); #endif /* NETWORKING */ #if SOCKETS bsd_init_kprintf("calling socketinit\n"); socketinit(); bsd_init_kprintf("calling domaininit\n"); domaininit(); iptap_init(); #if FLOW_DIVERT flow_divert_init(); #endif /* FLOW_DIVERT */ #endif /* SOCKETS */ kernproc->p_fd->fd_cdir = NULL; kernproc->p_fd->fd_rdir = NULL; #if CONFIG_FREEZE #ifndef CONFIG_MEMORYSTATUS #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS" #endif /* Initialise background freezing */ bsd_init_kprintf("calling memorystatus_freeze_init\n"); memorystatus_freeze_init(); #endif #if CONFIG_MEMORYSTATUS /* Initialize kernel memory status notifications */ bsd_init_kprintf("calling memorystatus_init\n"); memorystatus_init(); #endif /* CONFIG_MEMORYSTATUS */ bsd_init_kprintf("calling macx_init\n"); macx_init(); bsd_init_kprintf("calling acct_init\n"); acct_init(); #ifdef GPROF /* Initialize kernel profiling. */ kmstartup(); #endif bsd_init_kprintf("calling bsd_autoconf\n"); bsd_autoconf(); #if CONFIG_DTRACE dtrace_postinit(); #endif /* * We attach the loopback interface *way* down here to ensure * it happens after autoconf(), otherwise it becomes the * "primary" interface. */ #include <loop.h> #if NLOOP > 0 bsd_init_kprintf("calling loopattach\n"); loopattach(); /* XXX */ #endif #if NGIF /* Initialize gif interface (after lo0) */ gif_init(); #endif #if PFLOG /* Initialize packet filter log interface */ pfloginit(); #endif /* PFLOG */ #if NETHER > 0 /* Register the built-in dlil ethernet interface family */ bsd_init_kprintf("calling ether_family_init\n"); ether_family_init(); #endif /* ETHER */ #if NETWORKING /* Call any kext code that wants to run just after network init */ bsd_init_kprintf("calling net_init_run\n"); net_init_run(); #if CONTENT_FILTER cfil_init(); #endif #if PACKET_MANGLER pkt_mnglr_init(); #endif #if NECP /* Initialize Network Extension Control Policies */ necp_init(); #endif netagent_init(); /* register user tunnel kernel control handler */ utun_register_control(); #if IPSEC ipsec_register_control(); #endif /* IPSEC */ netsrc_init(); nstat_init(); tcp_cc_init(); #if MPTCP mptcp_control_register(); #endif /* MPTCP */ #endif /* NETWORKING */ bsd_init_kprintf("calling vnode_pager_bootstrap\n"); vnode_pager_bootstrap(); bsd_init_kprintf("calling inittodr\n"); inittodr(0); /* Mount the root file system. */ while( TRUE) { int err; bsd_init_kprintf("calling setconf\n"); setconf(); #if NFSCLIENT netboot = (mountroot == netboot_mountroot); #endif bsd_init_kprintf("vfs_mountroot\n"); if (0 == (err = vfs_mountroot())) break; rootdevice[0] = '\0'; #if NFSCLIENT if (netboot) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: failed to mount network root, error %d, %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } #endif printf("cannot mount root, errno = %d\n", err); boothowto |= RB_ASKNAME; } IOSecureBSDRoot(rootdevice); context.vc_thread = current_thread(); context.vc_ucred = kernproc->p_ucred; mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; bsd_init_kprintf("calling VFS_ROOT\n"); /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context)) panic("bsd_init: cannot find root vnode: %s", PE_boot_args()); rootvnode->v_flag |= VROOT; (void)vnode_ref(rootvnode); (void)vnode_put(rootvnode); filedesc0.fd_cdir = rootvnode; #if NFSCLIENT if (netboot) { int err; netboot = TRUE; /* post mount setup */ if ((err = netboot_setup()) != 0) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: NetBoot could not find root, error %d: %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } } #endif #if CONFIG_IMAGEBOOT /* * See if a system disk image is present. If so, mount it and * switch the root vnode to point to it */ if (netboot == FALSE && imageboot_needed()) { /* * An image was found. No turning back: we're booted * with a kernel from the disk image. */ imageboot_setup(); } #endif /* CONFIG_IMAGEBOOT */ /* set initial time; all other resource data is already zero'ed */ microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start); #if DEVFS { char mounthere[] = "/dev"; /* !const because of internal casting */ bsd_init_kprintf("calling devfs_kernel_mount\n"); devfs_kernel_mount(mounthere); } #endif /* DEVFS */ /* Initialize signal state for process 0. */ bsd_init_kprintf("calling siginit\n"); siginit(kernproc); bsd_init_kprintf("calling bsd_utaskbootstrap\n"); bsd_utaskbootstrap(); #if defined(__LP64__) kernproc->p_flag |= P_LP64; #endif pal_kernel_announce(); bsd_init_kprintf("calling mountroot_post_hook\n"); /* invoke post-root-mount hook */ if (mountroot_post_hook != NULL) mountroot_post_hook(); #if 0 /* not yet */ consider_zone_gc(FALSE); #endif bsd_init_kprintf("done\n"); }
int ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) { struct proc *t = current_proc(); /* target process */ task_t task; thread_t th_act; struct uthread *ut; int tr_sigexc = 0; int error = 0; int stopped = 0; AUDIT_ARG(cmd, uap->req); AUDIT_ARG(pid, uap->pid); AUDIT_ARG(addr, uap->addr); AUDIT_ARG(value32, uap->data); if (uap->req == PT_DENY_ATTACH) { proc_lock(p); if (ISSET(p->p_lflag, P_LTRACED)) { proc_unlock(p); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, p->p_pid, W_EXITCODE(ENOTSUP, 0), 4, 0, 0); exit1(p, W_EXITCODE(ENOTSUP, 0), retval); thread_exception_return(); /* NOTREACHED */ } SET(p->p_lflag, P_LNOATTACH); proc_unlock(p); return(0); } if (uap->req == PT_FORCEQUOTA) { if (kauth_cred_issuser(kauth_cred_get())) { OSBitOrAtomic(P_FORCEQUOTA, &t->p_flag); return (0); } else return (EPERM); } /* * Intercept and deal with "please trace me" request. */ if (uap->req == PT_TRACE_ME) { retry_trace_me:; proc_t pproc = proc_parent(p); if (pproc == NULL) return (EINVAL); #if CONFIG_MACF /* * NB: Cannot call kauth_authorize_process(..., KAUTH_PROCESS_CANTRACE, ...) * since that assumes the process being checked is the current process * when, in this case, it is the current process's parent. * Most of the other checks in cantrace() don't apply either. */ if ((error = mac_proc_check_debug(pproc, p)) == 0) { #endif proc_lock(p); /* Make sure the process wasn't re-parented. */ if (p->p_ppid != pproc->p_pid) { proc_unlock(p); proc_rele(pproc); goto retry_trace_me; } SET(p->p_lflag, P_LTRACED); /* Non-attached case, our tracer is our parent. */ p->p_oppid = p->p_ppid; proc_unlock(p); /* Child and parent will have to be able to run modified code. */ cs_allow_invalid(p); cs_allow_invalid(pproc); #if CONFIG_MACF } #endif proc_rele(pproc); return (error); } if (uap->req == PT_SIGEXC) { proc_lock(p); if (ISSET(p->p_lflag, P_LTRACED)) { SET(p->p_lflag, P_LSIGEXC); proc_unlock(p); return(0); } else { proc_unlock(p); return(EINVAL); } } /* * We do not want ptrace to do anything with kernel or launchd */ if (uap->pid < 2) { return(EPERM); } /* * Locate victim, and make sure it is traceable. */ if ((t = proc_find(uap->pid)) == NULL) return (ESRCH); AUDIT_ARG(process, t); task = t->task; if (uap->req == PT_ATTACHEXC) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" uap->req = PT_ATTACH; tr_sigexc = 1; } if (uap->req == PT_ATTACH) { #pragma clang diagnostic pop int err; if ( kauth_authorize_process(proc_ucred(p), KAUTH_PROCESS_CANTRACE, t, (uintptr_t)&err, 0, 0) == 0 ) { /* it's OK to attach */ proc_lock(t); SET(t->p_lflag, P_LTRACED); if (tr_sigexc) SET(t->p_lflag, P_LSIGEXC); t->p_oppid = t->p_ppid; /* Check whether child and parent are allowed to run modified * code (they'll have to) */ proc_unlock(t); cs_allow_invalid(t); cs_allow_invalid(p); if (t->p_pptr != p) proc_reparentlocked(t, p, 1, 0); proc_lock(t); if (get_task_userstop(task) > 0 ) { stopped = 1; } t->p_xstat = 0; proc_unlock(t); psignal(t, SIGSTOP); /* * If the process was stopped, wake up and run through * issignal() again to properly connect to the tracing * process. */ if (stopped) task_resume(task); error = 0; goto out; } else { /* not allowed to attach, proper error code returned by kauth_authorize_process */ if (ISSET(t->p_lflag, P_LNOATTACH)) { psignal(p, SIGSEGV); } error = err; goto out; } } /* * You can't do what you want to the process if: * (1) It's not being traced at all, */ proc_lock(t); if (!ISSET(t->p_lflag, P_LTRACED)) { proc_unlock(t); error = EPERM; goto out; } /* * (2) it's not being traced by _you_, or */ if (t->p_pptr != p) { proc_unlock(t); error = EBUSY; goto out; } /* * (3) it's not currently stopped. */ if (t->p_stat != SSTOP) { proc_unlock(t); error = EBUSY; goto out; } /* * Mach version of ptrace executes request directly here, * thus simplifying the interaction of ptrace and signals. */ /* proc lock is held here */ switch (uap->req) { case PT_DETACH: if (t->p_oppid != t->p_ppid) { struct proc *pp; proc_unlock(t); pp = proc_find(t->p_oppid); if (pp != PROC_NULL) { proc_reparentlocked(t, pp, 1, 0); proc_rele(pp); } else { /* original parent exited while traced */ proc_list_lock(); t->p_listflag |= P_LIST_DEADPARENT; proc_list_unlock(); proc_reparentlocked(t, initproc, 1, 0); } proc_lock(t); } t->p_oppid = 0; CLR(t->p_lflag, P_LTRACED); CLR(t->p_lflag, P_LSIGEXC); proc_unlock(t); goto resume; case PT_KILL: /* * Tell child process to kill itself after it * is resumed by adding NSIG to p_cursig. [see issig] */ proc_unlock(t); #if CONFIG_MACF error = mac_proc_check_signal(p, t, SIGKILL); if (0 != error) goto resume; #endif psignal(t, SIGKILL); goto resume; case PT_STEP: /* single step the child */ case PT_CONTINUE: /* continue the child */ proc_unlock(t); th_act = (thread_t)get_firstthread(task); if (th_act == THREAD_NULL) { error = EINVAL; goto out; } /* force use of Mach SPIs (and task_for_pid security checks) to adjust PC */ if (uap->addr != (user_addr_t)1) { error = ENOTSUP; goto out; } if ((unsigned)uap->data >= NSIG) { error = EINVAL; goto out; } if (uap->data != 0) { #if CONFIG_MACF error = mac_proc_check_signal(p, t, uap->data); if (0 != error) goto out; #endif psignal(t, uap->data); } if (uap->req == PT_STEP) { /* * set trace bit * we use sending SIGSTOP as a comparable security check. */ #if CONFIG_MACF error = mac_proc_check_signal(p, t, SIGSTOP); if (0 != error) { goto out; } #endif if (thread_setsinglestep(th_act, 1) != KERN_SUCCESS) { error = ENOTSUP; goto out; } } else { /* * clear trace bit if on * we use sending SIGCONT as a comparable security check. */ #if CONFIG_MACF error = mac_proc_check_signal(p, t, SIGCONT); if (0 != error) { goto out; } #endif if (thread_setsinglestep(th_act, 0) != KERN_SUCCESS) { error = ENOTSUP; goto out; } } resume: proc_lock(t); t->p_xstat = uap->data; t->p_stat = SRUN; if (t->sigwait) { wakeup((caddr_t)&(t->sigwait)); proc_unlock(t); if ((t->p_lflag & P_LSIGEXC) == 0) { task_resume(task); } } else proc_unlock(t); break; case PT_THUPDATE: { proc_unlock(t); if ((unsigned)uap->data >= NSIG) { error = EINVAL; goto out; } th_act = port_name_to_thread(CAST_MACH_PORT_TO_NAME(uap->addr)); if (th_act == THREAD_NULL) { error = ESRCH; goto out; } ut = (uthread_t)get_bsdthread_info(th_act); if (uap->data) ut->uu_siglist |= sigmask(uap->data); proc_lock(t); t->p_xstat = uap->data; t->p_stat = SRUN; proc_unlock(t); thread_deallocate(th_act); error = 0; } break; default: proc_unlock(t); error = EINVAL; goto out; } error = 0; out: proc_rele(t); return(error); }
static void ux_handler(void) { task_t self = current_task(); mach_port_name_t exc_port_name; mach_port_name_t exc_set_name; /* self->kernel_vm_space = TRUE; */ ux_handler_self = self; /* * Allocate a port set that we will receive on. */ if (mach_port_allocate(get_task_ipcspace(ux_handler_self), MACH_PORT_RIGHT_PORT_SET, &exc_set_name) != MACH_MSG_SUCCESS) panic("ux_handler: port_set_allocate failed"); /* * Allocate an exception port and use object_copyin to * translate it to the global name. Put it into the set. */ if (mach_port_allocate(get_task_ipcspace(ux_handler_self), MACH_PORT_RIGHT_RECEIVE, &exc_port_name) != MACH_MSG_SUCCESS) panic("ux_handler: port_allocate failed"); if (mach_port_move_member(get_task_ipcspace(ux_handler_self), exc_port_name, exc_set_name) != MACH_MSG_SUCCESS) panic("ux_handler: port_set_add failed"); if (ipc_object_copyin(get_task_ipcspace(self), exc_port_name, MACH_MSG_TYPE_MAKE_SEND, (void *) &ux_exception_port) != MACH_MSG_SUCCESS) panic("ux_handler: object_copyin(ux_exception_port) failed"); proc_list_lock(); thread_wakeup(&ux_exception_port); proc_list_unlock(); /* Message handling loop. */ for (;;) { struct rep_msg { mach_msg_header_t Head; NDR_record_t NDR; kern_return_t RetCode; } rep_msg; struct exc_msg { mach_msg_header_t Head; /* start of the kernel processed data */ mach_msg_body_t msgh_body; mach_msg_port_descriptor_t thread; mach_msg_port_descriptor_t task; /* end of the kernel processed data */ NDR_record_t NDR; exception_type_t exception; mach_msg_type_number_t codeCnt; mach_exception_data_t code; /* some times RCV_TO_LARGE probs */ char pad[512]; } exc_msg; mach_port_name_t reply_port; kern_return_t result; exc_msg.Head.msgh_local_port = CAST_MACH_NAME_TO_PORT(exc_set_name); exc_msg.Head.msgh_size = sizeof (exc_msg); #if 0 result = mach_msg_receive(&exc_msg.Head); #else result = mach_msg_receive(&exc_msg.Head, MACH_RCV_MSG, sizeof (exc_msg), exc_set_name, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL, 0); #endif if (result == MACH_MSG_SUCCESS) { reply_port = CAST_MACH_PORT_TO_NAME(exc_msg.Head.msgh_remote_port); if (mach_exc_server(&exc_msg.Head, &rep_msg.Head)) { result = mach_msg_send(&rep_msg.Head, MACH_SEND_MSG, sizeof (rep_msg),MACH_MSG_TIMEOUT_NONE,MACH_PORT_NULL); if (reply_port != 0 && result != MACH_MSG_SUCCESS) mach_port_deallocate(get_task_ipcspace(ux_handler_self), reply_port); } } else if (result == MACH_RCV_TOO_LARGE) /* ignore oversized messages */; else panic("exception_handler"); } }
int fasttrap_pid_probe(arm_saved_state_t *regs) { proc_t *p = current_proc(); user_addr_t new_pc = 0; fasttrap_bucket_t *bucket; lck_mtx_t *pid_mtx; fasttrap_tracepoint_t *tp, tp_local; pid_t pid; dtrace_icookie_t cookie; uint_t is_enabled = 0; int instr_size; int was_simulated = 1, retire_tp = 1; user_addr_t pc = regs->pc; uthread_t uthread = (uthread_t) get_bsdthread_info(current_thread()); /* * It's possible that a user (in a veritable orgy of bad planning) * could redirect this thread's flow of control before it reached the * return probe fasttrap. In this case we need to kill the process * since it's in a unrecoverable state. */ if (uthread->t_dtrace_step) { ASSERT(uthread->t_dtrace_on); fasttrap_sigtrap(p, uthread, pc); return (0); } /* * Clear all user tracing flags. */ uthread->t_dtrace_ft = 0; uthread->t_dtrace_pc = 0; uthread->t_dtrace_npc = 0; uthread->t_dtrace_scrpc = 0; uthread->t_dtrace_astpc = 0; /* * Treat a child created by a call to vfork(2) as if it were its * parent. We know that there's only one thread of control in such a * process: this one. */ if (p->p_lflag & P_LINVFORK) { proc_list_lock(); while (p->p_lflag & P_LINVFORK) p = p->p_pptr; proc_list_unlock(); } pid = p->p_pid; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; lck_mtx_lock(pid_mtx); bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid,pc)]; /* * Lookup the tracepoint that the process just hit. */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * If we couldn't find a matching tracepoint, either a tracepoint has * been inserted without using the pid<pid> ioctl interface (see * fasttrap_ioctl), or somehow we have mislaid this tracepoint. */ if (tp == NULL) { lck_mtx_unlock(pid_mtx); return (-1); } /* Default to always execute */ int condition_code = 0xE; if (tp->ftt_thumb) { uint32_t itstate = GETITSTATE(regs->cpsr); if (itstate != 0) { /* In IT block, make sure it's the last statement in the block */ if (ISLASTINIT(itstate)) { condition_code = itstate >> 4; } else { printf("dtrace: fasttrap: Tried to trace instruction %08x at %08x but not at end of IT block\n", (tp->ftt_thumb && dtrace_instr_size(tp->ftt_instr,tp->ftt_thumb) == 2) ? tp->ftt_instr1 : tp->ftt_instr, pc); fasttrap_tracepoint_remove(p, tp); lck_mtx_unlock(pid_mtx); return (-1); } }
/* * This function is called very early on in the Mach startup, from the * function start_kernel_threads() in osfmk/kern/startup.c. It's called * in the context of the current (startup) task using a call to the * function kernel_thread_create() to jump into start_kernel_threads(). * Internally, kernel_thread_create() calls thread_create_internal(), * which calls uthread_alloc(). The function of uthread_alloc() is * normally to allocate a uthread structure, and fill out the uu_sigmask, * uu_context fields. It skips filling these out in the case of the "task" * being "kernel_task", because the order of operation is inverted. To * account for that, we need to manually fill in at least the contents * of the uu_context.vc_ucred field so that the uthread structure can be * used like any other. */ void bsd_init(void) { struct uthread *ut; unsigned int i; #if __i386__ || __x86_64__ int error; #endif struct vfs_context context; kern_return_t ret; struct ucred temp_cred; #define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */ kernel_flock = funnel_alloc(KERNEL_FUNNEL); if (kernel_flock == (funnel_t *)0 ) { panic("bsd_init: Failed to allocate kernel funnel"); } printf(copyright); bsd_init_kprintf("calling kmeminit\n"); kmeminit(); bsd_init_kprintf("calling parse_bsd_args\n"); parse_bsd_args(); /* Initialize kauth subsystem before instancing the first credential */ bsd_init_kprintf("calling kauth_init\n"); kauth_init(); /* Initialize process and pgrp structures. */ bsd_init_kprintf("calling procinit\n"); procinit(); /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/ tty_init(); kernproc = &proc0; /* implicitly bzero'ed */ /* kernel_task->proc = kernproc; */ set_bsdtask_info(kernel_task,(void *)kernproc); /* give kernproc a name */ bsd_init_kprintf("calling process_name\n"); process_name("kernel_task", kernproc); /* allocate proc lock group attribute and group */ bsd_init_kprintf("calling lck_grp_attr_alloc_init\n"); proc_lck_grp_attr= lck_grp_attr_alloc_init(); proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); #ifndef CONFIG_EMBEDDED proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr); proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr); proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr); #endif /* Allocate proc lock attribute */ proc_lck_attr = lck_attr_alloc_init(); #if 0 #if __PROC_INTERNAL_DEBUG lck_attr_setdebug(proc_lck_attr); #endif #endif #ifdef CONFIG_EMBEDDED proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr); #else proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr); #endif execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); execargs_cache_size = bsd_simul_execs; execargs_free_count = bsd_simul_execs; execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t)); bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t)); if (current_task() != kernel_task) printf("bsd_init: We have a problem, " "current task is not kernel task\n"); bsd_init_kprintf("calling get_bsdthread_info\n"); ut = (uthread_t)get_bsdthread_info(current_thread()); #if CONFIG_MACF /* * Initialize the MAC Framework */ mac_policy_initbsd(); kernproc->p_mac_enforce = 0; #endif /* MAC */ /* * Create process 0. */ proc_list_lock(); LIST_INSERT_HEAD(&allproc, kernproc, p_list); kernproc->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); #ifdef CONFIG_EMBEDDED lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr); #else lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr); #endif /* There is no other bsd thread this point and is safe without pgrp lock */ LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist); kernproc->p_listflag |= P_LIST_INPGRP; kernproc->p_pgrpid = 0; pgrp0.pg_session = &session0; pgrp0.pg_membercnt = 1; session0.s_count = 1; session0.s_leader = kernproc; session0.s_listflags = 0; #ifdef CONFIG_EMBEDDED lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr); #else lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr); #endif LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash); proc_list_unlock(); #if CONFIG_LCTX kernproc->p_lctx = NULL; #endif kernproc->task = kernel_task; kernproc->p_stat = SRUN; kernproc->p_flag = P_SYSTEM; kernproc->p_nice = NZERO; kernproc->p_pptr = kernproc; TAILQ_INIT(&kernproc->p_uthlist); TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list); kernproc->sigwait = FALSE; kernproc->sigwait_thread = THREAD_NULL; kernproc->exit_thread = THREAD_NULL; kernproc->p_csflags = CS_VALID; /* * Create credential. This also Initializes the audit information. */ bsd_init_kprintf("calling bzero\n"); bzero(&temp_cred, sizeof(temp_cred)); temp_cred.cr_ngroups = 1; temp_cred.cr_audit.as_aia_p = &audit_default_aia; /* XXX the following will go away with cr_au */ temp_cred.cr_au.ai_auid = AU_DEFAUDITID; bsd_init_kprintf("calling kauth_cred_create\n"); kernproc->p_ucred = kauth_cred_create(&temp_cred); /* give the (already exisiting) initial thread a reference on it */ bsd_init_kprintf("calling kauth_cred_ref\n"); kauth_cred_ref(kernproc->p_ucred); ut->uu_context.vc_ucred = kernproc->p_ucred; ut->uu_context.vc_thread = current_thread(); TAILQ_INIT(&kernproc->p_aio_activeq); TAILQ_INIT(&kernproc->p_aio_doneq); kernproc->p_aio_total_count = 0; kernproc->p_aio_active_count = 0; bsd_init_kprintf("calling file_lock_init\n"); file_lock_init(); #if CONFIG_MACF mac_cred_label_associate_kernel(kernproc->p_ucred); mac_task_label_update_cred (kernproc->p_ucred, (struct task *) kernproc->task); #endif /* Create the file descriptor table. */ filedesc0.fd_refcnt = 1+1; /* +1 so shutdown will not _FREE_ZONE */ kernproc->p_fd = &filedesc0; filedesc0.fd_cmask = cmask; filedesc0.fd_knlistsize = -1; filedesc0.fd_knlist = NULL; filedesc0.fd_knhash = NULL; filedesc0.fd_knhashmask = 0; /* Create the limits structures. */ kernproc->p_limit = &limit0; for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++) limit0.pl_rlimit[i].rlim_cur = limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid; limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack; limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data; limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core; limit0.pl_refcnt = 1; kernproc->p_stats = &pstats0; kernproc->p_sigacts = &sigacts0; /* * Charge root for two processes: init and mach_init. */ bsd_init_kprintf("calling chgproccnt\n"); (void)chgproccnt(0, 1); /* * Allocate a kernel submap for pageable memory * for temporary copying (execve()). */ { vm_offset_t minimum; bsd_init_kprintf("calling kmem_suballoc\n"); ret = kmem_suballoc(kernel_map, &minimum, (vm_size_t)bsd_pageable_map_size, TRUE, VM_FLAGS_ANYWHERE, &bsd_pageable_map); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to allocate bsd pageable map"); } /* * Initialize buffers and hash links for buffers * * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must * happen after a credential has been associated with * the kernel task. */ bsd_init_kprintf("calling bsd_bufferinit\n"); bsd_bufferinit(); /* Initialize the execve() semaphore */ bsd_init_kprintf("calling semaphore_create\n"); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to create execve semaphore"); /* * Initialize the calendar. */ bsd_init_kprintf("calling IOKitInitializeTime\n"); IOKitInitializeTime(); if (turn_on_log_leaks && !new_nkdbufs) new_nkdbufs = 200000; start_kern_tracing(new_nkdbufs); if (turn_on_log_leaks) log_leaks = 1; bsd_init_kprintf("calling ubc_init\n"); ubc_init(); /* Initialize the file systems. */ bsd_init_kprintf("calling vfsinit\n"); vfsinit(); #if SOCKETS /* Initialize per-CPU cache allocator */ mcache_init(); /* Initialize mbuf's. */ bsd_init_kprintf("calling mbinit\n"); mbinit(); net_str_id_init(); /* for mbuf tags */ #endif /* SOCKETS */ /* * Initializes security event auditing. * XXX: Should/could this occur later? */ #if CONFIG_AUDIT bsd_init_kprintf("calling audit_init\n"); audit_init(); #endif /* Initialize kqueues */ bsd_init_kprintf("calling knote_init\n"); knote_init(); /* Initialize for async IO */ bsd_init_kprintf("calling aio_init\n"); aio_init(); /* Initialize pipes */ bsd_init_kprintf("calling pipeinit\n"); pipeinit(); /* Initialize SysV shm subsystem locks; the subsystem proper is * initialized through a sysctl. */ #if SYSV_SHM bsd_init_kprintf("calling sysv_shm_lock_init\n"); sysv_shm_lock_init(); #endif #if SYSV_SEM bsd_init_kprintf("calling sysv_sem_lock_init\n"); sysv_sem_lock_init(); #endif #if SYSV_MSG bsd_init_kprintf("sysv_msg_lock_init\n"); sysv_msg_lock_init(); #endif bsd_init_kprintf("calling pshm_lock_init\n"); pshm_lock_init(); bsd_init_kprintf("calling psem_lock_init\n"); psem_lock_init(); pthread_init(); /* POSIX Shm and Sem */ bsd_init_kprintf("calling pshm_cache_init\n"); pshm_cache_init(); bsd_init_kprintf("calling psem_cache_init\n"); psem_cache_init(); bsd_init_kprintf("calling time_zone_slock_init\n"); time_zone_slock_init(); /* Stack snapshot facility lock */ stackshot_lock_init(); /* * Initialize protocols. Block reception of incoming packets * until everything is ready. */ bsd_init_kprintf("calling sysctl_register_fixed\n"); sysctl_register_fixed(); bsd_init_kprintf("calling sysctl_mib_init\n"); sysctl_mib_init(); #if NETWORKING bsd_init_kprintf("calling dlil_init\n"); dlil_init(); bsd_init_kprintf("calling proto_kpi_init\n"); proto_kpi_init(); #endif /* NETWORKING */ #if SOCKETS bsd_init_kprintf("calling socketinit\n"); socketinit(); bsd_init_kprintf("calling domaininit\n"); domaininit(); #endif /* SOCKETS */ kernproc->p_fd->fd_cdir = NULL; kernproc->p_fd->fd_rdir = NULL; #if CONFIG_EMBEDDED /* Initialize kernel memory status notifications */ bsd_init_kprintf("calling kern_memorystatus_init\n"); kern_memorystatus_init(); #endif #ifdef GPROF /* Initialize kernel profiling. */ kmstartup(); #endif /* kick off timeout driven events by calling first time */ thread_wakeup(&lbolt); timeout(lightning_bolt, 0, hz); bsd_init_kprintf("calling bsd_autoconf\n"); bsd_autoconf(); #if CONFIG_DTRACE dtrace_postinit(); #endif /* * We attach the loopback interface *way* down here to ensure * it happens after autoconf(), otherwise it becomes the * "primary" interface. */ #include <loop.h> #if NLOOP > 0 bsd_init_kprintf("calling loopattach\n"); loopattach(); /* XXX */ #endif #if PFLOG /* Initialize packet filter log interface */ pfloginit(); #endif /* PFLOG */ #if NETHER > 0 /* Register the built-in dlil ethernet interface family */ bsd_init_kprintf("calling ether_family_init\n"); ether_family_init(); #endif /* ETHER */ #if NETWORKING /* Call any kext code that wants to run just after network init */ bsd_init_kprintf("calling net_init_run\n"); net_init_run(); /* register user tunnel kernel control handler */ utun_register_control(); #endif /* NETWORKING */ bsd_init_kprintf("calling vnode_pager_bootstrap\n"); vnode_pager_bootstrap(); #if 0 /* XXX Hack for early debug stop */ printf("\nabout to sleep for 10 seconds\n"); IOSleep( 10 * 1000 ); /* Debugger("hello"); */ #endif bsd_init_kprintf("calling inittodr\n"); inittodr(0); #if CONFIG_EMBEDDED { /* print out early VM statistics */ kern_return_t kr1; vm_statistics_data_t stat; mach_msg_type_number_t count; count = HOST_VM_INFO_COUNT; kr1 = host_statistics(host_self(), HOST_VM_INFO, (host_info_t)&stat, &count); kprintf("Mach Virtual Memory Statistics (page size of 4096) bytes\n" "Pages free:\t\t\t%u.\n" "Pages active:\t\t\t%u.\n" "Pages inactive:\t\t\t%u.\n" "Pages wired down:\t\t%u.\n" "\"Translation faults\":\t\t%u.\n" "Pages copy-on-write:\t\t%u.\n" "Pages zero filled:\t\t%u.\n" "Pages reactivated:\t\t%u.\n" "Pageins:\t\t\t%u.\n" "Pageouts:\t\t\t%u.\n" "Object cache: %u hits of %u lookups (%d%% hit rate)\n", stat.free_count, stat.active_count, stat.inactive_count, stat.wire_count, stat.faults, stat.cow_faults, stat.zero_fill_count, stat.reactivations, stat.pageins, stat.pageouts, stat.hits, stat.lookups, (stat.hits == 0) ? 100 : ((stat.lookups * 100) / stat.hits)); } #endif /* CONFIG_EMBEDDED */ /* Mount the root file system. */ while( TRUE) { int err; bsd_init_kprintf("calling setconf\n"); setconf(); bsd_init_kprintf("vfs_mountroot\n"); if (0 == (err = vfs_mountroot())) break; rootdevice[0] = '\0'; #if NFSCLIENT if (mountroot == netboot_mountroot) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: failed to mount network root, error %d, %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } #endif printf("cannot mount root, errno = %d\n", err); boothowto |= RB_ASKNAME; } IOSecureBSDRoot(rootdevice); context.vc_thread = current_thread(); context.vc_ucred = kernproc->p_ucred; mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; bsd_init_kprintf("calling VFS_ROOT\n"); /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context)) panic("bsd_init: cannot find root vnode: %s", PE_boot_args()); rootvnode->v_flag |= VROOT; (void)vnode_ref(rootvnode); (void)vnode_put(rootvnode); filedesc0.fd_cdir = rootvnode; #if NFSCLIENT if (mountroot == netboot_mountroot) { int err; /* post mount setup */ if ((err = netboot_setup()) != 0) { PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ vc_progress_set(FALSE, 0); for (i=1; 1; i*=2) { printf("bsd_init: NetBoot could not find root, error %d: %s\n", err, PE_boot_args()); printf("We are hanging here...\n"); IOSleep(i*60*1000); } /*NOTREACHED*/ } } #endif #if CONFIG_IMAGEBOOT /* * See if a system disk image is present. If so, mount it and * switch the root vnode to point to it */ if(imageboot_needed()) { int err; /* An image was found */ if((err = imageboot_setup())) { /* * this is not fatal. Keep trying to root * off the original media */ printf("%s: imageboot could not find root, %d\n", __FUNCTION__, err); } } #endif /* CONFIG_IMAGEBOOT */ /* set initial time; all other resource data is already zero'ed */ microtime(&kernproc->p_start); kernproc->p_stats->p_start = kernproc->p_start; /* for compat */ #if DEVFS { char mounthere[] = "/dev"; /* !const because of internal casting */ bsd_init_kprintf("calling devfs_kernel_mount\n"); devfs_kernel_mount(mounthere); } #endif /* DEVFS */ /* Initialize signal state for process 0. */ bsd_init_kprintf("calling siginit\n"); siginit(kernproc); bsd_init_kprintf("calling bsd_utaskbootstrap\n"); bsd_utaskbootstrap(); #if defined(__LP64__) kernproc->p_flag |= P_LP64; printf("Kernel is LP64\n"); #endif #if __i386__ || __x86_64__ /* this should be done after the root filesystem is mounted */ error = set_archhandler(kernproc, CPU_TYPE_POWERPC); // 10/30/08 - gab: <rdar://problem/6324501> // if default 'translate' can't be found, see if the understudy is available if (ENOENT == error) { strlcpy(exec_archhandler_ppc.path, kRosettaStandIn_str, MAXPATHLEN); error = set_archhandler(kernproc, CPU_TYPE_POWERPC); } if (error) /* XXX make more generic */ exec_archhandler_ppc.path[0] = 0; #endif bsd_init_kprintf("calling mountroot_post_hook\n"); /* invoke post-root-mount hook */ if (mountroot_post_hook != NULL) mountroot_post_hook(); #if 0 /* not yet */ consider_zone_gc(FALSE); #endif bsd_init_kprintf("done\n"); }