/* * fork1 * * Description: common code used by all new process creation other than the * bootstrap of the initial process on the system * * Parameters: parent_proc parent process of the process being * child_threadp pointer to location to receive the * Mach thread_t of the child process * breated * kind kind of creation being requested * * Notes: Permissable values for 'kind': * * PROC_CREATE_FORK Create a complete process which will * return actively running in both the * parent and the child; the child copies * the parent address space. * PROC_CREATE_SPAWN Create a complete process which will * return actively running in the parent * only after returning actively running * in the child; the child address space * is newly created by an image activator, * after which the child is run. * PROC_CREATE_VFORK Creates a partial process which will * borrow the parent task, thread, and * uthread to return running in the child; * the child address space and other parts * are lazily created at execve() time, or * the child is terminated, and the parent * does not actively run until that * happens. * * At first it may seem strange that we return the child thread * address rather than process structure, since the process is * the only part guaranteed to be "new"; however, since we do * not actualy adjust other references between Mach and BSD (see * the block diagram above the implementation of vfork()), this * is the only method which guarantees us the ability to get * back to the other information. */ int fork1(proc_t parent_proc, thread_t *child_threadp, int kind) { thread_t parent_thread = (thread_t)current_thread(); uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); proc_t child_proc = NULL; /* set in switch, but compiler... */ thread_t child_thread = NULL; uid_t uid; int count; int err = 0; int spawn = 0; /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. Don't allow * a nonprivileged user to use the last process; don't let root * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ uid = kauth_cred_get()->cr_ruid; proc_list_lock(); if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { proc_list_unlock(); tablefull("proc"); return (EAGAIN); } proc_list_unlock(); /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit, which is * always less than what an rlim_t can hold. * (locking protection is provided by list lock held in chgproccnt) */ count = chgproccnt(uid, 1); if (uid != 0 && (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) { err = EAGAIN; goto bad; } #if CONFIG_MACF /* * Determine if MAC policies applied to the process will allow * it to fork. This is an advisory-only check. */ err = mac_proc_check_fork(parent_proc); if (err != 0) { goto bad; } #endif switch(kind) { case PROC_CREATE_VFORK: /* * Prevent a vfork while we are in vfork(); we should * also likely preventing a fork here as well, and this * check should then be outside the switch statement, * since the proc struct contents will copy from the * child and the tash/thread/uthread from the parent in * that case. We do not support vfork() in vfork() * because we don't have to; the same non-requirement * is true of both fork() and posix_spawn() and any * call other than execve() amd _exit(), but we've * been historically lenient, so we continue to be so * (for now). * * <rdar://6640521> Probably a source of random panics */ if (parent_uthread->uu_flag & UT_VFORK) { printf("fork1 called within vfork by %s\n", parent_proc->p_comm); err = EINVAL; goto bad; } /* * Flag us in progress; if we chose to support vfork() in * vfork(), we would chain our parent at this point (in * effect, a stack push). We don't, since we actually want * to disallow everything not specified in the standard */ proc_vfork_begin(parent_proc); /* The newly created process comes with signal lock held */ if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ proc_vfork_end(parent_proc); err = ENOMEM; goto bad; } // XXX BEGIN: wants to move to be common code (and safe) #if CONFIG_MACF /* * allow policies to associate the credential/label that * we referenced from the parent ... with the child * JMM - this really isn't safe, as we can drop that * association without informing the policy in other * situations (keep long enough to get policies changed) */ mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); #endif /* * Propogate change of PID - may get new cred if auditing. * * NOTE: This has no effect in the vfork case, since * child_proc->task != current_task(), but we duplicate it * because this is probably, ultimately, wrong, since we * will be running in the "child" which is the parent task * with the wrong token until we get to the execve() or * _exit() call; a lot of "undefined" can happen before * that. * * <rdar://6640530> disallow everything but exeve()/_exit()? */ set_security_token(child_proc); AUDIT_ARG(pid, child_proc->p_pid); AUDIT_SESSION_PROCNEW(child_proc->p_ucred); // XXX END: wants to move to be common code (and safe) /* * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD * * Note: this is where we would "push" state instead of setting * it for nested vfork() support (see proc_vfork_end() for * description if issues here). */ child_proc->task = parent_proc->task; child_proc->p_lflag |= P_LINVFORK; child_proc->p_vforkact = parent_thread; child_proc->p_stat = SRUN; parent_uthread->uu_flag |= UT_VFORK; parent_uthread->uu_proc = child_proc; parent_uthread->uu_userstate = (void *)act_thread_csave(); parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask; /* temporarily drop thread-set-id state */ if (parent_uthread->uu_flag & UT_SETUID) { parent_uthread->uu_flag |= UT_WASSETUID; parent_uthread->uu_flag &= ~UT_SETUID; } /* blow thread state information */ /* XXX is this actually necessary, given syscall return? */ thread_set_child(parent_thread, child_proc->p_pid); child_proc->p_acflag = AFORK; /* forked but not exec'ed */ /* * Preserve synchronization semantics of vfork. If * waiting for child to exec or exit, set P_PPWAIT * on child, and sleep on our proc (in case of exit). */ child_proc->p_lflag |= P_LPPWAIT; pinsertchild(parent_proc, child_proc); /* set visible */ break; case PROC_CREATE_SPAWN: /* * A spawned process differs from a forked process in that * the spawned process does not carry around the parents * baggage with regard to address space copying, dtrace, * and so on. */ spawn = 1; /* FALLSTHROUGH */ case PROC_CREATE_FORK: /* * When we clone the parent process, we are going to inherit * its task attributes and memory, since when we fork, we * will, in effect, create a duplicate of it, with only minor * differences. Contrarily, spawned processes do not inherit. */ if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) { /* Failed to create thread */ err = EAGAIN; goto bad; } /* copy current thread state into the child thread (only for fork) */ if (!spawn) { thread_dup(child_thread); } /* child_proc = child_thread->task->proc; */ child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread))); // XXX BEGIN: wants to move to be common code (and safe) #if CONFIG_MACF /* * allow policies to associate the credential/label that * we referenced from the parent ... with the child * JMM - this really isn't safe, as we can drop that * association without informing the policy in other * situations (keep long enough to get policies changed) */ mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); #endif /* * Propogate change of PID - may get new cred if auditing. * * NOTE: This has no effect in the vfork case, since * child_proc->task != current_task(), but we duplicate it * because this is probably, ultimately, wrong, since we * will be running in the "child" which is the parent task * with the wrong token until we get to the execve() or * _exit() call; a lot of "undefined" can happen before * that. * * <rdar://6640530> disallow everything but exeve()/_exit()? */ set_security_token(child_proc); AUDIT_ARG(pid, child_proc->p_pid); AUDIT_SESSION_PROCNEW(child_proc->p_ucred); // XXX END: wants to move to be common code (and safe) /* * Blow thread state information; this is what gives the child * process its "return" value from a fork() call. * * Note: this should probably move to fork() proper, since it * is not relevent to spawn, and the value won't matter * until we resume the child there. If you are in here * refactoring code, consider doing this at the same time. */ thread_set_child(child_thread, child_proc->p_pid); child_proc->p_acflag = AFORK; /* forked but not exec'ed */ // <rdar://6598155> dtrace code cleanup needed #if CONFIG_DTRACE /* * This code applies to new processes who are copying the task * and thread state and address spaces of their parent process. */ if (!spawn) { // <rdar://6598155> call dtrace specific function here instead of all this... /* * APPLE NOTE: Solaris does a sprlock() and drops the * proc_lock here. We're cheating a bit and only taking * the p_dtrace_sprlock lock. A full sprlock would * task_suspend the parent. */ lck_mtx_lock(&parent_proc->p_dtrace_sprlock); /* * Remove all DTrace tracepoints from the child process. We * need to do this _before_ duplicating USDT providers since * any associated probes may be immediately enabled. */ if (parent_proc->p_dtrace_count > 0) { dtrace_fasttrap_fork(parent_proc, child_proc); } lck_mtx_unlock(&parent_proc->p_dtrace_sprlock); /* * Duplicate any lazy dof(s). This must be done while NOT * holding the parent sprlock! Lock ordering is * dtrace_dof_mode_lock, then sprlock. It is imperative we * always call dtrace_lazy_dofs_duplicate, rather than null * check and call if !NULL. If we NULL test, during lazy dof * faulting we can race with the faulting code and proceed * from here to beyond the helpers copy. The lazy dof * faulting will then fail to copy the helpers to the child * process. */ dtrace_lazy_dofs_duplicate(parent_proc, child_proc); /* * Duplicate any helper actions and providers. The SFORKING * we set above informs the code to enable USDT probes that * sprlock() may fail because the child is being forked. */ /* * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent * never fails to find the child. We do not set SFORKING. */ if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { (*dtrace_helpers_fork)(parent_proc, child_proc); } } #endif /* CONFIG_DTRACE */ break; default: panic("fork1 called with unknown kind %d", kind); break; } /* return the thread pointer to the caller */ *child_threadp = child_thread; bad: /* * In the error case, we return a 0 value for the returned pid (but * it is ignored in the trampoline due to the error return); this * is probably not necessary. */ if (err) { (void)chgproccnt(uid, -1); } return (err); }
/* * forkproc * * Description: Create a new process structure, given a parent process * structure. * * Parameters: parent_proc The parent process * * Returns: !NULL The new process structure * NULL Error (insufficient free memory) * * Note: When successful, the newly created process structure is * partially initialized; if a caller needs to deconstruct the * returned structure, they must call forkproc_free() to do so. */ proc_t forkproc(proc_t parent_proc) { proc_t child_proc; /* Our new process */ static int nextpid = 0, pidwrap = 0, nextpidversion = 0; static uint64_t nextuniqueid = 0; int error = 0; struct session *sessp; uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread()); MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK); if (child_proc == NULL) { printf("forkproc: M_PROC zone exhausted\n"); goto bad; } /* zero it out as we need to insert in hash */ bzero(child_proc, sizeof *child_proc); MALLOC_ZONE(child_proc->p_stats, struct pstats *, sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK); if (child_proc->p_stats == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n"); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *, sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK); if (child_proc->p_sigacts == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n"); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* allocate a callout for use by interval timers */ child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc); if (child_proc->p_rcall == NULL) { FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* * Find an unused PID. */ proc_list_lock(); nextpid++; retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (nextpid >= PID_MAX) { nextpid = 100; pidwrap = 1; } if (pidwrap != 0) { /* if the pid stays in hash both for zombie and runniing state */ if (pfind_locked(nextpid) != PROC_NULL) { nextpid++; goto retry; } if (pgfind_internal(nextpid) != PGRP_NULL) { nextpid++; goto retry; } if (session_find_internal(nextpid) != SESSION_NULL) { nextpid++; goto retry; } } nprocs++; child_proc->p_pid = nextpid; child_proc->p_idversion = nextpidversion++; /* kernel process is handcrafted and not from fork, so start from 1 */ child_proc->p_uniqueid = ++nextuniqueid; #if 1 if (child_proc->p_pid != 0) { if (pfind_locked(child_proc->p_pid) != PROC_NULL) panic("proc in the list already\n"); } #endif /* Insert in the hash */ child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE); LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash); proc_list_unlock(); /* * We've identified the PID we are going to use; initialize the new * process structure. */ child_proc->p_stat = SIDL; child_proc->p_pgrpid = PGRPID_DEAD; /* * The zero'ing of the proc was at the allocation time due to need * for insertion to hash. Copy the section that is to be copied * directly from the parent. */ bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy, (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy)); /* * Some flags are inherited from the parent. * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR)); if (parent_proc->p_flag & P_PROFIL) startprofclock(child_proc); /* * Note that if the current thread has an assumed identity, this * credential will be granted to the new process. */ child_proc->p_ucred = kauth_cred_get_with_ref(); /* update cred on proc */ PROC_UPDATE_CREDS_ONPROC(child_proc); /* update audit session proc count */ AUDIT_SESSION_PROCNEW(child_proc); #if CONFIG_FINE_LOCK_GROUPS lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr); #else /* !CONFIG_FINE_LOCK_GROUPS */ lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr); #endif /* !CONFIG_FINE_LOCK_GROUPS */ klist_init(&child_proc->p_klist); if (child_proc->p_textvp != NULLVP) { /* bump references to the text vnode */ /* Need to hold iocount across the ref call */ if (vnode_getwithref(child_proc->p_textvp) == 0) { error = vnode_ref(child_proc->p_textvp); vnode_put(child_proc->p_textvp); if (error != 0) child_proc->p_textvp = NULLVP; } } /* * Copy the parents per process open file table to the child; if * there is a per-thread current working directory, set the childs * per-process current working directory to that instead of the * parents. * * XXX may fail to copy descriptors to child */ child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir); #if SYSV_SHM if (parent_proc->vm_shm) { /* XXX may fail to attach shm to child */ (void)shmfork(parent_proc, child_proc); } #endif /* * inherit the limit structure to child */ proc_limitfork(parent_proc, child_proc); if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur; child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur; } /* Intialize new process stats, including start time */ /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */ bzero(&child_proc->p_stats->pstat_startzero, (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero - (caddr_t)&child_proc->p_stats->pstat_startzero)); bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof)); microtime(&child_proc->p_start); child_proc->p_stats->p_start = child_proc->p_start; /* for compat */ if (parent_proc->p_sigacts != NULL) (void)memcpy(child_proc->p_sigacts, parent_proc->p_sigacts, sizeof *child_proc->p_sigacts); else (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts); sessp = proc_session(parent_proc); if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT) OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag); session_rele(sessp); /* * block all signals to reach the process. * no transition race should be occuring with the child yet, * but indicate that the process is in (the creation) transition. */ proc_signalstart(child_proc, 0); proc_transstart(child_proc, 0); child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX; TAILQ_INIT(&child_proc->p_uthlist); TAILQ_INIT(&child_proc->p_aio_activeq); TAILQ_INIT(&child_proc->p_aio_doneq); /* Inherit the parent flags for code sign */ child_proc->p_csflags = (parent_proc->p_csflags & ~CS_KILLED); /* * All processes have work queue locks; cleaned up by * reap_child_locked() */ workqueue_init_lock(child_proc); /* * Copy work queue information * * Note: This should probably only happen in the case where we are * creating a child that is a copy of the parent; since this * routine is called in the non-duplication case of vfork() * or posix_spawn(), then this information should likely not * be duplicated. * * <rdar://6640553> Work queue pointers that no longer point to code */ child_proc->p_wqthread = parent_proc->p_wqthread; child_proc->p_threadstart = parent_proc->p_threadstart; child_proc->p_pthsize = parent_proc->p_pthsize; child_proc->p_targconc = parent_proc->p_targconc; if ((parent_proc->p_lflag & P_LREGISTER) != 0) { child_proc->p_lflag |= P_LREGISTER; } child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; #if PSYNCH pth_proc_hashinit(child_proc); #endif /* PSYNCH */ #if CONFIG_LCTX child_proc->p_lctx = NULL; /* Add new process to login context (if any). */ if (parent_proc->p_lctx != NULL) { /* * <rdar://6640564> This should probably be delayed in the * vfork() or posix_spawn() cases. */ LCTX_LOCK(parent_proc->p_lctx); enterlctx(child_proc, parent_proc->p_lctx, 0); } #endif bad: return(child_proc); }