void bsdinit_task(void) { proc_t p = current_proc(); struct uthread *ut; thread_t thread; process_name("init", p); ux_handler_init(); thread = current_thread(); (void) host_set_exception_ports(host_priv_self(), EXC_MASK_ALL & ~(EXC_MASK_RPC_ALERT),//pilotfish (shark) needs this port (mach_port_t) ux_exception_port, EXCEPTION_DEFAULT| MACH_EXCEPTION_CODES, 0); ut = (uthread_t)get_bsdthread_info(thread); bsd_init_task = get_threadtask(thread); init_task_died = FALSE; #if CONFIG_MACF mac_cred_label_associate_user(p->p_ucred); #endif load_init_program(p); lock_trace = 1; }
/* * cloneproc * * Description: Create a new process from a specified process. * * Parameters: parent_task The parent task to be cloned, or * TASK_NULL is task characteristics * are not to be inherited * be cloned, or TASK_NULL if the new * task is not to inherit the VM * characteristics of the parent * parent_proc The parent process to be cloned * inherit_memory True if the child is to inherit * memory from the parent; if this is * non-NULL, then the parent_task must * also be non-NULL * memstat_internal Whether to track the process in the * jetsam priority list (if configured) * * Returns: !NULL pointer to new child thread * NULL Failure (unspecified) * * Note: On return newly created child process has signal lock held * to block delivery of signal to it if called with lock set. * fork() code needs to explicity remove this lock before * signals can be delivered * * In the case of bootstrap, this function can be called from * bsd_utaskbootstrap() in order to bootstrap the first process; * the net effect is to provide a uthread structure for the * kernel process associated with the kernel task. * * XXX: Tristating using the value parent_task as the major key * and inherit_memory as the minor key is something we should * refactor later; we owe the current semantics, ultimately, * to the semantics of task_create_internal. For now, we will * live with this being somewhat awkward. */ thread_t cloneproc(task_t parent_task, coalition_t *parent_coalitions, proc_t parent_proc, int inherit_memory, int memstat_internal) { #if !CONFIG_MEMORYSTATUS #pragma unused(memstat_internal) #endif task_t child_task; proc_t child_proc; thread_t child_thread = NULL; if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ goto bad; } child_thread = fork_create_child(parent_task, parent_coalitions, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); if (child_thread == NULL) { /* * Failed to create thread; now we must deconstruct the new * process previously obtained from forkproc(). */ forkproc_free(child_proc); goto bad; } child_task = get_threadtask(child_thread); if (parent_proc->p_flag & P_LP64) { task_set_64bit(child_task, TRUE); OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag); } else { task_set_64bit(child_task, FALSE); OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag); } #if CONFIG_MEMORYSTATUS if (memstat_internal) { proc_list_lock(); child_proc->p_memstat_state |= P_MEMSTAT_INTERNAL; proc_list_unlock(); } #endif /* make child visible */ pinsertchild(parent_proc, child_proc); /* * Make child runnable, set start time. */ child_proc->p_stat = SRUN; bad: return(child_thread); }
/* * cloneproc * * Description: Create a new process from a specified process. * * Parameters: parent_task The parent task to be cloned, or * TASK_NULL is task characteristics * are not to be inherited * be cloned, or TASK_NULL if the new * task is not to inherit the VM * characteristics of the parent * parent_proc The parent process to be cloned * inherit_memory True if the child is to inherit * memory from the parent; if this is * non-NULL, then the parent_task must * also be non-NULL * * Returns: !NULL pointer to new child thread * NULL Failure (unspecified) * * Note: On return newly created child process has signal lock held * to block delivery of signal to it if called with lock set. * fork() code needs to explicity remove this lock before * signals can be delivered * * In the case of bootstrap, this function can be called from * bsd_utaskbootstrap() in order to bootstrap the first process; * the net effect is to provide a uthread structure for the * kernel process associated with the kernel task. * * XXX: Tristating using the value parent_task as the major key * and inherit_memory as the minor key is something we should * refactor later; we owe the current semantics, ultimately, * to the semantics of task_create_internal. For now, we will * live with this being somewhat awkward. */ thread_t cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory) { task_t child_task; proc_t child_proc; thread_t child_thread = NULL; if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ goto bad; } child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); if (child_thread == NULL) { /* * Failed to create thread; now we must deconstruct the new * process previously obtained from forkproc(). */ forkproc_free(child_proc); goto bad; } child_task = get_threadtask(child_thread); if (parent_proc->p_flag & P_LP64) { task_set_64bit(child_task, TRUE); OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag); #ifdef __ppc__ /* * PPC51: ppc64 is limited to 51-bit addresses. * Memory above that limit is handled specially at * the pmap level. */ pmap_map_sharedpage(child_task, get_map_pmap(get_task_map(child_task))); #endif /* __ppc__ */ } else { task_set_64bit(child_task, FALSE); OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag); } /* make child visible */ pinsertchild(parent_proc, child_proc); /* * Make child runnable, set start time. */ child_proc->p_stat = SRUN; bad: return(child_thread); }
void kperf_on_cpu_internal(thread_t thread, thread_continue_t continuation, uintptr_t *starting_fp) { if (kperf_kdebug_cswitch) { /* trace the new thread's PID for Instruments */ int pid = task_pid(get_threadtask(thread)); BUF_DATA(PERF_TI_CSWITCH, thread_tid(thread), pid); } if (kperf_lightweight_pet_active) { kperf_pet_on_cpu(thread, continuation, starting_fp); } }
void kperf_ipi_handler(void *param) { struct kperf_context ctx; struct kperf_timer *timer = param; assert(timer != NULL); /* Always cut a tracepoint to show a sample event occurred */ BUF_DATA(PERF_TM_HNDLR | DBG_FUNC_START, 0); int ncpu = cpu_number(); struct kperf_sample *intbuf = kperf_intr_sample_buffer(); /* On a timer, we can see the "real" current thread */ ctx.cur_thread = current_thread(); ctx.cur_pid = task_pid(get_threadtask(ctx.cur_thread)); /* who fired */ ctx.trigger_type = TRIGGER_TYPE_TIMER; ctx.trigger_id = (unsigned int)(timer - kperf_timerv); if (ctx.trigger_id == pet_timer_id && ncpu < machine_info.logical_cpu_max) { kperf_thread_on_cpus[ncpu] = ctx.cur_thread; } /* make sure sampling is on */ unsigned int status = kperf_sampling_status(); if (status == KPERF_SAMPLING_OFF) { BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_OFF); return; } else if (status == KPERF_SAMPLING_SHUTDOWN) { BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_SHUTDOWN); return; } /* call the action -- kernel-only from interrupt, pend user */ int r = kperf_sample(intbuf, &ctx, timer->actionid, SAMPLE_FLAG_PEND_USER); /* end tracepoint is informational */ BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, r); #if defined(__x86_64__) (void)atomic_bit_clear(&(timer->pending_cpus), ncpu, __ATOMIC_RELAXED); #endif /* defined(__x86_64__) */ }
/* * fork * * Description: fork system call. * * Parameters: parent Parent process to fork * uap (void) [unused] * retval Return value * * Returns: 0 Success * EAGAIN Resource unavailable, try again * * Notes: Attempts to create a new child process which inherits state * from the parent process. If successful, the call returns * having created an initially suspended child process with an * extra Mach task and thread reference, for which the thread * is initially suspended. Until we resume the child process, * it is not yet running. * * The return information to the child is contained in the * thread state structure of the new child, and does not * become visible to the child through a normal return process, * since it never made the call into the kernel itself in the * first place. * * After resuming the thread, this function returns directly to * the parent process which invoked the fork() system call. * * Important: The child thread_resume occurs before the parent returns; * depending on scheduling latency, this means that it is not * deterministic as to whether the parent or child is scheduled * to run first. It is entirely possible that the child could * run to completion prior to the parent running. */ int fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) { thread_t child_thread; int err; retval[1] = 0; /* flag parent return for user space */ if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK, NULL)) == 0) { task_t child_task; proc_t child_proc; /* Return to the parent */ child_proc = (proc_t)get_bsdthreadtask_info(child_thread); retval[0] = child_proc->p_pid; /* * Drop the signal lock on the child which was taken on our * behalf by forkproc()/cloneproc() to prevent signals being * received by the child in a partially constructed state. */ proc_signalend(child_proc, 0); proc_transend(child_proc, 0); /* flag the fork has occurred */ proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid); DTRACE_PROC1(create, proc_t, child_proc); #if CONFIG_DTRACE if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) (*dtrace_proc_waitfor_hook)(child_proc); #endif /* "Return" to the child */ proc_clear_return_wait(child_proc, child_thread); /* drop the extra references we got during the creation */ if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) { task_deallocate(child_task); } thread_deallocate(child_thread); } return(err); }
/* * cloneproc * * Description: Create a new process from a specified process. * * Parameters: parent_task The parent task to be cloned, or * TASK_NULL is task characteristics * are not to be inherited * be cloned, or TASK_NULL if the new * task is not to inherit the VM * characteristics of the parent * parent_proc The parent process to be cloned * inherit_memory True if the child is to inherit * memory from the parent; if this is * non-NULL, then the parent_task must * also be non-NULL * * Returns: !NULL pointer to new child thread * NULL Failure (unspecified) * * Note: On return newly created child process has signal lock held * to block delivery of signal to it if called with lock set. * fork() code needs to explicity remove this lock before * signals can be delivered * * In the case of bootstrap, this function can be called from * bsd_utaskbootstrap() in order to bootstrap the first process; * the net effect is to provide a uthread structure for the * kernel process associated with the kernel task. * * XXX: Tristating using the value parent_task as the major key * and inherit_memory as the minor key is something we should * refactor later; we owe the current semantics, ultimately, * to the semantics of task_create_internal. For now, we will * live with this being somewhat awkward. */ thread_t cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory) { task_t child_task; proc_t child_proc; thread_t child_thread = NULL; if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ goto bad; } child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); if (child_thread == NULL) { /* * Failed to create thread; now we must deconstruct the new * process previously obtained from forkproc(). */ forkproc_free(child_proc); goto bad; } child_task = get_threadtask(child_thread); if (parent_proc->p_flag & P_LP64) { task_set_64bit(child_task, TRUE); OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag); } else { task_set_64bit(child_task, FALSE); OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag); } /* make child visible */ pinsertchild(parent_proc, child_proc); /* * Make child runnable, set start time. */ child_proc->p_stat = SRUN; bad: return(child_thread); }
void kperf_task_snapshot_sample(struct kperf_task_snapshot *tksn, struct kperf_context *ctx) { thread_t thread; task_t task; BUF_INFO(PERF_TK_SNAP_SAMPLE | DBG_FUNC_START); assert(tksn != NULL); assert(ctx != NULL); thread = ctx->cur_thread; task = get_threadtask(thread); tksn->kptksn_flags = 0; if (task->effective_policy.tep_darwinbg) { tksn->kptksn_flags |= KPERF_TASK_FLAG_DARWIN_BG; } if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) { tksn->kptksn_flags |= KPERF_TASK_FLAG_FOREGROUND; } if (task->requested_policy.trp_boosted == 1) { tksn->kptksn_flags |= KPERF_TASK_FLAG_BOOSTED; } #if CONFIG_MEMORYSTATUS if (memorystatus_proc_is_dirty_unsafe(task->bsd_info)) { tksn->kptksn_flags |= KPERF_TASK_FLAG_DIRTY; } #endif tksn->kptksn_suspend_count = task->suspend_count; tksn->kptksn_pageins = task->pageins; tksn->kptksn_user_time_in_terminated_threads = task->total_user_time; tksn->kptksn_system_time_in_terminated_threads = task->total_system_time; BUF_INFO(PERF_TK_SNAP_SAMPLE | DBG_FUNC_END); }
/* * vfork_return * * Description: "Return" to parent vfork thread() following execve/_exit; * this is done by reassociating the parent process structure * with the task, thread, and uthread. * * Refer to the ASCII art above vfork() to figure out the * state we're undoing. * * Parameters: child_proc Child process * retval System call return value array * rval Return value to present to parent * * Returns: void * * Notes: The caller resumes or exits the parent, as appropriate, after * calling this function. */ void vfork_return(proc_t child_proc, int32_t *retval, int rval) { task_t parent_task = get_threadtask(child_proc->p_vforkact); proc_t parent_proc = get_bsdtask_info(parent_task); thread_t th = current_thread(); uthread_t uth = get_bsdthread_info(th); act_thread_catt(uth->uu_userstate); /* clear vfork state in parent proc structure */ proc_vfork_end(parent_proc); /* REPATRIATE PARENT TASK, THREAD, UTHREAD */ uth->uu_userstate = 0; uth->uu_flag &= ~UT_VFORK; /* restore thread-set-id state */ if (uth->uu_flag & UT_WASSETUID) { uth->uu_flag |= UT_SETUID; uth->uu_flag &= UT_WASSETUID; } uth->uu_proc = 0; uth->uu_sigmask = uth->uu_vforkmask; proc_lock(child_proc); child_proc->p_lflag &= ~P_LINVFORK; child_proc->p_vforkact = 0; proc_unlock(child_proc); thread_set_parent(th, rval); if (retval) { retval[0] = rval; retval[1] = 0; /* mark parent */ } }
/* * fork1 * * Description: common code used by all new process creation other than the * bootstrap of the initial process on the system * * Parameters: parent_proc parent process of the process being * child_threadp pointer to location to receive the * Mach thread_t of the child process * breated * kind kind of creation being requested * * Notes: Permissable values for 'kind': * * PROC_CREATE_FORK Create a complete process which will * return actively running in both the * parent and the child; the child copies * the parent address space. * PROC_CREATE_SPAWN Create a complete process which will * return actively running in the parent * only after returning actively running * in the child; the child address space * is newly created by an image activator, * after which the child is run. * PROC_CREATE_VFORK Creates a partial process which will * borrow the parent task, thread, and * uthread to return running in the child; * the child address space and other parts * are lazily created at execve() time, or * the child is terminated, and the parent * does not actively run until that * happens. * * At first it may seem strange that we return the child thread * address rather than process structure, since the process is * the only part guaranteed to be "new"; however, since we do * not actualy adjust other references between Mach and BSD (see * the block diagram above the implementation of vfork()), this * is the only method which guarantees us the ability to get * back to the other information. */ int fork1(proc_t parent_proc, thread_t *child_threadp, int kind) { thread_t parent_thread = (thread_t)current_thread(); uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); proc_t child_proc = NULL; /* set in switch, but compiler... */ thread_t child_thread = NULL; uid_t uid; int count; int err = 0; int spawn = 0; /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. Don't allow * a nonprivileged user to use the last process; don't let root * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ uid = kauth_cred_get()->cr_ruid; proc_list_lock(); if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { proc_list_unlock(); tablefull("proc"); return (EAGAIN); } proc_list_unlock(); /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit, which is * always less than what an rlim_t can hold. * (locking protection is provided by list lock held in chgproccnt) */ count = chgproccnt(uid, 1); if (uid != 0 && (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) { err = EAGAIN; goto bad; } #if CONFIG_MACF /* * Determine if MAC policies applied to the process will allow * it to fork. This is an advisory-only check. */ err = mac_proc_check_fork(parent_proc); if (err != 0) { goto bad; } #endif switch(kind) { case PROC_CREATE_VFORK: /* * Prevent a vfork while we are in vfork(); we should * also likely preventing a fork here as well, and this * check should then be outside the switch statement, * since the proc struct contents will copy from the * child and the tash/thread/uthread from the parent in * that case. We do not support vfork() in vfork() * because we don't have to; the same non-requirement * is true of both fork() and posix_spawn() and any * call other than execve() amd _exit(), but we've * been historically lenient, so we continue to be so * (for now). * * <rdar://6640521> Probably a source of random panics */ if (parent_uthread->uu_flag & UT_VFORK) { printf("fork1 called within vfork by %s\n", parent_proc->p_comm); err = EINVAL; goto bad; } /* * Flag us in progress; if we chose to support vfork() in * vfork(), we would chain our parent at this point (in * effect, a stack push). We don't, since we actually want * to disallow everything not specified in the standard */ proc_vfork_begin(parent_proc); /* The newly created process comes with signal lock held */ if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ proc_vfork_end(parent_proc); err = ENOMEM; goto bad; } // XXX BEGIN: wants to move to be common code (and safe) #if CONFIG_MACF /* * allow policies to associate the credential/label that * we referenced from the parent ... with the child * JMM - this really isn't safe, as we can drop that * association without informing the policy in other * situations (keep long enough to get policies changed) */ mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); #endif /* * Propogate change of PID - may get new cred if auditing. * * NOTE: This has no effect in the vfork case, since * child_proc->task != current_task(), but we duplicate it * because this is probably, ultimately, wrong, since we * will be running in the "child" which is the parent task * with the wrong token until we get to the execve() or * _exit() call; a lot of "undefined" can happen before * that. * * <rdar://6640530> disallow everything but exeve()/_exit()? */ set_security_token(child_proc); AUDIT_ARG(pid, child_proc->p_pid); AUDIT_SESSION_PROCNEW(child_proc->p_ucred); // XXX END: wants to move to be common code (and safe) /* * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD * * Note: this is where we would "push" state instead of setting * it for nested vfork() support (see proc_vfork_end() for * description if issues here). */ child_proc->task = parent_proc->task; child_proc->p_lflag |= P_LINVFORK; child_proc->p_vforkact = parent_thread; child_proc->p_stat = SRUN; parent_uthread->uu_flag |= UT_VFORK; parent_uthread->uu_proc = child_proc; parent_uthread->uu_userstate = (void *)act_thread_csave(); parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask; /* temporarily drop thread-set-id state */ if (parent_uthread->uu_flag & UT_SETUID) { parent_uthread->uu_flag |= UT_WASSETUID; parent_uthread->uu_flag &= ~UT_SETUID; } /* blow thread state information */ /* XXX is this actually necessary, given syscall return? */ thread_set_child(parent_thread, child_proc->p_pid); child_proc->p_acflag = AFORK; /* forked but not exec'ed */ /* * Preserve synchronization semantics of vfork. If * waiting for child to exec or exit, set P_PPWAIT * on child, and sleep on our proc (in case of exit). */ child_proc->p_lflag |= P_LPPWAIT; pinsertchild(parent_proc, child_proc); /* set visible */ break; case PROC_CREATE_SPAWN: /* * A spawned process differs from a forked process in that * the spawned process does not carry around the parents * baggage with regard to address space copying, dtrace, * and so on. */ spawn = 1; /* FALLSTHROUGH */ case PROC_CREATE_FORK: /* * When we clone the parent process, we are going to inherit * its task attributes and memory, since when we fork, we * will, in effect, create a duplicate of it, with only minor * differences. Contrarily, spawned processes do not inherit. */ if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) { /* Failed to create thread */ err = EAGAIN; goto bad; } /* copy current thread state into the child thread (only for fork) */ if (!spawn) { thread_dup(child_thread); } /* child_proc = child_thread->task->proc; */ child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread))); // XXX BEGIN: wants to move to be common code (and safe) #if CONFIG_MACF /* * allow policies to associate the credential/label that * we referenced from the parent ... with the child * JMM - this really isn't safe, as we can drop that * association without informing the policy in other * situations (keep long enough to get policies changed) */ mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); #endif /* * Propogate change of PID - may get new cred if auditing. * * NOTE: This has no effect in the vfork case, since * child_proc->task != current_task(), but we duplicate it * because this is probably, ultimately, wrong, since we * will be running in the "child" which is the parent task * with the wrong token until we get to the execve() or * _exit() call; a lot of "undefined" can happen before * that. * * <rdar://6640530> disallow everything but exeve()/_exit()? */ set_security_token(child_proc); AUDIT_ARG(pid, child_proc->p_pid); AUDIT_SESSION_PROCNEW(child_proc->p_ucred); // XXX END: wants to move to be common code (and safe) /* * Blow thread state information; this is what gives the child * process its "return" value from a fork() call. * * Note: this should probably move to fork() proper, since it * is not relevent to spawn, and the value won't matter * until we resume the child there. If you are in here * refactoring code, consider doing this at the same time. */ thread_set_child(child_thread, child_proc->p_pid); child_proc->p_acflag = AFORK; /* forked but not exec'ed */ // <rdar://6598155> dtrace code cleanup needed #if CONFIG_DTRACE /* * This code applies to new processes who are copying the task * and thread state and address spaces of their parent process. */ if (!spawn) { // <rdar://6598155> call dtrace specific function here instead of all this... /* * APPLE NOTE: Solaris does a sprlock() and drops the * proc_lock here. We're cheating a bit and only taking * the p_dtrace_sprlock lock. A full sprlock would * task_suspend the parent. */ lck_mtx_lock(&parent_proc->p_dtrace_sprlock); /* * Remove all DTrace tracepoints from the child process. We * need to do this _before_ duplicating USDT providers since * any associated probes may be immediately enabled. */ if (parent_proc->p_dtrace_count > 0) { dtrace_fasttrap_fork(parent_proc, child_proc); } lck_mtx_unlock(&parent_proc->p_dtrace_sprlock); /* * Duplicate any lazy dof(s). This must be done while NOT * holding the parent sprlock! Lock ordering is * dtrace_dof_mode_lock, then sprlock. It is imperative we * always call dtrace_lazy_dofs_duplicate, rather than null * check and call if !NULL. If we NULL test, during lazy dof * faulting we can race with the faulting code and proceed * from here to beyond the helpers copy. The lazy dof * faulting will then fail to copy the helpers to the child * process. */ dtrace_lazy_dofs_duplicate(parent_proc, child_proc); /* * Duplicate any helper actions and providers. The SFORKING * we set above informs the code to enable USDT probes that * sprlock() may fail because the child is being forked. */ /* * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent * never fails to find the child. We do not set SFORKING. */ if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { (*dtrace_helpers_fork)(parent_proc, child_proc); } } #endif /* CONFIG_DTRACE */ break; default: panic("fork1 called with unknown kind %d", kind); break; } /* return the thread pointer to the caller */ *child_threadp = child_thread; bad: /* * In the error case, we return a 0 value for the returned pid (but * it is ignored in the trampoline due to the error return); this * is probably not necessary. */ if (err) { (void)chgproccnt(uid, -1); } return (err); }
load_return_t load_machfile( struct image_params *imgp, struct mach_header *header, thread_t thread, vm_map_t new_map, load_result_t *result ) { struct vnode *vp = imgp->ip_vp; off_t file_offset = imgp->ip_arch_offset; off_t macho_size = imgp->ip_arch_size; off_t file_size = imgp->ip_vattr->va_data_size; pmap_t pmap = 0; /* protected by create_map */ vm_map_t map; vm_map_t old_map; task_t old_task = TASK_NULL; /* protected by create_map */ load_result_t myresult; load_return_t lret; boolean_t create_map = FALSE; int spawn = (imgp->ip_flags & IMGPF_SPAWN); task_t task = current_task(); proc_t p = current_proc(); mach_vm_offset_t aslr_offset = 0; kern_return_t kret; if (macho_size > file_size) { return(LOAD_BADMACHO); } if (new_map == VM_MAP_NULL) { create_map = TRUE; old_task = current_task(); } /* * If we are spawning, we have created backing objects for the process * already, which include non-lazily creating the task map. So we * are going to switch out the task map with one appropriate for the * bitness of the image being loaded. */ if (spawn) { create_map = TRUE; old_task = get_threadtask(thread); } if (create_map) { pmap = pmap_create(get_task_ledger(task), (vm_map_size_t) 0, (imgp->ip_flags & IMGPF_IS_64BIT)); map = vm_map_create(pmap, 0, vm_compute_max_offset((imgp->ip_flags & IMGPF_IS_64BIT)), TRUE); } else map = new_map; #ifndef CONFIG_ENFORCE_SIGNED_CODE /* This turns off faulting for executable pages, which allows to * circumvent Code Signing Enforcement */ if ( (header->flags & MH_ALLOW_STACK_EXECUTION) ) vm_map_disable_NX(map); #endif /* Forcibly disallow execution from data pages on even if the arch * normally permits it. */ if ((header->flags & MH_NO_HEAP_EXECUTION) && !(imgp->ip_flags & IMGPF_ALLOW_DATA_EXEC)) vm_map_disallow_data_exec(map); /* * Compute a random offset for ASLR. */ if (!(imgp->ip_flags & IMGPF_DISABLE_ASLR)) { aslr_offset = random(); aslr_offset %= 1 << ((imgp->ip_flags & IMGPF_IS_64BIT) ? 16 : 8); aslr_offset <<= PAGE_SHIFT; } if (!result) result = &myresult; *result = load_result_null; lret = parse_machfile(vp, map, thread, header, file_offset, macho_size, 0, (int64_t)aslr_offset, result); if (lret != LOAD_SUCCESS) { if (create_map) { vm_map_deallocate(map); /* will lose pmap reference too */ } return(lret); } #if CONFIG_EMBEDDED /* * Check to see if the page zero is enforced by the map->min_offset. */ if (vm_map_has_hard_pagezero(map, 0x1000) == FALSE) { if (create_map) { vm_map_deallocate(map); /* will lose pmap reference too */ } printf("Cannot enforce a hard page-zero for %s\n", imgp->ip_strings); psignal(vfs_context_proc(imgp->ip_vfs_context), SIGKILL); return (LOAD_BADMACHO); } #else /* * For 64-bit users, check for presence of a 4GB page zero * which will enable the kernel to share the user's address space * and hence avoid TLB flushes on kernel entry/exit */ if ((imgp->ip_flags & IMGPF_IS_64BIT) && vm_map_has_4GB_pagezero(map)) { vm_map_set_4GB_pagezero(map); } #endif /* * Commit to new map. * * Swap the new map for the old, which consumes our new map * reference but each leaves us responsible for the old_map reference. * That lets us get off the pmap associated with it, and * then we can release it. */ if (create_map) { /* * If this is an exec, then we are going to destroy the old * task, and it's correct to halt it; if it's spawn, the * task is not yet running, and it makes no sense. */ if (!spawn) { /* * Mark the task as halting and start the other * threads towards terminating themselves. Then * make sure any threads waiting for a process * transition get informed that we are committed to * this transition, and then finally complete the * task halting (wait for threads and then cleanup * task resources). * * NOTE: task_start_halt() makes sure that no new * threads are created in the task during the transition. * We need to mark the workqueue as exiting before we * wait for threads to terminate (at the end of which * we no longer have a prohibition on thread creation). * * Finally, clean up any lingering workqueue data structures * that may have been left behind by the workqueue threads * as they exited (and then clean up the work queue itself). */ kret = task_start_halt(task); if (kret != KERN_SUCCESS) { return(kret); } proc_transcommit(p, 0); workqueue_mark_exiting(p); task_complete_halt(task); workqueue_exit(p); } old_map = swap_task_map(old_task, thread, map, !spawn); vm_map_clear_4GB_pagezero(old_map); vm_map_deallocate(old_map); } return(LOAD_SUCCESS); }
__private_extern__ task_t chudxnu_task_for_thread(thread_t thread) { return get_threadtask(thread); }
static load_return_t load_thread( struct thread_command *tcp, thread_t thread, load_result_t *result ) { kern_return_t kret; load_return_t lret; task_t task; int customstack=0; task = get_threadtask(thread); /* if count is 0; same as thread */ if (result->thread_count != 0) { kret = thread_create(task, &thread); if (kret != KERN_SUCCESS) return(LOAD_RESOURCE); thread_deallocate(thread); } lret = load_threadstate(thread, (unsigned long *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command)); if (lret != LOAD_SUCCESS) return (lret); if (result->thread_count == 0) { lret = load_threadstack(thread, (unsigned long *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command), &result->user_stack, &customstack); if (customstack) result->customstack = 1; else result->customstack = 0; if (lret != LOAD_SUCCESS) return(lret); lret = load_threadentry(thread, (unsigned long *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command), &result->entry_point); if (lret != LOAD_SUCCESS) return(lret); } /* * Resume thread now, note that this means that the thread * commands should appear after all the load commands to * be sure they don't reference anything not yet mapped. */ else thread_resume(thread); result->thread_count++; return(LOAD_SUCCESS); }
/* * The file size of a mach-o file is limited to 32 bits; this is because * this is the limit on the kalloc() of enough bytes for a mach_header and * the contents of its sizeofcmds, which is currently constrained to 32 * bits in the file format itself. We read into the kernel buffer the * commands section, and then parse it in order to parse the mach-o file * format load_command segment(s). We are only interested in a subset of * the total set of possible commands. */ static load_return_t parse_machfile( struct vnode *vp, vm_map_t map, thread_t thread, struct mach_header *header, off_t file_offset, off_t macho_size, int depth, load_result_t *result ) { uint32_t ncmds; struct load_command *lcp; struct dylinker_command *dlp = 0; integer_t dlarchbits = 0; void * pager; load_return_t ret = LOAD_SUCCESS; caddr_t addr; void * kl_addr; vm_size_t size,kl_size; size_t offset; size_t oldoffset; /* for overflow check */ int pass; proc_t p = current_proc(); /* XXXX */ int error; int resid=0; task_t task; size_t mach_header_sz = sizeof(struct mach_header); boolean_t abi64; boolean_t got_code_signatures = FALSE; if (header->magic == MH_MAGIC_64 || header->magic == MH_CIGAM_64) { mach_header_sz = sizeof(struct mach_header_64); } /* * Break infinite recursion */ if (depth > 6) { return(LOAD_FAILURE); } task = (task_t)get_threadtask(thread); depth++; /* * Check to see if right machine type. */ if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != cpu_type()) || !grade_binary(header->cputype, header->cpusubtype & ~CPU_SUBTYPE_MASK)) return(LOAD_BADARCH); abi64 = ((header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64); switch (header->filetype) { case MH_OBJECT: case MH_EXECUTE: case MH_PRELOAD: if (depth != 1) { return (LOAD_FAILURE); } break; case MH_FVMLIB: case MH_DYLIB: if (depth == 1) { return (LOAD_FAILURE); } break; case MH_DYLINKER: if (depth != 2) { return (LOAD_FAILURE); } break; default: return (LOAD_FAILURE); } /* * Get the pager for the file. */ pager = (void *) ubc_getpager(vp); /* * Map portion that must be accessible directly into * kernel's map. */ if ((mach_header_sz + header->sizeofcmds) > macho_size) return(LOAD_BADMACHO); /* * Round size of Mach-O commands up to page boundry. */ size = round_page(mach_header_sz + header->sizeofcmds); if (size <= 0) return(LOAD_BADMACHO); /* * Map the load commands into kernel memory. */ addr = 0; kl_size = size; kl_addr = kalloc(size); addr = (caddr_t)kl_addr; if (addr == NULL) return(LOAD_NOSPACE); error = vn_rdwr(UIO_READ, vp, addr, size, file_offset, UIO_SYSSPACE32, 0, kauth_cred_get(), &resid, p); if (error) { if (kl_addr ) kfree(kl_addr, kl_size); return(LOAD_IOERROR); } /* (void)ubc_map(vp, PROT_EXEC); */ /* NOT HERE */ /* * Scan through the commands, processing each one as necessary. */ for (pass = 1; pass <= 2; pass++) { /* * Loop through each of the load_commands indicated by the * Mach-O header; if an absurd value is provided, we just * run off the end of the reserved section by incrementing * the offset too far, so we are implicitly fail-safe. */ offset = mach_header_sz; ncmds = header->ncmds; while (ncmds--) { /* * Get a pointer to the command. */ lcp = (struct load_command *)(addr + offset); oldoffset = offset; offset += lcp->cmdsize; /* * Perform prevalidation of the struct load_command * before we attempt to use its contents. Invalid * values are ones which result in an overflow, or * which can not possibly be valid commands, or which * straddle or exist past the reserved section at the * start of the image. */ if (oldoffset > offset || lcp->cmdsize < sizeof(struct load_command) || offset > header->sizeofcmds + mach_header_sz) { ret = LOAD_BADMACHO; break; } /* * Act on struct load_command's for which kernel * intervention is required. */ switch(lcp->cmd) { case LC_SEGMENT_64: if (pass != 1) break; ret = load_segment_64( (struct segment_command_64 *)lcp, pager, file_offset, macho_size, ubc_getsize(vp), map, result); break; case LC_SEGMENT: if (pass != 1) break; ret = load_segment( (struct segment_command *) lcp, pager, file_offset, macho_size, ubc_getsize(vp), map, result); break; case LC_THREAD: if (pass != 2) break; ret = load_thread((struct thread_command *)lcp, thread, result); break; case LC_UNIXTHREAD: if (pass != 2) break; ret = load_unixthread( (struct thread_command *) lcp, thread, result); break; case LC_LOAD_DYLINKER: if (pass != 2) break; if ((depth == 1) && (dlp == 0)) { dlp = (struct dylinker_command *)lcp; dlarchbits = (header->cputype & CPU_ARCH_MASK); } else { ret = LOAD_FAILURE; } break; case LC_CODE_SIGNATURE: /* CODE SIGNING */ if (pass != 2) break; /* pager -> uip -> load signatures & store in uip set VM object "signed_pages" */ ret = load_code_signature( (struct linkedit_data_command *) lcp, vp, file_offset, macho_size, header->cputype, (depth == 1) ? result : NULL); if (ret != LOAD_SUCCESS) { printf("proc %d: load code signature error %d " "for file \"%s\"\n", p->p_pid, ret, vp->v_name); ret = LOAD_SUCCESS; /* ignore error */ } else { got_code_signatures = TRUE; } break; default: /* Other commands are ignored by the kernel */ ret = LOAD_SUCCESS; break; } if (ret != LOAD_SUCCESS) break; } if (ret != LOAD_SUCCESS) break; } if (ret == LOAD_SUCCESS) { if (! got_code_signatures) { struct cs_blob *blob; /* no embedded signatures: look for detached ones */ blob = ubc_cs_blob_get(vp, -1, file_offset); if (blob != NULL) { /* get flags to be applied to the process */ result->csflags |= blob->csb_flags; } } if (dlp != 0) ret = load_dylinker(dlp, dlarchbits, map, thread, depth, result, abi64); if(depth == 1) { if (result->thread_count == 0) { ret = LOAD_FAILURE; } else if ( abi64 ) { #ifdef __ppc__ /* Map in 64-bit commpage */ /* LP64todo - make this clean */ /* * PPC51: ppc64 is limited to 51-bit addresses. * Memory above that limit is handled specially * at the pmap level. */ pmap_map_sharedpage(current_task(), get_map_pmap(map)); #endif /* __ppc__ */ } } } if (kl_addr ) kfree(kl_addr, kl_size); if (ret == LOAD_SUCCESS) (void)ubc_map(vp, PROT_READ | PROT_EXEC); return(ret); }