/* * Destroy a process structure that resulted from a call to forkproc(), but * which must be returned to the system because of a subsequent failure * preventing it from becoming active. * * Parameters: p The incomplete process from forkproc() * * Returns: (void) * * Note: This function should only be used in an error handler following * a call to forkproc(). * * Operations occur in reverse order of those in forkproc(). */ void forkproc_free(proc_t p) { /* We held signal and a transition locks; drop them */ proc_signalend(p, 0); proc_transend(p, 0); /* * If we have our own copy of the resource limits structure, we * need to free it. If it's a shared copy, we need to drop our * reference on it. */ proc_limitdrop(p, 0); p->p_limit = NULL; #if SYSV_SHM /* Need to drop references to the shared memory segment(s), if any */ if (p->vm_shm) { /* * Use shmexec(): we have no address space, so no mappings * * XXX Yes, the routine is badly named. */ shmexec(p); } #endif /* Need to undo the effects of the fdcopy(), if any */ fdfree(p); /* * Drop the reference on a text vnode pointer, if any * XXX This code is broken in forkproc(); see <rdar://4256419>; * XXX if anyone ever uses this field, we will be extremely unhappy. */ if (p->p_textvp) { vnode_rele(p->p_textvp); p->p_textvp = NULL; } /* Stop the profiling clock */ stopprofclock(p); /* Release the credential reference */ kauth_cred_unref(&p->p_ucred); proc_list_lock(); /* Decrement the count of processes in the system */ nprocs--; proc_list_unlock(); thread_call_free(p->p_rcall); /* Free allocated memory */ FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS); FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS); proc_checkdeadrefs(p); FREE_ZONE(p, sizeof *p, M_PROC); }
/* * Much like before, but we can afford to take faults here. If the * update fails, we simply turn off profiling. */ void addupc_task(struct proc *p, u_long pc, u_int nticks) { struct uprof *prof; caddr_t addr; u_int i; u_short v; /* Testing P_PROFIL may be unnecessary, but is certainly safe. */ if ((p->p_flag & P_PROFIL) == 0 || nticks == 0) return; prof = &p->p_p->ps_prof; if (pc < prof->pr_off || (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) return; addr = prof->pr_base + i; if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) { v += nticks; if (copyout((caddr_t)&v, addr, sizeof(v)) == 0) return; } stopprofclock(p); }
/* ARGSUSED */ int sys_profil(struct proc *p, void *v, register_t *retval) { struct sys_profil_args /* { syscallarg(caddr_t) samples; syscallarg(size_t) size; syscallarg(u_long) offset; syscallarg(u_int) scale; } */ *uap = v; struct uprof *upp; int s; if (SCARG(uap, scale) > (1 << 16)) return (EINVAL); if (SCARG(uap, scale) == 0) { stopprofclock(p); return (0); } upp = &p->p_p->ps_prof; /* Block profile interrupts while changing state. */ s = splstatclock(); upp->pr_off = SCARG(uap, offset); upp->pr_scale = SCARG(uap, scale); upp->pr_base = (caddr_t)SCARG(uap, samples); upp->pr_size = SCARG(uap, size); startprofclock(p); splx(s); return (0); }
/* * sysctl helper routine for kern.profiling subtree. enables/disables * kernel profiling and gives out copies of the profiling data. */ static int sysctl_kern_profiling(SYSCTLFN_ARGS) { struct gmonparam *gp = &_gmonparam; int error; struct sysctlnode node; node = *rnode; switch (node.sysctl_num) { case GPROF_STATE: node.sysctl_data = &gp->state; break; case GPROF_COUNT: node.sysctl_data = gp->kcount; node.sysctl_size = gp->kcountsize; break; case GPROF_FROMS: node.sysctl_data = gp->froms; node.sysctl_size = gp->fromssize; break; case GPROF_TOS: node.sysctl_data = gp->tos; node.sysctl_size = gp->tossize; break; case GPROF_GMONPARAM: node.sysctl_data = gp; node.sysctl_size = sizeof(*gp); break; default: return (EOPNOTSUPP); } error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) return (error); if (node.sysctl_num == GPROF_STATE) { mutex_spin_enter(&proc0.p_stmutex); if (gp->state == GMON_PROF_OFF) stopprofclock(&proc0); else startprofclock(&proc0); mutex_spin_exit(&proc0.p_stmutex); } return (0); }
/* * Return kernel profiling information. */ int sysctl_doprof(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { struct gmonparam *gp = &_gmonparam; int error; /* all sysctl names at this level are terminal */ if (namelen != 1) return (ENOTDIR); /* overloaded */ switch (name[0]) { case GPROF_STATE: error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state); if (error) return (error); if (gp->state == GMON_PROF_OFF) stopprofclock(&proc0); else startprofclock(&proc0); return (0); case GPROF_COUNT: return (sysctl_struct(oldp, oldlenp, newp, newlen, gp->kcount, gp->kcountsize)); case GPROF_FROMS: return (sysctl_struct(oldp, oldlenp, newp, newlen, gp->froms, gp->fromssize)); case GPROF_TOS: return (sysctl_struct(oldp, oldlenp, newp, newlen, gp->tos, gp->tossize)); case GPROF_GMONPARAM: return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp)); default: return (EOPNOTSUPP); } /* NOTREACHED */ }
/* ARGSUSED */ int sys_execve(struct proc *p, void *v, register_t *retval) { struct sys_execve_args /* { syscallarg(const char *) path; syscallarg(char *const *) argp; syscallarg(char *const *) envp; } */ *uap = v; int error; struct exec_package pack; struct nameidata nid; struct vattr attr; struct ucred *cred = p->p_ucred; char *argp; char * const *cpp, *dp, *sp; #ifdef KTRACE char *env_start; #endif struct process *pr = p->p_p; long argc, envc; size_t len, sgap; #ifdef MACHINE_STACK_GROWS_UP size_t slen; #endif char *stack; struct ps_strings arginfo; struct vmspace *vm = pr->ps_vmspace; char **tmpfap; extern struct emul emul_native; #if NSYSTRACE > 0 int wassugid = ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC); size_t pathbuflen; #endif char *pathbuf = NULL; struct vnode *otvp; /* get other threads to stop */ if ((error = single_thread_set(p, SINGLE_UNWIND, 1))) return (error); /* * Cheap solution to complicated problems. * Mark this process as "leave me alone, I'm execing". */ atomic_setbits_int(&pr->ps_flags, PS_INEXEC); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) { systrace_execve0(p); pathbuf = pool_get(&namei_pool, PR_WAITOK); error = copyinstr(SCARG(uap, path), pathbuf, MAXPATHLEN, &pathbuflen); if (error != 0) goto clrflag; } #endif if (pathbuf != NULL) { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, p); } else { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); } /* * initialize the fields of the exec package. */ if (pathbuf != NULL) pack.ep_name = pathbuf; else pack.ep_name = (char *)SCARG(uap, path); pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK); pack.ep_hdrlen = exec_maxhdrsz; pack.ep_hdrvalid = 0; pack.ep_ndp = &nid; pack.ep_interp = NULL; pack.ep_emul_arg = NULL; VMCMDSET_INIT(&pack.ep_vmcmds); pack.ep_vap = &attr; pack.ep_emul = &emul_native; pack.ep_flags = 0; /* see if we can run it. */ if ((error = check_exec(p, &pack)) != 0) { goto freehdr; } /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */ /* allocate an argument buffer */ argp = km_alloc(NCARGS, &kv_exec, &kp_pageable, &kd_waitok); #ifdef DIAGNOSTIC if (argp == NULL) panic("execve: argp == NULL"); #endif dp = argp; argc = 0; /* copy the fake args list, if there's one, freeing it as we go */ if (pack.ep_flags & EXEC_HASARGL) { tmpfap = pack.ep_fa; while (*tmpfap != NULL) { char *cp; cp = *tmpfap; while (*cp) *dp++ = *cp++; *dp++ = '\0'; free(*tmpfap, M_EXEC, 0); tmpfap++; argc++; } free(pack.ep_fa, M_EXEC, 0); pack.ep_flags &= ~EXEC_HASARGL; } /* Now get argv & environment */ if (!(cpp = SCARG(uap, argp))) { error = EFAULT; goto bad; } if (pack.ep_flags & EXEC_SKIPARG) cpp++; while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; argc++; } /* must have at least one argument */ if (argc == 0) { error = EINVAL; goto bad; } #ifdef KTRACE if (KTRPOINT(p, KTR_EXECARGS)) ktrexec(p, KTR_EXECARGS, argp, dp - argp); #endif envc = 0; /* environment does not need to be there */ if ((cpp = SCARG(uap, envp)) != NULL ) { #ifdef KTRACE env_start = dp; #endif while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; envc++; } #ifdef KTRACE if (KTRPOINT(p, KTR_EXECENV)) ktrexec(p, KTR_EXECENV, env_start, dp - env_start); #endif } dp = (char *)(((long)dp + _STACKALIGNBYTES) & ~_STACKALIGNBYTES); sgap = STACKGAPLEN; /* * If we have enabled random stackgap, the stack itself has already * been moved from a random location, but is still aligned to a page * boundary. Provide the lower bits of random placement now. */ if (stackgap_random != 0) { sgap += arc4random() & PAGE_MASK; sgap = (sgap + _STACKALIGNBYTES) & ~_STACKALIGNBYTES; } /* Now check if args & environ fit into new stack */ len = ((argc + envc + 2 + pack.ep_emul->e_arglen) * sizeof(char *) + sizeof(long) + dp + sgap + sizeof(struct ps_strings)) - argp; len = (len + _STACKALIGNBYTES) &~ _STACKALIGNBYTES; if (len > pack.ep_ssize) { /* in effect, compare to initial limit */ error = ENOMEM; goto bad; } /* adjust "active stack depth" for process VSZ */ pack.ep_ssize = len; /* maybe should go elsewhere, but... */ /* * we're committed: any further errors will kill the process, so * kill the other threads now. */ single_thread_set(p, SINGLE_EXIT, 0); /* * Prepare vmspace for remapping. Note that uvmspace_exec can replace * pr_vmspace! */ uvmspace_exec(p, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); vm = pr->ps_vmspace; /* Now map address space */ vm->vm_taddr = (char *)trunc_page(pack.ep_taddr); vm->vm_tsize = atop(round_page(pack.ep_taddr + pack.ep_tsize) - trunc_page(pack.ep_taddr)); vm->vm_daddr = (char *)trunc_page(pack.ep_daddr); vm->vm_dsize = atop(round_page(pack.ep_daddr + pack.ep_dsize) - trunc_page(pack.ep_daddr)); vm->vm_dused = 0; vm->vm_ssize = atop(round_page(pack.ep_ssize)); vm->vm_maxsaddr = (char *)pack.ep_maxsaddr; vm->vm_minsaddr = (char *)pack.ep_minsaddr; /* create the new process's VM space by running the vmcmds */ #ifdef DIAGNOSTIC if (pack.ep_vmcmds.evs_used == 0) panic("execve: no vmcmds"); #endif error = exec_process_vmcmds(p, &pack); /* if an error happened, deallocate and punt */ if (error) goto exec_abort; /* old "stackgap" is gone now */ pr->ps_stackgap = 0; #ifdef MACHINE_STACK_GROWS_UP pr->ps_strings = (vaddr_t)vm->vm_maxsaddr + sgap; if (uvm_map_protect(&vm->vm_map, (vaddr_t)vm->vm_maxsaddr, trunc_page(pr->ps_strings), PROT_NONE, TRUE)) goto exec_abort; #else pr->ps_strings = (vaddr_t)vm->vm_minsaddr - sizeof(arginfo) - sgap; if (uvm_map_protect(&vm->vm_map, round_page(pr->ps_strings + sizeof(arginfo)), (vaddr_t)vm->vm_minsaddr, PROT_NONE, TRUE)) goto exec_abort; #endif /* remember information about the process */ arginfo.ps_nargvstr = argc; arginfo.ps_nenvstr = envc; #ifdef MACHINE_STACK_GROWS_UP stack = (char *)vm->vm_maxsaddr + sizeof(arginfo) + sgap; slen = len - sizeof(arginfo) - sgap; #else stack = (char *)(vm->vm_minsaddr - len); #endif /* Now copy argc, args & environ to new stack */ if (!(*pack.ep_emul->e_copyargs)(&pack, &arginfo, stack, argp)) goto exec_abort; /* copy out the process's ps_strings structure */ if (copyout(&arginfo, (char *)pr->ps_strings, sizeof(arginfo))) goto exec_abort; stopprofclock(pr); /* stop profiling */ fdcloseexec(p); /* handle close on exec */ execsigs(p); /* reset caught signals */ TCB_SET(p, NULL); /* reset the TCB address */ pr->ps_kbind_addr = 0; /* reset the kbind bits */ pr->ps_kbind_cookie = 0; /* set command name & other accounting info */ memset(p->p_comm, 0, sizeof(p->p_comm)); len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN); memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len); pr->ps_acflag &= ~AFORK; /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; vref(pack.ep_vp); pr->ps_textvp = pack.ep_vp; if (otvp) vrele(otvp); atomic_setbits_int(&pr->ps_flags, PS_EXEC); if (pr->ps_flags & PS_PPWAIT) { atomic_clearbits_int(&pr->ps_flags, PS_PPWAIT); atomic_clearbits_int(&pr->ps_pptr->ps_flags, PS_ISPWAIT); wakeup(pr->ps_pptr); } /* * If process does execve() while it has a mismatched real, * effective, or saved uid/gid, we set PS_SUGIDEXEC. */ if (cred->cr_uid != cred->cr_ruid || cred->cr_uid != cred->cr_svuid || cred->cr_gid != cred->cr_rgid || cred->cr_gid != cred->cr_svgid) atomic_setbits_int(&pr->ps_flags, PS_SUGIDEXEC); else atomic_clearbits_int(&pr->ps_flags, PS_SUGIDEXEC); atomic_clearbits_int(&pr->ps_flags, PS_TAMED); tame_dropwpaths(pr); /* * deal with set[ug]id. * MNT_NOEXEC has already been used to disable s[ug]id. */ if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) { int i; atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC); #ifdef KTRACE /* * If process is being ktraced, turn off - unless * root set it. */ if (pr->ps_tracevp && !(pr->ps_traceflag & KTRFAC_ROOT)) ktrcleartrace(pr); #endif p->p_ucred = cred = crcopy(cred); if (attr.va_mode & VSUID) cred->cr_uid = attr.va_uid; if (attr.va_mode & VSGID) cred->cr_gid = attr.va_gid; /* * For set[ug]id processes, a few caveats apply to * stdin, stdout, and stderr. */ error = 0; fdplock(p->p_fd); for (i = 0; i < 3; i++) { struct file *fp = NULL; /* * NOTE - This will never return NULL because of * immature fds. The file descriptor table is not * shared because we're suid. */ fp = fd_getfile(p->p_fd, i); /* * Ensure that stdin, stdout, and stderr are already * allocated. We do not want userland to accidentally * allocate descriptors in this range which has implied * meaning to libc. */ if (fp == NULL) { short flags = FREAD | (i == 0 ? 0 : FWRITE); struct vnode *vp; int indx; if ((error = falloc(p, &fp, &indx)) != 0) break; #ifdef DIAGNOSTIC if (indx != i) panic("sys_execve: falloc indx != i"); #endif if ((error = cdevvp(getnulldev(), &vp)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); break; } if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); vrele(vp); break; } if (flags & FWRITE) vp->v_writecount++; fp->f_flag = flags; fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; FILE_SET_MATURE(fp, p); } } fdpunlock(p->p_fd); if (error) goto exec_abort; } else atomic_clearbits_int(&pr->ps_flags, PS_SUGID); /* * Reset the saved ugids and update the process's copy of the * creds if the creds have been changed */ if (cred->cr_uid != cred->cr_svuid || cred->cr_gid != cred->cr_svgid) { /* make sure we have unshared ucreds */ p->p_ucred = cred = crcopy(cred); cred->cr_svuid = cred->cr_uid; cred->cr_svgid = cred->cr_gid; } if (pr->ps_ucred != cred) { struct ucred *ocred; ocred = pr->ps_ucred; crhold(cred); pr->ps_ucred = cred; crfree(ocred); } if (pr->ps_flags & PS_SUGIDEXEC) { int i, s = splclock(); timeout_del(&pr->ps_realit_to); for (i = 0; i < nitems(pr->ps_timer); i++) { timerclear(&pr->ps_timer[i].it_interval); timerclear(&pr->ps_timer[i].it_value); } splx(s); } /* reset CPU time usage for the thread, but not the process */ timespecclear(&p->p_tu.tu_runtime); p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0; km_free(argp, NCARGS, &kv_exec, &kp_pageable); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); /* * notify others that we exec'd */ KNOTE(&pr->ps_klist, NOTE_EXEC); /* setup new registers and do misc. setup. */ if (pack.ep_emul->e_fixup != NULL) { if ((*pack.ep_emul->e_fixup)(p, &pack) != 0) goto free_pack_abort; } #ifdef MACHINE_STACK_GROWS_UP (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack + slen, retval); #else (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack, retval); #endif /* map the process's signal trampoline code */ if (exec_sigcode_map(pr, pack.ep_emul)) goto free_pack_abort; #ifdef __HAVE_EXEC_MD_MAP /* perform md specific mappings that process might need */ if (exec_md_map(p, &pack)) goto free_pack_abort; #endif if (pr->ps_flags & PS_TRACED) psignal(p, SIGTRAP); free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); /* * Call emulation specific exec hook. This can setup per-process * p->p_emuldata or do any other per-process stuff an emulation needs. * * If we are executing process of different emulation than the * original forked process, call e_proc_exit() of the old emulation * first, then e_proc_exec() of new emulation. If the emulation is * same, the exec hook code should deallocate any old emulation * resources held previously by this process. */ if (pr->ps_emul && pr->ps_emul->e_proc_exit && pr->ps_emul != pack.ep_emul) (*pr->ps_emul->e_proc_exit)(p); p->p_descfd = 255; if ((pack.ep_flags & EXEC_HASFD) && pack.ep_fd < 255) p->p_descfd = pack.ep_fd; /* * Call exec hook. Emulation code may NOT store reference to anything * from &pack. */ if (pack.ep_emul->e_proc_exec) (*pack.ep_emul->e_proc_exec)(p, &pack); #if defined(KTRACE) && defined(COMPAT_LINUX) /* update ps_emul, but don't ktrace it if native-execing-native */ if (pr->ps_emul != pack.ep_emul || pack.ep_emul != &emul_native) { pr->ps_emul = pack.ep_emul; if (KTRPOINT(p, KTR_EMUL)) ktremul(p); } #else /* update ps_emul, the old value is no longer needed */ pr->ps_emul = pack.ep_emul; #endif atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); single_thread_clear(p, P_SUSPSIG); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE) && wassugid && !ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC)) systrace_execve1(pathbuf, p); #endif if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (0); bad: /* free the vmspace-creation commands, and release their references */ kill_vmcmds(&pack.ep_vmcmds); /* kill any opened file descriptor, if necessary */ if (pack.ep_flags & EXEC_HASFD) { pack.ep_flags &= ~EXEC_HASFD; fdplock(p->p_fd); (void) fdrelease(p, pack.ep_fd); fdpunlock(p->p_fd); } if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize); /* close and put the exec'd file */ vn_close(pack.ep_vp, FREAD, cred, p); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); km_free(argp, NCARGS, &kv_exec, &kp_pageable); freehdr: free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); #if NSYSTRACE > 0 clrflag: #endif atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); single_thread_clear(p, P_SUSPSIG); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (error); exec_abort: /* * the old process doesn't exist anymore. exit gracefully. * get rid of the (new) address space we have created, if any, get rid * of our namei data and vnode, and exit noting failure */ uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS); if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); km_free(argp, NCARGS, &kv_exec, &kp_pageable); free_pack_abort: free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); exit1(p, W_EXITCODE(0, SIGABRT), EXIT_NORMAL); /* NOTREACHED */ atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); return (0); }
/* * Exit: deallocate address space and other resources, change proc state to * zombie, and unlink proc from allproc and parent's lists. Save exit status * and rusage for wait(). Check for child processes and orphan them. */ void exit1(struct thread *td, int rv) { struct proc *p, *nq, *q; struct vnode *vtmp; struct vnode *ttyvp = NULL; struct plimit *plim; mtx_assert(&Giant, MA_NOTOWNED); p = td->td_proc; /* * XXX in case we're rebooting we just let init die in order to * work around an unsolved stack overflow seen very late during * shutdown on sparc64 when the gmirror worker process exists. */ if (p == initproc && rebooting == 0) { printf("init died (signal %d, exit %d)\n", WTERMSIG(rv), WEXITSTATUS(rv)); panic("Going nowhere without my init!"); } /* * MUST abort all other threads before proceeding past here. */ PROC_LOCK(p); while (p->p_flag & P_HADTHREADS) { /* * First check if some other thread got here before us. * If so, act appropriately: exit or suspend. */ thread_suspend_check(0); /* * Kill off the other threads. This requires * some co-operation from other parts of the kernel * so it may not be instantaneous. With this state set * any thread entering the kernel from userspace will * thread_exit() in trap(). Any thread attempting to * sleep will return immediately with EINTR or EWOULDBLOCK * which will hopefully force them to back out to userland * freeing resources as they go. Any thread attempting * to return to userland will thread_exit() from userret(). * thread_exit() will unsuspend us when the last of the * other threads exits. * If there is already a thread singler after resumption, * calling thread_single will fail; in that case, we just * re-check all suspension request, the thread should * either be suspended there or exit. */ if (!thread_single(SINGLE_EXIT)) break; /* * All other activity in this process is now stopped. * Threading support has been turned off. */ } KASSERT(p->p_numthreads == 1, ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); racct_sub(p, RACCT_NTHR, 1); /* * Wakeup anyone in procfs' PIOCWAIT. They should have a hold * on our vmspace, so we should block below until they have * released their reference to us. Note that if they have * requested S_EXIT stops we will block here until they ack * via PIOCCONT. */ _STOPEVENT(p, S_EXIT, rv); /* * Ignore any pending request to stop due to a stop signal. * Once P_WEXIT is set, future requests will be ignored as * well. */ p->p_flag &= ~P_STOPPED_SIG; KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped")); /* * Note that we are exiting and do another wakeup of anyone in * PIOCWAIT in case they aren't listening for S_EXIT stops or * decided to wait again after we told them we are exiting. */ p->p_flag |= P_WEXIT; wakeup(&p->p_stype); /* * Wait for any processes that have a hold on our vmspace to * release their reference. */ while (p->p_lock > 0) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); p->p_xstat = rv; /* Let event handler change exit status */ PROC_UNLOCK(p); /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); #ifdef AUDIT /* * The Sun BSM exit token contains two components: an exit status as * passed to exit(), and a return value to indicate what sort of exit * it was. The exit status is WEXITSTATUS(rv), but it's not clear * what the return value is. */ AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0); AUDIT_SYSCALL_EXIT(0, td); #endif /* Are we a task leader? */ if (p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { PROC_LOCK(q); kern_psignal(q, SIGKILL); PROC_UNLOCK(q); q = q->p_peers; } while (p->p_peers != NULL) msleep(p, &ppeers_lock, PWAIT, "exit1", 0); mtx_unlock(&ppeers_lock); } /* * Check if any loadable modules need anything done at process exit. * E.g. SYSV IPC stuff * XXX what if one of these generates an error? */ EVENTHANDLER_INVOKE(process_exit, p); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ PROC_LOCK(p); rv = p->p_xstat; /* Event handler could change exit status */ stopprofclock(p); p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE); /* * Stop the real interval timer. If the handler is currently * executing, prevent it from rearming itself and let it finish. */ if (timevalisset(&p->p_realtimer.it_value) && callout_stop(&p->p_itcallout) == 0) { timevalclear(&p->p_realtimer.it_interval); msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } PROC_UNLOCK(p); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. */ funsetownlst(&p->p_sigiolst); /* * If this process has an nlminfo data area (for lockd), release it */ if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) (*nlminfo_release_p)(p); /* * Close open files and release open-file table. * This may block! */ fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Remove ourself from our leader's peer list and wake our leader. */ mtx_lock(&ppeers_lock); if (p->p_leader->p_peers) { q = p->p_leader; while (q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; wakeup(p->p_leader); } mtx_unlock(&ppeers_lock); vmspace_exit(td); sx_xlock(&proctree_lock); if (SESS_LEADER(p)) { struct session *sp = p->p_session; struct tty *tp; /* * s_ttyp is not zero'd; we use this to indicate that * the session once had a controlling terminal. (for * logging and informational purposes) */ SESS_LOCK(sp); ttyvp = sp->s_ttyvp; tp = sp->s_ttyp; sp->s_ttyvp = NULL; sp->s_ttydp = NULL; sp->s_leader = NULL; SESS_UNLOCK(sp); /* * Signal foreground pgrp and revoke access to * controlling terminal if it has not been revoked * already. * * Because the TTY may have been revoked in the mean * time and could already have a new session associated * with it, make sure we don't send a SIGHUP to a * foreground process group that does not belong to this * session. */ if (tp != NULL) { tty_lock(tp); if (tp->t_session == sp) tty_signal_pgrp(tp, SIGHUP); tty_unlock(tp); } if (ttyvp != NULL) { sx_xunlock(&proctree_lock); if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) { VOP_REVOKE(ttyvp, REVOKEALL); VOP_UNLOCK(ttyvp, 0); } sx_xlock(&proctree_lock); } } fixjobc(p, p->p_pgrp, 0); sx_xunlock(&proctree_lock); (void)acct_process(td); /* Release the TTY now we've unlocked everything. */ if (ttyvp != NULL) vrele(ttyvp); #ifdef KTRACE ktrprocexit(td); #endif /* * Release reference to text vnode */ if ((vtmp = p->p_textvp) != NULL) { p->p_textvp = NULL; vrele(vtmp); } /* * Release our limits structure. */ plim = p->p_limit; p->p_limit = NULL; lim_free(plim); tidhash_remove(td); /* * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); /* * Call machine-dependent code to release any * machine-dependent resources other than the address space. * The address space is released by "vmspace_exitfree(p)" in * vm_waitproc(). */ cpu_exit(td); WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); /* * Reparent all of our children to init. */ sx_xlock(&proctree_lock); q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ wakeup(initproc); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); proc_reparent(q, initproc); q->p_sigparent = SIGCHLD; /* * Traced processes are killed * since their existence means someone is screwing up. */ if (q->p_flag & P_TRACED) { struct thread *temp; /* * Since q was found on our children list, the * proc_reparent() call moved q to the orphan * list due to present P_TRACED flag. Clear * orphan link for q now while q is locked. */ clear_orphan(q); q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); FOREACH_THREAD_IN_PROC(q, temp) temp->td_dbgflags &= ~TDB_SUSPEND; kern_psignal(q, SIGKILL); } PROC_UNLOCK(q); } /* * Also get rid of our orphans. */ while ((q = LIST_FIRST(&p->p_orphans)) != NULL) { PROC_LOCK(q); clear_orphan(q); PROC_UNLOCK(q); } /* Save exit status. */ PROC_LOCK(p); p->p_xthread = td; /* Tell the prison that we are gone. */ prison_proc_free(p->p_ucred->cr_prison); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exit if it * has declared an interest. */ if (dtrace_fasttrap_exit) dtrace_fasttrap_exit(p); #endif /* * Notify interested parties of our demise. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); #ifdef KDTRACE_HOOKS int reason = CLD_EXITED; if (WCOREDUMP(rv)) reason = CLD_DUMPED; else if (WIFSIGNALED(rv)) reason = CLD_KILLED; SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0); #endif /* * Just delete all entries in the p_klist. At this point we won't * report any more events, and there are nasty race conditions that * can beat us if we don't. */ knlist_clear(&p->p_klist, 1); /* * If this is a process with a descriptor, we may not need to deliver * a signal to the parent. proctree_lock is held over * procdesc_exit() to serialize concurrent calls to close() and * exit(). */ if (p->p_procdesc == NULL || procdesc_exit(p)) { /* * Notify parent that we're gone. If parent has the * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, * notify process 1 instead (and hope it will handle this * situation). */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { struct proc *pp; mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); proc_reparent(p, initproc); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); /* * Notify parent, so in case he was wait(2)ing or * executing waitpid(2) with our pid, he will * continue. */ wakeup(pp); } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr == initproc) kern_psignal(p->p_pptr, SIGCHLD); else if (p->p_sigparent != 0) { if (p->p_sigparent == SIGCHLD) childproc_exited(p); else /* LINUX thread */ kern_psignal(p->p_pptr, p->p_sigparent); } } else PROC_LOCK(p->p_pptr); sx_xunlock(&proctree_lock); /* * The state PRS_ZOMBIE prevents other proesses from sending * signal to the process, to avoid memory leak, we free memory * for signal queue at the time when the state is set. */ sigqueue_flush(&p->p_sigqueue); sigqueue_flush(&td->td_sigqueue); /* * We have to wait until after acquiring all locks before * changing p_state. We need to avoid all possible context * switches (including ones from blocking on a mutex) while * marked as a zombie. We also have to set the zombie state * before we release the parent process' proc lock to avoid * a lost wakeup. So, we first call wakeup, then we grab the * sched lock, update the state, and release the parent process' * proc lock. */ wakeup(p->p_pptr); cv_broadcast(&p->p_pwait); sched_exit(p->p_pptr, td); PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); /* * Hopefully no one will try to deliver a signal to the process this * late in the game. */ knlist_destroy(&p->p_klist); /* * Save our children's rusage information in our exit rusage. */ ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ thread_exit(); }
/* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p) { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *oldcred; struct uidinfo *euip = NULL; register_t *stack_base; int error, i; struct image_params image_params, *imgp; struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE struct vnode *tracevp = NULL; struct ucred *tracecred = NULL; #endif struct vnode *oldtextvp = NULL, *newtextvp; int credential_changing; int textset; #ifdef MAC struct label *interpvplabel = NULL; int will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; oldcred = p->p_ucred; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif /* * Translate the file name. namei() returns a vnode pointer * in ni_vp among other things. * * XXXAUDIT: It would be desirable to also audit the name of the * interpreter if this is an interpreted binary. */ if (args->fname != NULL) { NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); } SDT_PROBE1(proc, , , exec, args->fname); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif error = namei(&nd); if (error) goto exec_fail; newtextvp = nd.ni_vp; imgp->vp = newtextvp; } else { AUDIT_ARG_FD(args->fd); /* * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp); if (error) goto exec_fail; vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } /* * Check file permissions (also 'opens' file) */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); /* * Set VV_TEXT now so no one can write to the executable while we're * activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ textset = VOP_IS_TEXT(imgp->vp); VOP_SET_TEXT(imgp->vp); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; imgp->proc->p_fctl0 = 0; /* * Implement image setuid/setgid. * * Determine new credentials before attempting image activators * so that it can be used by process_exec handlers to determine * credential/setid changes. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in capability mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = 0; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp); credential_changing |= will_transition; #endif /* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */ if (credential_changing) imgp->proc->p_pdeathsig = 0; if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { imgp->credential_setid = true; VOP_UNLOCK(imgp->vp, 0); imgp->newcred = crdup(oldcred); if (attr.va_mode & S_ISUID) { euip = uifind(attr.va_uid); change_euid(imgp->newcred, euip); } vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (attr.va_mode & S_ISGID) change_egid(imgp->newcred, attr.va_gid); /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } else { /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { VOP_UNLOCK(imgp->vp, 0); imgp->newcred = crdup(oldcred); vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } } /* The new credentials are installed into the process later. */ /* * Do the best to calculate the full path to the image file. */ if (args->fname != NULL && args->fname[0] == '/') imgp->execpath = args->fname; else { VOP_UNLOCK(imgp->vp, 0); if (vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); } /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell * scripts differently. */ error = -1; if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) error = img_first(imgp); /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL || execsw[i]->ex_imgact == img_first) { continue; } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) { if (textset == 0) VOP_UNSET_TEXT(imgp->vp); error = ENOEXEC; } goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * VV_TEXT needs to be unset for scripts. There is a short * period before we determine that something is a script where * VV_TEXT will be set. The vnode lock is held over this * entire period so nothing should illegitimately be blocked. */ VOP_UNSET_TEXT(imgp->vp); /* free name buffer and old vnode */ if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = 0; } vput(newtextvp); vm_object_deallocate(imgp->object); imgp->object = NULL; imgp->credential_setid = false; if (imgp->newcred != NULL) { crfree(imgp->newcred); imgp->newcred = NULL; } imgp->execpath = NULL; free(imgp->freepath, M_TEMP); imgp->freepath = NULL; /* set new name to that of the interpreter */ NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td); args->fname = imgp->interpreter_name; goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp, 0); if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : "<unresolved>"); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* ABI enforces the use of Capsicum. Switch into capabilities mode. */ if (SV_PROC_FLAG(p, SV_CAPSICUM)) sys_cap_enter(td, NULL); /* * Copy out strings (args and env) and initialize stack base. */ stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); /* * Stack setup. */ error = (*p->p_sysent->sv_fixup)(&stack_base, imgp); if (error != 0) { vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } if (args->fdp != NULL) { /* Install a brand new file descriptor table. */ fdinstall_remapped(td, args->fdp); args->fdp = NULL; } else { /* * Keep on using the existing file descriptor table. For * security and other reasons, the file descriptor table * cannot be shared after an exec. */ fdunshare(td); /* close files on exec */ fdcloseexec(td); } /* * Malloc things before we need locks. */ i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); /* STOPs are no longer ignored, arrange for AST */ signotify(td); } /* * Implement image setuid/setgid installation. */ if (imgp->credential_setid) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE if (p->p_tracecred != NULL && priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED)) ktrprocexec(p, &tracecred, &tracevp); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); fdsetugidsafety(td); error = fdcheckstd(td); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto exec_fail_dealloc; PROC_LOCK(p); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, imgp->newcred, imgp->vp, interpvplabel, imgp); } #endif } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; } /* * Set the new credentials. */ if (imgp->newcred != NULL) { proc_set_cred(p, imgp->newcred); crfree(oldcred); oldcred = NULL; } /* * Store the vp for use in procfs. This vnode was referenced by namei * or fgetvp_exec. */ oldtextvp = p->p_textvp; p->p_textvp = newtextvp; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; PROC_UNLOCK(p); #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { VOP_UNLOCK(imgp->vp, 0); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } #endif /* Set values passed into the program in registers. */ (*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base); vfs_mark_atime(imgp->vp, td->td_ucred); SDT_PROBE1(proc, , , exec__success, args->fname); exec_fail_dealloc: if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (args->fname) NDFREE(&nd, NDF_ONLY_PNBUF); if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); if (error != 0) vput(imgp->vp); else VOP_UNLOCK(imgp->vp, 0); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { if (p->p_ptevents & PTRACE_EXEC) { PROC_LOCK(p); if (p->p_ptevents & PTRACE_EXEC) td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); } /* * Stop the process here if its stop event mask has * the S_EXEC bit set. */ STOPEVENT(p, S_EXEC, 0); } else { exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); } if (imgp->newcred != NULL && oldcred != NULL) crfree(imgp->newcred); #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); /* * Handle deferred decrement of ref counts. */ if (oldtextvp != NULL) vrele(oldtextvp); #ifdef KTRACE if (tracevp != NULL) vrele(tracevp); if (tracecred != NULL) crfree(tracecred); #endif pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); if (euip != NULL) uifree(euip); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(td, 0, SIGABRT); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif /* * We don't want cpu_set_syscall_retval() to overwrite any of * the register values put in place by exec_setregs(). * Implementations of cpu_set_syscall_retval() will leave * registers unmodified when returning EJUSTRETURN. */ return (error == 0 ? EJUSTRETURN : error); }