/* ARGSUSED */ int sys_bind(struct proc *p, void *v, register_t *retval) { struct sys_bind_args /* { syscallarg(int) s; syscallarg(const struct sockaddr *) name; syscallarg(socklen_t) namelen; } */ *uap = v; struct file *fp; struct mbuf *nam; int error; if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) return (error); error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen), MT_SONAME); if (error == 0) { #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen)); #endif error = sobind(fp->f_data, nam, p); m_freem(nam); } FRELE(fp, p); return (error); }
/* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. * * This code will return back into the fork trampoline code which then * runs doreti. * * NOTE: The mplock is not held at any point. */ void generic_lwp_return(struct lwp *lp, struct trapframe *frame) { struct proc *p = lp->lwp_proc; /* * Newly forked processes are given a kernel priority. We have to * adjust the priority to a normal user priority and fake entry * into the kernel (call userenter()) to install a passive release * function just in case userret() decides to stop the process. This * can occur when ^Z races a fork. If we do not install the passive * release function the current process designation will not be * released when the thread goes to sleep. */ lwkt_setpri_self(TDPRI_USER_NORM); userenter(lp->lwp_thread, p); userret(lp, frame, 0); #ifdef KTRACE if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) ktrsysret(lp, SYS_fork, 0, 0); #endif lp->lwp_flags |= LWP_PASSIVE_ACQ; userexit(lp); lp->lwp_flags &= ~LWP_PASSIVE_ACQ; }
int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) { struct socket *so; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); AUDIT_ARG_SOCKADDR(td, dirfd, sa); error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND), &fp, NULL, NULL); if (error != 0) return (error); so = fp->f_data; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrsockaddr(sa); #endif #ifdef MAC error = mac_socket_check_bind(td->td_ucred, so, sa); if (error == 0) { #endif if (dirfd == AT_FDCWD) error = sobind(so, sa, td); else error = sobindat(dirfd, so, sa, td); #ifdef MAC } #endif fdrop(fp, td); return (error); }
void netbsd32_ktrpsig(int sig, sig_t action, const sigset_t *mask, const ksiginfo_t *ksi) { struct ktrace_entry *kte; lwp_t *l = curlwp; struct { struct netbsd32_ktr_psig kp; siginfo32_t si; } *kbuf; if (!KTRPOINT(l->l_proc, KTR_PSIG)) return; if (ktealloc(&kte, (void *)&kbuf, l, KTR_PSIG, sizeof(*kbuf))) return; kbuf->kp.signo = (char)sig; NETBSD32PTR32(kbuf->kp.action, action); kbuf->kp.mask = *mask; if (ksi) { kbuf->kp.code = KSI_TRAPCODE(ksi); (void)memset(&kbuf->si, 0, sizeof(kbuf->si)); netbsd32_ksi_to_ksi32(&kbuf->si._info, &ksi->ksi_info); ktesethdrlen(kte, sizeof(*kbuf)); } else { kbuf->kp.code = 0; ktesethdrlen(kte, sizeof(struct netbsd32_ktr_psig)); } ktraddentry(l, kte, KTA_WAITOK); }
/* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. * * This code will return back into the fork trampoline code which then * runs doreti. */ void generic_lwp_return(struct lwp *lp, struct trapframe *frame) { struct proc *p = lp->lwp_proc; /* * Check for exit-race. If one lwp exits the process concurrent with * another lwp creating a new thread, the two operations may cross * each other resulting in the newly-created lwp not receiving a * KILL signal. */ if (p->p_flags & P_WEXIT) { lwpsignal(p, lp, SIGKILL); } /* * Newly forked processes are given a kernel priority. We have to * adjust the priority to a normal user priority and fake entry * into the kernel (call userenter()) to install a passive release * function just in case userret() decides to stop the process. This * can occur when ^Z races a fork. If we do not install the passive * release function the current process designation will not be * released when the thread goes to sleep. */ lwkt_setpri_self(TDPRI_USER_NORM); userenter(lp->lwp_thread, p); userret(lp, frame, 0); #ifdef KTRACE if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) ktrsysret(lp, SYS_fork, 0, 0); #endif lp->lwp_flags |= LWP_PASSIVE_ACQ; userexit(lp); lp->lwp_flags &= ~LWP_PASSIVE_ACQ; }
int sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap) { char *path; Fnv32_t fnv; int error; path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); error = copyinstr(uap->path, path, MAXPATHLEN, NULL); if (error) { free(path, M_TEMP); return (error); } #ifdef KTRACE if (KTRPOINT(curthread, KTR_NAMEI)) ktrnamei(path); #endif fnv = fnv_32_str(path, FNV1_32_INIT); sx_xlock(&shm_dict_lock); error = shm_remove(path, fnv, td->td_ucred); sx_xunlock(&shm_dict_lock); free(path, M_TEMP); return (error); }
int pledge_fail(struct proc *p, int error, uint64_t code) { char *codes = ""; int i; struct sigaction sa; /* Print first matching pledge */ for (i = 0; code && pledgenames[i].bits != 0; i++) if (pledgenames[i].bits & code) { codes = pledgenames[i].name; break; } printf("%s(%d): syscall %d \"%s\"\n", p->p_comm, p->p_pid, p->p_pledge_syscall, codes); #ifdef KTRACE if (KTRPOINT(p, KTR_PLEDGE)) ktrpledge(p, error, code, p->p_pledge_syscall); #endif /* Send uncatchable SIGABRT for coredump */ memset(&sa, 0, sizeof sa); sa.sa_handler = SIG_DFL; setsigvec(p, SIGABRT, &sa); psignal(p, SIGABRT); p->p_p->ps_pledge = 0; /* Disable all PLEDGE_ flags */ return (error); }
/* * General sleep call. Suspends the current thread until a wakeup is * performed on the specified identifier. The thread will then be made * runnable with the specified priority. Sleeps at most sbt units of time * (0 means no timeout). If pri includes the PCATCH flag, let signals * interrupt the sleep, otherwise ignore them while sleeping. Returns 0 if * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a * signal becomes pending, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). * * The lock argument is unlocked before the caller is suspended, and * re-locked before _sleep() returns. If priority includes the PDROP * flag the lock is not re-locked before returning. */ int _sleep(void *ident, struct lock_object *lock, int priority, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { struct thread *td; struct proc *p; struct lock_class *class; uintptr_t lock_state; int catch, pri, rval, sleepq_flags; WITNESS_SAVE_DECL(lock_witness); td = curthread; p = td->td_proc; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, wmesg); #endif WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Sleeping on \"%s\"", wmesg); KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL, ("sleeping without a lock")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); if (priority & PDROP) KASSERT(lock != NULL && lock != &Giant.lock_object, ("PDROP requires a non-Giant lock")); if (lock != NULL) class = LOCK_CLASS(lock); else
/* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. Giant is not held on entry, and must not * be held on return. This function is passed in to fork_exit() as the * first parameter and is called when returning to a new userland process. */ void fork_return(struct thread *td, struct trapframe *frame) { struct proc *p, *dbg; p = td->td_proc; if (td->td_dbgflags & TDB_STOPATFORK) { sx_xlock(&proctree_lock); PROC_LOCK(p); if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED | P_FOLLOWFORK)) { /* * If debugger still wants auto-attach for the * parent's children, do it now. */ dbg = p->p_pptr->p_pptr; p->p_flag |= P_TRACED; p->p_oppid = p->p_pptr->p_pid; CTR2(KTR_PTRACE, "fork_return: attaching to new child pid %d: oppid %d", p->p_pid, p->p_oppid); proc_reparent(p, dbg); sx_xunlock(&proctree_lock); td->td_dbgflags |= TDB_CHILD | TDB_SCX; ptracestop(td, SIGSTOP); td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX); } else { /* * ... otherwise clear the request. */ sx_xunlock(&proctree_lock); td->td_dbgflags &= ~TDB_STOPATFORK; cv_broadcast(&p->p_dbgwait); } PROC_UNLOCK(p); } else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) { /* * This is the start of a new thread in a traced * process. Report a system call exit event. */ PROC_LOCK(p); td->td_dbgflags |= TDB_SCX; _STOPEVENT(p, S_SCX, td->td_dbg_sc_code); if ((p->p_stops & S_PT_SCX) != 0 || (td->td_dbgflags & TDB_BORN) != 0) ptracestop(td, SIGTRAP); td->td_dbgflags &= ~(TDB_SCX | TDB_BORN); PROC_UNLOCK(p); } userret(td, frame); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(SYS_fork, 0, 0); #endif }
/* ARGSUSED */ int sys_connect(struct proc *p, void *v, register_t *retval) { struct sys_connect_args /* { syscallarg(int) s; syscallarg(const struct sockaddr *) name; syscallarg(socklen_t) namelen; } */ *uap = v; struct file *fp; struct socket *so; struct mbuf *nam = NULL; int error, s; if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0) return (error); so = fp->f_data; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { FRELE(fp, p); return (EALREADY); } error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen), MT_SONAME); if (error) goto bad; #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen)); #endif error = soconnect(so, nam); if (error) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { FRELE(fp, p); m_freem(nam); return (EINPROGRESS); } s = splsoftnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0); if (error) break; } if (error == 0) { error = so->so_error; so->so_error = 0; } splx(s); bad: so->so_state &= ~SS_ISCONNECTING; FRELE(fp, p); if (nam) m_freem(nam); if (error == ERESTART) error = EINTR; return (error); }
/* * vm_fault: * * Handle a page fault occurring at the given address, * requiring the given permissions, in the map specified. * If successful, the page is inserted into the * associated physical map. * * NOTE: the given address should be truncated to the * proper page address. * * KERN_SUCCESS is returned if the page fault is handled; otherwise, * a standard error specifying why the fault is fatal is returned. * * The map in question must be referenced, and remains so. * Caller may hold no locks. */ int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) { struct thread *td; int result; td = curthread; if ((td->td_pflags & TDP_NOFAULTING) != 0) return (KERN_PROTECTION_FAILURE); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, NULL); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) ktrfaultend(result); #endif return (result); }
static inline int _cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type) { if ((need & ~have) != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(type, need, have); #endif return (ENOTCAPABLE); } return (0); }
static inline int _cap_check(const cap_rights_t *havep, const cap_rights_t *needp, enum ktr_cap_fail_type type) { if (!cap_rights_contains(havep, needp)) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(type, needp, havep); #endif return (ENOTCAPABLE); } return (0); }
void child_return(void *arg) { struct lwp *l = arg; struct proc *p = l->l_proc; userret(l, l->l_md.md_regs->tf_iioq_head, 0); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p, SYS_fork, 0, 0); #endif #ifdef DEBUG frame_sanity_check(l->l_md.md_regs, l); #endif /* DEBUG */ }
int sysarch(struct thread *td, struct sysarch_args *uap) { int error; #ifdef CAPABILITY_MODE /* * When adding new operations, add a new case statement here to * explicitly indicate whether or not the operation is safe to * perform in capability mode. */ if (IN_CAPABILITY_MODE(td)) { switch (uap->op) { case ARM_SYNC_ICACHE: case ARM_DRAIN_WRITEBUF: case ARM_SET_TP: case ARM_GET_TP: break; default: #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); #endif return (ECAPMODE); } } #endif switch (uap->op) { case ARM_SYNC_ICACHE: error = arm32_sync_icache(td, uap->parms); break; case ARM_DRAIN_WRITEBUF: error = arm32_drain_writebuf(td, uap->parms); break; case ARM_SET_TP: error = arm32_set_tp(td, uap->parms); break; case ARM_GET_TP: error = arm32_get_tp(td, uap->parms); break; default: error = EINVAL; break; } return (error); }
/* * Process the tail end of a fork() for the child. */ void child_return(void *arg) { struct proc *p = arg; /* * Return values in the frame set by cpu_fork(). */ KERNEL_PROC_UNLOCK(p); userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { KERNEL_PROC_LOCK(p); ktrsysret(p, SYS_fork, 0, 0); KERNEL_PROC_UNLOCK(p); } #endif }
int sysarch(struct thread *td, struct sysarch_args *uap) { int error; #ifdef CAPABILITY_MODE /* * When adding new operations, add a new case statement here to * explicitly indicate whether or not the operation is safe to * perform in capability mode. */ if (IN_CAPABILITY_MODE(td)) { switch (uap->op) { case SPARC_SIGTRAMP_INSTALL: case SPARC_UTRAP_INSTALL: break; default: #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); #endif return (ECAPMODE); } } #endif mtx_lock(&Giant); switch (uap->op) { case SPARC_SIGTRAMP_INSTALL: error = sparc_sigtramp_install(td, uap->parms); break; case SPARC_UTRAP_INSTALL: error = sparc_utrap_install(td, uap->parms); break; default: error = EINVAL; break; } mtx_unlock(&Giant); return (error); }
/* * System call to query the rights mask associated with a capability. */ int sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap) { struct filedesc *fdp; cap_rights_t rights; int error, fd, i, n; if (uap->version != CAP_RIGHTS_VERSION_00) return (EINVAL); fd = uap->fd; AUDIT_ARG_FD(fd); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); if (fget_locked(fdp, fd) == NULL) { FILEDESC_SUNLOCK(fdp); return (EBADF); } rights = *cap_rights(fdp, fd); FILEDESC_SUNLOCK(fdp); n = uap->version + 2; if (uap->version != CAPVER(&rights)) { /* * For older versions we need to check if the descriptor * doesn't contain rights not understood by the caller. * If it does, we have to return an error. */ for (i = n; i < CAPARSIZE(&rights); i++) { if ((rights.cr_rights[i] & ~(0x7FULL << 57)) != 0) return (EINVAL); } } error = copyout(&rights, uap->rightsp, sizeof(rights.cr_rights[0]) * n); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT)) ktrcaprights(&rights); #endif return (error); }
/* * System call to limit rights of the given capability. */ int sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { cap_rights_t rights; int error, version; cap_rights_init(&rights); error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0])); if (error != 0) return (error); version = CAPVER(&rights); if (version != CAP_RIGHTS_VERSION_00) return (EINVAL); error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0]) * CAPARSIZE(&rights)); if (error != 0) return (error); /* Check for race. */ if (CAPVER(&rights) != version) return (EINVAL); if (!cap_rights_is_valid(&rights)) return (EINVAL); if (version != CAP_RIGHTS_VERSION) { rights.cr_rights[0] &= ~(0x3ULL << 62); rights.cr_rights[0] |= ((uint64_t)CAP_RIGHTS_VERSION << 62); } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrcaprights(&rights); #endif AUDIT_ARG_FD(uap->fd); AUDIT_ARG_RIGHTS(&rights); return (kern_cap_rights_limit(td, uap->fd, &rights)); }
void EMULNAME(syscall_fancy)(struct proc *p, u_int status, u_int cause, u_int opc) { struct frame *frame = (struct frame *)p->p_md.md_regs; register_t *args, copyargs[8]; register_t *rval; #if _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN register_t copyrval[2]; #endif mips_reg_t ov0; size_t code, numsys, nsaved, nargs; const struct sysent *callp; int error; uvmexp.syscalls++; if (DELAYBRANCH(cause)) frame->f_regs[PC] = MachEmulateBranch(frame, opc, 0, 0); else frame->f_regs[PC] = opc + sizeof(int); callp = p->p_emul->e_sysent; numsys = p->p_emul->e_nsysent; ov0 = code = frame->f_regs[V0] - SYSCALL_SHIFT; switch (code) { case SYS_syscall: case SYS___syscall: args = copyargs; if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = frame->f_regs[A0] - SYSCALL_SHIFT; args[0] = frame->f_regs[A1]; args[1] = frame->f_regs[A2]; args[2] = frame->f_regs[A3]; nsaved = 3; } else { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = frame->f_regs[A0 + _QUAD_LOWWORD] - SYSCALL_SHIFT; args[0] = frame->f_regs[A2]; args[1] = frame->f_regs[A3]; nsaved = 2; } if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_argsize / sizeof(register_t); if (nargs > nsaved) { error = copyin( ((register_t *)(vaddr_t)frame->f_regs[SP] + 4), (args + nsaved), (nargs - nsaved) * sizeof(register_t)); if (error) goto bad; } break; default: if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_narg; if (nargs < 5) { #if !defined(_MIPS_BSD_API) || _MIPS_BSD_API == _MIPS_BSD_API_LP32 args = (register_t *)&frame->f_regs[A0]; #elif _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN args = copyargs; args[0] = frame->f_regs[A0]; args[1] = frame->f_regs[A1]; args[2] = frame->f_regs[A2]; args[3] = frame->f_regs[A3]; #else # error syscall not implemented for current MIPS ABI #endif } else { args = copyargs; error = copyin( ((register_t *)(vaddr_t)frame->f_regs[SP] + 4), (©args[4]), (nargs - 4) * sizeof(register_t)); if (error) goto bad; args[0] = frame->f_regs[A0]; args[1] = frame->f_regs[A1]; args[2] = frame->f_regs[A2]; args[3] = frame->f_regs[A3]; } break; } #ifdef SYSCALL_DEBUG scdebug_call(p, code, args); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p, code, callp->sy_argsize, args); #endif #if !defined(_MIPS_BSD_API) || _MIPS_BSD_API == _MIPS_BSD_API_LP32 rval = (register_t *)&frame->f_regs[V0]; rval[0] = 0; /* rval[1] already has V1 */ #elif _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN rval = copyrval; rval[0] = 0; rval[1] = frame->f_regs[V1]; #endif error = (*callp->sy_call)(p, args, rval); switch (error) { case 0: #if _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN frame->f_regs[V0] = rval[0]; frame->f_regs[V1] = rval[1]; #endif frame->f_regs[A3] = 0; break; case ERESTART: frame->f_regs[V0] = ov0; /* restore syscall code */ frame->f_regs[PC] = opc; break; case EJUSTRETURN: break; /* nothing to do */ default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[V0] = error; frame->f_regs[A3] = 1; break; } #ifdef SYSCALL_DEBUG scdebug_ret(p, code, error, rval); #endif userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p, code, error, rval[0]); #endif }
/* * Process an asynchronous software trap. * This is relatively easy. * This function will return with preemption disabled. */ void ast(struct trapframe *framep) { struct thread *td; struct proc *p; int flags; int sig; td = curthread; p = td->td_proc; CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode"); mtx_assert(&Giant, MA_NOTOWNED); THREAD_LOCK_ASSERT(td, MA_NOTOWNED); td->td_frame = framep; td->td_pticks = 0; /* * This updates the td_flag's for the checks below in one * "atomic" operation with turning off the astpending flag. * If another AST is triggered while we are handling the * AST's saved in flags, the astpending flag will be set and * ast() will be called again. */ thread_lock(td); flags = td->td_flags; td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK | TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND); thread_unlock(td); PCPU_INC(cnt.v_trap); if (td->td_ucred != p->p_ucred) cred_update_thread(td); if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) { addupc_task(td, td->td_profil_addr, td->td_profil_ticks); td->td_profil_ticks = 0; td->td_pflags &= ~TDP_OWEUPC; } #ifdef HWPMC_HOOKS /* Handle Software PMC callchain capture. */ if (PMC_IS_PENDING_CALLCHAIN(td)) PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN_SOFT, (void *) framep); #endif if (flags & TDF_ALRMPEND) { PROC_LOCK(p); kern_psignal(p, SIGVTALRM); PROC_UNLOCK(p); } if (flags & TDF_PROFPEND) { PROC_LOCK(p); kern_psignal(p, SIGPROF); PROC_UNLOCK(p); } #ifdef MAC if (flags & TDF_MACPEND) mac_thread_userret(td); #endif if (flags & TDF_NEEDRESCHED) { #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 1, __func__); #endif thread_lock(td); sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL); thread_unlock(td); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 1, __func__); #endif } /* * Check for signals. Unlocked reads of p_pendingcnt or * p_siglist might cause process-directed signal to be handled * later. */ if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 || !SIGISEMPTY(p->p_siglist)) { PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) postsig(sig); mtx_unlock(&p->p_sigacts->ps_mtx); PROC_UNLOCK(p); } /* * We need to check to see if we have to exit or wait due to a * single threading requirement or some other STOP condition. */ if (flags & TDF_NEEDSUSPCHK) { PROC_LOCK(p); thread_suspend_check(0); PROC_UNLOCK(p); } if (td->td_pflags & TDP_OLDMASK) { td->td_pflags &= ~TDP_OLDMASK; kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0); } userret(td, framep); }
int sys_pledge(struct proc *p, void *v, register_t *retval) { struct sys_pledge_args /* { syscallarg(const char *)request; syscallarg(const char **)paths; } */ *uap = v; uint64_t flags = 0; int error; if (SCARG(uap, request)) { size_t rbuflen; char *rbuf, *rp, *pn; uint64_t f; rbuf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); error = copyinstr(SCARG(uap, request), rbuf, MAXPATHLEN, &rbuflen); if (error) { free(rbuf, M_TEMP, MAXPATHLEN); return (error); } #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrstruct(p, "pledgereq", rbuf, rbuflen-1); #endif for (rp = rbuf; rp && *rp && error == 0; rp = pn) { pn = strchr(rp, ' '); /* find terminator */ if (pn) { while (*pn == ' ') *pn++ = '\0'; } if ((f = pledgereq_flags(rp)) == 0) { free(rbuf, M_TEMP, MAXPATHLEN); return (EINVAL); } flags |= f; } free(rbuf, M_TEMP, MAXPATHLEN); /* * if we are already pledged, allow only promises reductions. * flags doesn't contain flags outside _USERSET: they will be * relearned. */ if (ISSET(p->p_p->ps_flags, PS_PLEDGE) && (((flags | p->p_p->ps_pledge) != p->p_p->ps_pledge))) return (EPERM); } if (SCARG(uap, paths)) { #if 1 return (EINVAL); #else const char **u = SCARG(uap, paths), *sp; struct whitepaths *wl; char *path, *rdir = NULL, *cwd = NULL; size_t pathlen, rdirlen, cwdlen; size_t maxargs = 0; int i, error; if (p->p_p->ps_pledgepaths) return (EPERM); /* Count paths */ for (i = 0; i < PLEDGE_MAXPATHS; i++) { if ((error = copyin(u + i, &sp, sizeof(sp))) != 0) return (error); if (sp == NULL) break; } if (i == PLEDGE_MAXPATHS) return (E2BIG); wl = malloc(sizeof *wl + sizeof(struct whitepath) * (i+1), M_TEMP, M_WAITOK | M_ZERO); wl->wl_size = sizeof *wl + sizeof(struct whitepath) * (i+1); wl->wl_count = i; wl->wl_ref = 1; path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); /* Copy in */ for (i = 0; i < wl->wl_count; i++) { char *resolved = NULL; size_t resolvedlen; if ((error = copyin(u + i, &sp, sizeof(sp))) != 0) break; if (sp == NULL) break; if ((error = copyinstr(sp, path, MAXPATHLEN, &pathlen)) != 0) break; #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrstruct(p, "pledgepath", path, pathlen-1); #endif error = resolvpath(p, &rdir, &rdirlen, &cwd, &cwdlen, path, pathlen, &resolved, &resolvedlen); if (error != 0) /* resolved is allocated only if !error */ break; maxargs += resolvedlen; if (maxargs > ARG_MAX) { error = E2BIG; free(resolved, M_TEMP, resolvedlen); break; } wl->wl_paths[i].name = resolved; wl->wl_paths[i].len = resolvedlen; } free(rdir, M_TEMP, rdirlen); free(cwd, M_TEMP, cwdlen); free(path, M_TEMP, MAXPATHLEN); if (error) { for (i = 0; i < wl->wl_count; i++) free(wl->wl_paths[i].name, M_TEMP, wl->wl_paths[i].len); free(wl, M_TEMP, wl->wl_size); return (error); } p->p_p->ps_pledgepaths = wl; #ifdef DEBUG_PLEDGE /* print paths registered as whilelisted (viewed as without chroot) */ DNPRINTF(1, "pledge: %s(%d): paths loaded:\n", p->p_comm, p->p_pid); for (i = 0; i < wl->wl_count; i++) if (wl->wl_paths[i].name) DNPRINTF(1, "pledge: %d=\"%s\" [%lld]\n", i, wl->wl_paths[i].name, (long long)wl->wl_paths[i].len); #endif #endif } if (SCARG(uap, request)) { p->p_p->ps_pledge = flags; p->p_p->ps_flags |= PS_PLEDGE; } return (0); }
/* ARGSUSED */ int sys_execve(struct proc *p, void *v, register_t *retval) { struct sys_execve_args /* { syscallarg(const char *) path; syscallarg(char *const *) argp; syscallarg(char *const *) envp; } */ *uap = v; int error; struct exec_package pack; struct nameidata nid; struct vattr attr; struct ucred *cred = p->p_ucred; char *argp; char * const *cpp, *dp, *sp; #ifdef KTRACE char *env_start; #endif struct process *pr = p->p_p; long argc, envc; size_t len, sgap; #ifdef MACHINE_STACK_GROWS_UP size_t slen; #endif char *stack; struct ps_strings arginfo; struct vmspace *vm = pr->ps_vmspace; char **tmpfap; extern struct emul emul_native; #if NSYSTRACE > 0 int wassugid = ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC); size_t pathbuflen; #endif char *pathbuf = NULL; struct vnode *otvp; /* get other threads to stop */ if ((error = single_thread_set(p, SINGLE_UNWIND, 1))) return (error); /* * Cheap solution to complicated problems. * Mark this process as "leave me alone, I'm execing". */ atomic_setbits_int(&pr->ps_flags, PS_INEXEC); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) { systrace_execve0(p); pathbuf = pool_get(&namei_pool, PR_WAITOK); error = copyinstr(SCARG(uap, path), pathbuf, MAXPATHLEN, &pathbuflen); if (error != 0) goto clrflag; } #endif if (pathbuf != NULL) { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, p); } else { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); } /* * initialize the fields of the exec package. */ if (pathbuf != NULL) pack.ep_name = pathbuf; else pack.ep_name = (char *)SCARG(uap, path); pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK); pack.ep_hdrlen = exec_maxhdrsz; pack.ep_hdrvalid = 0; pack.ep_ndp = &nid; pack.ep_interp = NULL; pack.ep_emul_arg = NULL; VMCMDSET_INIT(&pack.ep_vmcmds); pack.ep_vap = &attr; pack.ep_emul = &emul_native; pack.ep_flags = 0; /* see if we can run it. */ if ((error = check_exec(p, &pack)) != 0) { goto freehdr; } /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */ /* allocate an argument buffer */ argp = km_alloc(NCARGS, &kv_exec, &kp_pageable, &kd_waitok); #ifdef DIAGNOSTIC if (argp == NULL) panic("execve: argp == NULL"); #endif dp = argp; argc = 0; /* copy the fake args list, if there's one, freeing it as we go */ if (pack.ep_flags & EXEC_HASARGL) { tmpfap = pack.ep_fa; while (*tmpfap != NULL) { char *cp; cp = *tmpfap; while (*cp) *dp++ = *cp++; *dp++ = '\0'; free(*tmpfap, M_EXEC, 0); tmpfap++; argc++; } free(pack.ep_fa, M_EXEC, 0); pack.ep_flags &= ~EXEC_HASARGL; } /* Now get argv & environment */ if (!(cpp = SCARG(uap, argp))) { error = EFAULT; goto bad; } if (pack.ep_flags & EXEC_SKIPARG) cpp++; while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; argc++; } /* must have at least one argument */ if (argc == 0) { error = EINVAL; goto bad; } #ifdef KTRACE if (KTRPOINT(p, KTR_EXECARGS)) ktrexec(p, KTR_EXECARGS, argp, dp - argp); #endif envc = 0; /* environment does not need to be there */ if ((cpp = SCARG(uap, envp)) != NULL ) { #ifdef KTRACE env_start = dp; #endif while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; envc++; } #ifdef KTRACE if (KTRPOINT(p, KTR_EXECENV)) ktrexec(p, KTR_EXECENV, env_start, dp - env_start); #endif } dp = (char *)(((long)dp + _STACKALIGNBYTES) & ~_STACKALIGNBYTES); sgap = STACKGAPLEN; /* * If we have enabled random stackgap, the stack itself has already * been moved from a random location, but is still aligned to a page * boundary. Provide the lower bits of random placement now. */ if (stackgap_random != 0) { sgap += arc4random() & PAGE_MASK; sgap = (sgap + _STACKALIGNBYTES) & ~_STACKALIGNBYTES; } /* Now check if args & environ fit into new stack */ len = ((argc + envc + 2 + pack.ep_emul->e_arglen) * sizeof(char *) + sizeof(long) + dp + sgap + sizeof(struct ps_strings)) - argp; len = (len + _STACKALIGNBYTES) &~ _STACKALIGNBYTES; if (len > pack.ep_ssize) { /* in effect, compare to initial limit */ error = ENOMEM; goto bad; } /* adjust "active stack depth" for process VSZ */ pack.ep_ssize = len; /* maybe should go elsewhere, but... */ /* * we're committed: any further errors will kill the process, so * kill the other threads now. */ single_thread_set(p, SINGLE_EXIT, 0); /* * Prepare vmspace for remapping. Note that uvmspace_exec can replace * pr_vmspace! */ uvmspace_exec(p, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); vm = pr->ps_vmspace; /* Now map address space */ vm->vm_taddr = (char *)trunc_page(pack.ep_taddr); vm->vm_tsize = atop(round_page(pack.ep_taddr + pack.ep_tsize) - trunc_page(pack.ep_taddr)); vm->vm_daddr = (char *)trunc_page(pack.ep_daddr); vm->vm_dsize = atop(round_page(pack.ep_daddr + pack.ep_dsize) - trunc_page(pack.ep_daddr)); vm->vm_dused = 0; vm->vm_ssize = atop(round_page(pack.ep_ssize)); vm->vm_maxsaddr = (char *)pack.ep_maxsaddr; vm->vm_minsaddr = (char *)pack.ep_minsaddr; /* create the new process's VM space by running the vmcmds */ #ifdef DIAGNOSTIC if (pack.ep_vmcmds.evs_used == 0) panic("execve: no vmcmds"); #endif error = exec_process_vmcmds(p, &pack); /* if an error happened, deallocate and punt */ if (error) goto exec_abort; /* old "stackgap" is gone now */ pr->ps_stackgap = 0; #ifdef MACHINE_STACK_GROWS_UP pr->ps_strings = (vaddr_t)vm->vm_maxsaddr + sgap; if (uvm_map_protect(&vm->vm_map, (vaddr_t)vm->vm_maxsaddr, trunc_page(pr->ps_strings), PROT_NONE, TRUE)) goto exec_abort; #else pr->ps_strings = (vaddr_t)vm->vm_minsaddr - sizeof(arginfo) - sgap; if (uvm_map_protect(&vm->vm_map, round_page(pr->ps_strings + sizeof(arginfo)), (vaddr_t)vm->vm_minsaddr, PROT_NONE, TRUE)) goto exec_abort; #endif /* remember information about the process */ arginfo.ps_nargvstr = argc; arginfo.ps_nenvstr = envc; #ifdef MACHINE_STACK_GROWS_UP stack = (char *)vm->vm_maxsaddr + sizeof(arginfo) + sgap; slen = len - sizeof(arginfo) - sgap; #else stack = (char *)(vm->vm_minsaddr - len); #endif /* Now copy argc, args & environ to new stack */ if (!(*pack.ep_emul->e_copyargs)(&pack, &arginfo, stack, argp)) goto exec_abort; /* copy out the process's ps_strings structure */ if (copyout(&arginfo, (char *)pr->ps_strings, sizeof(arginfo))) goto exec_abort; stopprofclock(pr); /* stop profiling */ fdcloseexec(p); /* handle close on exec */ execsigs(p); /* reset caught signals */ TCB_SET(p, NULL); /* reset the TCB address */ pr->ps_kbind_addr = 0; /* reset the kbind bits */ pr->ps_kbind_cookie = 0; /* set command name & other accounting info */ memset(p->p_comm, 0, sizeof(p->p_comm)); len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN); memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len); pr->ps_acflag &= ~AFORK; /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; vref(pack.ep_vp); pr->ps_textvp = pack.ep_vp; if (otvp) vrele(otvp); atomic_setbits_int(&pr->ps_flags, PS_EXEC); if (pr->ps_flags & PS_PPWAIT) { atomic_clearbits_int(&pr->ps_flags, PS_PPWAIT); atomic_clearbits_int(&pr->ps_pptr->ps_flags, PS_ISPWAIT); wakeup(pr->ps_pptr); } /* * If process does execve() while it has a mismatched real, * effective, or saved uid/gid, we set PS_SUGIDEXEC. */ if (cred->cr_uid != cred->cr_ruid || cred->cr_uid != cred->cr_svuid || cred->cr_gid != cred->cr_rgid || cred->cr_gid != cred->cr_svgid) atomic_setbits_int(&pr->ps_flags, PS_SUGIDEXEC); else atomic_clearbits_int(&pr->ps_flags, PS_SUGIDEXEC); atomic_clearbits_int(&pr->ps_flags, PS_TAMED); tame_dropwpaths(pr); /* * deal with set[ug]id. * MNT_NOEXEC has already been used to disable s[ug]id. */ if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) { int i; atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC); #ifdef KTRACE /* * If process is being ktraced, turn off - unless * root set it. */ if (pr->ps_tracevp && !(pr->ps_traceflag & KTRFAC_ROOT)) ktrcleartrace(pr); #endif p->p_ucred = cred = crcopy(cred); if (attr.va_mode & VSUID) cred->cr_uid = attr.va_uid; if (attr.va_mode & VSGID) cred->cr_gid = attr.va_gid; /* * For set[ug]id processes, a few caveats apply to * stdin, stdout, and stderr. */ error = 0; fdplock(p->p_fd); for (i = 0; i < 3; i++) { struct file *fp = NULL; /* * NOTE - This will never return NULL because of * immature fds. The file descriptor table is not * shared because we're suid. */ fp = fd_getfile(p->p_fd, i); /* * Ensure that stdin, stdout, and stderr are already * allocated. We do not want userland to accidentally * allocate descriptors in this range which has implied * meaning to libc. */ if (fp == NULL) { short flags = FREAD | (i == 0 ? 0 : FWRITE); struct vnode *vp; int indx; if ((error = falloc(p, &fp, &indx)) != 0) break; #ifdef DIAGNOSTIC if (indx != i) panic("sys_execve: falloc indx != i"); #endif if ((error = cdevvp(getnulldev(), &vp)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); break; } if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); vrele(vp); break; } if (flags & FWRITE) vp->v_writecount++; fp->f_flag = flags; fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; FILE_SET_MATURE(fp, p); } } fdpunlock(p->p_fd); if (error) goto exec_abort; } else atomic_clearbits_int(&pr->ps_flags, PS_SUGID); /* * Reset the saved ugids and update the process's copy of the * creds if the creds have been changed */ if (cred->cr_uid != cred->cr_svuid || cred->cr_gid != cred->cr_svgid) { /* make sure we have unshared ucreds */ p->p_ucred = cred = crcopy(cred); cred->cr_svuid = cred->cr_uid; cred->cr_svgid = cred->cr_gid; } if (pr->ps_ucred != cred) { struct ucred *ocred; ocred = pr->ps_ucred; crhold(cred); pr->ps_ucred = cred; crfree(ocred); } if (pr->ps_flags & PS_SUGIDEXEC) { int i, s = splclock(); timeout_del(&pr->ps_realit_to); for (i = 0; i < nitems(pr->ps_timer); i++) { timerclear(&pr->ps_timer[i].it_interval); timerclear(&pr->ps_timer[i].it_value); } splx(s); } /* reset CPU time usage for the thread, but not the process */ timespecclear(&p->p_tu.tu_runtime); p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0; km_free(argp, NCARGS, &kv_exec, &kp_pageable); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); /* * notify others that we exec'd */ KNOTE(&pr->ps_klist, NOTE_EXEC); /* setup new registers and do misc. setup. */ if (pack.ep_emul->e_fixup != NULL) { if ((*pack.ep_emul->e_fixup)(p, &pack) != 0) goto free_pack_abort; } #ifdef MACHINE_STACK_GROWS_UP (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack + slen, retval); #else (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack, retval); #endif /* map the process's signal trampoline code */ if (exec_sigcode_map(pr, pack.ep_emul)) goto free_pack_abort; #ifdef __HAVE_EXEC_MD_MAP /* perform md specific mappings that process might need */ if (exec_md_map(p, &pack)) goto free_pack_abort; #endif if (pr->ps_flags & PS_TRACED) psignal(p, SIGTRAP); free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); /* * Call emulation specific exec hook. This can setup per-process * p->p_emuldata or do any other per-process stuff an emulation needs. * * If we are executing process of different emulation than the * original forked process, call e_proc_exit() of the old emulation * first, then e_proc_exec() of new emulation. If the emulation is * same, the exec hook code should deallocate any old emulation * resources held previously by this process. */ if (pr->ps_emul && pr->ps_emul->e_proc_exit && pr->ps_emul != pack.ep_emul) (*pr->ps_emul->e_proc_exit)(p); p->p_descfd = 255; if ((pack.ep_flags & EXEC_HASFD) && pack.ep_fd < 255) p->p_descfd = pack.ep_fd; /* * Call exec hook. Emulation code may NOT store reference to anything * from &pack. */ if (pack.ep_emul->e_proc_exec) (*pack.ep_emul->e_proc_exec)(p, &pack); #if defined(KTRACE) && defined(COMPAT_LINUX) /* update ps_emul, but don't ktrace it if native-execing-native */ if (pr->ps_emul != pack.ep_emul || pack.ep_emul != &emul_native) { pr->ps_emul = pack.ep_emul; if (KTRPOINT(p, KTR_EMUL)) ktremul(p); } #else /* update ps_emul, the old value is no longer needed */ pr->ps_emul = pack.ep_emul; #endif atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); single_thread_clear(p, P_SUSPSIG); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE) && wassugid && !ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC)) systrace_execve1(pathbuf, p); #endif if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (0); bad: /* free the vmspace-creation commands, and release their references */ kill_vmcmds(&pack.ep_vmcmds); /* kill any opened file descriptor, if necessary */ if (pack.ep_flags & EXEC_HASFD) { pack.ep_flags &= ~EXEC_HASFD; fdplock(p->p_fd); (void) fdrelease(p, pack.ep_fd); fdpunlock(p->p_fd); } if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize); /* close and put the exec'd file */ vn_close(pack.ep_vp, FREAD, cred, p); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); km_free(argp, NCARGS, &kv_exec, &kp_pageable); freehdr: free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); #if NSYSTRACE > 0 clrflag: #endif atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); single_thread_clear(p, P_SUSPSIG); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (error); exec_abort: /* * the old process doesn't exist anymore. exit gracefully. * get rid of the (new) address space we have created, if any, get rid * of our namei data and vnode, and exit noting failure */ uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS); if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); km_free(argp, NCARGS, &kv_exec, &kp_pageable); free_pack_abort: free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); exit1(p, W_EXITCODE(0, SIGABRT), EXIT_NORMAL); /* NOTREACHED */ atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); return (0); }
void osf1_syscall_fancy(struct proc *p, u_int64_t code, struct trapframe *framep) { const struct sysent *callp; int error; u_int64_t rval[2]; u_int64_t *args, copyargs[10]; /* XXX */ u_int hidden, nargs; KERNEL_PROC_LOCK(p); uvmexp.syscalls++; p->p_md.md_tf = framep; callp = p->p_emul->e_sysent; switch (code) { case OSF1_SYS_syscall: /* OSF/1 syscall() */ code = framep->tf_regs[FRAME_A0]; hidden = 1; break; default: hidden = 0; break; } code &= (OSF1_SYS_NSYSENT - 1); callp += code; nargs = callp->sy_narg + hidden; switch (nargs) { default: error = copyin((caddr_t)alpha_pal_rdusp(), ©args[6], (nargs - 6) * sizeof(u_int64_t)); if (error) goto bad; case 6: copyargs[5] = framep->tf_regs[FRAME_A5]; case 5: copyargs[4] = framep->tf_regs[FRAME_A4]; case 4: copyargs[3] = framep->tf_regs[FRAME_A3]; copyargs[2] = framep->tf_regs[FRAME_A2]; copyargs[1] = framep->tf_regs[FRAME_A1]; copyargs[0] = framep->tf_regs[FRAME_A0]; args = copyargs; break; case 3: case 2: case 1: case 0: args = &framep->tf_regs[FRAME_A0]; break; } args += hidden; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p, code, callp->sy_argsize, args); #endif #ifdef SYSCALL_DEBUG scdebug_call(p, code, args); #endif rval[0] = 0; rval[1] = 0; error = (*callp->sy_call)(p, args, rval); switch (error) { case 0: framep->tf_regs[FRAME_V0] = rval[0]; framep->tf_regs[FRAME_A4] = rval[1]; framep->tf_regs[FRAME_A3] = 0; break; case ERESTART: framep->tf_regs[FRAME_PC] -= 4; break; case EJUSTRETURN: break; default: bad: error = native_to_osf1_errno[error]; framep->tf_regs[FRAME_V0] = error; framep->tf_regs[FRAME_A3] = 1; break; } #ifdef SYSCALL_DEBUG scdebug_ret(p, code, error, rval); #endif KERNEL_PROC_UNLOCK(p); userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { KERNEL_PROC_LOCK(p); ktrsysret(p, code, error, rval[0]); KERNEL_PROC_UNLOCK(p); } #endif }
/* Instruction pointers operate differently on mc88110 */ void m88110_syscall(register_t code, struct trapframe *tf) { int i, nsys, nap; struct sysent *callp; struct proc *p; int error; register_t args[11], rval[2], *ap; u_quad_t sticks; #ifdef DIAGNOSTIC extern struct pcb *curpcb; #endif uvmexp.syscalls++; p = curproc; callp = p->p_emul->e_sysent; nsys = p->p_emul->e_nsysent; #ifdef DIAGNOSTIC if (USERMODE(tf->tf_epsr) == 0) panic("syscall"); if (curpcb != &p->p_addr->u_pcb) panic("syscall curpcb/ppcb"); if (tf != (struct trapframe *)&curpcb->user_state) panic("syscall trapframe"); #endif sticks = p->p_sticks; p->p_md.md_tf = tf; /* * For 88k, all the arguments are passed in the registers (r2-r12) * For syscall (and __syscall), r2 (and r3) has the actual code. * __syscall takes a quad syscall number, so that other * arguments are at their natural alignments. */ ap = &tf->tf_r[2]; nap = 11; /* r2-r12 */ switch (code) { case SYS_syscall: code = *ap++; nap--; break; case SYS___syscall: if (callp != sysent) break; code = ap[_QUAD_LOWWORD]; ap += 2; nap -= 2; break; } /* Callp currently points to syscall, which returns ENOSYS. */ if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; else { callp += code; i = callp->sy_argsize / sizeof(register_t); if (i > nap) panic("syscall nargs"); /* * just copy them; syscall stub made sure all the * args are moved from user stack to registers. */ bcopy((caddr_t)ap, (caddr_t)args, i * sizeof(register_t)); } #ifdef SYSCALL_DEBUG scdebug_call(p, code, args); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p, code, callp->sy_argsize, args); #endif rval[0] = 0; rval[1] = tf->tf_r[3]; #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) error = systrace_redirect(code, p, args, rval); else #endif error = (*callp->sy_call)(p, args, rval); /* * system call will look like: * ld r10, r31, 32; r10,r11,r12 might be garbage. * ld r11, r31, 36 * ld r12, r31, 40 * or r13, r0, <code> * tb0 0, r0, <128> <- exip * br err <- enip * jmp r1 * err: or.u r3, r0, hi16(errno) * st r2, r3, lo16(errno) * subu r2, r0, 1 * jmp r1 * * So, when we take syscall trap, exip/enip will be as * shown above. * Given this, * 1. If the system call returned 0, need to jmp r1. * exip += 8 * 2. If the system call returned an errno > 0, increment * exip += 4 and plug the value in r2. This will have us * executing "br err" on return to user space. * 3. If the system call code returned ERESTART, * we need to rexecute the trap instruction. leave exip as is. * 4. If the system call returned EJUSTRETURN, just return. * exip += 4 */ switch (error) { case 0: /* * If fork succeeded and we are the child, our stack * has moved and the pointer tf is no longer valid, * and p is wrong. Compute the new trapframe pointer. * (The trap frame invariably resides at the * tippity-top of the u. area.) */ p = curproc; tf = (struct trapframe *)USER_REGS(p); tf->tf_r[2] = rval[0]; tf->tf_r[3] = rval[1]; tf->tf_epsr &= ~PSR_C; /* skip two instructions */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip + 4; else tf->tf_exip += 4 + 4; break; case ERESTART: /* * Reexecute the trap. * exip is already at the trap instruction, so * there is nothing to do. */ tf->tf_epsr &= ~PSR_C; break; case EJUSTRETURN: tf->tf_epsr &= ~PSR_C; /* skip one instruction */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip; else tf->tf_exip += 4; break; default: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; tf->tf_r[2] = error; tf->tf_epsr |= PSR_C; /* fail */ /* skip one instruction */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip; else tf->tf_exip += 4; break; } #ifdef SYSCALL_DEBUG scdebug_ret(p, code, error, rval); #endif userret(p, tf, sticks); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p, code, error, rval[0]); #endif }
/* * syscall2 - MP aware system call request C handler * * A system call is essentially treated as a trap except that the * MP lock is not held on entry or return. We are responsible for * obtaining the MP lock if necessary and for handling ASTs * (e.g. a task switch) prior to return. * * MPSAFE */ void syscall2(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; struct lwp *lp = td->td_lwp; struct sysent *callp; register_t orig_tf_rflags; int sticks; int error; int narg; #ifdef INVARIANTS int crit_count = td->td_critcount; #endif register_t *argp; u_int code; int regcnt, optimized_regcnt; union sysunion args; register_t *argsdst; mycpu->gd_cnt.v_syscall++; #ifdef DIAGNOSTIC if (ISPL(frame->tf_cs) != SEL_UPL) { panic("syscall"); /* NOT REACHED */ } #endif KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, frame->tf_rax); userenter(td, p); /* lazy raise our priority */ regcnt = 6; optimized_regcnt = 6; /* * Misc */ sticks = (int)td->td_sticks; orig_tf_rflags = frame->tf_rflags; /* * Virtual kernel intercept - if a VM context managed by a virtual * kernel issues a system call the virtual kernel handles it, not us. * Restore the virtual kernel context and return from its system * call. The current frame is copied out to the virtual kernel. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); error = EJUSTRETURN; callp = NULL; code = 0; goto out; } /* * Get the system call parameters and account for time */ KASSERT(lp->lwp_md.md_regs == frame, ("Frame mismatch %p %p", lp->lwp_md.md_regs, frame)); code = (u_int)frame->tf_rax; if (code == SYS_syscall || code == SYS___syscall) { code = frame->tf_rdi; regcnt--; argp = &frame->tf_rdi + 1; } else { argp = &frame->tf_rdi; } if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg & SYF_ARGMASK; /* * On x86_64 we get up to six arguments in registers. The rest are * on the stack. The first six members of 'struct trapframe' happen * to be the registers used to pass arguments, in exactly the right * order. */ argsdst = (register_t *)(&args.nosys.sysmsg + 1); /* * Its easier to copy up to the highest number of syscall arguments * passed in registers, which is 6, than to conditionalize it. */ bcopy(argp, argsdst, sizeof(register_t) * optimized_regcnt); /* * Any arguments beyond available argument-passing registers must * be copyin()'d from the user stack. */ if (narg > regcnt) { caddr_t params; params = (caddr_t)frame->tf_rsp + sizeof(register_t); error = copyin(params, &argsdst[regcnt], (narg - regcnt) * sizeof(register_t)); if (error) { #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif /* * Default return value is 0 (will be copied to %rax). Double-value * returns use %rax and %rdx. %rdx is left unchanged for system * calls which return only one result. */ args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame->tf_rdx; /* * The syscall might manipulate the trap frame. If it does it * will probably return EJUSTRETURN. */ args.sysmsg_frame = frame; STOPEVENT(p, S_SCE, narg); /* MP aware */ /* * NOTE: All system calls run MPSAFE now. The system call itself * is responsible for getting the MP lock. */ #ifdef SYSCALL_DEBUG tsc_uclock_t tscval = rdtsc(); #endif error = (*callp->sy_call)(&args); #ifdef SYSCALL_DEBUG tscval = rdtsc() - tscval; tscval = tscval * 1000000 / tsc_frequency; if (SysCallsWorstCase[code] < tscval) SysCallsWorstCase[code] = tscval; #endif out: /* * MP SAFE (we may or may not have the MP lock at this point) */ //kprintf("SYSMSG %d ", error); switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; lp = curthread->td_lwp; frame->tf_rax = args.sysmsg_fds[0]; frame->tf_rdx = args.sysmsg_fds[1]; frame->tf_rflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, we know that 'syscall' is 2 bytes. * We have to do a full context restore so that %r10 * (which was holding the value of %rcx) is restored for * the next iteration. */ if (frame->tf_err != 0 && frame->tf_err != 2) kprintf("lp %s:%d frame->tf_err is weird %ld\n", td->td_comm, lp->lwp_proc->p_pid, frame->tf_err); frame->tf_rip -= frame->tf_err; frame->tf_r10 = frame->tf_rcx; break; case EJUSTRETURN: break; case EASYNC: panic("Unexpected EASYNC return value (for now)"); default: bad: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_rax = error; frame->tf_rflags |= PSL_C; break; } /* * Traced syscall. trapsignal() should now be MP aware */ if (orig_tf_rflags & PSL_T) { frame->tf_rflags &= ~PSL_T; trapsignal(lp, SIGTRAP, TRAP_TRACE); } /* * Handle reschedule and other end-of-syscall issues */ userret(lp, frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { ktrsysret(lp, code, error, args.sysmsg_result); } #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); userexit(lp); KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(&td->td_toks_base == td->td_toks_stop, ("syscall: %ld extra tokens held after trap! syscall %p", td->td_toks_stop - &td->td_toks_base, callp->sy_call)); #endif }
/* * System call to limit rights of the given capability. */ int sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { struct filedesc *fdp; cap_rights_t rights; int error, fd, version; cap_rights_init(&rights); error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0])); if (error != 0) return (error); version = CAPVER(&rights); if (version != CAP_RIGHTS_VERSION_00) return (EINVAL); error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0]) * CAPARSIZE(&rights)); if (error != 0) return (error); /* Check for race. */ if (CAPVER(&rights) != version) return (EINVAL); if (!cap_rights_is_valid(&rights)) return (EINVAL); if (version != CAP_RIGHTS_VERSION) { rights.cr_rights[0] &= ~(0x3ULL << 62); rights.cr_rights[0] |= ((uint64_t)CAP_RIGHTS_VERSION << 62); } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrcaprights(&rights); #endif fd = uap->fd; AUDIT_ARG_FD(fd); AUDIT_ARG_RIGHTS(&rights); fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); if (fget_locked(fdp, fd) == NULL) { FILEDESC_XUNLOCK(fdp); return (EBADF); } error = _cap_check(cap_rights(fdp, fd), &rights, CAPFAIL_INCREASE); if (error == 0) { fdp->fd_ofiles[fd].fde_rights = rights; if (!cap_rights_is_set(&rights, CAP_IOCTL)) { free(fdp->fd_ofiles[fd].fde_ioctls, M_FILECAPS); fdp->fd_ofiles[fd].fde_ioctls = NULL; fdp->fd_ofiles[fd].fde_nioctls = 0; } if (!cap_rights_is_set(&rights, CAP_FCNTL)) fdp->fd_ofiles[fd].fde_fcntls = 0; } FILEDESC_XUNLOCK(fdp); return (error); }
/* * syscall2 - MP aware system call request C handler * * A system call is essentially treated as a trap. The MP lock is not * held on entry or return. We are responsible for handling ASTs * (e.g. a task switch) prior to return. * * MPSAFE */ void syscall2(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; struct lwp *lp = td->td_lwp; caddr_t params; struct sysent *callp; register_t orig_tf_eflags; int sticks; int error; int narg; #ifdef INVARIANTS int crit_count = td->td_critcount; #endif int have_mplock = 0; u_int code; union sysunion args; #ifdef DIAGNOSTIC if (ISPL(frame->tf_cs) != SEL_UPL) { get_mplock(); panic("syscall"); /* NOT REACHED */ } #endif KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, frame->tf_eax); userenter(td, p); /* lazy raise our priority */ /* * Misc */ sticks = (int)td->td_sticks; orig_tf_eflags = frame->tf_eflags; /* * Virtual kernel intercept - if a VM context managed by a virtual * kernel issues a system call the virtual kernel handles it, not us. * Restore the virtual kernel context and return from its system * call. The current frame is copied out to the virtual kernel. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); error = EJUSTRETURN; callp = NULL; goto out; } /* * Get the system call parameters and account for time */ lp->lwp_md.md_regs = frame; params = (caddr_t)frame->tf_esp + sizeof(int); code = frame->tf_eax; if (p->p_sysent->sv_prepsyscall) { (*p->p_sysent->sv_prepsyscall)( frame, (int *)(&args.nosys.sysmsg + 1), &code, ¶ms); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. * fuword is MP aware. */ if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = fuword(params); params += sizeof(int); } else if (code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = fuword(params); params += sizeof(quad_t); } } code &= p->p_sysent->sv_mask; if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg & SYF_ARGMASK; #if 0 if (p->p_sysent->sv_name[0] == 'L') kprintf("Linux syscall, code = %d\n", code); #endif /* * copyin is MP aware, but the tracing code is not */ if (narg && params) { error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1), narg * sizeof(register_t)); if (error) { #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { MAKEMPSAFE(have_mplock); ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { MAKEMPSAFE(have_mplock); ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif /* * For traditional syscall code edx is left untouched when 32 bit * results are returned. Since edx is loaded from fds[1] when the * system call returns we pre-set it here. */ args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame->tf_edx; /* * The syscall might manipulate the trap frame. If it does it * will probably return EJUSTRETURN. */ args.sysmsg_frame = frame; STOPEVENT(p, S_SCE, narg); /* MP aware */ /* * NOTE: All system calls run MPSAFE now. The system call itself * is responsible for getting the MP lock. */ error = (*callp->sy_call)(&args); out: /* * MP SAFE (we may or may not have the MP lock at this point) */ switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; lp = curthread->td_lwp; frame->tf_eax = args.sysmsg_fds[0]; frame->tf_edx = args.sysmsg_fds[1]; frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, * int 0x80 is 2 bytes. We saved this in tf_err. */ frame->tf_eip -= frame->tf_err; break; case EJUSTRETURN: break; case EASYNC: panic("Unexpected EASYNC return value (for now)"); default: bad: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_eax = error; frame->tf_eflags |= PSL_C; break; } /* * Traced syscall. trapsignal() is not MP aware. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { MAKEMPSAFE(have_mplock); frame->tf_eflags &= ~PSL_T; trapsignal(lp, SIGTRAP, TRAP_TRACE); } /* * Handle reschedule and other end-of-syscall issues */ userret(lp, frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { MAKEMPSAFE(have_mplock); ktrsysret(lp, code, error, args.sysmsg_result); } #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); userexit(lp); /* * Release the MP lock if we had to get it */ if (have_mplock) rel_mplock(); KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(&td->td_toks_base == td->td_toks_stop, ("syscall: extra tokens held after trap! %zd", td->td_toks_stop - &td->td_toks_base)); #endif }
static inline int syscallenter(struct thread *td, struct syscall_args *sa) { struct proc *p; int error, traced; PCPU_INC(cnt.v_syscall); p = td->td_proc; td->td_pticks = 0; if (td->td_ucred != p->p_ucred) cred_update_thread(td); if (p->p_flag & P_TRACED) { traced = 1; PROC_LOCK(p); td->td_dbgflags &= ~TDB_USERWR; td->td_dbgflags |= TDB_SCE; PROC_UNLOCK(p); } else traced = 0; error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(sa->code, sa->narg, sa->args); #endif CTR6(KTR_SYSC, "syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)", td, td->td_proc->p_pid, syscallname(p, sa->code), sa->args[0], sa->args[1], sa->args[2]); if (error == 0) { STOPEVENT(p, S_SCE, sa->narg); if (p->p_flag & P_TRACED && p->p_stops & S_PT_SCE) { PROC_LOCK(p); ptracestop((td), SIGTRAP); PROC_UNLOCK(p); } if (td->td_dbgflags & TDB_USERWR) { /* * Reread syscall number and arguments if * debugger modified registers or memory. */ error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(sa->code, sa->narg, sa->args); #endif if (error != 0) goto retval; } #ifdef CAPABILITY_MODE /* * In capability mode, we only allow access to system calls * flagged with SYF_CAPENABLED. */ if (IN_CAPABILITY_MODE(td) && !(sa->callp->sy_flags & SYF_CAPENABLED)) { error = ECAPMODE; goto retval; } #endif error = syscall_thread_enter(td, sa->callp); if (error != 0) goto retval; #ifdef KDTRACE_HOOKS /* * If the systrace module has registered it's probe * callback and if there is a probe active for the * syscall 'entry', process the probe. */ if (systrace_probe_func != NULL && sa->callp->sy_entry != 0) (*systrace_probe_func)(sa->callp->sy_entry, sa->code, sa->callp, sa->args, 0); #endif AUDIT_SYSCALL_ENTER(sa->code, td); error = (sa->callp->sy_call)(td, sa->args); AUDIT_SYSCALL_EXIT(error, td); /* Save the latest error return value. */ td->td_errno = error; #ifdef KDTRACE_HOOKS /* * If the systrace module has registered it's probe * callback and if there is a probe active for the * syscall 'return', process the probe. */ if (systrace_probe_func != NULL && sa->callp->sy_return != 0) (*systrace_probe_func)(sa->callp->sy_return, sa->code, sa->callp, NULL, (error) ? -1 : td->td_retval[0]); #endif syscall_thread_exit(td, sa->callp); CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx", p, error, td->td_retval[0], td->td_retval[1]); } retval: if (traced) { PROC_LOCK(p); td->td_dbgflags &= ~TDB_SCE; PROC_UNLOCK(p); } (p->p_sysent->sv_set_syscall_retval)(td, error); return (error); }
/* * Convert a pathname into a pointer to a vnode. * * The FOLLOW flag is set when symbolic links are to be followed * when they occur at the end of the name translation process. * Symbolic links are always followed for all other pathname * components other than the last. * * If the LOCKLEAF flag is set, a locked vnode is returned. * * The segflg defines whether the name is to be copied from user * space or kernel space. * * Overall outline of namei: * * copy in name * get starting directory * while (!done && !error) { * call lookup to search path. * if symbolic link, massage name in buffer and continue * } */ int namei(struct nameidata *ndp) { struct filedesc *fdp; /* pointer to file descriptor state */ char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; struct proc *p = cnp->cn_proc; ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred; #ifdef DIAGNOSTIC if (!cnp->cn_cred || !cnp->cn_proc) panic ("namei: bad cred/proc"); if (cnp->cn_nameiop & (~OPMASK)) panic ("namei: nameiop contaminated with flags"); if (cnp->cn_flags & OPMASK) panic ("namei: flags contaminated with nameiops"); #endif fdp = cnp->cn_proc->p_fd; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ if ((cnp->cn_flags & HASBUF) == 0) cnp->cn_pnbuf = pool_get(&namei_pool, PR_WAITOK); if (ndp->ni_segflg == UIO_SYSSPACE) error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); else error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); /* * Fail on null pathnames */ if (error == 0 && ndp->ni_pathlen == 1) error = ENOENT; if (error) { pool_put(&namei_pool, cnp->cn_pnbuf); ndp->ni_vp = NULL; return (error); } #ifdef KTRACE if (KTRPOINT(cnp->cn_proc, KTR_NAMEI)) ktrnamei(cnp->cn_proc, cnp->cn_pnbuf); #endif #if NSYSTRACE > 0 if (ISSET(cnp->cn_proc->p_flag, P_SYSTRACE)) systrace_namei(ndp); #endif /* * Strip trailing slashes, as requested */ if (cnp->cn_flags & STRIPSLASHES) { char *end = cnp->cn_pnbuf + ndp->ni_pathlen - 2; cp = end; while (cp >= cnp->cn_pnbuf && (*cp == '/')) cp--; /* Still some remaining characters in the buffer */ if (cp >= cnp->cn_pnbuf) { ndp->ni_pathlen -= (end - cp); *(cp + 1) = '\0'; } } ndp->ni_loopcnt = 0; /* * Get starting point for the translation. */ if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL) ndp->ni_rootdir = rootvnode; /* * Check if starting from root directory or current directory. */ if (cnp->cn_pnbuf[0] == '/') { dp = ndp->ni_rootdir; vref(dp); } else { dp = fdp->fd_cdir; vref(dp); } for (;;) { if (!dp->v_mount) { /* Give up if the directory is no longer mounted */ pool_put(&namei_pool, cnp->cn_pnbuf); return (ENOENT); } cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_startdir = dp; if ((error = lookup(ndp)) != 0) { pool_put(&namei_pool, cnp->cn_pnbuf); return (error); } /* * If not a symbolic link, return search result. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) pool_put(&namei_pool, cnp->cn_pnbuf); else cnp->cn_flags |= HASBUF; return (0); } if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(ndp->ni_dvp, 0, p); if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; } if (ndp->ni_pathlen > 1) cp = pool_get(&namei_pool, PR_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = cnp->cn_proc; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink: if (ndp->ni_pathlen > 1) pool_put(&namei_pool, cp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink; } if (ndp->ni_pathlen > 1) { bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); pool_put(&namei_pool, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; vput(ndp->ni_vp); dp = ndp->ni_dvp; /* * Check if root directory should replace current directory. */ if (cnp->cn_pnbuf[0] == '/') { vrele(dp); dp = ndp->ni_rootdir; vref(dp); } } pool_put(&namei_pool, cnp->cn_pnbuf); vrele(ndp->ni_dvp); vput(ndp->ni_vp); ndp->ni_vp = NULL; return (error); }