int recvit32(struct lwp *l, int s, struct netbsd32_msghdr *mp, struct iovec *iov, void *namelenp, register_t *retsize) { struct uio auio; int i, len, error, iovlen; struct mbuf *from = 0, *control = 0; struct socket *so; struct proc *p; struct iovec *ktriov = NULL; p = l->l_proc; /* fd_getsock() will use the descriptor for us */ if ((error = fd_getsock(s, &so)) != 0) return (error); auio.uio_iov = iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_rw = UIO_READ; auio.uio_vmspace = l->l_proc->p_vmspace; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; for (i = 0; i < mp->msg_iovlen; i++, iov++) { #if 0 /* cannot happen iov_len is unsigned */ if (iov->iov_len < 0) { error = EINVAL; goto out1; } #endif /* * Reads return ssize_t because -1 is returned on error. * Therefore we must restrict the length to SSIZE_MAX to * avoid garbage return values. */ auio.uio_resid += iov->iov_len; if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { error = EINVAL; goto out1; } } if (ktrpoint(KTR_GENIO)) { iovlen = auio.uio_iovcnt * sizeof(struct iovec); ktriov = (struct iovec *)malloc(iovlen, M_TEMP, M_WAITOK); memcpy((void *)ktriov, (void *)auio.uio_iov, iovlen); } len = auio.uio_resid; error = (*so->so_receive)(so, &from, &auio, NULL, NETBSD32PTR64(mp->msg_control) ? &control : NULL, &mp->msg_flags); if (error) { if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } if (ktriov != NULL) { ktrgeniov(s, UIO_READ, ktriov, len - auio.uio_resid, error); FREE(ktriov, M_TEMP); } if (error) goto out; *retsize = len - auio.uio_resid; if (NETBSD32PTR64(mp->msg_name)) { len = mp->msg_namelen; if (len <= 0 || from == 0) len = 0; else { if (len > from->m_len) len = from->m_len; /* else if len < from->m_len ??? */ error = copyout(mtod(from, void *), (void *)NETBSD32PTR64(mp->msg_name), (unsigned)len); if (error) goto out; } mp->msg_namelen = len; if (namelenp && (error = copyout((void *)&len, namelenp, sizeof(int)))) goto out; }
static int do_sys_sendmsg_so(struct lwp *l, int s, struct socket *so, file_t *fp, struct msghdr *mp, int flags, register_t *retsize) { struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL; struct mbuf *to, *control; struct uio auio; size_t len, iovsz; int i, error; ktrkuser("msghdr", mp, sizeof *mp); /* If the caller passed us stuff in mbufs, we must free them. */ to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL; control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL; iovsz = mp->msg_iovlen * sizeof(struct iovec); if (mp->msg_flags & MSG_IOVUSRSPACE) { if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) { if ((unsigned int)mp->msg_iovlen > IOV_MAX) { error = EMSGSIZE; goto bad; } iov = kmem_alloc(iovsz, KM_SLEEP); } if (mp->msg_iovlen != 0) { error = copyin(mp->msg_iov, iov, iovsz); if (error) goto bad; } mp->msg_iov = iov; } auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_rw = UIO_WRITE; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; KASSERT(l == curlwp); auio.uio_vmspace = l->l_proc->p_vmspace; for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) { /* * Writes return ssize_t because -1 is returned on error. * Therefore, we must restrict the length to SSIZE_MAX to * avoid garbage return values. */ auio.uio_resid += tiov->iov_len; if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { error = EINVAL; goto bad; } } if (mp->msg_name && to == NULL) { error = sockargs(&to, mp->msg_name, mp->msg_namelen, MT_SONAME); if (error) goto bad; } if (mp->msg_control) { if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) { error = EINVAL; goto bad; } if (control == NULL) { error = sockargs(&control, mp->msg_control, mp->msg_controllen, MT_CONTROL); if (error) goto bad; } } if (ktrpoint(KTR_GENIO) && iovsz > 0) { ktriov = kmem_alloc(iovsz, KM_SLEEP); memcpy(ktriov, auio.uio_iov, iovsz); } if (mp->msg_name) MCLAIM(to, so->so_mowner); if (mp->msg_control) MCLAIM(control, so->so_mowner); len = auio.uio_resid; error = (*so->so_send)(so, to, &auio, NULL, control, flags, l); /* Protocol is responsible for freeing 'control' */ control = NULL; if (error) { if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error == EPIPE && (fp->f_flag & FNOSIGPIPE) == 0 && (flags & MSG_NOSIGNAL) == 0) { mutex_enter(proc_lock); psignal(l->l_proc, SIGPIPE); mutex_exit(proc_lock); } } if (error == 0) *retsize = len - auio.uio_resid; bad: if (ktriov != NULL) { ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error); kmem_free(ktriov, iovsz); } if (iov != aiov) kmem_free(iov, iovsz); if (to) m_freem(to); if (control) m_freem(control); return error; }
static int do_sys_recvmsg_so(struct lwp *l, int s, struct socket *so, struct msghdr *mp, struct mbuf **from, struct mbuf **control, register_t *retsize) { struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL; struct uio auio; size_t len, iovsz; int i, error; ktrkuser("msghdr", mp, sizeof *mp); *from = NULL; if (control != NULL) *control = NULL; iovsz = mp->msg_iovlen * sizeof(struct iovec); if (mp->msg_flags & MSG_IOVUSRSPACE) { if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) { if ((unsigned int)mp->msg_iovlen > IOV_MAX) { error = EMSGSIZE; goto out; } iov = kmem_alloc(iovsz, KM_SLEEP); } if (mp->msg_iovlen != 0) { error = copyin(mp->msg_iov, iov, iovsz); if (error) goto out; } auio.uio_iov = iov; } else auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_rw = UIO_READ; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; KASSERT(l == curlwp); auio.uio_vmspace = l->l_proc->p_vmspace; tiov = auio.uio_iov; for (i = 0; i < mp->msg_iovlen; i++, tiov++) { /* * Reads return ssize_t because -1 is returned on error. * Therefore we must restrict the length to SSIZE_MAX to * avoid garbage return values. */ auio.uio_resid += tiov->iov_len; if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { error = EINVAL; goto out; } } if (ktrpoint(KTR_GENIO) && iovsz > 0) { ktriov = kmem_alloc(iovsz, KM_SLEEP); memcpy(ktriov, auio.uio_iov, iovsz); } len = auio.uio_resid; mp->msg_flags &= MSG_USERFLAGS; error = (*so->so_receive)(so, from, &auio, NULL, control, &mp->msg_flags); len -= auio.uio_resid; *retsize = len; if (error != 0 && len != 0 && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) /* Some data transferred */ error = 0; if (ktriov != NULL) { ktrgeniov(s, UIO_READ, ktriov, len, error); kmem_free(ktriov, iovsz); } if (error != 0) { m_freem(*from); *from = NULL; if (control != NULL) { free_control_mbuf(l, *control, *control); *control = NULL; } } out: if (iov != aiov) kmem_free(iov, iovsz); return error; }
/* * General fork call. Note that another LWP in the process may call exec() * or exit() while we are forking. It's safe to continue here, because * neither operation will complete until all LWPs have exited the process. */ int fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize, void (*func)(void *), void *arg, register_t *retval, struct proc **rnewprocp) { struct proc *p1, *p2, *parent; struct plimit *p1_lim; uid_t uid; struct lwp *l2; int count; vaddr_t uaddr; int tnprocs; int tracefork; int error = 0; p1 = l1->l_proc; uid = kauth_cred_getuid(l1->l_cred); tnprocs = atomic_inc_uint_nv(&nprocs); /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. */ if (__predict_false(tnprocs >= maxproc)) error = -1; else error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL); if (error) { static struct timeval lasttfm; atomic_dec_uint(&nprocs); if (ratecheck(&lasttfm, &fork_tfmrate)) tablefull("proc", "increase kern.maxproc or NPROC"); if (forkfsleep) kpause("forkmx", false, forkfsleep, NULL); return EAGAIN; } /* * Enforce limits. */ count = chgproccnt(uid, 1); if (__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) { if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT, p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0) { (void)chgproccnt(uid, -1); atomic_dec_uint(&nprocs); if (forkfsleep) kpause("forkulim", false, forkfsleep, NULL); return EAGAIN; } } /* * Allocate virtual address space for the U-area now, while it * is still easy to abort the fork operation if we're out of * kernel virtual address space. */ uaddr = uvm_uarea_alloc(); if (__predict_false(uaddr == 0)) { (void)chgproccnt(uid, -1); atomic_dec_uint(&nprocs); return ENOMEM; } /* * We are now committed to the fork. From here on, we may * block on resources, but resource allocation may NOT fail. */ /* Allocate new proc. */ p2 = proc_alloc(); /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ memset(&p2->p_startzero, 0, (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero)); memcpy(&p2->p_startcopy, &p1->p_startcopy, (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy)); TAILQ_INIT(&p2->p_sigpend.sp_info); LIST_INIT(&p2->p_lwps); LIST_INIT(&p2->p_sigwaiters); /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * Inherit flags we want to keep. The flags related to SIGCHLD * handling are important in order to keep a consistent behaviour * for the child after the fork. If we are a 32-bit process, the * child will be too. */ p2->p_flag = p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32); p2->p_emul = p1->p_emul; p2->p_execsw = p1->p_execsw; if (flags & FORK_SYSTEM) { /* * Mark it as a system process. Set P_NOCLDWAIT so that * children are reparented to init(8) when they exit. * init(8) can easily wait them out for us. */ p2->p_flag |= (PK_SYSTEM | PK_NOCLDWAIT); } mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE); rw_init(&p2->p_reflock); cv_init(&p2->p_waitcv, "wait"); cv_init(&p2->p_lwpcv, "lwpwait"); /* * Share a lock between the processes if they are to share signal * state: we must synchronize access to it. */ if (flags & FORK_SHARESIGS) { p2->p_lock = p1->p_lock; mutex_obj_hold(p1->p_lock); } else p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); kauth_proc_fork(p1, p2); p2->p_raslist = NULL; #if defined(__HAVE_RAS) ras_fork(p1, p2); #endif /* bump references to the text vnode (for procfs) */ p2->p_textvp = p1->p_textvp; if (p2->p_textvp) vref(p2->p_textvp); if (flags & FORK_SHAREFILES) fd_share(p2); else if (flags & FORK_CLEANFILES) p2->p_fd = fd_init(NULL); else p2->p_fd = fd_copy(); /* XXX racy */ p2->p_mqueue_cnt = p1->p_mqueue_cnt; if (flags & FORK_SHARECWD) cwdshare(p2); else p2->p_cwdi = cwdinit(); /* * Note: p_limit (rlimit stuff) is copy-on-write, so normally * we just need increase pl_refcnt. */ p1_lim = p1->p_limit; if (!p1_lim->pl_writeable) { lim_addref(p1_lim); p2->p_limit = p1_lim; } else { p2->p_limit = lim_copy(p1_lim); } if (flags & FORK_PPWAIT) { /* Mark ourselves as waiting for a child. */ l1->l_pflag |= LP_VFORKWAIT; p2->p_lflag = PL_PPWAIT; p2->p_vforklwp = l1; } else { p2->p_lflag = 0; } p2->p_sflag = 0; p2->p_slflag = 0; parent = (flags & FORK_NOWAIT) ? initproc : p1; p2->p_pptr = parent; p2->p_ppid = parent->p_pid; LIST_INIT(&p2->p_children); p2->p_aio = NULL; #ifdef KTRACE /* * Copy traceflag and tracefile if enabled. * If not inherited, these were zeroed above. */ if (p1->p_traceflag & KTRFAC_INHERIT) { mutex_enter(&ktrace_lock); p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) ktradref(p2); mutex_exit(&ktrace_lock); } #endif /* * Create signal actions for the child process. */ p2->p_sigacts = sigactsinit(p1, flags & FORK_SHARESIGS); mutex_enter(p1->p_lock); p2->p_sflag |= (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP)); sched_proc_fork(p1, p2); mutex_exit(p1->p_lock); p2->p_stflag = p1->p_stflag; /* * p_stats. * Copy parts of p_stats, and zero out the rest. */ p2->p_stats = pstatscopy(p1->p_stats); /* * Set up the new process address space. */ uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? true : false); /* * Finish creating the child process. * It will return through a different path later. */ lwp_create(l1, p2, uaddr, (flags & FORK_PPWAIT) ? LWP_VFORK : 0, stack, stacksize, (func != NULL) ? func : child_return, arg, &l2, l1->l_class); /* * Inherit l_private from the parent. * Note that we cannot use lwp_setprivate() here since that * also sets the CPU TLS register, which is incorrect if the * process has changed that without letting the kernel know. */ l2->l_private = l1->l_private; /* * If emulation has a process fork hook, call it now. */ if (p2->p_emul->e_proc_fork) (*p2->p_emul->e_proc_fork)(p2, l1, flags); /* * ...and finally, any other random fork hooks that subsystems * might have registered. */ doforkhooks(p2, p1); SDT_PROBE(proc,,,create, p2, p1, flags, 0, 0); /* * It's now safe for the scheduler and other processes to see the * child process. */ mutex_enter(proc_lock); if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT) p2->p_lflag |= PL_CONTROLT; LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling); p2->p_exitsig = exitsig; /* signal for parent on exit */ /* * We don't want to tracefork vfork()ed processes because they * will not receive the SIGTRAP until it is too late. */ tracefork = (p1->p_slflag & (PSL_TRACEFORK|PSL_TRACED)) == (PSL_TRACEFORK|PSL_TRACED) && (flags && FORK_PPWAIT) == 0; if (tracefork) { p2->p_slflag |= PSL_TRACED; p2->p_opptr = p2->p_pptr; if (p2->p_pptr != p1->p_pptr) { struct proc *parent1 = p2->p_pptr; if (parent1->p_lock < p2->p_lock) { if (!mutex_tryenter(parent1->p_lock)) { mutex_exit(p2->p_lock); mutex_enter(parent1->p_lock); } } else if (parent1->p_lock > p2->p_lock) { mutex_enter(parent1->p_lock); } parent1->p_slflag |= PSL_CHTRACED; proc_reparent(p2, p1->p_pptr); if (parent1->p_lock != p2->p_lock) mutex_exit(parent1->p_lock); } /* * Set ptrace status. */ p1->p_fpid = p2->p_pid; p2->p_fpid = p1->p_pid; } LIST_INSERT_AFTER(p1, p2, p_pglist); LIST_INSERT_HEAD(&allproc, p2, p_list); p2->p_trace_enabled = trace_is_enabled(p2); #ifdef __HAVE_SYSCALL_INTERN (*p2->p_emul->e_syscall_intern)(p2); #endif /* * Update stats now that we know the fork was successful. */ uvmexp.forks++; if (flags & FORK_PPWAIT) uvmexp.forks_ppwait++; if (flags & FORK_SHAREVM) uvmexp.forks_sharevm++; /* * Pass a pointer to the new process to the caller. */ if (rnewprocp != NULL) *rnewprocp = p2; if (ktrpoint(KTR_EMUL)) p2->p_traceflag |= KTRFAC_TRC_EMUL; /* * Notify any interested parties about the new process. */ if (!SLIST_EMPTY(&p1->p_klist)) { mutex_exit(proc_lock); KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); mutex_enter(proc_lock); } /* * Make child runnable, set start time, and add to run queue except * if the parent requested the child to start in SSTOP state. */ mutex_enter(p2->p_lock); /* * Start profiling. */ if ((p2->p_stflag & PST_PROFIL) != 0) { mutex_spin_enter(&p2->p_stmutex); startprofclock(p2); mutex_spin_exit(&p2->p_stmutex); } getmicrotime(&p2->p_stats->p_start); p2->p_acflag = AFORK; lwp_lock(l2); KASSERT(p2->p_nrlwps == 1); if (p2->p_sflag & PS_STOPFORK) { struct schedstate_percpu *spc = &l2->l_cpu->ci_schedstate; p2->p_nrlwps = 0; p2->p_stat = SSTOP; p2->p_waited = 0; p1->p_nstopchild++; l2->l_stat = LSSTOP; KASSERT(l2->l_wchan == NULL); lwp_unlock_to(l2, spc->spc_lwplock); } else { p2->p_nrlwps = 1; p2->p_stat = SACTIVE; l2->l_stat = LSRUN; sched_enqueue(l2, false); lwp_unlock(l2); } /* * Return child pid to parent process, * marking us as parent via retval[1]. */ if (retval != NULL) { retval[0] = p2->p_pid; retval[1] = 0; } mutex_exit(p2->p_lock); /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, sleep until it clears LP_VFORKWAIT. */ #if 0 while (l1->l_pflag & LP_VFORKWAIT) { cv_wait(&l1->l_waitcv, proc_lock); } #else while (p2->p_lflag & PL_PPWAIT) cv_wait(&p1->p_waitcv, proc_lock); #endif /* * Let the parent know that we are tracing its child. */ if (tracefork) { ksiginfo_t ksi; KSI_INIT_EMPTY(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_lid = l1->l_lid; kpsignal(p1, &ksi, NULL); } mutex_exit(proc_lock); return 0; }