/* * This is the protocol drain routine. * * No locks should be held when this is called. The drain routines have to * presently acquire some locks which raises the possibility of lock order * reversal. */ void mb_reclaim(void *junk) { #if 0 struct domain *dp; struct protosw *pr; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, "mb_reclaim()"); for (dp = domains; dp != NULL; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain != NULL) (*pr->pr_drain)(); #endif }
void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, vm_rendezvous_func_t func, void *arg) { int i; /* * Enforce that this function is called without any locks */ WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); restart: mtx_lock(&vm->rendezvous_mtx); if (vm->rendezvous_func != NULL) { /* * If a rendezvous is already in progress then we need to * call the rendezvous handler in case this 'vcpuid' is one * of the targets of the rendezvous. */ RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); mtx_unlock(&vm->rendezvous_mtx); vm_handle_rendezvous(vm, vcpuid); goto restart; } KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " "rendezvous is still in progress")); RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); vm->rendezvous_req_cpus = dest; CPU_ZERO(&vm->rendezvous_done_cpus); vm->rendezvous_arg = arg; vm_set_rendezvous_func(vm, func); mtx_unlock(&vm->rendezvous_mtx); /* * Wake up any sleeping vcpus and trigger a VM-exit in any running * vcpus so they handle the rendezvous as soon as possible. */ for (i = 0; i < VM_MAXCPU; i++) { if (CPU_ISSET(i, &dest)) vcpu_notify_event(vm, i, false); } vm_handle_rendezvous(vm, vcpuid); }
/* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling ureadc()"); again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_base; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); }
/* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_common *tc; int error; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); if (parent == NULL) { error = bus_dma_bounce_impl.tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } else { tc = (struct bus_dma_tag_common *)parent; error = tc->impl->tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } return (error); }
/* * Define the code needed before returning to user mode, for trap and * syscall. */ void userret(struct thread *td, struct trapframe *frame) { struct proc *p = td->td_proc; CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, td->td_name); KASSERT((p->p_flag & P_WEXIT) == 0, ("Exiting process returns to usermode")); #if 0 #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); thread_lock(td); if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || (td->td_flags & TDF_ASTPENDING) == 0)) printf("failed to set signal flags properly for ast()\n"); thread_unlock(td); PROC_UNLOCK(p); #endif #endif #ifdef KTRACE KTRUSERRET(td); #endif /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); /* * Let the scheduler adjust our priority etc. */ sched_userret(td); #ifdef XEN PT_UPDATES_FLUSH(); #endif /* * Check for misbehavior. * * In case there is a callchain tracing ongoing because of * hwpmc(4), skip the scheduler pinning check. * hwpmc(4) subsystem, infact, will collect callchain informations * at ast() checkpoint, which is past userret(). */ WITNESS_WARN(WARN_PANIC, NULL, "userret: returning"); KASSERT(td->td_critnest == 0, ("userret: Returning in a critical section")); KASSERT(td->td_locks == 0, ("userret: Returning with %d locks held", td->td_locks)); KASSERT(td->td_rw_rlocks == 0, ("userret: Returning with %d rwlocks held in read mode", td->td_rw_rlocks)); KASSERT((td->td_pflags & TDP_NOFAULTING) == 0, ("userret: Returning with pagefaults disabled")); KASSERT(td->td_no_sleeping == 0, ("userret: Returning with sleep disabled")); KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, ("userret: Returning with with pinned thread")); KASSERT(td->td_vp_reserv == 0, ("userret: Returning while holding vnode reservation")); KASSERT((td->td_flags & TDF_SBDRY) == 0, ("userret: Returning with stop signals deferred")); #ifdef VIMAGE /* Unfortunately td_vnet_lpush needs VNET_DEBUG. */ VNET_ASSERT(curvnet == NULL, ("%s: Returning on td %p (pid %d, %s) with vnet %p set in %s", __func__, td, p->p_pid, td->td_name, curvnet, (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A")); #endif #ifdef RACCT PROC_LOCK(p); while (p->p_throttled == 1) msleep(p->p_racct, &p->p_mtx, 0, "racct", 0); PROC_UNLOCK(p); #endif }
/* * Process an asynchronous software trap. * This is relatively easy. * This function will return with preemption disabled. */ void ast(struct trapframe *framep) { struct thread *td; struct proc *p; int flags; int sig; td = curthread; p = td->td_proc; CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode"); mtx_assert(&Giant, MA_NOTOWNED); THREAD_LOCK_ASSERT(td, MA_NOTOWNED); td->td_frame = framep; td->td_pticks = 0; /* * This updates the td_flag's for the checks below in one * "atomic" operation with turning off the astpending flag. * If another AST is triggered while we are handling the * AST's saved in flags, the astpending flag will be set and * ast() will be called again. */ thread_lock(td); flags = td->td_flags; td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK | TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND); thread_unlock(td); PCPU_INC(cnt.v_trap); if (td->td_ucred != p->p_ucred) cred_update_thread(td); if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) { addupc_task(td, td->td_profil_addr, td->td_profil_ticks); td->td_profil_ticks = 0; td->td_pflags &= ~TDP_OWEUPC; } #ifdef HWPMC_HOOKS /* Handle Software PMC callchain capture. */ if (PMC_IS_PENDING_CALLCHAIN(td)) PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN_SOFT, (void *) framep); #endif if (flags & TDF_ALRMPEND) { PROC_LOCK(p); kern_psignal(p, SIGVTALRM); PROC_UNLOCK(p); } if (flags & TDF_PROFPEND) { PROC_LOCK(p); kern_psignal(p, SIGPROF); PROC_UNLOCK(p); } #ifdef MAC if (flags & TDF_MACPEND) mac_thread_userret(td); #endif if (flags & TDF_NEEDRESCHED) { #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 1, __func__); #endif thread_lock(td); sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL); thread_unlock(td); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 1, __func__); #endif } /* * Check for signals. Unlocked reads of p_pendingcnt or * p_siglist might cause process-directed signal to be handled * later. */ if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 || !SIGISEMPTY(p->p_siglist)) { PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) postsig(sig); mtx_unlock(&p->p_sigacts->ps_mtx); PROC_UNLOCK(p); } /* * We need to check to see if we have to exit or wait due to a * single threading requirement or some other STOP condition. */ if (flags & TDF_NEEDSUSPCHK) { PROC_LOCK(p); thread_suspend_check(0); PROC_UNLOCK(p); } if (td->td_pflags & TDP_OLDMASK) { td->td_pflags &= ~TDP_OLDMASK; kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0); } userret(td, framep); }
static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault) { #ifndef __rtems__ struct thread *td; #endif /* __rtems__ */ struct iovec *iov; u_int cnt; int error, newflags, save; #ifndef __rtems__ td = curthread; #endif /* __rtems__ */ error = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, ("uiomove proc")); if (!nofault) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); #ifndef __rtems__ /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */ newflags = TDP_DEADLKTREAT; if (uio->uio_segflg == UIO_USERSPACE && nofault) { /* * Fail if a non-spurious page fault occurs. */ newflags |= TDP_NOFAULTING | TDP_RESETSPUR; } save = curthread_pflags_set(newflags); #else /* __rtems__ */ (void) newflags; (void) save; #endif /* __rtems__ */ while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: #ifndef __rtems__ if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); #endif /* __rtems__ */ if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: #ifndef __rtems__ curthread_pflags_restore(save); #endif /* __rtems__ */ return (error); }
/* * Exit: deallocate address space and other resources, change proc state to * zombie, and unlink proc from allproc and parent's lists. Save exit status * and rusage for wait(). Check for child processes and orphan them. */ void exit1(struct thread *td, int rv) { struct proc *p, *nq, *q; struct vnode *vtmp; struct vnode *ttyvp = NULL; struct plimit *plim; mtx_assert(&Giant, MA_NOTOWNED); p = td->td_proc; /* * XXX in case we're rebooting we just let init die in order to * work around an unsolved stack overflow seen very late during * shutdown on sparc64 when the gmirror worker process exists. */ if (p == initproc && rebooting == 0) { printf("init died (signal %d, exit %d)\n", WTERMSIG(rv), WEXITSTATUS(rv)); panic("Going nowhere without my init!"); } /* * MUST abort all other threads before proceeding past here. */ PROC_LOCK(p); while (p->p_flag & P_HADTHREADS) { /* * First check if some other thread got here before us. * If so, act appropriately: exit or suspend. */ thread_suspend_check(0); /* * Kill off the other threads. This requires * some co-operation from other parts of the kernel * so it may not be instantaneous. With this state set * any thread entering the kernel from userspace will * thread_exit() in trap(). Any thread attempting to * sleep will return immediately with EINTR or EWOULDBLOCK * which will hopefully force them to back out to userland * freeing resources as they go. Any thread attempting * to return to userland will thread_exit() from userret(). * thread_exit() will unsuspend us when the last of the * other threads exits. * If there is already a thread singler after resumption, * calling thread_single will fail; in that case, we just * re-check all suspension request, the thread should * either be suspended there or exit. */ if (!thread_single(SINGLE_EXIT)) break; /* * All other activity in this process is now stopped. * Threading support has been turned off. */ } KASSERT(p->p_numthreads == 1, ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); racct_sub(p, RACCT_NTHR, 1); /* * Wakeup anyone in procfs' PIOCWAIT. They should have a hold * on our vmspace, so we should block below until they have * released their reference to us. Note that if they have * requested S_EXIT stops we will block here until they ack * via PIOCCONT. */ _STOPEVENT(p, S_EXIT, rv); /* * Ignore any pending request to stop due to a stop signal. * Once P_WEXIT is set, future requests will be ignored as * well. */ p->p_flag &= ~P_STOPPED_SIG; KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped")); /* * Note that we are exiting and do another wakeup of anyone in * PIOCWAIT in case they aren't listening for S_EXIT stops or * decided to wait again after we told them we are exiting. */ p->p_flag |= P_WEXIT; wakeup(&p->p_stype); /* * Wait for any processes that have a hold on our vmspace to * release their reference. */ while (p->p_lock > 0) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); p->p_xstat = rv; /* Let event handler change exit status */ PROC_UNLOCK(p); /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); #ifdef AUDIT /* * The Sun BSM exit token contains two components: an exit status as * passed to exit(), and a return value to indicate what sort of exit * it was. The exit status is WEXITSTATUS(rv), but it's not clear * what the return value is. */ AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0); AUDIT_SYSCALL_EXIT(0, td); #endif /* Are we a task leader? */ if (p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { PROC_LOCK(q); kern_psignal(q, SIGKILL); PROC_UNLOCK(q); q = q->p_peers; } while (p->p_peers != NULL) msleep(p, &ppeers_lock, PWAIT, "exit1", 0); mtx_unlock(&ppeers_lock); } /* * Check if any loadable modules need anything done at process exit. * E.g. SYSV IPC stuff * XXX what if one of these generates an error? */ EVENTHANDLER_INVOKE(process_exit, p); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ PROC_LOCK(p); rv = p->p_xstat; /* Event handler could change exit status */ stopprofclock(p); p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE); /* * Stop the real interval timer. If the handler is currently * executing, prevent it from rearming itself and let it finish. */ if (timevalisset(&p->p_realtimer.it_value) && callout_stop(&p->p_itcallout) == 0) { timevalclear(&p->p_realtimer.it_interval); msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } PROC_UNLOCK(p); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. */ funsetownlst(&p->p_sigiolst); /* * If this process has an nlminfo data area (for lockd), release it */ if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) (*nlminfo_release_p)(p); /* * Close open files and release open-file table. * This may block! */ fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Remove ourself from our leader's peer list and wake our leader. */ mtx_lock(&ppeers_lock); if (p->p_leader->p_peers) { q = p->p_leader; while (q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; wakeup(p->p_leader); } mtx_unlock(&ppeers_lock); vmspace_exit(td); sx_xlock(&proctree_lock); if (SESS_LEADER(p)) { struct session *sp = p->p_session; struct tty *tp; /* * s_ttyp is not zero'd; we use this to indicate that * the session once had a controlling terminal. (for * logging and informational purposes) */ SESS_LOCK(sp); ttyvp = sp->s_ttyvp; tp = sp->s_ttyp; sp->s_ttyvp = NULL; sp->s_ttydp = NULL; sp->s_leader = NULL; SESS_UNLOCK(sp); /* * Signal foreground pgrp and revoke access to * controlling terminal if it has not been revoked * already. * * Because the TTY may have been revoked in the mean * time and could already have a new session associated * with it, make sure we don't send a SIGHUP to a * foreground process group that does not belong to this * session. */ if (tp != NULL) { tty_lock(tp); if (tp->t_session == sp) tty_signal_pgrp(tp, SIGHUP); tty_unlock(tp); } if (ttyvp != NULL) { sx_xunlock(&proctree_lock); if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) { VOP_REVOKE(ttyvp, REVOKEALL); VOP_UNLOCK(ttyvp, 0); } sx_xlock(&proctree_lock); } } fixjobc(p, p->p_pgrp, 0); sx_xunlock(&proctree_lock); (void)acct_process(td); /* Release the TTY now we've unlocked everything. */ if (ttyvp != NULL) vrele(ttyvp); #ifdef KTRACE ktrprocexit(td); #endif /* * Release reference to text vnode */ if ((vtmp = p->p_textvp) != NULL) { p->p_textvp = NULL; vrele(vtmp); } /* * Release our limits structure. */ plim = p->p_limit; p->p_limit = NULL; lim_free(plim); tidhash_remove(td); /* * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); /* * Call machine-dependent code to release any * machine-dependent resources other than the address space. * The address space is released by "vmspace_exitfree(p)" in * vm_waitproc(). */ cpu_exit(td); WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); /* * Reparent all of our children to init. */ sx_xlock(&proctree_lock); q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ wakeup(initproc); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); proc_reparent(q, initproc); q->p_sigparent = SIGCHLD; /* * Traced processes are killed * since their existence means someone is screwing up. */ if (q->p_flag & P_TRACED) { struct thread *temp; /* * Since q was found on our children list, the * proc_reparent() call moved q to the orphan * list due to present P_TRACED flag. Clear * orphan link for q now while q is locked. */ clear_orphan(q); q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); FOREACH_THREAD_IN_PROC(q, temp) temp->td_dbgflags &= ~TDB_SUSPEND; kern_psignal(q, SIGKILL); } PROC_UNLOCK(q); } /* * Also get rid of our orphans. */ while ((q = LIST_FIRST(&p->p_orphans)) != NULL) { PROC_LOCK(q); clear_orphan(q); PROC_UNLOCK(q); } /* Save exit status. */ PROC_LOCK(p); p->p_xthread = td; /* Tell the prison that we are gone. */ prison_proc_free(p->p_ucred->cr_prison); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exit if it * has declared an interest. */ if (dtrace_fasttrap_exit) dtrace_fasttrap_exit(p); #endif /* * Notify interested parties of our demise. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); #ifdef KDTRACE_HOOKS int reason = CLD_EXITED; if (WCOREDUMP(rv)) reason = CLD_DUMPED; else if (WIFSIGNALED(rv)) reason = CLD_KILLED; SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0); #endif /* * Just delete all entries in the p_klist. At this point we won't * report any more events, and there are nasty race conditions that * can beat us if we don't. */ knlist_clear(&p->p_klist, 1); /* * If this is a process with a descriptor, we may not need to deliver * a signal to the parent. proctree_lock is held over * procdesc_exit() to serialize concurrent calls to close() and * exit(). */ if (p->p_procdesc == NULL || procdesc_exit(p)) { /* * Notify parent that we're gone. If parent has the * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, * notify process 1 instead (and hope it will handle this * situation). */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { struct proc *pp; mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); proc_reparent(p, initproc); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); /* * Notify parent, so in case he was wait(2)ing or * executing waitpid(2) with our pid, he will * continue. */ wakeup(pp); } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr == initproc) kern_psignal(p->p_pptr, SIGCHLD); else if (p->p_sigparent != 0) { if (p->p_sigparent == SIGCHLD) childproc_exited(p); else /* LINUX thread */ kern_psignal(p->p_pptr, p->p_sigparent); } } else PROC_LOCK(p->p_pptr); sx_xunlock(&proctree_lock); /* * The state PRS_ZOMBIE prevents other proesses from sending * signal to the process, to avoid memory leak, we free memory * for signal queue at the time when the state is set. */ sigqueue_flush(&p->p_sigqueue); sigqueue_flush(&td->td_sigqueue); /* * We have to wait until after acquiring all locks before * changing p_state. We need to avoid all possible context * switches (including ones from blocking on a mutex) while * marked as a zombie. We also have to set the zombie state * before we release the parent process' proc lock to avoid * a lost wakeup. So, we first call wakeup, then we grab the * sched lock, update the state, and release the parent process' * proc lock. */ wakeup(p->p_pptr); cv_broadcast(&p->p_pwait); sched_exit(p->p_pptr, td); PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); /* * Hopefully no one will try to deliver a signal to the process this * late in the game. */ knlist_destroy(&p->p_klist); /* * Save our children's rusage information in our exit rusage. */ ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ thread_exit(); }
int uiomove(void *cp, int n, struct uio *uio) { struct thread *td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); #ifndef __rtems__ save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; #endif while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: #ifndef __rtems__ if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; #endif return (error); }
int uiofill(uint8_t val, int n, struct uio *uio) { struct thread *td; struct iovec *iov; size_t cnt; int error, save; #define UIOFILL_MAXBUF 256 uint8_t buf[UIOFILL_MAXBUF]; td = curthread; error = 0; KASSERT(uio->uio_rw == UIO_READ, ("uiofill: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, ("uiofill proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiofill()"); /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */ save = curthread_pflags_set(TDP_DEADLKTREAT); if (uio->uio_segflg == UIO_USERSPACE) memset(buf, val, imax(UIOFILL_MAXBUF, n)); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: maybe_yield(); error = copyout(buf, iov->iov_base, cnt); if (error) goto out; break; case UIO_SYSSPACE: memset(iov->iov_base, val, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; n -= cnt; } out: curthread_pflags_restore(save); return (error); }
void ia32_syscall(struct trapframe *frame) { caddr_t params; int i; struct sysent *callp; struct thread *td = curthread; struct proc *p = td->td_proc; register_t orig_tf_rflags; int error; int narg; u_int32_t args[8]; u_int64_t args64[8]; u_int code; ksiginfo_t ksi; PCPU_INC(cnt.v_syscall); td->td_pticks = 0; td->td_frame = frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t); code = frame->tf_rax; orig_tf_rflags = frame->tf_rflags; if (p->p_sysent->sv_prepsyscall) { /* * The prep code is MP aware. */ (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. * fuword is MP aware. */ if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = fuword32(params); params += sizeof(int); } else if (code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. * We use a 32-bit fetch in case params is not * aligned. */ code = fuword32(params); params += sizeof(quad_t); } } if (p->p_sysent->sv_mask) code &= p->p_sysent->sv_mask; if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg; /* * copyin and the ktrsyscall()/ktrsysret() code is MP-aware */ if (params != NULL && narg != 0) error = copyin(params, (caddr_t)args, (u_int)(narg * sizeof(int))); else error = 0; for (i = 0; i < narg; i++) args64[i] = args[i]; #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(code, narg, args64); #endif CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td, td->td_proc->p_pid, td->td_proc->p_comm, code); if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; STOPEVENT(p, S_SCE, narg); PTRACESTOP_SC(p, td, S_PT_SCE); AUDIT_SYSCALL_ENTER(code, td); error = (*callp->sy_call)(td, args64); AUDIT_SYSCALL_EXIT(error, td); } switch (error) { case 0: frame->tf_rax = td->td_retval[0]; frame->tf_rdx = td->td_retval[1]; frame->tf_rflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, * int 0x80 is 2 bytes. We saved this in tf_err. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: break; default: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_rax = error; frame->tf_rflags |= PSL_C; break; } /* * Traced syscall. */ if (orig_tf_rflags & PSL_T) { frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_rip; trapsignal(td, &ksi); } /* * Check for misbehavior. */ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???"); KASSERT(td->td_critnest == 0, ("System call %s returning in a critical section", (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???")); KASSERT(td->td_locks == 0, ("System call %s returning with %d locks held", (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???", td->td_locks)); /* * Handle reschedule and other end-of-syscall issues */ userret(td, frame); CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td, td->td_proc->p_pid, td->td_proc->p_comm, code); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); PTRACESTOP_SC(p, td, S_PT_SCX); }