/* * copied from amd64/amd64/machdep.c * * Send an interrupt to process. */ static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct l_rt_sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; caddr_t sp; struct trapframe *regs; int sig, code; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u", catcher, sig, mask, code); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe); } else sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul); mtx_unlock(&psp->ps_mtx); /* Translate the signal. */ sig = bsd_to_linux_signal(sig); /* Save user context. */ bzero(&sf, sizeof(sf)); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask); sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); sf.sf_sc.uc_mcontext.sc_rdi = regs->tf_rdi; sf.sf_sc.uc_mcontext.sc_rsi = regs->tf_rsi; sf.sf_sc.uc_mcontext.sc_rdx = regs->tf_rdx; sf.sf_sc.uc_mcontext.sc_rbp = regs->tf_rbp; sf.sf_sc.uc_mcontext.sc_rbx = regs->tf_rbx; sf.sf_sc.uc_mcontext.sc_rcx = regs->tf_rcx; sf.sf_sc.uc_mcontext.sc_rax = regs->tf_rax; sf.sf_sc.uc_mcontext.sc_rip = regs->tf_rip; sf.sf_sc.uc_mcontext.sc_rsp = regs->tf_rsp; sf.sf_sc.uc_mcontext.sc_r8 = regs->tf_r8; sf.sf_sc.uc_mcontext.sc_r9 = regs->tf_r9; sf.sf_sc.uc_mcontext.sc_r10 = regs->tf_r10; sf.sf_sc.uc_mcontext.sc_r11 = regs->tf_r11; sf.sf_sc.uc_mcontext.sc_r12 = regs->tf_r12; sf.sf_sc.uc_mcontext.sc_r13 = regs->tf_r13; sf.sf_sc.uc_mcontext.sc_r14 = regs->tf_r14; sf.sf_sc.uc_mcontext.sc_r15 = regs->tf_r15; sf.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags; sf.sf_sc.uc_mcontext.sc_err = regs->tf_err; sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); sf.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rax = 0; regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ regs->tf_rdx = (register_t)&sfp->sf_sc; /* arg 3 in %rdx */ sf.sf_handler = catcher; /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = linux_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); }
static void sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying) { struct sfxge_softc *sc; unsigned int index; struct sfxge_evq *evq; unsigned int batch; unsigned int rxfill; unsigned int mblksize; int ntodo; efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; sc = rxq->sc; index = rxq->index; evq = sc->evq[index]; prefetch_read_many(sc->enp); prefetch_read_many(rxq->common); mtx_assert(&evq->lock, MA_OWNED); if (rxq->init_state != SFXGE_RXQ_STARTED) return; rxfill = rxq->added - rxq->completed; KASSERT(rxfill <= EFX_RXQ_LIMIT(SFXGE_NDESCS), ("rxfill > EFX_RXQ_LIMIT(SFXGE_NDESCS)")); ntodo = min(EFX_RXQ_LIMIT(SFXGE_NDESCS) - rxfill, target); KASSERT(ntodo <= EFX_RXQ_LIMIT(SFXGE_NDESCS), ("ntodo > EFX_RQX_LIMIT(SFXGE_NDESCS)")); if (ntodo == 0) return; batch = 0; mblksize = sc->rx_buffer_size; while (ntodo-- > 0) { unsigned int id; struct sfxge_rx_sw_desc *rx_desc; bus_dma_segment_t seg; struct mbuf *m; id = (rxq->added + batch) & (SFXGE_NDESCS - 1); rx_desc = &rxq->queue[id]; KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL")); rx_desc->flags = EFX_DISCARD; m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc); if (m == NULL) break; sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg); addr[batch++] = seg.ds_addr; if (batch == SFXGE_REFILL_BATCH) { efx_rx_qpost(rxq->common, addr, mblksize, batch, rxq->completed, rxq->added); rxq->added += batch; batch = 0; } } if (ntodo != 0) sfxge_rx_schedule_refill(rxq, retrying); if (batch != 0) { efx_rx_qpost(rxq->common, addr, mblksize, batch, rxq->completed, rxq->added); rxq->added += batch; } /* Make the descriptors visible to the hardware */ bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map, BUS_DMASYNC_PREWRITE); efx_rx_qpush(rxq->common, rxq->added); }
/* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int oonstack, i; int sig, code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(sendsig)) printf(ARGS(sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Build the argument list for the signal handler. */ if (p->p_sysent->sv_sigtbl) if (sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__bits[0]; frame.sf_sc.sc_gs = regs->tf_gs; frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_rdi; frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; frame.sf_sc.sc_ebx = regs->tf_rbx; frame.sf_sc.sc_edx = regs->tf_rdx; frame.sf_sc.sc_ecx = regs->tf_rcx; frame.sf_sc.sc_eax = regs->tf_rax; frame.sf_sc.sc_eip = regs->tf_rip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_rflags; frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) frame.sf_extramask[i] = lmask.__bits[i+1]; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = p->p_sysent->sv_sigcode_base; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); }
/* * Exit: deallocate address space and other resources, change proc state to * zombie, and unlink proc from allproc and parent's lists. Save exit status * and rusage for wait(). Check for child processes and orphan them. */ void exit1(struct thread *td, int rv) { struct proc *p, *nq, *q, *t; struct thread *tdt; struct vnode *ttyvp = NULL; mtx_assert(&Giant, MA_NOTOWNED); p = td->td_proc; /* * XXX in case we're rebooting we just let init die in order to * work around an unsolved stack overflow seen very late during * shutdown on sparc64 when the gmirror worker process exists. */ if (p == initproc && rebooting == 0) { printf("init died (signal %d, exit %d)\n", WTERMSIG(rv), WEXITSTATUS(rv)); panic("Going nowhere without my init!"); } /* * MUST abort all other threads before proceeding past here. */ PROC_LOCK(p); while (p->p_flag & P_HADTHREADS) { /* * First check if some other thread got here before us. * If so, act appropriately: exit or suspend. */ thread_suspend_check(0); /* * Kill off the other threads. This requires * some co-operation from other parts of the kernel * so it may not be instantaneous. With this state set * any thread entering the kernel from userspace will * thread_exit() in trap(). Any thread attempting to * sleep will return immediately with EINTR or EWOULDBLOCK * which will hopefully force them to back out to userland * freeing resources as they go. Any thread attempting * to return to userland will thread_exit() from userret(). * thread_exit() will unsuspend us when the last of the * other threads exits. * If there is already a thread singler after resumption, * calling thread_single will fail; in that case, we just * re-check all suspension request, the thread should * either be suspended there or exit. */ if (!thread_single(SINGLE_EXIT)) break; /* * All other activity in this process is now stopped. * Threading support has been turned off. */ } KASSERT(p->p_numthreads == 1, ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); racct_sub(p, RACCT_NTHR, 1); /* * Wakeup anyone in procfs' PIOCWAIT. They should have a hold * on our vmspace, so we should block below until they have * released their reference to us. Note that if they have * requested S_EXIT stops we will block here until they ack * via PIOCCONT. */ _STOPEVENT(p, S_EXIT, rv); /* * Ignore any pending request to stop due to a stop signal. * Once P_WEXIT is set, future requests will be ignored as * well. */ p->p_flag &= ~P_STOPPED_SIG; KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped")); /* * Note that we are exiting and do another wakeup of anyone in * PIOCWAIT in case they aren't listening for S_EXIT stops or * decided to wait again after we told them we are exiting. */ p->p_flag |= P_WEXIT; wakeup(&p->p_stype); /* * Wait for any processes that have a hold on our vmspace to * release their reference. */ while (p->p_lock > 0) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); p->p_xstat = rv; /* Let event handler change exit status */ PROC_UNLOCK(p); /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); #ifdef AUDIT /* * The Sun BSM exit token contains two components: an exit status as * passed to exit(), and a return value to indicate what sort of exit * it was. The exit status is WEXITSTATUS(rv), but it's not clear * what the return value is. */ AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0); AUDIT_SYSCALL_EXIT(0, td); #endif /* Are we a task leader with peers? */ if (p->p_peers != NULL && p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { PROC_LOCK(q); kern_psignal(q, SIGKILL); PROC_UNLOCK(q); q = q->p_peers; } while (p->p_peers != NULL) msleep(p, &ppeers_lock, PWAIT, "exit1", 0); mtx_unlock(&ppeers_lock); } /* * Check if any loadable modules need anything done at process exit. * E.g. SYSV IPC stuff * XXX what if one of these generates an error? */ EVENTHANDLER_INVOKE(process_exit, p); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ PROC_LOCK(p); rv = p->p_xstat; /* Event handler could change exit status */ stopprofclock(p); p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE); /* * Stop the real interval timer. If the handler is currently * executing, prevent it from rearming itself and let it finish. */ if (timevalisset(&p->p_realtimer.it_value) && callout_stop(&p->p_itcallout) == 0) { timevalclear(&p->p_realtimer.it_interval); msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } PROC_UNLOCK(p); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. */ funsetownlst(&p->p_sigiolst); /* * If this process has an nlminfo data area (for lockd), release it */ if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) (*nlminfo_release_p)(p); /* * Close open files and release open-file table. * This may block! */ fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Remove ourself from our leader's peer list and wake our leader. */ if (p->p_leader->p_peers != NULL) { mtx_lock(&ppeers_lock); if (p->p_leader->p_peers != NULL) { q = p->p_leader; while (q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; wakeup(p->p_leader); } mtx_unlock(&ppeers_lock); } vmspace_exit(td); sx_xlock(&proctree_lock); if (SESS_LEADER(p)) { struct session *sp = p->p_session; struct tty *tp; /* * s_ttyp is not zero'd; we use this to indicate that * the session once had a controlling terminal. (for * logging and informational purposes) */ SESS_LOCK(sp); ttyvp = sp->s_ttyvp; tp = sp->s_ttyp; sp->s_ttyvp = NULL; sp->s_ttydp = NULL; sp->s_leader = NULL; SESS_UNLOCK(sp); /* * Signal foreground pgrp and revoke access to * controlling terminal if it has not been revoked * already. * * Because the TTY may have been revoked in the mean * time and could already have a new session associated * with it, make sure we don't send a SIGHUP to a * foreground process group that does not belong to this * session. */ if (tp != NULL) { tty_lock(tp); if (tp->t_session == sp) tty_signal_pgrp(tp, SIGHUP); tty_unlock(tp); } if (ttyvp != NULL) { sx_xunlock(&proctree_lock); if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) { VOP_REVOKE(ttyvp, REVOKEALL); VOP_UNLOCK(ttyvp, 0); } sx_xlock(&proctree_lock); } } fixjobc(p, p->p_pgrp, 0); sx_xunlock(&proctree_lock); (void)acct_process(td); /* Release the TTY now we've unlocked everything. */ if (ttyvp != NULL) vrele(ttyvp); #ifdef KTRACE ktrprocexit(td); #endif /* * Release reference to text vnode */ if (p->p_textvp != NULL) { vrele(p->p_textvp); p->p_textvp = NULL; } /* * Release our limits structure. */ lim_free(p->p_limit); p->p_limit = NULL; tidhash_remove(td); /* * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); /* * Call machine-dependent code to release any * machine-dependent resources other than the address space. * The address space is released by "vmspace_exitfree(p)" in * vm_waitproc(). */ cpu_exit(td); WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); /* * Reparent all children processes: * - traced ones to the original parent (or init if we are that parent) * - the rest to init */ sx_xlock(&proctree_lock); q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ wakeup(initproc); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); q->p_sigparent = SIGCHLD; if (!(q->p_flag & P_TRACED)) { proc_reparent(q, initproc); } else { /* * Traced processes are killed since their existence * means someone is screwing up. */ t = proc_realparent(q); if (t == p) { proc_reparent(q, initproc); } else { PROC_LOCK(t); proc_reparent(q, t); PROC_UNLOCK(t); } /* * Since q was found on our children list, the * proc_reparent() call moved q to the orphan * list due to present P_TRACED flag. Clear * orphan link for q now while q is locked. */ clear_orphan(q); q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); FOREACH_THREAD_IN_PROC(q, tdt) tdt->td_dbgflags &= ~TDB_SUSPEND; kern_psignal(q, SIGKILL); } PROC_UNLOCK(q); } /* * Also get rid of our orphans. */ while ((q = LIST_FIRST(&p->p_orphans)) != NULL) { PROC_LOCK(q); clear_orphan(q); PROC_UNLOCK(q); } /* Save exit status. */ PROC_LOCK(p); p->p_xthread = td; /* Tell the prison that we are gone. */ prison_proc_free(p->p_ucred->cr_prison); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exit if it * has declared an interest. */ if (dtrace_fasttrap_exit) dtrace_fasttrap_exit(p); #endif /* * Notify interested parties of our demise. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); #ifdef KDTRACE_HOOKS int reason = CLD_EXITED; if (WCOREDUMP(rv)) reason = CLD_DUMPED; else if (WIFSIGNALED(rv)) reason = CLD_KILLED; SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0); #endif /* * Just delete all entries in the p_klist. At this point we won't * report any more events, and there are nasty race conditions that * can beat us if we don't. */ knlist_clear(&p->p_klist, 1); /* * If this is a process with a descriptor, we may not need to deliver * a signal to the parent. proctree_lock is held over * procdesc_exit() to serialize concurrent calls to close() and * exit(). */ if (p->p_procdesc == NULL || procdesc_exit(p)) { /* * Notify parent that we're gone. If parent has the * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, * notify process 1 instead (and hope it will handle this * situation). */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { struct proc *pp; mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); proc_reparent(p, initproc); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); /* * Notify parent, so in case he was wait(2)ing or * executing waitpid(2) with our pid, he will * continue. */ wakeup(pp); } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr == initproc) kern_psignal(p->p_pptr, SIGCHLD); else if (p->p_sigparent != 0) { if (p->p_sigparent == SIGCHLD) childproc_exited(p); else /* LINUX thread */ kern_psignal(p->p_pptr, p->p_sigparent); } } else PROC_LOCK(p->p_pptr); sx_xunlock(&proctree_lock); /* * The state PRS_ZOMBIE prevents other proesses from sending * signal to the process, to avoid memory leak, we free memory * for signal queue at the time when the state is set. */ sigqueue_flush(&p->p_sigqueue); sigqueue_flush(&td->td_sigqueue); /* * We have to wait until after acquiring all locks before * changing p_state. We need to avoid all possible context * switches (including ones from blocking on a mutex) while * marked as a zombie. We also have to set the zombie state * before we release the parent process' proc lock to avoid * a lost wakeup. So, we first call wakeup, then we grab the * sched lock, update the state, and release the parent process' * proc lock. */ wakeup(p->p_pptr); cv_broadcast(&p->p_pwait); sched_exit(p->p_pptr, td); PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); /* * Hopefully no one will try to deliver a signal to the process this * late in the game. */ knlist_destroy(&p->p_klist); /* * Save our children's rusage information in our exit rusage. */ PROC_STATLOCK(p); ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); PROC_STATUNLOCK(p); /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ thread_exit(); }
static void cheriabi_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *regs; struct cheri_frame *capreg; struct sigacts *psp; struct sigframe_c sf, *sfp; vm_offset_t sp; int cheri_is_sandboxed; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; capreg = &td->td_pcb->pcb_cheriframe; oonstack = sigonstack(regs->sp); /* * CHERI affects signal delivery in the following ways: * * (1) Additional capability-coprocessor state is exposed via * extensions to the context frame placed on the stack. * * (2) If the user $pcc doesn't include CHERI_PERM_SYSCALL, then we * consider user state to be 'sandboxed' and therefore to require * special delivery handling which includes a domain-switch to the * thread's context-switch domain. (This is done by * cheri_sendsig()). * * (3) If an alternative signal stack is not defined, and we are in a * 'sandboxed' state, then we have two choices: (a) if the signal * is of type SA_SANDBOX_UNWIND, we will automatically unwind the * trusted stack by one frame; (b) otherwise, we will terminate * the process unconditionally. */ cheri_is_sandboxed = cheri_signal_sandboxed(td); /* * We provide the ability to drop into the sandbox in two different * circumstances: (1) if the code running is sandboxed; and (2) if the * fault is a CHERI protection fault. Handle both here for the * non-unwind case. Do this before we rewrite any general-purpose or * capability register state for the thread. */ #if DDB if (cheri_is_sandboxed && security_cheri_debugger_on_sandbox_signal) kdb_enter(KDB_WHY_CHERI, "Signal delivery to CHERI sandbox"); else if (sig == SIGPROT && security_cheri_debugger_on_sigprot) kdb_enter(KDB_WHY_CHERI, "SIGPROT delivered outside sandbox"); #endif /* * If a thread is running sandboxed, we can't rely on $sp which may * not point at a valid stack in the ambient context, or even be * maliciously manipulated. We must therefore always use the * alternative stack. We are also therefore unable to tell whether we * are on the alternative stack, so must clear 'oonstack' here. * * XXXRW: This requires significant further thinking; however, the net * upshot is that it is not a good idea to do an object-capability * invoke() from a signal handler, as with so many other things in * life. */ if (cheri_is_sandboxed != 0) oonstack = 0; /* save user context */ bzero(&sf, sizeof(struct sigframe)); sf.sf_uc.uc_sigmask = *mask; #if 0 /* * XXX-BD: stack_t type differs and we can't just fake a capabilty. * We don't restore the value so what purpose does it serve? */ sf.sf_uc.uc_stack = td->td_sigstk; #endif sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs->pc; sf.sf_uc.uc_mcontext.mullo = regs->mullo; sf.sf_uc.uc_mcontext.mulhi = regs->mulhi; #if 0 /* XXX-BD: what actually makes sense here? */ sf.sf_uc.uc_mcontext.mc_tls = td->td_md.md_tls; #endif sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ bcopy((void *)®s->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1], sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t)); sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); bcopy((void *)&td->td_frame->f0, (void *)sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); } /* XXXRW: sf.sf_uc.uc_mcontext.sr seems never to be set? */ sf.sf_uc.uc_mcontext.cause = regs->cause; cheri_memcpy(&sf.sf_uc.uc_mcontext.mc_cheriframe, &td->td_pcb->pcb_cheriframe, sizeof(struct cheri_frame)); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (vm_offset_t)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size); } else { /* * Signals delivered when a CHERI sandbox is present must be * delivered on the alternative stack rather than a local one. * If an alternative stack isn't present, then terminate or * risk leaking capabilities (and control) to the sandbox (or * just crashing the sandbox). */ if (cheri_is_sandboxed) { mtx_unlock(&psp->ps_mtx); printf("pid %d, tid %d: signal in sandbox without " "alternative stack defined\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } sp = (vm_offset_t)regs->sp; } sp -= sizeof(struct sigframe_c); /* For CHERI, keep the stack pointer capability aligned. */ sp &= ~(CHERICAP_SIZE - 1); sfp = (void *)sp; /* Build the argument list for the signal handler. */ regs->a0 = sig; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ cheri_capability_set(&capreg->cf_c3, CHERI_CAP_USER_PERMS, CHERI_CAP_USER_OTYPE, (void *)(intptr_t)&sfp->sf_si, sizeof(sfp->sf_si), 0); cheri_capability_set(&capreg->cf_c4, CHERI_CAP_USER_PERMS, CHERI_CAP_USER_OTYPE, (void *)(intptr_t)&sfp->sf_uc, sizeof(sfp->sf_uc), 0); /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; /* * Write out badvaddr, but don't create a valid capability * since that might allow privlege amplification. * * XXX-BD: This probably isn't the right method. * XXX-BD: Do we want to set base or offset? */ *((uintptr_t *)&sf.sf_si.si_addr) = (uintptr_t)(void *)regs->badvaddr; } /* * XXX: No support for undocumented arguments to old style handlers. */ mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyoutcap(&sf, sfp, sizeof(sf)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } /* * Install CHERI signal-delivery register state for handler to run * in. As we don't install this in the CHERI frame on the user stack, * it will be (genrally) be removed automatically on sigreturn(). */ /* XXX-BD: this isn't quite right */ cheri_sendsig(td); regs->pc = (register_t)(intptr_t)catcher; regs->sp = (register_t)(intptr_t)sfp; cheri_capability_copy(&capreg->cf_c12, &psp->ps_sigcap[_SIG_IDX(sig)]); cheri_capability_copy(&capreg->cf_c17, &td->td_pcb->pcb_cherisignal.csig_sigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); }
static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int oonstack; int sig; int code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); /* * Build the argument list for the signal handler. */ if (p->p_sysent->sv_sigtbl) if (sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; frame.sf_siginfo = PTROUT(&fp->sf_si); frame.sf_ucontext = PTROUT(&fp->sf_sc); /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = 0; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), fp, oonstack); #endif PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); }
void cfcs_action(struct cam_sim *sim, union ccb *ccb) { struct cfcs_softc *softc; int err; softc = (struct cfcs_softc *)cam_sim_softc(sim); mtx_assert(&softc->lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_SCSI_IO: { union ctl_io *io; struct ccb_scsiio *csio; csio = &ccb->csio; /* * Catch CCB flags, like physical address flags, that * indicate situations we currently can't handle. */ if (ccb->ccb_h.flags & CFCS_BAD_CCB_FLAGS) { ccb->ccb_h.status = CAM_REQ_INVALID; printf("%s: bad CCB flags %#x (all flags %#x)\n", __func__, ccb->ccb_h.flags & CFCS_BAD_CCB_FLAGS, ccb->ccb_h.flags); xpt_done(ccb); return; } /* * If we aren't online, there are no devices to see. */ if (softc->online == 0) { ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } io = ctl_alloc_io(softc->port.ctl_pool_ref); if (io == NULL) { printf("%s: can't allocate ctl_io\n", __func__); ccb->ccb_h.status = CAM_BUSY | CAM_DEV_QFRZN; xpt_freeze_devq(ccb->ccb_h.path, 1); xpt_done(ccb); return; } ctl_zero_io(io); /* Save pointers on both sides */ io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = ccb; ccb->ccb_h.io_ptr = io; /* * Only SCSI I/O comes down this path, resets, etc. come * down via the XPT_RESET_BUS/LUN CCBs below. */ io->io_hdr.io_type = CTL_IO_SCSI; io->io_hdr.nexus.initid.id = 1; io->io_hdr.nexus.targ_port = softc->port.targ_port; /* * XXX KDM how do we handle target IDs? */ io->io_hdr.nexus.targ_target.id = ccb->ccb_h.target_id; io->io_hdr.nexus.targ_lun = ccb->ccb_h.target_lun; /* * This tag scheme isn't the best, since we could in theory * have a very long-lived I/O and tag collision, especially * in a high I/O environment. But it should work well * enough for now. Since we're using unsigned ints, * they'll just wrap around. */ io->scsiio.tag_num = softc->cur_tag_num++; csio->tag_id = io->scsiio.tag_num; switch (csio->tag_action) { case CAM_TAG_ACTION_NONE: io->scsiio.tag_type = CTL_TAG_UNTAGGED; break; case MSG_SIMPLE_TASK: io->scsiio.tag_type = CTL_TAG_SIMPLE; break; case MSG_HEAD_OF_QUEUE_TASK: io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; break; case MSG_ORDERED_TASK: io->scsiio.tag_type = CTL_TAG_ORDERED; break; case MSG_ACA_TASK: io->scsiio.tag_type = CTL_TAG_ACA; break; default: io->scsiio.tag_type = CTL_TAG_UNTAGGED; printf("%s: unhandled tag type %#x!!\n", __func__, csio->tag_action); break; } if (csio->cdb_len > sizeof(io->scsiio.cdb)) { printf("%s: WARNING: CDB len %d > ctl_io space %zd\n", __func__, csio->cdb_len, sizeof(io->scsiio.cdb)); } io->scsiio.cdb_len = min(csio->cdb_len, sizeof(io->scsiio.cdb)); bcopy(csio->cdb_io.cdb_bytes, io->scsiio.cdb, io->scsiio.cdb_len); err = ctl_queue(io); if (err != CTL_RETVAL_COMPLETE) { printf("%s: func %d: error %d returned by " "ctl_queue()!\n", __func__, ccb->ccb_h.func_code, err); ctl_free_io(io); } else { ccb->ccb_h.status |= CAM_SIM_QUEUED; } break; } case XPT_ABORT: { union ctl_io *io; union ccb *abort_ccb; abort_ccb = ccb->cab.abort_ccb; if (abort_ccb->ccb_h.func_code != XPT_SCSI_IO) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); } /* * If we aren't online, there are no devices to talk to. */ if (softc->online == 0) { ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } io = ctl_alloc_io(softc->port.ctl_pool_ref); if (io == NULL) { ccb->ccb_h.status = CAM_BUSY | CAM_DEV_QFRZN; xpt_freeze_devq(ccb->ccb_h.path, 1); xpt_done(ccb); return; } ctl_zero_io(io); /* Save pointers on both sides */ io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = ccb; ccb->ccb_h.io_ptr = io; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid.id = 1; io->io_hdr.nexus.targ_port = softc->port.targ_port; io->io_hdr.nexus.targ_target.id = ccb->ccb_h.target_id; io->io_hdr.nexus.targ_lun = ccb->ccb_h.target_lun; io->taskio.task_action = CTL_TASK_ABORT_TASK; io->taskio.tag_num = abort_ccb->csio.tag_id; switch (abort_ccb->csio.tag_action) { case CAM_TAG_ACTION_NONE: io->taskio.tag_type = CTL_TAG_UNTAGGED; break; case MSG_SIMPLE_TASK: io->taskio.tag_type = CTL_TAG_SIMPLE; break; case MSG_HEAD_OF_QUEUE_TASK: io->taskio.tag_type = CTL_TAG_HEAD_OF_QUEUE; break; case MSG_ORDERED_TASK: io->taskio.tag_type = CTL_TAG_ORDERED; break; case MSG_ACA_TASK: io->taskio.tag_type = CTL_TAG_ACA; break; default: io->taskio.tag_type = CTL_TAG_UNTAGGED; printf("%s: unhandled tag type %#x!!\n", __func__, abort_ccb->csio.tag_action); break; } err = ctl_queue(io); if (err != CTL_RETVAL_COMPLETE) { printf("%s func %d: error %d returned by " "ctl_queue()!\n", __func__, ccb->ccb_h.func_code, err); ctl_free_io(io); } break; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts; struct ccb_trans_settings_scsi *scsi; struct ccb_trans_settings_fc *fc; cts = &ccb->cts; scsi = &cts->proto_specific.scsi; fc = &cts->xport_specific.fc; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC2; cts->transport = XPORT_FC; cts->transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; fc->valid = CTS_FC_VALID_SPEED; fc->bitrate = 800000; fc->wwnn = softc->wwnn; fc->wwpn = softc->wwpn; fc->port = softc->port.targ_port; fc->valid |= CTS_FC_VALID_WWNN | CTS_FC_VALID_WWPN | CTS_FC_VALID_PORT; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_SET_TRAN_SETTINGS: /* XXX KDM should we actually do something here? */ ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_RESET_BUS: case XPT_RESET_DEV: { union ctl_io *io; /* * If we aren't online, there are no devices to talk to. */ if (softc->online == 0) { ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } io = ctl_alloc_io(softc->port.ctl_pool_ref); if (io == NULL) { ccb->ccb_h.status = CAM_BUSY | CAM_DEV_QFRZN; xpt_freeze_devq(ccb->ccb_h.path, 1); xpt_done(ccb); return; } ctl_zero_io(io); /* Save pointers on both sides */ if (ccb->ccb_h.func_code == XPT_RESET_DEV) io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = ccb; ccb->ccb_h.io_ptr = io; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid.id = 0; io->io_hdr.nexus.targ_port = softc->port.targ_port; io->io_hdr.nexus.targ_target.id = ccb->ccb_h.target_id; io->io_hdr.nexus.targ_lun = ccb->ccb_h.target_lun; if (ccb->ccb_h.func_code == XPT_RESET_BUS) io->taskio.task_action = CTL_TASK_BUS_RESET; else io->taskio.task_action = CTL_TASK_LUN_RESET; err = ctl_queue(io); if (err != CTL_RETVAL_COMPLETE) { printf("%s func %d: error %d returned by " "ctl_queue()!\n", __func__, ccb->ccb_h.func_code, err); ctl_free_io(io); } break; } case XPT_CALC_GEOMETRY: cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); break; case XPT_PATH_INQ: { struct ccb_pathinq *cpi; cpi = &ccb->cpi; cpi->version_num = 0; cpi->hba_inquiry = PI_TAG_ABLE; cpi->target_sprt = 0; cpi->hba_misc = 0; cpi->hba_eng_cnt = 0; cpi->max_target = 1; cpi->max_lun = 1024; /* Do we really have a limit? */ cpi->maxio = 1024 * 1024; cpi->async_flags = 0; cpi->hpath_id = 0; cpi->initiator_id = 0; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, "FreeBSD", HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = 0; cpi->bus_id = 0; cpi->base_transfer_speed = 800000; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC2; /* * Pretend to be Fibre Channel. */ cpi->transport = XPORT_FC; cpi->transport_version = 0; cpi->xport_specific.fc.wwnn = softc->wwnn; cpi->xport_specific.fc.wwpn = softc->wwpn; cpi->xport_specific.fc.port = softc->port.targ_port; cpi->xport_specific.fc.bitrate = 8 * 1000 * 1000; cpi->ccb_h.status = CAM_REQ_CMP; break; } default: ccb->ccb_h.status = CAM_PROVIDE_FAIL; printf("%s: unsupported CCB type %#x\n", __func__, ccb->ccb_h.func_code); xpt_done(ccb); break; } }
struct tte * tsb_tte_enter(pmap_t pm, vm_page_t m, vm_offset_t va, u_long sz, u_long data) { struct tte *bucket; struct tte *rtp; struct tte *tp; vm_offset_t ova; int b0; int i; if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) { CTR5(KTR_SPARE2, "tsb_tte_enter: off colour va=%#lx pa=%#lx o=%p ot=%d pi=%#lx", va, VM_PAGE_TO_PHYS(m), m->object, m->object ? m->object->type : -1, m->pindex); if (pm == kernel_pmap) PMAP_STATS_INC(tsb_nenter_k_oc); else PMAP_STATS_INC(tsb_nenter_u_oc); } mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pm, MA_OWNED); if (pm == kernel_pmap) { PMAP_STATS_INC(tsb_nenter_k); tp = tsb_kvtotte(va); KASSERT((tp->tte_data & TD_V) == 0, ("tsb_tte_enter: replacing valid kernel mapping")); goto enter; } PMAP_STATS_INC(tsb_nenter_u); bucket = tsb_vtobucket(pm, sz, va); tp = NULL; rtp = NULL; b0 = rd(tick) & (TSB_BUCKET_SIZE - 1); i = b0; do { if ((bucket[i].tte_data & TD_V) == 0) { tp = &bucket[i]; break; } if (tp == NULL) { if ((bucket[i].tte_data & TD_REF) == 0) tp = &bucket[i]; else if (rtp == NULL) rtp = &bucket[i]; } } while ((i = (i + 1) & (TSB_BUCKET_SIZE - 1)) != b0); if (tp == NULL) tp = rtp; if ((tp->tte_data & TD_V) != 0) { PMAP_STATS_INC(tsb_nrepl); ova = TTE_GET_VA(tp); pmap_remove_tte(pm, NULL, tp, ova); tlb_page_demap(pm, ova); } enter: if ((m->flags & PG_FICTITIOUS) == 0) { data |= TD_CP; if ((m->flags & PG_UNMANAGED) == 0) { pm->pm_stats.resident_count++; data |= TD_PV; } if (pmap_cache_enter(m, va) != 0) data |= TD_CV; TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link); } else data |= TD_FAKE | TD_E; tp->tte_vpn = TV_VPN(va, sz); tp->tte_data = data; return (tp); }
/* * Find a cylinder to place a directory. * * The policy implemented by this algorithm is to allocate a * directory inode in the same cylinder group as its parent * directory, but also to reserve space for its files inodes * and data. Restrict the number of directories which may be * allocated one after another in the same cylinder group * without intervening allocation of files. * * If we allocate a first level directory then force allocation * in another cylinder group. * */ static u_long ext2_dirpref(struct inode *pip) { struct m_ext2fs *fs; int cg, prefcg, cgsize; u_int avgifree, avgbfree, avgndir, curdirsize; u_int minifree, minbfree, maxndir; u_int mincg, minndir; u_int dirsize, maxcontigdirs; mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); fs = pip->i_e2fs; avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount; avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; /* * Force allocation in another cg if creating a first level dir. */ ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); if (ITOV(pip)->v_vflag & VV_ROOT) { prefcg = arc4random() % fs->e2fs_gcount; mincg = prefcg; minndir = fs->e2fs_ipg; for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { mincg = cg; minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; } for (cg = 0; cg < prefcg; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { mincg = cg; minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; } return (mincg); } /* * Count various limits which used for * optimal allocation of a directory inode. */ maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); minifree = avgifree - avgifree / 4; if (minifree < 1) minifree = 1; minbfree = avgbfree - avgbfree / 4; if (minbfree < 1) minbfree = 1; cgsize = fs->e2fs_fsize * fs->e2fs_fpg; dirsize = AVGDIRSIZE; curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; if (dirsize < curdirsize) dirsize = curdirsize; maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); if (maxcontigdirs == 0) maxcontigdirs = 1; /* * Limit number of dirs in one cg and reserve space for * regular files, but only if we have no deficit in * inodes or space. */ prefcg = ino_to_cg(fs, pip->i_number); for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } for (cg = 0; cg < prefcg; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } /* * This is a backstop when we have deficit in space. */ for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) return (cg); for (cg = 0; cg < prefcg; cg++) if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) break; return (cg); }
/*********************************************************************** * Handle a request for action from CAM */ static void amr_cam_action(struct cam_sim *sim, union ccb *ccb) { struct amr_softc *sc = cam_sim_softc(sim); switch(ccb->ccb_h.func_code) { /* * Perform SCSI I/O to a physical device. */ case XPT_SCSI_IO: { struct ccb_hdr *ccbh = &ccb->ccb_h; struct ccb_scsiio *csio = &ccb->csio; /* Validate the CCB */ ccbh->status = CAM_REQ_INPROG; /* check the CDB length */ if (csio->cdb_len > AMR_MAX_EXTCDB_LEN) ccbh->status = CAM_REQ_INVALID; if ((csio->cdb_len > AMR_MAX_CDB_LEN) && (sc->support_ext_cdb == 0)) ccbh->status = CAM_REQ_INVALID; /* check that the CDB pointer is not to a physical address */ if ((ccbh->flags & CAM_CDB_POINTER) && (ccbh->flags & CAM_CDB_PHYS)) ccbh->status = CAM_REQ_INVALID; /* * if there is data transfer, it must be to/from a virtual * address */ if ((ccbh->flags & CAM_DIR_MASK) != CAM_DIR_NONE) { if ((ccbh->flags & CAM_DATA_MASK) != CAM_DATA_VADDR) /* we can't map it */ ccbh->status = CAM_REQ_INVALID; } /* * If the command is to a LUN other than 0, fail it. * This is probably incorrect, but during testing the * firmware did not seem to respect the LUN field, and thus * devices appear echoed. */ if (csio->ccb_h.target_lun != 0) ccbh->status = CAM_DEV_NOT_THERE; /* if we're happy with the request, queue it for attention */ if (ccbh->status == CAM_REQ_INPROG) { /* save the channel number in the ccb */ csio->ccb_h.sim_priv.entries[0].field= cam_sim_bus(sim); amr_enqueue_ccb(sc, ccb); amr_startio(sc); return; } break; } case XPT_CALC_GEOMETRY: { cam_calc_geometry(&ccb->ccg, /*extended*/1); break; } /* * Return path stats. Some of these should probably be amended. */ case XPT_PATH_INQ: { struct ccb_pathinq *cpi = & ccb->cpi; debug(3, "XPT_PATH_INQ"); cpi->version_num = 1; /* XXX??? */ cpi->hba_inquiry = PI_SDTR_ABLE|PI_TAG_ABLE|PI_WIDE_16; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET|PIM_SEQSCAN; cpi->hba_eng_cnt = 0; cpi->max_target = AMR_MAX_TARGETS; cpi->max_lun = 0 /* AMR_MAX_LUNS*/; cpi->initiator_id = 7; /* XXX variable? */ strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, "LSI", HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 132 * 1024; /* XXX */ cpi->transport = XPORT_SPI; cpi->transport_version = 2; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_2; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_BUS: { struct ccb_pathinq *cpi = & ccb->cpi; debug(1, "XPT_RESET_BUS"); cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_DEV: { debug(1, "XPT_RESET_DEV"); ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &(ccb->cts); debug(3, "XPT_GET_TRAN_SETTINGS"); struct ccb_trans_settings_scsi *scsi; struct ccb_trans_settings_spi *spi; scsi = &cts->proto_specific.scsi; spi = &cts->xport_specific.spi; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_2; cts->transport = XPORT_SPI; cts->transport_version = 2; if (cts->type == CTS_TYPE_USER_SETTINGS) { ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; } spi->flags = CTS_SPI_FLAGS_DISC_ENB; spi->bus_width = MSG_EXT_WDTR_BUS_32_BIT; spi->sync_period = 6; /* 40MHz how wide is this bus? */ spi->sync_offset = 31; /* How to extract this from board? */ spi->valid = CTS_SPI_VALID_SYNC_RATE | CTS_SPI_VALID_SYNC_OFFSET | CTS_SPI_VALID_BUS_WIDTH | CTS_SPI_VALID_DISC; scsi->valid = CTS_SCSI_VALID_TQ; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_SET_TRAN_SETTINGS: debug(3, "XPT_SET_TRAN_SETTINGS"); ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; /* * Reject anything else as unsupported. */ default: /* we can't do this */ ccb->ccb_h.status = CAM_REQ_INVALID; break; } mtx_assert(&sc->amr_list_lock, MA_OWNED); xpt_done(ccb); }
/* * ppb_MS_microseq() * * Interprete a microsequence. Some microinstructions are executed at adapter * level to avoid function call overhead between ppbus and the adapter */ int ppb_MS_microseq(device_t bus, device_t dev, struct ppb_microseq *msq, int *ret) { struct ppb_data *ppb = (struct ppb_data *)device_get_softc(bus); struct ppb_device *ppbdev = (struct ppb_device *)device_get_ivars(dev); struct ppb_microseq *mi; /* current microinstruction */ int error; struct ppb_xfer *xfer; /* microsequence executed to initialize the transfer */ struct ppb_microseq initxfer[] = { MS_PTR(MS_UNKNOWN), /* set ptr to buffer */ MS_SET(MS_UNKNOWN), /* set transfer size */ MS_RET(0) }; mtx_assert(ppb->ppc_lock, MA_OWNED); if (ppb->ppb_owner != dev) return (EACCES); #define INCR_PC (mi ++) mi = msq; for (;;) { switch (mi->opcode) { case MS_OP_PUT: case MS_OP_GET: /* attempt to choose the best mode for the device */ xfer = mode2xfer(bus, ppbdev, mi->opcode); /* figure out if we should use ieee1284 code */ if (!xfer->loop) { if (mi->opcode == MS_OP_PUT) { if ((error = PPBUS_WRITE( device_get_parent(bus), (char *)mi->arg[0].p, mi->arg[1].i, 0))) goto error; INCR_PC; goto next; } else panic("%s: IEEE1284 read not supported", __func__); } /* XXX should use ppb_MS_init_msq() */ initxfer[0].arg[0].p = mi->arg[0].p; initxfer[1].arg[0].i = mi->arg[1].i; /* initialize transfer */ ppb_MS_microseq(bus, dev, initxfer, &error); if (error) goto error; /* the xfer microsequence should not contain any * MS_OP_PUT or MS_OP_GET! */ ppb_MS_microseq(bus, dev, xfer->loop, &error); if (error) goto error; INCR_PC; break; case MS_OP_RET: if (ret) *ret = mi->arg[0].i; /* return code */ return (0); default: /* executing microinstructions at ppc level is * faster. This is the default if the microinstr * is unknown here */ if ((error = PPBUS_EXEC_MICROSEQ( device_get_parent(bus), &mi))) goto error; break; } next: continue; } error: return (error); }
void trap(struct trapframe *frame) { struct thread *td; struct proc *p; int sig, type, user; ksiginfo_t ksi; PCPU_INC(cnt.v_trap); td = curthread; p = td->td_proc; type = frame->exc; sig = 0; user = (frame->srr1 & PSL_PR) ? 1 : 0; CTR3(KTR_TRAP, "trap: %s type=%s (%s)", p->p_comm, trapname(type), user ? "user" : "kernel"); if (user) { td->td_frame = frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); /* User Mode Traps */ switch (type) { case EXC_DSI: case EXC_ISI: sig = trap_pfault(frame, 1); break; case EXC_SC: syscall(frame); break; case EXC_ALI: if (fix_unaligned(td, frame) != 0) sig = SIGBUS; else frame->srr0 += 4; break; case EXC_DEBUG: /* Single stepping */ mtspr(SPR_DBSR, mfspr(SPR_DBSR)); frame->srr1 &= ~PSL_DE; frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM || DBCR0_IC); sig = SIGTRAP; break; case EXC_PGM: /* Program exception */ #ifdef FPU_EMU sig = fpu_emulate(frame, (struct fpreg *)&td->td_pcb->pcb_fpu); #else /* XXX SIGILL for non-trap instructions. */ sig = SIGTRAP; #endif break; default: trap_fatal(frame); } } else { /* Kernel Mode Traps */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case EXC_DEBUG: mtspr(SPR_DBSR, mfspr(SPR_DBSR)); kdb_trap(frame->exc, 0, frame); return; case EXC_DSI: if (trap_pfault(frame, 0) == 0) return; break; case EXC_MCHK: if (handle_onfault(frame)) return; break; #ifdef KDB case EXC_PGM: if (frame->cpu.booke.esr & ESR_PTR) kdb_trap(EXC_PGM, 0, frame); return; #endif default: break; } trap_fatal(frame); } if (sig != 0) { if (p->p_sysent->sv_transtrap != NULL) sig = (p->p_sysent->sv_transtrap)(sig, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = type; /* XXX, not POSIX */ /* ksi.ksi_addr = ? */ ksi.ksi_trapno = type; trapsignal(td, &ksi); } userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); }
/* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *regs; #ifdef CPU_CHERI struct cheri_frame *cfp; #endif struct sigacts *psp; struct sigframe sf, *sfp; vm_offset_t sp; #ifdef CPU_CHERI size_t cp2_len; int cheri_is_sandboxed; #endif int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->sp); #ifdef CPU_CHERI /* * CHERI affects signal delivery in the following ways: * * (1) Additional capability-coprocessor state is exposed via * extensions to the context frame placed on the stack. * * (2) If the user $pcc doesn't include CHERI_PERM_SYSCALL, then we * consider user state to be 'sandboxed' and therefore to require * special delivery handling which includes a domain-switch to the * thread's context-switch domain. (This is done by * cheri_sendsig()). * * (3) If an alternative signal stack is not defined, and we are in a * 'sandboxed' state, then we have two choices: (a) if the signal * is of type SA_SANDBOX_UNWIND, we will automatically unwind the * trusted stack by one frame; (b) otherwise, we will terminate * the process unconditionally. */ cheri_is_sandboxed = cheri_signal_sandboxed(td); /* * We provide the ability to drop into the debugger in two different * circumstances: (1) if the code running is sandboxed; and (2) if the * fault is a CHERI protection fault. Handle both here for the * non-unwind case. Do this before we rewrite any general-purpose or * capability register state for the thread. */ #if DDB if (cheri_is_sandboxed && security_cheri_debugger_on_sandbox_signal) kdb_enter(KDB_WHY_CHERI, "Signal delivery to CHERI sandbox"); else if (sig == SIGPROT && security_cheri_debugger_on_sigprot) kdb_enter(KDB_WHY_CHERI, "SIGPROT delivered outside sandbox"); #endif /* * If a thread is running sandboxed, we can't rely on $sp which may * not point at a valid stack in the ambient context, or even be * maliciously manipulated. We must therefore always use the * alternative stack. We are also therefore unable to tell whether we * are on the alternative stack, so must clear 'oonstack' here. * * XXXRW: This requires significant further thinking; however, the net * upshot is that it is not a good idea to do an object-capability * invoke() from a signal handler, as with so many other things in * life. */ if (cheri_is_sandboxed != 0) oonstack = 0; #endif /* save user context */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs->pc; sf.sf_uc.uc_mcontext.mullo = regs->mullo; sf.sf_uc.uc_mcontext.mulhi = regs->mulhi; sf.sf_uc.uc_mcontext.mc_tls = td->td_md.md_tls; sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ bcopy((void *)®s->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1], sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t)); sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; #if defined(CPU_HAVEFPU) if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); bcopy((void *)&td->td_frame->f0, (void *)sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); } #endif /* XXXRW: sf.sf_uc.uc_mcontext.sr seems never to be set? */ sf.sf_uc.uc_mcontext.cause = regs->cause; /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (vm_offset_t)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); } else { #ifdef CPU_CHERI /* * Signals delivered when a CHERI sandbox is present must be * delivered on the alternative stack rather than a local one. * If an alternative stack isn't present, then terminate or * risk leaking capabilities (and control) to the sandbox (or * just crashing the sandbox). */ if (cheri_is_sandboxed) { mtx_unlock(&psp->ps_mtx); printf("pid %d, tid %d: signal in sandbox without " "alternative stack defined\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } #endif sp = (vm_offset_t)regs->sp; } #ifdef CPU_CHERI cp2_len = sizeof(*cfp); sp -= cp2_len; sp &= ~(CHERICAP_SIZE - 1); sf.sf_uc.uc_mcontext.mc_cp2state = sp; sf.sf_uc.uc_mcontext.mc_cp2state_len = cp2_len; #endif sp -= sizeof(struct sigframe); #ifdef CPU_CHERI /* For CHERI, keep the stack pointer capability aligned. */ sp &= ~(CHERICAP_SIZE - 1); #else sp &= ~(sizeof(__int64_t) - 1); #endif sfp = (struct sigframe *)sp; /* Build the argument list for the signal handler. */ regs->a0 = sig; regs->a2 = (register_t)(intptr_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->a1 = (register_t)(intptr_t)&sfp->sf_si; /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = (void*)(intptr_t)regs->badvaddr; } else { /* Old FreeBSD-style arguments. */ regs->a1 = ksi->ksi_code; regs->a3 = regs->badvaddr; /* sf.sf_ahu.sf_handler = catcher; */ } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ #ifdef CPU_CHERI cfp = malloc(sizeof(*cfp), M_TEMP, M_WAITOK); cheri_trapframe_to_cheriframe(&td->td_pcb->pcb_regs, cfp); if (copyoutcap(cfp, (void *)sf.sf_uc.uc_mcontext.mc_cp2state, cp2_len) != 0) { free(cfp, M_TEMP); PROC_LOCK(p); printf("pid %d, tid %d: could not copy out cheriframe\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } free(cfp, M_TEMP); #endif if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); printf("pid %d, tid %d: could not copy out sigframe\n", td->td_proc->p_pid, td->td_tid); sigexit(td, SIGILL); /* NOTREACHED */ } #ifdef CPU_CHERI /* * Install CHERI signal-delivery register state for handler to run * in. As we don't install this in the CHERI frame on the user stack, * it will be (genrally) be removed automatically on sigreturn(). */ cheri_sendsig(td); #endif regs->pc = (register_t)(intptr_t)catcher; regs->t9 = (register_t)(intptr_t)catcher; regs->sp = (register_t)(intptr_t)sfp; /* * Signal trampoline code is at base of user stack. */ regs->ra = (register_t)(intptr_t)PS_STRINGS - *(p->p_sysent->sv_szsigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); }