static __inline void userret (struct lwp *l, register_t pc, u_quad_t oticks) { struct proc *p = l->l_proc; int sig; /* take pending signals */ while ((sig = CURSIG(l)) != 0) postsig(sig); l->l_priority = l->l_usrpri; if (want_resched) { /* * We're being preempted. */ preempt(0); while ((sig = CURSIG(l)) != 0) postsig(sig); } /* * If profiling, charge recent system time to the trapped pc. */ if (l->l_flag & P_PROFIL) { extern int psratio; addupc_task(p, pc, (int)(p->p_sticks - oticks) * psratio); } curcpu()->ci_schedstate.spc_curpriority = l->l_priority; }
/* * trap and syscall both need the following work done before returning * to user mode. */ static inline void userret(struct lwp *l, struct frame *fp, u_quad_t oticks, u_int faultaddr, int fromtrap) { struct proc *p = l->l_proc; #ifdef M68040 int sig; int beenhere = 0; again: #endif /* Invoke MI userret code */ mi_userret(l); /* * If profiling, charge system time to the trapped pc. */ if (p->p_stflag & PST_PROFIL) { extern int psratio; addupc_task(l, fp->f_pc, (int)(p->p_sticks - oticks) * psratio); } #ifdef M68040 /* * Deal with user mode writebacks (from trap, or from sigreturn). * If any writeback fails, go back and attempt signal delivery. * unless we have already been here and attempted the writeback * (e.g. bad address with user ignoring SIGSEGV). In that case * we just return to the user without successfully completing * the writebacks. Maybe we should just drop the sucker? */ if ( #if defined(M68030) || defined(M68060) cputype == CPU_68040 && #endif fp->f_format == FMT7) { if (beenhere) { #ifdef DEBUG if (mmudebug & MDB_WBFAILED) printf(fromtrap ? "pid %d(%s): writeback aborted, pc=%x, fa=%x\n" : "pid %d(%s): writeback aborted in sigreturn, pc=%x\n", p->p_pid, p->p_comm, fp->f_pc, faultaddr); #endif } else if ((sig = writeback(fp, fromtrap))) { ksiginfo_t ksi; beenhere = 1; oticks = p->p_sticks; (void)memset(&ksi, 0, sizeof(ksi)); ksi.ksi_signo = sig; ksi.ksi_addr = (void *)faultaddr; ksi.ksi_code = BUS_OBJERR; trapsignal(l, &ksi); goto again; } } #endif }
static inline void userret(struct proc *p, struct trapframe *frame, u_quad_t oticks) { int sig; /* take pending signals */ while ((sig = CURSIG(p)) != 0) postsig(sig); p->p_priority = p->p_usrpri; if (want_resched) { /* * We're being preempted. */ preempt(NULL); while ((sig = CURSIG(p)) != 0) postsig(sig); } /* * If profiling, charge recent system time to the trapped pc. */ if (p->p_flag & P_PROFIL) { extern int psratio; addupc_task(p, frame->tf_sxip & XIP_ADDR, (int)(p->p_sticks - oticks) * psratio); } curpriority = p->p_priority; }
void userret(struct proc *p, u_int32_t pc, quad_t oticks) { int sig; /* Take pending signals. */ while ((sig = (CURSIG(p))) != 0) postsig(sig); p->p_priority = p->p_usrpri; if (want_resched) { /* * We're being preempted. */ preempt(NULL); while ((sig = CURSIG(p)) != 0) postsig(sig); } /* * If profiling, charge recent system time to the trapped pc. */ if (p->p_flag & P_PROFIL) { extern int psratio; addupc_task(p, pc, (int)(p->p_sticks - oticks) * psratio); } curpriority = p->p_priority; }
/* * Define the code needed before returning to user mode, for trap and * syscall. */ void userret(struct thread *td, struct trapframe *frame) { struct proc *p = td->td_proc; CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, td->td_name); KASSERT((p->p_flag & P_WEXIT) == 0, ("Exiting process returns to usermode")); #if 0 #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); thread_lock(td); if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || (td->td_flags & TDF_ASTPENDING) == 0)) printf("failed to set signal flags properly for ast()\n"); thread_unlock(td); PROC_UNLOCK(p); #endif #endif #ifdef KTRACE KTRUSERRET(td); #endif /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); /* * Let the scheduler adjust our priority etc. */ sched_userret(td); KASSERT(td->td_locks == 0, ("userret: Returning with %d locks held.", td->td_locks)); #ifdef VIMAGE /* Unfortunately td_vnet_lpush needs VNET_DEBUG. */ VNET_ASSERT(curvnet == NULL, ("%s: Returning on td %p (pid %d, %s) with vnet %p set in %s", __func__, td, p->p_pid, td->td_name, curvnet, (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A")); #endif #ifdef XEN PT_UPDATES_FLUSH(); #endif }
/* * trap and syscall both need the following work done before * returning to user mode. */ static void userret(struct lwp *l, struct trapframe *tf, u_quad_t oticks) { struct proc *p = l->l_proc; /* Invoke MI userret code */ mi_userret(l); /* * If profiling, charge system time to the trapped pc. */ if (p->p_stflag & PST_PROFIL) { extern int psratio; addupc_task(l, tf->tf_pc, (int)(p->p_sticks - oticks) * psratio); } }
/* * Define the code needed before returning to user mode, for * trap, mem_access_fault, and syscall. */ static inline void userret(struct proc *p, int pc, u_quad_t oticks) { int sig; /* take pending signals */ while ((sig = CURSIG(p)) != 0) psig(sig); p->p_pri = p->p_usrpri; if (want_ast) { want_ast = 0; if (p->p_flag & SOWEUPC) { p->p_flag &= ~SOWEUPC; ADDUPROF(p); } } if (want_resched) { /* * Since we are curproc, a clock interrupt could * change our priority without changing run queues * (the running process is not kept on a run queue). * If this happened after we setrq ourselves but * before we swtch()'ed, we might not be on the queue * indicated by our priority. */ (void) splstatclock(); setrq(p); p->p_stats->p_ru.ru_nivcsw++; swtch(); (void) spl0(); while ((sig = CURSIG(p)) != 0) psig(sig); } /* * If profiling, charge recent system time to the trapped pc. */ if (p->p_flag & SPROFIL) addupc_task(p, pc, (int)(p->p_sticks - oticks)); curpri = p->p_pri; }
/* * Define the code needed before returning to user mode, for * trap, mem_access_fault, and syscall. */ static inline void userret(struct proc *p, int pc, u_quad_t oticks) { int sig; /* take pending signals */ while ((sig = CURSIG(p)) != 0) postsig(sig); p->p_priority = p->p_usrpri; if (want_ast) { want_ast = 0; if (p->p_flag & P_OWEUPC) { p->p_flag &= ~P_OWEUPC; ADDUPROF(p); } } if (want_resched) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another * (since the running process is not on a queue.) * If that happened after we put ourselves on the run queue * but before we switched, we might not be on the queue * indicated by our priority. */ (void) splstatclock(); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; mi_switch(); (void) spl0(); while ((sig = CURSIG(p)) != 0) postsig(sig); } /* * If profiling, charge recent system time to the trapped pc. */ if (p->p_flag & P_PROFIL) addupc_task(p, pc, (int)(p->p_sticks - oticks)); curpriority = p->p_priority; }
void userret(struct proc *p, register_t pc, u_quad_t oticks) { int sig; /* take pending signals */ while ((sig = CURSIG(p)) != 0) postsig(sig); p->p_priority = p->p_usrpri; if (astpending) { astpending = 0; if (p->p_flag & P_OWEUPC) { ADDUPROF(p); } } if (want_resched) { /* * We're being preempted. */ preempt(NULL); while ((sig = CURSIG(p)) != 0) postsig(sig); } /* * If profiling, charge recent system time to the trapped pc. */ if (p->p_flag & P_PROFIL) { extern int psratio; addupc_task(p, pc, (int)(p->p_sticks - oticks) * psratio); } p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority; }
/* TBD locking user profiling is not resolved yet */ void bsd_uprofil(struct time_value *syst, user_addr_t pc) { struct proc *p = current_proc(); int ticks; struct timeval *tv; struct timeval st; if (p == NULL) return; if ( !(p->p_flag & P_PROFIL)) return; st.tv_sec = syst->seconds; st.tv_usec = syst->microseconds; tv = &(p->p_stats->p_ru.ru_stime); ticks = ((tv->tv_sec - st.tv_sec) * 1000 + (tv->tv_usec - st.tv_usec) / 1000) / (tick / 1000); if (ticks) addupc_task(p, pc, ticks); }
/* * Define the code needed before returning to user mode, for trap and * syscall. */ void userret(struct thread *td, struct trapframe *frame) { struct proc *p = td->td_proc; CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, td->td_name); KASSERT((p->p_flag & P_WEXIT) == 0, ("Exiting process returns to usermode")); #if 0 #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); thread_lock(td); if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || (td->td_flags & TDF_ASTPENDING) == 0)) printf("failed to set signal flags properly for ast()\n"); thread_unlock(td); PROC_UNLOCK(p); #endif #endif #ifdef KTRACE KTRUSERRET(td); #endif /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); /* * Let the scheduler adjust our priority etc. */ sched_userret(td); #ifdef XEN PT_UPDATES_FLUSH(); #endif /* * Check for misbehavior. * * In case there is a callchain tracing ongoing because of * hwpmc(4), skip the scheduler pinning check. * hwpmc(4) subsystem, infact, will collect callchain informations * at ast() checkpoint, which is past userret(). */ WITNESS_WARN(WARN_PANIC, NULL, "userret: returning"); KASSERT(td->td_critnest == 0, ("userret: Returning in a critical section")); KASSERT(td->td_locks == 0, ("userret: Returning with %d locks held", td->td_locks)); KASSERT(td->td_rw_rlocks == 0, ("userret: Returning with %d rwlocks held in read mode", td->td_rw_rlocks)); KASSERT((td->td_pflags & TDP_NOFAULTING) == 0, ("userret: Returning with pagefaults disabled")); KASSERT(td->td_no_sleeping == 0, ("userret: Returning with sleep disabled")); KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, ("userret: Returning with with pinned thread")); KASSERT(td->td_vp_reserv == 0, ("userret: Returning while holding vnode reservation")); KASSERT((td->td_flags & TDF_SBDRY) == 0, ("userret: Returning with stop signals deferred")); #ifdef VIMAGE /* Unfortunately td_vnet_lpush needs VNET_DEBUG. */ VNET_ASSERT(curvnet == NULL, ("%s: Returning on td %p (pid %d, %s) with vnet %p set in %s", __func__, td, p->p_pid, td->td_name, curvnet, (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A")); #endif #ifdef RACCT PROC_LOCK(p); while (p->p_throttled == 1) msleep(p->p_racct, &p->p_mtx, 0, "racct", 0); PROC_UNLOCK(p); #endif }
/* * Process an asynchronous software trap. * This is relatively easy. * This function will return with preemption disabled. */ void ast(struct trapframe *framep) { struct thread *td; struct proc *p; int flags; int sig; td = curthread; p = td->td_proc; CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode"); mtx_assert(&Giant, MA_NOTOWNED); THREAD_LOCK_ASSERT(td, MA_NOTOWNED); td->td_frame = framep; td->td_pticks = 0; /* * This updates the td_flag's for the checks below in one * "atomic" operation with turning off the astpending flag. * If another AST is triggered while we are handling the * AST's saved in flags, the astpending flag will be set and * ast() will be called again. */ thread_lock(td); flags = td->td_flags; td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK | TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND); thread_unlock(td); PCPU_INC(cnt.v_trap); if (td->td_ucred != p->p_ucred) cred_update_thread(td); if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) { addupc_task(td, td->td_profil_addr, td->td_profil_ticks); td->td_profil_ticks = 0; td->td_pflags &= ~TDP_OWEUPC; } #ifdef HWPMC_HOOKS /* Handle Software PMC callchain capture. */ if (PMC_IS_PENDING_CALLCHAIN(td)) PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN_SOFT, (void *) framep); #endif if (flags & TDF_ALRMPEND) { PROC_LOCK(p); kern_psignal(p, SIGVTALRM); PROC_UNLOCK(p); } if (flags & TDF_PROFPEND) { PROC_LOCK(p); kern_psignal(p, SIGPROF); PROC_UNLOCK(p); } #ifdef MAC if (flags & TDF_MACPEND) mac_thread_userret(td); #endif if (flags & TDF_NEEDRESCHED) { #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 1, __func__); #endif thread_lock(td); sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL); thread_unlock(td); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 1, __func__); #endif } /* * Check for signals. Unlocked reads of p_pendingcnt or * p_siglist might cause process-directed signal to be handled * later. */ if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 || !SIGISEMPTY(p->p_siglist)) { PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) postsig(sig); mtx_unlock(&p->p_sigacts->ps_mtx); PROC_UNLOCK(p); } /* * We need to check to see if we have to exit or wait due to a * single threading requirement or some other STOP condition. */ if (flags & TDF_NEEDSUSPCHK) { PROC_LOCK(p); thread_suspend_check(0); PROC_UNLOCK(p); } if (td->td_pflags & TDP_OLDMASK) { td->td_pflags &= ~TDP_OLDMASK; kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0); } userret(td, framep); }
/* * Exception, fault, and trap interface to the kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. * * This function is also called from doreti in an interlock to handle ASTs. * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap * * NOTE! We have to retrieve the fault address prior to potentially * blocking, including blocking on any token. * * NOTE! NMI and kernel DBG traps remain on their respective pcpu IST * stacks if taken from a kernel RPL. trap() cannot block in this * situation. DDB entry or a direct report-and-return is ok. * * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing * if an attempt is made to switch from a fast interrupt or IPI. */ void trap(struct trapframe *frame) { static struct krate sscpubugrate = { 1 }; struct globaldata *gd = mycpu; struct thread *td = gd->gd_curthread; struct lwp *lp = td->td_lwp; struct proc *p; int sticks = 0; int i = 0, ucode = 0, type, code; #ifdef INVARIANTS int crit_count = td->td_critcount; lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; p = td->td_proc; clear_quickret(); #ifdef DDB /* * We need to allow T_DNA faults when the debugger is active since * some dumping paths do large bcopy() which use the floating * point registers for faster copying. */ if (db_active && frame->tf_trapno != T_DNA) { eva = (frame->tf_trapno == T_PAGEFLT ? frame->tf_addr : 0); ++gd->gd_trap_nesting_level; trap_fatal(frame, eva); --gd->gd_trap_nesting_level; goto out2; } #endif eva = 0; if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but * it is better than running with interrupts disabled until * they are accidentally enabled later. */ type = frame->tf_trapno; if (ISPL(frame->tf_cs) == SEL_UPL) { /* JG curproc can be NULL */ kprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curproc->p_comm, type); } else if ((type == T_STKFLT || type == T_PROTFLT || type == T_SEGNPFLT) && frame->tf_rip == (long)doreti_iret) { /* * iretq fault from kernel mode during return to * userland. * * This situation is expected, don't complain. */ } else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ kprintf("kernel trap %d (%s @ 0x%016jx) with " "interrupts disabled\n", type, td->td_comm, frame->tf_rip); } cpu_enable_intr(); } type = frame->tf_trapno; code = frame->tf_err; if (ISPL(frame->tf_cs) == SEL_UPL) { /* user trap */ KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid, frame->tf_trapno, eva); userenter(td, p); sticks = (int)td->td_sticks; KASSERT(lp->lwp_md.md_regs == frame, ("Frame mismatch %p %p", lp->lwp_md.md_regs, frame)); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ frame->tf_rflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = code; i = SIGFPE; break; case T_ASTFLT: /* Allow process switch */ mycpu->gd_cnt.v_soft++; if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); } goto out; case T_PROTFLT: /* general protection fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE); #ifdef DDB if (frame->tf_rip == 0) { /* used for kernel debugging only */ while (freeze_on_seg_fault) tsleep(p, 0, "freeze", hz * 20); } #endif if (i == -1 || i == 0) goto out; if (i == SIGSEGV) { ucode = SEGV_MAPERR; } else { i = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #if NISA > 0 case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* NISA > 0 */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* * Virtual kernel intercept - pass the DNA exception * to the virtual kernel if it asked to handle it. * This occurs when the virtual kernel is holding * onto the FP context for a different emulated * process then the one currently running. * * We must still call npxdna() since we may have * saved FP state that the virtual kernel needs * to hand over to a different emulated process. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve && (td->td_pcb->pcb_flags & FP_VIRTFP) ) { npxdna(); break; } /* * The kernel may have switched out the FP unit's * state, causing the user process to take a fault * when it tries to use the FP unit. Restore the * state here */ if (npxdna()) { gd->gd_cnt.v_trap++; goto out; } i = SIGFPE; ucode = FPE_FPU_NP_TRAP; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = 0; /* XXX */ i = SIGFPE; break; } } else { /* kernel trap */ switch (type) { case T_PAGEFLT: /* page fault */ trap_pfault(frame, FALSE); goto out2; case T_DNA: /* * The kernel is apparently using fpu for copying. * XXX this should be fatal unless the kernel has * registered such use. */ if (npxdna()) { gd->gd_cnt.v_trap++; goto out2; } break; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (mycpu->gd_intr_nesting_level == 0) { /* * NOTE: in 64-bit mode traps push rsp/ss * even if no ring change occurs. */ if (td->td_pcb->pcb_onfault && td->td_pcb->pcb_onfault_sp == frame->tf_rsp) { frame->tf_rip = (register_t) td->td_pcb->pcb_onfault; goto out2; } /* * If the iretq in doreti faults during * return to user, it will be special-cased * in IDTVEC(prot) to get here. We want * to 'return' to doreti_iret_fault in * ipl.s in approximately the same state we * were in at the iretq. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; goto out2; } } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; #if 0 /* do we need this? */ if (frame->tf_rip == (long)doreti_iret) frame->tf_rip = (long)doreti_iret_fault; #endif goto out2; } break; case T_TRCTRAP: /* trace trap */ /* * Detect historical CPU artifact on syscall or int $3 * entry (if not shortcutted in exception.s via * DIRECT_DISALLOW_SS_CPUBUG). */ gd->gd_cnt.v_trap++; if (frame->tf_rip == (register_t)IDTVEC(fast_syscall)) { krateprintf(&sscpubugrate, "Caught #DB at syscall cpu artifact\n"); goto out2; } if (frame->tf_rip == (register_t)IDTVEC(bpt)) { krateprintf(&sscpubugrate, "Caught #DB at int $N cpu artifact\n"); goto out2; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & ~0xf); goto out2; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If DDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ ucode = TRAP_BRKPT; #ifdef DDB if (kdb_trap(type, 0, frame)) goto out2; #endif break; #if NISA > 0 case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi == 0) goto out2; /* FALL THROUGH */ #endif /* NISA > 0 */ } trap_fatal(frame, 0); goto out2; } /* * Fault from user mode, virtual kernel interecept. * * If the fault is directly related to a VM context managed by a * virtual kernel then let the virtual kernel handle it. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); gd->gd_cnt.v_trap++; trapsignal(lp, i, ucode); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", frame->tf_addr); uprintf("\n"); } #endif out: userret(lp, frame, sticks); userexit(lp); out2: ; if (p != NULL && lp != NULL) KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(curstop == td->td_toks_stop, ("trap: extra tokens held after trap! %ld/%ld", curstop - &td->td_toks_base, td->td_toks_stop - &td->td_toks_base)); #endif }
/* * Handle signals, upcalls, profiling, and other AST's and/or tasks that * must be completed before we can return to or try to return to userland. * * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 * arithmatic on the delta calculation so the absolute tick values are * truncated to an integer. */ static void userret(struct lwp *lp, struct trapframe *frame, int sticks) { struct proc *p = lp->lwp_proc; int sig; int ptok; /* * Charge system time if profiling. Note: times are in microseconds. * This may do a copyout and block, so do it first even though it * means some system time will be charged as user time. */ if (p->p_flags & P_PROFIL) { addupc_task(p, frame->tf_rip, (u_int)((int)lp->lwp_thread->td_sticks - sticks)); } recheck: /* * Specific on-return-to-usermode checks (LWP_MP_WEXIT, * LWP_MP_VNLRU, etc). */ if (lp->lwp_mpflags & LWP_MP_URETMASK) lwpuserret(lp); /* * Block here if we are in a stopped state. */ if (STOPLWP(p, lp)) { lwkt_gettoken(&p->p_token); tstop(); lwkt_reltoken(&p->p_token); goto recheck; } while (dump_stop_usertds) { tsleep(&dump_stop_usertds, 0, "dumpstp", 0); } /* * Post any pending upcalls. If running a virtual kernel be sure * to restore the virtual kernel's vmspace before posting the upcall. */ if (p->p_flags & (P_SIGVTALRM | P_SIGPROF)) { lwkt_gettoken(&p->p_token); if (p->p_flags & P_SIGVTALRM) { p->p_flags &= ~P_SIGVTALRM; ksignal(p, SIGVTALRM); } if (p->p_flags & P_SIGPROF) { p->p_flags &= ~P_SIGPROF; ksignal(p, SIGPROF); } lwkt_reltoken(&p->p_token); goto recheck; } /* * Post any pending signals. If running a virtual kernel be sure * to restore the virtual kernel's vmspace before posting the signal. * * WARNING! postsig() can exit and not return. */ if ((sig = CURSIG_LCK_TRACE(lp, &ptok)) != 0) { postsig(sig, ptok); goto recheck; } /* * block here if we are swapped out, but still process signals * (such as SIGKILL). proc0 (the swapin scheduler) is already * aware of our situation, we do not have to wake it up. */ if (p->p_flags & P_SWAPPEDOUT) { lwkt_gettoken(&p->p_token); p->p_flags |= P_SWAPWAIT; swapin_request(); if (p->p_flags & P_SWAPWAIT) tsleep(p, PCATCH, "SWOUT", 0); p->p_flags &= ~P_SWAPWAIT; lwkt_reltoken(&p->p_token); goto recheck; } /* * In a multi-threaded program it is possible for a thread to change * signal state during a system call which temporarily changes the * signal mask. In this case postsig() might not be run and we * have to restore the mask ourselves. */ if (lp->lwp_flags & LWP_OLDMASK) { lp->lwp_flags &= ~LWP_OLDMASK; lp->lwp_sigmask = lp->lwp_oldsigmask; goto recheck; } }
void trap(struct trapframe *frame) { struct globaldata *gd = mycpu; struct thread *td = gd->gd_curthread; struct lwp *lp = td->td_lwp; struct proc *p; int sticks = 0; int i = 0, ucode = 0, type, code; int have_mplock = 0; #ifdef INVARIANTS int crit_count = td->td_critcount; lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; p = td->td_proc; #ifdef DDB /* * We need to allow T_DNA faults when the debugger is active since * some dumping paths do large bcopy() which use the floating * point registers for faster copying. */ if (db_active && frame->tf_trapno != T_DNA) { eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); ++gd->gd_trap_nesting_level; MAKEMPSAFE(have_mplock); trap_fatal(frame, eva); --gd->gd_trap_nesting_level; goto out2; } #endif eva = 0; ++gd->gd_trap_nesting_level; if (frame->tf_trapno == T_PAGEFLT) { /* * For some Cyrix CPUs, %cr2 is clobbered by interrupts. * This problem is worked around by using an interrupt * gate for the pagefault handler. We are finally ready * to read %cr2 and then must reenable interrupts. * * XXX this should be in the switch statement, but the * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the * flow of control too much for this to be obviously * correct. */ eva = rcr2(); cpu_enable_intr(); } --gd->gd_trap_nesting_level; if (!(frame->tf_eflags & PSL_I)) { /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but * it is better than running with interrupts disabled until * they are accidentally enabled later. */ type = frame->tf_trapno; if (ISPL(frame->tf_cs)==SEL_UPL || (frame->tf_eflags & PSL_VM)) { MAKEMPSAFE(have_mplock); kprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curproc->p_comm, type); } else if (type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ MAKEMPSAFE(have_mplock); kprintf("kernel trap %d with interrupts disabled\n", type); } cpu_enable_intr(); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) restart: #endif type = frame->tf_trapno; code = frame->tf_err; if (in_vm86call) { if (frame->tf_eflags & PSL_VM && (type == T_PROTFLT || type == T_STKFLT)) { KKASSERT(get_mplock_count(curthread) > 0); i = vm86_emulate((struct vm86frame *)frame); KKASSERT(get_mplock_count(curthread) > 0); if (i != 0) { /* * returns to original process */ vm86_trap((struct vm86frame *)frame, have_mplock); KKASSERT(0); /* NOT REACHED */ } goto out2; } switch (type) { /* * these traps want either a process context, or * assume a normal userspace trap. */ case T_PROTFLT: case T_SEGNPFLT: trap_fatal(frame, eva); goto out2; case T_TRCTRAP: type = T_BPTFLT; /* kernel breakpoint */ /* FALL THROUGH */ } goto kernel_trap; /* normal kernel trap handling */ } if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { /* user trap */ KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid, frame->tf_trapno, eva); userenter(td, p); sticks = (int)td->td_sticks; lp->lwp_md.md_regs = frame; switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ frame->tf_eflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = code; i = SIGFPE; break; case T_ASTFLT: /* Allow process switch */ mycpu->gd_cnt.v_soft++; if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); } goto out; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == 0) goto out; break; } i = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE, eva); if (i == -1) goto out; #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) goto restart; #endif if (i == 0) goto out; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { i = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #if NISA > 0 case T_NMI: MAKEMPSAFE(have_mplock); #ifdef POWERFAIL_NMI goto handle_powerfail; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap (type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* * Virtual kernel intercept - pass the DNA exception * to the virtual kernel if it asked to handle it. * This occurs when the virtual kernel is holding * onto the FP context for a different emulated * process then the one currently running. * * We must still call npxdna() since we may have * saved FP state that the virtual kernel needs * to hand over to a different emulated process. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve && (td->td_pcb->pcb_flags & FP_VIRTFP) ) { npxdna(); break; } #if NNPX > 0 /* * The kernel may have switched out the FP unit's * state, causing the user process to take a fault * when it tries to use the FP unit. Restore the * state here */ if (npxdna()) goto out; #endif if (!pmath_emulate) { i = SIGFPE; ucode = FPE_FPU_NP_TRAP; break; } i = (*pmath_emulate)(frame); if (i == 0) { if (!(frame->tf_eflags & PSL_T)) goto out2; frame->tf_eflags &= ~PSL_T; i = SIGTRAP; } /* else ucode = emulator_only_knows() XXX */ break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = 0; /* XXX */ i = SIGFPE; break; } } else { kernel_trap: /* kernel trap */ switch (type) { case T_PAGEFLT: /* page fault */ trap_pfault(frame, FALSE, eva); goto out2; case T_DNA: #if NNPX > 0 /* * The kernel may be using npx for copying or other * purposes. */ if (npxdna()) goto out2; #endif break; case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ #define MAYBE_DORETI_FAULT(where, whereto) \ do { \ if (frame->tf_eip == (int)where) { \ frame->tf_eip = (int)whereto; \ goto out2; \ } \ } while (0) if (mycpu->gd_intr_nesting_level == 0) { /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ MAYBE_DORETI_FAULT(doreti_iret, doreti_iret_fault); MAYBE_DORETI_FAULT(doreti_popl_ds, doreti_popl_ds_fault); MAYBE_DORETI_FAULT(doreti_popl_es, doreti_popl_es_fault); MAYBE_DORETI_FAULT(doreti_popl_fs, doreti_popl_fs_fault); MAYBE_DORETI_FAULT(doreti_popl_gs, doreti_popl_gs_fault); /* * NOTE: cpu doesn't push esp on kernel trap */ if (td->td_pcb->pcb_onfault && td->td_pcb->pcb_onfault_sp == (int)&frame->tf_esp) { frame->tf_eip = (register_t)td->td_pcb->pcb_onfault; goto out2; } } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; goto out2; } break; case T_TRCTRAP: /* trace trap */ if (frame->tf_eip == (int)IDTVEC(syscall)) { /* * We've just entered system mode via the * syscall lcall. Continue single stepping * silently until the syscall handler has * saved the flags. */ goto out2; } if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { /* * The syscall handler has now saved the * flags. Stop single stepping it. */ frame->tf_eflags &= ~PSL_T; goto out2; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); goto out2; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If DDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ ucode = TRAP_BRKPT; #ifdef DDB MAKEMPSAFE(have_mplock); if (kdb_trap (type, 0, frame)) goto out2; #endif break; #if NISA > 0 case T_NMI: MAKEMPSAFE(have_mplock); #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif handle_powerfail: { static unsigned lastalert = 0; if (time_uptime - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(TIMER_FREQ/880, hz); lastalert = time_uptime; } /* YYY mp count */ goto out2; } #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap (type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi == 0) goto out2; /* FALL THROUGH */ #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ } MAKEMPSAFE(have_mplock); trap_fatal(frame, eva); goto out2; } /* * Virtual kernel intercept - if the fault is directly related to a * VM context managed by a virtual kernel then let the virtual kernel * handle it. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); MAKEMPSAFE(have_mplock); trapsignal(lp, i, ucode); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", (u_long)eva); uprintf("\n"); } #endif out: userret(lp, frame, sticks); userexit(lp); out2: ; if (have_mplock) rel_mplock(); if (p != NULL && lp != NULL) KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(curstop == td->td_toks_stop, ("trap: extra tokens held after trap! %zd/%zd", curstop - &td->td_toks_base, td->td_toks_stop - &td->td_toks_base)); #endif }
void user_trap(struct trapframe *frame) { struct globaldata *gd = mycpu; struct thread *td = gd->gd_curthread; struct lwp *lp = td->td_lwp; struct proc *p; int sticks = 0; int i = 0, ucode = 0, type, code; int have_mplock = 0; #ifdef INVARIANTS int crit_count = td->td_critcount; lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; p = td->td_proc; /* * This is a bad kludge to avoid changing the various trapframe * structures. Because we are enabled as a virtual kernel, * the original tf_err field will be passed to us shifted 16 * over in the tf_trapno field for T_PAGEFLT. */ if (frame->tf_trapno == T_PAGEFLT) eva = frame->tf_err; else eva = 0; #if 0 kprintf("USER_TRAP AT %08x xflags %d trapno %d eva %08x\n", frame->tf_eip, frame->tf_xflags, frame->tf_trapno, eva); #endif /* * Everything coming from user mode runs through user_trap, * including system calls. */ if (frame->tf_trapno == T_SYSCALL80) { syscall2(frame); return; } KTR_LOG(kernentry_trap, lp->lwp_proc->p_pid, lp->lwp_tid, frame->tf_trapno, eva); #ifdef DDB if (db_active) { eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); ++gd->gd_trap_nesting_level; MAKEMPSAFE(have_mplock); trap_fatal(frame, TRUE, eva); --gd->gd_trap_nesting_level; goto out2; } #endif #if defined(I586_CPU) && !defined(NO_F00F_HACK) restart: #endif type = frame->tf_trapno; code = frame->tf_err; userenter(td, p); sticks = (int)td->td_sticks; lp->lwp_md.md_regs = frame; switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ frame->tf_eflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = code; i = SIGFPE; break; case T_ASTFLT: /* Allow process switch */ mycpu->gd_cnt.v_soft++; if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); } goto out; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ #if 0 if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == 0) goto out; break; } #endif i = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ MAKEMPSAFE(have_mplock); i = trap_pfault(frame, TRUE, eva); if (i == -1) goto out; #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) goto restart; #endif if (i == 0) goto out; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { i = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #if NISA > 0 case T_NMI: MAKEMPSAFE(have_mplock); /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap (type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* NISA > 0 */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* * Virtual kernel intercept - pass the DNA exception * to the (emulated) virtual kernel if it asked to handle * it. This occurs when the virtual kernel is holding * onto the FP context for a different emulated * process then the one currently running. * * We must still call npxdna() since we may have * saved FP state that the (emulated) virtual kernel * needs to hand over to a different emulated process. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve && (td->td_pcb->pcb_flags & FP_VIRTFP) ) { npxdna(frame); break; } #if NNPX > 0 /* * The kernel may have switched out the FP unit's * state, causing the user process to take a fault * when it tries to use the FP unit. Restore the * state here */ if (npxdna(frame)) goto out; #endif if (!pmath_emulate) { i = SIGFPE; ucode = FPE_FPU_NP_TRAP; break; } i = (*pmath_emulate)(frame); if (i == 0) { if (!(frame->tf_eflags & PSL_T)) goto out2; frame->tf_eflags &= ~PSL_T; i = SIGTRAP; } /* else ucode = emulator_only_knows() XXX */ break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = 0; /* XXX */ i = SIGFPE; break; } /* * Virtual kernel intercept - if the fault is directly related to a * VM context managed by a virtual kernel then let the virtual kernel * handle it. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); goto out; } /* * Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); MAKEMPSAFE(have_mplock); trapsignal(lp, i, ucode); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", (u_long)eva); uprintf("\n"); } #endif out: userret(lp, frame, sticks); userexit(lp); out2: ; if (have_mplock) rel_mplock(); KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(curstop == td->td_toks_stop, ("trap: extra tokens held after trap! %zd/%zd", curstop - &td->td_toks_base, td->td_toks_stop - &td->td_toks_base)); #endif }
/* * Handle signals, upcalls, profiling, and other AST's and/or tasks that * must be completed before we can return to or try to return to userland. * * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 * arithmatic on the delta calculation so the absolute tick values are * truncated to an integer. */ static void userret(struct lwp *lp, struct trapframe *frame, int sticks) { struct proc *p = lp->lwp_proc; void (*hook)(void); int sig; if (p->p_userret != NULL) { hook = p->p_userret; p->p_userret = NULL; (*hook)(); } /* * Charge system time if profiling. Note: times are in microseconds. * This may do a copyout and block, so do it first even though it * means some system time will be charged as user time. */ if (p->p_flags & P_PROFIL) { addupc_task(p, frame->tf_eip, (u_int)((int)lp->lwp_thread->td_sticks - sticks)); } recheck: /* * If the jungle wants us dead, so be it. */ if (lp->lwp_mpflags & LWP_MP_WEXIT) { lwkt_gettoken(&p->p_token); lwp_exit(0); lwkt_reltoken(&p->p_token); /* NOT REACHED */ } /* * Block here if we are in a stopped state. */ if (p->p_stat == SSTOP || dump_stop_usertds) { lwkt_gettoken(&p->p_token); tstop(); lwkt_reltoken(&p->p_token); goto recheck; } /* * Post any pending upcalls. If running a virtual kernel be sure * to restore the virtual kernel's vmspace before posting the upcall. */ if (p->p_flags & (P_SIGVTALRM | P_SIGPROF | P_UPCALLPEND)) { lwkt_gettoken(&p->p_token); if (p->p_flags & P_SIGVTALRM) { p->p_flags &= ~P_SIGVTALRM; ksignal(p, SIGVTALRM); } if (p->p_flags & P_SIGPROF) { p->p_flags &= ~P_SIGPROF; ksignal(p, SIGPROF); } if (p->p_flags & P_UPCALLPEND) { p->p_flags &= ~P_UPCALLPEND; postupcall(lp); } lwkt_reltoken(&p->p_token); goto recheck; } /* * Post any pending signals. If running a virtual kernel be sure * to restore the virtual kernel's vmspace before posting the signal. * * WARNING! postsig() can exit and not return. */ if ((sig = CURSIG_TRACE(lp)) != 0) { lwkt_gettoken(&p->p_token); postsig(sig); lwkt_reltoken(&p->p_token); goto recheck; } /* * block here if we are swapped out, but still process signals * (such as SIGKILL). proc0 (the swapin scheduler) is already * aware of our situation, we do not have to wake it up. */ if (p->p_flags & P_SWAPPEDOUT) { lwkt_gettoken(&p->p_token); get_mplock(); p->p_flags |= P_SWAPWAIT; swapin_request(); if (p->p_flags & P_SWAPWAIT) tsleep(p, PCATCH, "SWOUT", 0); p->p_flags &= ~P_SWAPWAIT; rel_mplock(); lwkt_reltoken(&p->p_token); goto recheck; } /* * In a multi-threaded program it is possible for a thread to change * signal state during a system call which temporarily changes the * signal mask. In this case postsig() might not be run and we * have to restore the mask ourselves. */ if (lp->lwp_flags & LWP_OLDMASK) { lp->lwp_flags &= ~LWP_OLDMASK; lp->lwp_sigmask = lp->lwp_oldsigmask; goto recheck; } }