static int db_ss(struct db_variable *vp, db_expr_t *valuep, int op) { if (kdb_frame == NULL) return (0); if (op == DB_VAR_GET) *valuep = (ISPL(kdb_frame->tf_cs)) ? kdb_frame->tf_ss : rss(); else if (ISPL(kdb_frame->tf_cs)) kdb_frame->tf_ss = *valuep; return (1); }
static int db_rsp(struct db_variable *vp, db_expr_t *valuep, int op) { if (kdb_frame == NULL) return (0); if (op == DB_VAR_GET) *valuep = get_rsp(kdb_frame); else if (ISPL(kdb_frame->tf_cs)) kdb_frame->tf_rsp = *valuep; return (1); }
void _test_frame_enter(struct trapframe *frame) { thread_t td = curthread; if (ISPL(frame->tf_cs) == SEL_UPL) { KKASSERT(td->td_lwp); KASSERT(td->td_lwp->lwp_md.md_regs == frame, ("_test_frame_exit: Frame mismatch %p %p", td->td_lwp->lwp_md.md_regs, frame)); td->td_lwp->lwp_saveusp = (void *)frame->tf_rsp; td->td_lwp->lwp_saveupc = (void *)frame->tf_rip; } if ((char *)frame < td->td_kstack || (char *)frame > td->td_kstack + td->td_kstack_size) { panic("_test_frame_exit: frame not on kstack %p kstack=%p\n", frame, td->td_kstack); } }
void _test_frame_exit(struct trapframe *frame) { thread_t td = curthread; if (ISPL(frame->tf_cs) == SEL_UPL) { KKASSERT(td->td_lwp); KASSERT(td->td_lwp->lwp_md.md_regs == frame, ("_test_frame_exit: Frame mismatch %p %p", td->td_lwp->lwp_md.md_regs, frame)); if (td->td_lwp->lwp_saveusp != (void *)frame->tf_rsp) { kprintf("_test_frame_exit: %s:%d usp mismatch %p/%p\n", td->td_comm, td->td_proc->p_pid, td->td_lwp->lwp_saveusp, (void *)frame->tf_rsp); } if (td->td_lwp->lwp_saveupc != (void *)frame->tf_rip) { kprintf("_test_frame_exit: %s:%d upc mismatch %p/%p\n", td->td_comm, td->td_proc->p_pid, td->td_lwp->lwp_saveupc, (void *)frame->tf_rip); } /* * adulterate the fields to catch entries that * don't run through test_frame_enter */ td->td_lwp->lwp_saveusp = (void *)~(intptr_t)td->td_lwp->lwp_saveusp; td->td_lwp->lwp_saveupc = (void *)~(intptr_t)td->td_lwp->lwp_saveupc; } if ((char *)frame < td->td_kstack || (char *)frame > td->td_kstack + td->td_kstack_size) { panic("_test_frame_exit: frame not on kstack %p kstack=%p\n", frame, td->td_kstack); } }
static int fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr) { proc_t *p = curproc; #ifdef __i386__ struct segment_descriptor *desc; #else struct user_segment_descriptor *desc; #endif uint16_t sel = 0, ndx, type; uintptr_t limit; switch (tp->ftt_segment) { case FASTTRAP_SEG_CS: sel = rp->r_cs; break; case FASTTRAP_SEG_DS: sel = rp->r_ds; break; case FASTTRAP_SEG_ES: sel = rp->r_es; break; case FASTTRAP_SEG_FS: sel = rp->r_fs; break; case FASTTRAP_SEG_GS: sel = rp->r_gs; break; case FASTTRAP_SEG_SS: sel = rp->r_ss; break; } /* * Make sure the given segment register specifies a user priority * selector rather than a kernel selector. */ if (ISPL(sel) != SEL_UPL) return (-1); ndx = IDXSEL(sel); /* * Check the bounds and grab the descriptor out of the specified * descriptor table. */ if (ISLDT(sel)) { #ifdef __i386__ if (ndx > p->p_md.md_ldt->ldt_len) return (-1); desc = (struct segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #else if (ndx > max_ldt_segment) return (-1); desc = (struct user_segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #endif } else { if (ndx >= NGDT) return (-1); #ifdef __i386__ desc = &gdt[ndx].sd; #else desc = &gdt[ndx]; #endif } /* * The descriptor must have user privilege level and it must be * present in memory. */ if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1) return (-1); type = desc->sd_type; /* * If the S bit in the type field is not set, this descriptor can * only be used in system context. */ if ((type & 0x10) != 0x10) return (-1); limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1); if (tp->ftt_segment == FASTTRAP_SEG_CS) { /* * The code/data bit and readable bit must both be set. */ if ((type & 0xa) != 0xa) return (-1); if (*addr > limit) return (-1); } else { /* * The code/data bit must be clear. */ if ((type & 0x8) != 0) return (-1); /* * If the expand-down bit is clear, we just check the limit as * it would naturally be applied. Otherwise, we need to check * that the address is the range [limit + 1 .. 0xffff] or * [limit + 1 ... 0xffffffff] depending on if the default * operand size bit is set. */ if ((type & 0x4) == 0) { if (*addr > limit) return (-1); } else if (desc->sd_def32) { if (*addr < limit + 1 || 0xffff < *addr) return (-1); } else { if (*addr < limit + 1 || 0xffffffff < *addr) return (-1); } } *addr += USD_GETBASE(desc); return (0); }
static __inline long get_rsp(struct trapframe *tf) { return ((ISPL(tf->tf_cs)) ? tf->tf_rsp : (db_expr_t)tf + offsetof(struct trapframe, tf_rsp)); }
static void trap_fatal(struct trapframe *frame, vm_offset_t eva) { int code, ss; u_int type; long rsp; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); /* three separate prints in case of a trap on an unmapped page */ kprintf("cpuid = %d; ", mycpu->gd_cpuid); if (lapic_usable) kprintf("lapic id = %u\n", LAPIC_READID); if (type == T_PAGEFLT) { kprintf("fault virtual address = 0x%lx\n", eva); kprintf("fault code = %s %s %s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_P ? "protection violation" : "page not present"); } kprintf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); if (ISPL(frame->tf_cs) == SEL_UPL) { ss = frame->tf_ss & 0xffff; rsp = frame->tf_rsp; } else { /* * NOTE: in 64-bit mode traps push rsp/ss even if no ring * change occurs. */ ss = GSEL(GDATA_SEL, SEL_KPL); rsp = frame->tf_rsp; } kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); kprintf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); kprintf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); kprintf("processor eflags = "); if (frame->tf_rflags & PSL_T) kprintf("trace trap, "); if (frame->tf_rflags & PSL_I) kprintf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) kprintf("nested task, "); if (frame->tf_rflags & PSL_RF) kprintf("resume, "); kprintf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); kprintf("current process = "); if (curproc) { kprintf("%lu\n", (u_long)curproc->p_pid); } else { kprintf("Idle\n"); } kprintf("current thread = pri %d ", curthread->td_pri); if (curthread->td_critcount) kprintf("(CRIT)"); kprintf("\n"); #ifdef DDB if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) return; #endif kprintf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); }
/* * Exception, fault, and trap interface to the kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. * * This function is also called from doreti in an interlock to handle ASTs. * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap * * NOTE! We have to retrieve the fault address prior to potentially * blocking, including blocking on any token. * * NOTE! NMI and kernel DBG traps remain on their respective pcpu IST * stacks if taken from a kernel RPL. trap() cannot block in this * situation. DDB entry or a direct report-and-return is ok. * * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing * if an attempt is made to switch from a fast interrupt or IPI. */ void trap(struct trapframe *frame) { static struct krate sscpubugrate = { 1 }; struct globaldata *gd = mycpu; struct thread *td = gd->gd_curthread; struct lwp *lp = td->td_lwp; struct proc *p; int sticks = 0; int i = 0, ucode = 0, type, code; #ifdef INVARIANTS int crit_count = td->td_critcount; lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; p = td->td_proc; clear_quickret(); #ifdef DDB /* * We need to allow T_DNA faults when the debugger is active since * some dumping paths do large bcopy() which use the floating * point registers for faster copying. */ if (db_active && frame->tf_trapno != T_DNA) { eva = (frame->tf_trapno == T_PAGEFLT ? frame->tf_addr : 0); ++gd->gd_trap_nesting_level; trap_fatal(frame, eva); --gd->gd_trap_nesting_level; goto out2; } #endif eva = 0; if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but * it is better than running with interrupts disabled until * they are accidentally enabled later. */ type = frame->tf_trapno; if (ISPL(frame->tf_cs) == SEL_UPL) { /* JG curproc can be NULL */ kprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curproc->p_comm, type); } else if ((type == T_STKFLT || type == T_PROTFLT || type == T_SEGNPFLT) && frame->tf_rip == (long)doreti_iret) { /* * iretq fault from kernel mode during return to * userland. * * This situation is expected, don't complain. */ } else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ kprintf("kernel trap %d (%s @ 0x%016jx) with " "interrupts disabled\n", type, td->td_comm, frame->tf_rip); } cpu_enable_intr(); } type = frame->tf_trapno; code = frame->tf_err; if (ISPL(frame->tf_cs) == SEL_UPL) { /* user trap */ KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid, frame->tf_trapno, eva); userenter(td, p); sticks = (int)td->td_sticks; KASSERT(lp->lwp_md.md_regs == frame, ("Frame mismatch %p %p", lp->lwp_md.md_regs, frame)); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ frame->tf_rflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = code; i = SIGFPE; break; case T_ASTFLT: /* Allow process switch */ mycpu->gd_cnt.v_soft++; if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); } goto out; case T_PROTFLT: /* general protection fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE); #ifdef DDB if (frame->tf_rip == 0) { /* used for kernel debugging only */ while (freeze_on_seg_fault) tsleep(p, 0, "freeze", hz * 20); } #endif if (i == -1 || i == 0) goto out; if (i == SIGSEGV) { ucode = SEGV_MAPERR; } else { i = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #if NISA > 0 case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* NISA > 0 */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* * Virtual kernel intercept - pass the DNA exception * to the virtual kernel if it asked to handle it. * This occurs when the virtual kernel is holding * onto the FP context for a different emulated * process then the one currently running. * * We must still call npxdna() since we may have * saved FP state that the virtual kernel needs * to hand over to a different emulated process. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve && (td->td_pcb->pcb_flags & FP_VIRTFP) ) { npxdna(); break; } /* * The kernel may have switched out the FP unit's * state, causing the user process to take a fault * when it tries to use the FP unit. Restore the * state here */ if (npxdna()) { gd->gd_cnt.v_trap++; goto out; } i = SIGFPE; ucode = FPE_FPU_NP_TRAP; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = 0; /* XXX */ i = SIGFPE; break; } } else { /* kernel trap */ switch (type) { case T_PAGEFLT: /* page fault */ trap_pfault(frame, FALSE); goto out2; case T_DNA: /* * The kernel is apparently using fpu for copying. * XXX this should be fatal unless the kernel has * registered such use. */ if (npxdna()) { gd->gd_cnt.v_trap++; goto out2; } break; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (mycpu->gd_intr_nesting_level == 0) { /* * NOTE: in 64-bit mode traps push rsp/ss * even if no ring change occurs. */ if (td->td_pcb->pcb_onfault && td->td_pcb->pcb_onfault_sp == frame->tf_rsp) { frame->tf_rip = (register_t) td->td_pcb->pcb_onfault; goto out2; } /* * If the iretq in doreti faults during * return to user, it will be special-cased * in IDTVEC(prot) to get here. We want * to 'return' to doreti_iret_fault in * ipl.s in approximately the same state we * were in at the iretq. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; goto out2; } } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; #if 0 /* do we need this? */ if (frame->tf_rip == (long)doreti_iret) frame->tf_rip = (long)doreti_iret_fault; #endif goto out2; } break; case T_TRCTRAP: /* trace trap */ /* * Detect historical CPU artifact on syscall or int $3 * entry (if not shortcutted in exception.s via * DIRECT_DISALLOW_SS_CPUBUG). */ gd->gd_cnt.v_trap++; if (frame->tf_rip == (register_t)IDTVEC(fast_syscall)) { krateprintf(&sscpubugrate, "Caught #DB at syscall cpu artifact\n"); goto out2; } if (frame->tf_rip == (register_t)IDTVEC(bpt)) { krateprintf(&sscpubugrate, "Caught #DB at int $N cpu artifact\n"); goto out2; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & ~0xf); goto out2; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If DDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ ucode = TRAP_BRKPT; #ifdef DDB if (kdb_trap(type, 0, frame)) goto out2; #endif break; #if NISA > 0 case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi == 0) goto out2; /* FALL THROUGH */ #endif /* NISA > 0 */ } trap_fatal(frame, 0); goto out2; } /* * Fault from user mode, virtual kernel interecept. * * If the fault is directly related to a VM context managed by a * virtual kernel then let the virtual kernel handle it. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); gd->gd_cnt.v_trap++; trapsignal(lp, i, ucode); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", frame->tf_addr); uprintf("\n"); } #endif out: userret(lp, frame, sticks); userexit(lp); out2: ; if (p != NULL && lp != NULL) KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(curstop == td->td_toks_stop, ("trap: extra tokens held after trap! %ld/%ld", curstop - &td->td_toks_base, td->td_toks_stop - &td->td_toks_base)); #endif }
void trap(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; int i = 0, ucode = 0, code; u_int type; register_t addr = 0; ksiginfo_t ksi; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP #ifdef STOP_NMI /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* STOP_NMI */ #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A return value of '1' from the handler means that * the NMI was handled by it and we can return immediately. */ if (type == T_NMI && pmc_intr && (*pmc_intr)(PCPU_GET(cpuid), frame)) goto out; #endif if (type == T_MCHK) { if (!mca_intr()) trap_fatal(frame, 0); goto out; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in it's per-cpu flags to indicate that it doesn't * want to fault. On returning from the the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ if (dtrace_trap_func != NULL) if ((*dtrace_trap_func)(frame, type)) goto out; #endif if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (ISPL(frame->tf_cs) == SEL_UPL) printf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curproc->p_comm, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock or servicing an NMI. */ if (type != T_NMI && td->td_md.md_spinlock_count == 0) enable_intr(); } } code = frame->tf_err; if (type == T_PAGEFLT) { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) trap_fatal(frame, frame->tf_addr); } if (ISPL(frame->tf_cs) == SEL_UPL) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_ucred != p->p_ucred) cred_update_thread(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); frame->tf_rflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap(); if (ucode == -1) goto userout; i = SIGFPE; break; case T_PROTFLT: /* general protection fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ addr = frame->tf_addr; #ifdef KSE if (td->td_pflags & TDP_SA) thread_user_enter(td); #endif i = trap_pfault(frame, TRUE); if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if ((curproc->p_sysent == &elf64_freebsd_sysvec #ifdef COMPAT_IA32 || curproc->p_sysent == &ia32_freebsd_sysvec #endif ) && p->p_osrel >= 700004) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ /* XXX Giant */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ fpudna(); goto userout; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = 0; /* XXX */ i = SIGFPE; break; } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); goto out; case T_DNA: /* * The kernel is apparently using fpu for copying. * XXX this should be fatal unless the kernel has * registered such use. */ fpudna(); printf("fpudna in kernel mode!\n"); goto out; case T_STKFLT: /* stack fault */ break; case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; goto out; } if (PCPU_GET(curpcb)->pcb_onfault != NULL) { frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ /* XXX check upper bits here */ load_dr6(rdr6() & 0xfffffff0); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, 0, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: /* XXX Giant */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto out; } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ #endif /* DEV_ISA */ } trap_fatal(frame, 0); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; trapsignal(td, &ksi); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", frame->tf_addr); uprintf("\n"); } #endif user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); userout: out: return; }
static int trap_pfault(struct trapframe *frame, int usermode) { vm_offset_t va; struct vmspace *vm = NULL; vm_map_t map; int rv = 0; int fault_flags; vm_prot_t ftype; thread_t td = curthread; struct lwp *lp = td->td_lwp; struct proc *p; va = trunc_page(frame->tf_addr); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) { fault_flags = -1; ftype = -1; goto nogo; } map = &kernel_map; } else { /* * This is a fault on non-kernel virtual memory. * vm is initialized above to NULL. If curproc is NULL * or curproc->p_vmspace is NULL the fault is fatal. */ if (lp != NULL) vm = lp->lwp_vmspace; if (vm == NULL) { fault_flags = -1; ftype = -1; goto nogo; } /* * Debugging, try to catch kernel faults on the user address space when not inside * on onfault (e.g. copyin/copyout) routine. */ if (usermode == 0 && (td->td_pcb == NULL || td->td_pcb->pcb_onfault == NULL)) { #ifdef DDB if (freeze_on_seg_fault) { kprintf("trap_pfault: user address fault from kernel mode " "%016lx\n", (long)frame->tf_addr); while (freeze_on_seg_fault) tsleep(&freeze_on_seg_fault, 0, "frzseg", hz * 20); } #endif } map = &vm->vm_map; } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; #if JG else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; #endif else ftype = VM_PROT_READ; if (map != &kernel_map) { /* * Keep swapout from messing with us during this * critical time. */ PHOLD(lp->lwp_proc); /* * Issue fault */ fault_flags = 0; if (usermode) fault_flags |= VM_FAULT_BURST; if (ftype & VM_PROT_WRITE) fault_flags |= VM_FAULT_DIRTY; else fault_flags |= VM_FAULT_NORMAL; rv = vm_fault(map, va, ftype, fault_flags); PRELE(lp->lwp_proc); } else { /* * Don't have to worry about process locking or stacks in the * kernel. */ fault_flags = VM_FAULT_NORMAL; rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) return (0); nogo: if (!usermode) { if (td->td_gd->gd_intr_nesting_level == 0 && td->td_pcb->pcb_onfault) { frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; return (0); } trap_fatal(frame, frame->tf_addr); return (-1); } /* * NOTE: on x86_64 we have a tf_addr field in the trapframe, no * kludge is needed to pass the fault address to signal handlers. */ p = td->td_proc; if (td->td_lwp->lwp_vkernel == NULL) { #ifdef DDB if (bootverbose || freeze_on_seg_fault || ddb_on_seg_fault) { #else if (bootverbose) { #endif kprintf("seg-fault ft=%04x ff=%04x addr=%p rip=%p " "pid=%d cpu=%d p_comm=%s\n", ftype, fault_flags, (void *)frame->tf_addr, (void *)frame->tf_rip, p->p_pid, mycpu->gd_cpuid, p->p_comm); } #ifdef DDB while (freeze_on_seg_fault) { tsleep(p, 0, "freeze", hz * 20); } if (ddb_on_seg_fault) Debugger("ddb_on_seg_fault"); #endif } return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(struct trapframe *frame, vm_offset_t eva) { int code, ss; u_int type; long rsp; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* three separate prints in case of a trap on an unmapped page */ kprintf("cpuid = %d; ", mycpu->gd_cpuid); kprintf("lapic->id = %08x\n", lapic->id); #endif if (type == T_PAGEFLT) { kprintf("fault virtual address = 0x%lx\n", eva); kprintf("fault code = %s %s %s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_P ? "protection violation" : "page not present"); } kprintf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); if (ISPL(frame->tf_cs) == SEL_UPL) { ss = frame->tf_ss & 0xffff; rsp = frame->tf_rsp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); rsp = (long)&frame->tf_rsp; } kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); kprintf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); kprintf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); kprintf("processor eflags = "); if (frame->tf_rflags & PSL_T) kprintf("trace trap, "); if (frame->tf_rflags & PSL_I) kprintf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) kprintf("nested task, "); if (frame->tf_rflags & PSL_RF) kprintf("resume, "); kprintf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); kprintf("current process = "); if (curproc) { kprintf("%lu\n", (u_long)curproc->p_pid); } else { kprintf("Idle\n"); } kprintf("current thread = pri %d ", curthread->td_pri); if (curthread->td_critcount) kprintf("(CRIT)"); kprintf("\n"); #ifdef DDB if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) return; #endif kprintf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); }
void trap(struct trapframe *frame) { struct globaldata *gd = mycpu; struct thread *td = gd->gd_curthread; struct lwp *lp = td->td_lwp; struct proc *p; int sticks = 0; int i = 0, ucode = 0, type, code; int have_mplock = 0; #ifdef INVARIANTS int crit_count = td->td_critcount; lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; p = td->td_proc; #ifdef DDB /* * We need to allow T_DNA faults when the debugger is active since * some dumping paths do large bcopy() which use the floating * point registers for faster copying. */ if (db_active && frame->tf_trapno != T_DNA) { eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); ++gd->gd_trap_nesting_level; MAKEMPSAFE(have_mplock); trap_fatal(frame, eva); --gd->gd_trap_nesting_level; goto out2; } #endif eva = 0; ++gd->gd_trap_nesting_level; if (frame->tf_trapno == T_PAGEFLT) { /* * For some Cyrix CPUs, %cr2 is clobbered by interrupts. * This problem is worked around by using an interrupt * gate for the pagefault handler. We are finally ready * to read %cr2 and then must reenable interrupts. * * XXX this should be in the switch statement, but the * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the * flow of control too much for this to be obviously * correct. */ eva = rcr2(); cpu_enable_intr(); } --gd->gd_trap_nesting_level; if (!(frame->tf_eflags & PSL_I)) { /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but * it is better than running with interrupts disabled until * they are accidentally enabled later. */ type = frame->tf_trapno; if (ISPL(frame->tf_cs)==SEL_UPL || (frame->tf_eflags & PSL_VM)) { MAKEMPSAFE(have_mplock); kprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curproc->p_comm, type); } else if (type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ MAKEMPSAFE(have_mplock); kprintf("kernel trap %d with interrupts disabled\n", type); } cpu_enable_intr(); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) restart: #endif type = frame->tf_trapno; code = frame->tf_err; if (in_vm86call) { if (frame->tf_eflags & PSL_VM && (type == T_PROTFLT || type == T_STKFLT)) { KKASSERT(get_mplock_count(curthread) > 0); i = vm86_emulate((struct vm86frame *)frame); KKASSERT(get_mplock_count(curthread) > 0); if (i != 0) { /* * returns to original process */ vm86_trap((struct vm86frame *)frame, have_mplock); KKASSERT(0); /* NOT REACHED */ } goto out2; } switch (type) { /* * these traps want either a process context, or * assume a normal userspace trap. */ case T_PROTFLT: case T_SEGNPFLT: trap_fatal(frame, eva); goto out2; case T_TRCTRAP: type = T_BPTFLT; /* kernel breakpoint */ /* FALL THROUGH */ } goto kernel_trap; /* normal kernel trap handling */ } if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { /* user trap */ KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid, frame->tf_trapno, eva); userenter(td, p); sticks = (int)td->td_sticks; lp->lwp_md.md_regs = frame; switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ frame->tf_eflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = code; i = SIGFPE; break; case T_ASTFLT: /* Allow process switch */ mycpu->gd_cnt.v_soft++; if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); } goto out; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == 0) goto out; break; } i = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE, eva); if (i == -1) goto out; #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) goto restart; #endif if (i == 0) goto out; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { i = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #if NISA > 0 case T_NMI: MAKEMPSAFE(have_mplock); #ifdef POWERFAIL_NMI goto handle_powerfail; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap (type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* * Virtual kernel intercept - pass the DNA exception * to the virtual kernel if it asked to handle it. * This occurs when the virtual kernel is holding * onto the FP context for a different emulated * process then the one currently running. * * We must still call npxdna() since we may have * saved FP state that the virtual kernel needs * to hand over to a different emulated process. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve && (td->td_pcb->pcb_flags & FP_VIRTFP) ) { npxdna(); break; } #if NNPX > 0 /* * The kernel may have switched out the FP unit's * state, causing the user process to take a fault * when it tries to use the FP unit. Restore the * state here */ if (npxdna()) goto out; #endif if (!pmath_emulate) { i = SIGFPE; ucode = FPE_FPU_NP_TRAP; break; } i = (*pmath_emulate)(frame); if (i == 0) { if (!(frame->tf_eflags & PSL_T)) goto out2; frame->tf_eflags &= ~PSL_T; i = SIGTRAP; } /* else ucode = emulator_only_knows() XXX */ break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = 0; /* XXX */ i = SIGFPE; break; } } else { kernel_trap: /* kernel trap */ switch (type) { case T_PAGEFLT: /* page fault */ trap_pfault(frame, FALSE, eva); goto out2; case T_DNA: #if NNPX > 0 /* * The kernel may be using npx for copying or other * purposes. */ if (npxdna()) goto out2; #endif break; case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ #define MAYBE_DORETI_FAULT(where, whereto) \ do { \ if (frame->tf_eip == (int)where) { \ frame->tf_eip = (int)whereto; \ goto out2; \ } \ } while (0) if (mycpu->gd_intr_nesting_level == 0) { /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ MAYBE_DORETI_FAULT(doreti_iret, doreti_iret_fault); MAYBE_DORETI_FAULT(doreti_popl_ds, doreti_popl_ds_fault); MAYBE_DORETI_FAULT(doreti_popl_es, doreti_popl_es_fault); MAYBE_DORETI_FAULT(doreti_popl_fs, doreti_popl_fs_fault); MAYBE_DORETI_FAULT(doreti_popl_gs, doreti_popl_gs_fault); /* * NOTE: cpu doesn't push esp on kernel trap */ if (td->td_pcb->pcb_onfault && td->td_pcb->pcb_onfault_sp == (int)&frame->tf_esp) { frame->tf_eip = (register_t)td->td_pcb->pcb_onfault; goto out2; } } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; goto out2; } break; case T_TRCTRAP: /* trace trap */ if (frame->tf_eip == (int)IDTVEC(syscall)) { /* * We've just entered system mode via the * syscall lcall. Continue single stepping * silently until the syscall handler has * saved the flags. */ goto out2; } if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { /* * The syscall handler has now saved the * flags. Stop single stepping it. */ frame->tf_eflags &= ~PSL_T; goto out2; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); goto out2; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If DDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ ucode = TRAP_BRKPT; #ifdef DDB MAKEMPSAFE(have_mplock); if (kdb_trap (type, 0, frame)) goto out2; #endif break; #if NISA > 0 case T_NMI: MAKEMPSAFE(have_mplock); #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif handle_powerfail: { static unsigned lastalert = 0; if (time_uptime - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(TIMER_FREQ/880, hz); lastalert = time_uptime; } /* YYY mp count */ goto out2; } #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef DDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (ddb_on_nmi) { kprintf ("NMI ... going to debugger\n"); kdb_trap (type, 0, frame); } #endif /* DDB */ goto out2; } else if (panic_on_nmi == 0) goto out2; /* FALL THROUGH */ #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ } MAKEMPSAFE(have_mplock); trap_fatal(frame, eva); goto out2; } /* * Virtual kernel intercept - if the fault is directly related to a * VM context managed by a virtual kernel then let the virtual kernel * handle it. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); MAKEMPSAFE(have_mplock); trapsignal(lp, i, ucode); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", (u_long)eva); uprintf("\n"); } #endif out: userret(lp, frame, sticks); userexit(lp); out2: ; if (have_mplock) rel_mplock(); if (p != NULL && lp != NULL) KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(curstop == td->td_toks_stop, ("trap: extra tokens held after trap! %zd/%zd", curstop - &td->td_toks_base, td->td_toks_stop - &td->td_toks_base)); #endif }
/* * syscall2 - MP aware system call request C handler * * A system call is essentially treated as a trap. The MP lock is not * held on entry or return. We are responsible for handling ASTs * (e.g. a task switch) prior to return. * * MPSAFE */ void syscall2(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; struct lwp *lp = td->td_lwp; caddr_t params; struct sysent *callp; register_t orig_tf_eflags; int sticks; int error; int narg; #ifdef INVARIANTS int crit_count = td->td_critcount; #endif int have_mplock = 0; u_int code; union sysunion args; #ifdef DIAGNOSTIC if (ISPL(frame->tf_cs) != SEL_UPL) { get_mplock(); panic("syscall"); /* NOT REACHED */ } #endif KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, frame->tf_eax); userenter(td, p); /* lazy raise our priority */ /* * Misc */ sticks = (int)td->td_sticks; orig_tf_eflags = frame->tf_eflags; /* * Virtual kernel intercept - if a VM context managed by a virtual * kernel issues a system call the virtual kernel handles it, not us. * Restore the virtual kernel context and return from its system * call. The current frame is copied out to the virtual kernel. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); error = EJUSTRETURN; callp = NULL; goto out; } /* * Get the system call parameters and account for time */ lp->lwp_md.md_regs = frame; params = (caddr_t)frame->tf_esp + sizeof(int); code = frame->tf_eax; if (p->p_sysent->sv_prepsyscall) { (*p->p_sysent->sv_prepsyscall)( frame, (int *)(&args.nosys.sysmsg + 1), &code, ¶ms); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. * fuword is MP aware. */ if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = fuword(params); params += sizeof(int); } else if (code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = fuword(params); params += sizeof(quad_t); } } code &= p->p_sysent->sv_mask; if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg & SYF_ARGMASK; #if 0 if (p->p_sysent->sv_name[0] == 'L') kprintf("Linux syscall, code = %d\n", code); #endif /* * copyin is MP aware, but the tracing code is not */ if (narg && params) { error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1), narg * sizeof(register_t)); if (error) { #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { MAKEMPSAFE(have_mplock); ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { MAKEMPSAFE(have_mplock); ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif /* * For traditional syscall code edx is left untouched when 32 bit * results are returned. Since edx is loaded from fds[1] when the * system call returns we pre-set it here. */ args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame->tf_edx; /* * The syscall might manipulate the trap frame. If it does it * will probably return EJUSTRETURN. */ args.sysmsg_frame = frame; STOPEVENT(p, S_SCE, narg); /* MP aware */ /* * NOTE: All system calls run MPSAFE now. The system call itself * is responsible for getting the MP lock. */ error = (*callp->sy_call)(&args); out: /* * MP SAFE (we may or may not have the MP lock at this point) */ switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; lp = curthread->td_lwp; frame->tf_eax = args.sysmsg_fds[0]; frame->tf_edx = args.sysmsg_fds[1]; frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, * int 0x80 is 2 bytes. We saved this in tf_err. */ frame->tf_eip -= frame->tf_err; break; case EJUSTRETURN: break; case EASYNC: panic("Unexpected EASYNC return value (for now)"); default: bad: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_eax = error; frame->tf_eflags |= PSL_C; break; } /* * Traced syscall. trapsignal() is not MP aware. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { MAKEMPSAFE(have_mplock); frame->tf_eflags &= ~PSL_T; trapsignal(lp, SIGTRAP, TRAP_TRACE); } /* * Handle reschedule and other end-of-syscall issues */ userret(lp, frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { MAKEMPSAFE(have_mplock); ktrsysret(lp, code, error, args.sysmsg_result); } #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); userexit(lp); /* * Release the MP lock if we had to get it */ if (have_mplock) rel_mplock(); KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(&td->td_toks_base == td->td_toks_stop, ("syscall: extra tokens held after trap! %zd", td->td_toks_stop - &td->td_toks_base)); #endif }
static void trap_fatal(struct trapframe *frame, vm_offset_t eva) { int code, type, ss, esp; struct soft_segment_descriptor softseg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[mycpu->gd_cpuid * NGDT + IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); if (type <= MAX_TRAP_MSG) kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg[type], frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); /* three separate prints in case of a trap on an unmapped page */ kprintf("cpuid = %d; ", mycpu->gd_cpuid); kprintf("lapic.id = %08x\n", lapic->id); if (type == T_PAGEFLT) { kprintf("fault virtual address = %p\n", (void *)eva); kprintf("fault code = %s %s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_P ? "protection violation" : "page not present"); } kprintf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } kprintf("stack pointer = 0x%x:0x%x\n", ss, esp); kprintf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); kprintf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); kprintf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); kprintf("processor eflags = "); if (frame->tf_eflags & PSL_T) kprintf("trace trap, "); if (frame->tf_eflags & PSL_I) kprintf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) kprintf("nested task, "); if (frame->tf_eflags & PSL_RF) kprintf("resume, "); if (frame->tf_eflags & PSL_VM) kprintf("vm86, "); kprintf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); kprintf("current process = "); if (curproc) { kprintf("%lu (%s)\n", (u_long)curproc->p_pid, curproc->p_comm ? curproc->p_comm : ""); } else { kprintf("Idle\n"); } kprintf("current thread = pri %d ", curthread->td_pri); if (curthread->td_critcount) kprintf("(CRIT)"); kprintf("\n"); /** * XXX FIXME: * we probably SHOULD have stopped the other CPUs before now! * another CPU COULD have been touching cpl at this moment... */ kprintf(" <- SMP: XXX"); kprintf("\n"); #ifdef KDB if (kdb_trap(&psl)) return; #endif #ifdef DDB if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) return; #endif kprintf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); }
/* * kdb_trap - field a TRACE or BPT trap */ int kdb_trap(int type, int code, struct i386_saved_state *regs) { volatile int ddb_mode = !(boothowto & RB_GDB); /* * XXX try to do nothing if the console is in graphics mode. * Handle trace traps (and hardware breakpoints...) by ignoring * them except for forgetting about them. Return 0 for other * traps to say that we haven't done anything. The trap handler * will usually panic. We should handle breakpoint traps for * our breakpoints by disarming our breakpoints and fixing up * %eip. */ if (cons_unavail && ddb_mode) { if (type == T_TRCTRAP) { regs->tf_eflags &= ~PSL_T; return (1); } return (0); } switch (type) { case T_BPTFLT: /* breakpoint */ case T_TRCTRAP: /* debug exception */ break; default: /* * XXX this is almost useless now. In most cases, * trap_fatal() has already printed a much more verbose * message. However, it is dangerous to print things in * trap_fatal() - kprintf() might be reentered and trap. * The debugger should be given control first. */ if (ddb_mode) db_printf("kernel: type %d trap, code=%x\n", type, code); if (db_nofault) { jmp_buf *no_fault = db_nofault; db_nofault = NULL; longjmp(*no_fault, 1); } } /* * This handles unexpected traps in ddb commands, including calls to * non-ddb functions. db_nofault only applies to memory accesses by * internal ddb commands. */ if (db_global_jmpbuf_valid) longjmp(db_global_jmpbuf, 1); /* * XXX We really should switch to a local stack here. */ ddb_regs = *regs; /* * If in kernel mode, esp and ss are not saved, so dummy them up. */ if (ISPL(regs->tf_cs) == 0) { ddb_regs.tf_esp = (int)®s->tf_esp; ddb_regs.tf_ss = rss(); } crit_enter(); #ifdef SMP db_printf("\nCPU%d stopping CPUs: 0x%08x\n", mycpu->gd_cpuid, mycpu->gd_other_cpus); /* We stop all CPUs except ourselves (obviously) */ stop_cpus(mycpu->gd_other_cpus); db_printf(" stopped\n"); #endif /* SMP */ setjmp(db_global_jmpbuf); db_global_jmpbuf_valid = TRUE; db_active++; if (ddb_mode) { cndbctl(TRUE); db_trap(type, code); cndbctl(FALSE); } else gdb_handle_exception(&ddb_regs, type, code); db_active--; db_global_jmpbuf_valid = FALSE; #ifdef SMP db_printf("\nCPU%d restarting CPUs: 0x%08x\n", mycpu->gd_cpuid, stopped_cpus); /* Restart all the CPUs we previously stopped */ if (stopped_cpus != mycpu->gd_other_cpus) { db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n", mycpu->gd_other_cpus, stopped_cpus); panic("stop_cpus() failed"); } restart_cpus(stopped_cpus); db_printf(" restarted\n"); #endif /* SMP */ crit_exit(); regs->tf_eip = ddb_regs.tf_eip; regs->tf_eflags = ddb_regs.tf_eflags; regs->tf_eax = ddb_regs.tf_eax; regs->tf_ecx = ddb_regs.tf_ecx; regs->tf_edx = ddb_regs.tf_edx; regs->tf_ebx = ddb_regs.tf_ebx; /* * If in user mode, the saved ESP and SS were valid, restore them. */ if (ISPL(regs->tf_cs)) { regs->tf_esp = ddb_regs.tf_esp; regs->tf_ss = ddb_regs.tf_ss & 0xffff; } regs->tf_ebp = ddb_regs.tf_ebp; regs->tf_esi = ddb_regs.tf_esi; regs->tf_edi = ddb_regs.tf_edi; regs->tf_es = ddb_regs.tf_es & 0xffff; regs->tf_fs = ddb_regs.tf_fs & 0xffff; regs->tf_gs = ddb_regs.tf_gs & 0xffff; regs->tf_cs = ddb_regs.tf_cs & 0xffff; regs->tf_ds = ddb_regs.tf_ds & 0xffff; return (1); }
void trap(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; int i = 0, ucode = 0, code; u_int type; register_t addr = 0; vm_offset_t eva; ksiginfo_t ksi; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); goto out; } #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A return value of '1' from the * hook means that the NMI was handled by it and that we can * return immediately. */ if (type == T_NMI && pmc_intr && (*pmc_intr)(PCPU_GET(cpuid), frame)) goto out; #endif if (type == T_MCHK) { if (!mca_intr()) trap_fatal(frame, 0); goto out; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in it's per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL) if ((*dtrace_trap_func)(frame, type)) goto out; if (type == T_DTRACE_PROBE || type == T_DTRACE_RET || type == T_BPTFLT) { struct reg regs; fill_frame_regs(frame, ®s); if (type == T_DTRACE_PROBE && dtrace_fasttrap_probe_ptr != NULL && dtrace_fasttrap_probe_ptr(®s) == 0) goto out; if (type == T_BPTFLT && dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) goto out; if (type == T_DTRACE_RET && dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; } #endif if ((frame->tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM)) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_BPTFLT && type != T_TRCTRAP && frame->tf_eip != (int)cpu_switch_load_gs) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * Page faults need interrupts disabled until later, * and we shouldn't enable interrupts while holding * a spin lock or if servicing an NMI. */ if (type != T_NMI && type != T_PAGEFLT && td->td_md.md_spinlock_count == 0) enable_intr(); } } eva = 0; code = frame->tf_err; if (type == T_PAGEFLT) { /* * For some Cyrix CPUs, %cr2 is clobbered by * interrupts. This problem is worked around by using * an interrupt gate for the pagefault handler. We * are finally ready to read %cr2 and then must * reenable interrupts. * * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ eva = rcr2(); if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) trap_fatal(frame, eva); else enable_intr(); } if ((ISPL(frame->tf_cs) == SEL_UPL) || ((frame->tf_eflags & PSL_VM) && !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_ucred != p->p_ucred) cred_update_thread(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); frame->tf_eflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ #ifdef DEV_NPX ucode = npxtrap(); if (ucode == -1) goto userout; #else ucode = 0; #endif i = SIGFPE; break; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == 0) goto user; break; } i = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; i = SIGILL; break; } #endif if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto userout; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: #ifdef DEV_NPX KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; #endif uprintf("pid %d killed due to lack of floating point\n", p->p_pid); i = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = 0; /* XXX */ i = SIGFPE; break; } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); goto out; case T_DNA: #ifdef DEV_NPX KASSERT(!PCB_USER_FPU(td->td_pcb), ("Unregistered use of FPU in kernel")); if (npxdna()) goto out; #endif break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); goto out; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); goto out; } if (type == T_STKFLT) break; /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { PCPU_GET(curpcb)->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif goto out; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_eip == (int)doreti_iret) { frame->tf_eip = (int)doreti_iret_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_ds) { frame->tf_eip = (int)doreti_popl_ds_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_es) { frame->tf_eip = (int)doreti_popl_es_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_fs) { frame->tf_eip = (int)doreti_popl_fs_fault; goto out; } if (PCPU_GET(curpcb)->pcb_onfault != NULL) { frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) { /* * We've just entered system mode via the * syscall lcall. Continue single stepping * silently until the syscall handler has * saved the flags. */ goto out; } if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) { /* * The syscall handler has now saved the * flags. Stop single stepping it. */ frame->tf_eflags &= ~PSL_T; goto out; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap() && !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, 0, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto out; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto out; } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; trapsignal(td, &ksi); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", (u_long)eva); uprintf("\n"); } #endif user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); userout: out: return; }
/* * syscall2 - MP aware system call request C handler * * A system call is essentially treated as a trap except that the * MP lock is not held on entry or return. We are responsible for * obtaining the MP lock if necessary and for handling ASTs * (e.g. a task switch) prior to return. * * MPSAFE */ void syscall2(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; struct lwp *lp = td->td_lwp; struct sysent *callp; register_t orig_tf_rflags; int sticks; int error; int narg; #ifdef INVARIANTS int crit_count = td->td_critcount; #endif register_t *argp; u_int code; int regcnt, optimized_regcnt; union sysunion args; register_t *argsdst; mycpu->gd_cnt.v_syscall++; #ifdef DIAGNOSTIC if (ISPL(frame->tf_cs) != SEL_UPL) { panic("syscall"); /* NOT REACHED */ } #endif KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, frame->tf_rax); userenter(td, p); /* lazy raise our priority */ regcnt = 6; optimized_regcnt = 6; /* * Misc */ sticks = (int)td->td_sticks; orig_tf_rflags = frame->tf_rflags; /* * Virtual kernel intercept - if a VM context managed by a virtual * kernel issues a system call the virtual kernel handles it, not us. * Restore the virtual kernel context and return from its system * call. The current frame is copied out to the virtual kernel. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); error = EJUSTRETURN; callp = NULL; code = 0; goto out; } /* * Get the system call parameters and account for time */ KASSERT(lp->lwp_md.md_regs == frame, ("Frame mismatch %p %p", lp->lwp_md.md_regs, frame)); code = (u_int)frame->tf_rax; if (code == SYS_syscall || code == SYS___syscall) { code = frame->tf_rdi; regcnt--; argp = &frame->tf_rdi + 1; } else { argp = &frame->tf_rdi; } if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg & SYF_ARGMASK; /* * On x86_64 we get up to six arguments in registers. The rest are * on the stack. The first six members of 'struct trapframe' happen * to be the registers used to pass arguments, in exactly the right * order. */ argsdst = (register_t *)(&args.nosys.sysmsg + 1); /* * Its easier to copy up to the highest number of syscall arguments * passed in registers, which is 6, than to conditionalize it. */ bcopy(argp, argsdst, sizeof(register_t) * optimized_regcnt); /* * Any arguments beyond available argument-passing registers must * be copyin()'d from the user stack. */ if (narg > regcnt) { caddr_t params; params = (caddr_t)frame->tf_rsp + sizeof(register_t); error = copyin(params, &argsdst[regcnt], (narg - regcnt) * sizeof(register_t)); if (error) { #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif /* * Default return value is 0 (will be copied to %rax). Double-value * returns use %rax and %rdx. %rdx is left unchanged for system * calls which return only one result. */ args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame->tf_rdx; /* * The syscall might manipulate the trap frame. If it does it * will probably return EJUSTRETURN. */ args.sysmsg_frame = frame; STOPEVENT(p, S_SCE, narg); /* MP aware */ /* * NOTE: All system calls run MPSAFE now. The system call itself * is responsible for getting the MP lock. */ #ifdef SYSCALL_DEBUG tsc_uclock_t tscval = rdtsc(); #endif error = (*callp->sy_call)(&args); #ifdef SYSCALL_DEBUG tscval = rdtsc() - tscval; tscval = tscval * 1000000 / tsc_frequency; if (SysCallsWorstCase[code] < tscval) SysCallsWorstCase[code] = tscval; #endif out: /* * MP SAFE (we may or may not have the MP lock at this point) */ //kprintf("SYSMSG %d ", error); switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; lp = curthread->td_lwp; frame->tf_rax = args.sysmsg_fds[0]; frame->tf_rdx = args.sysmsg_fds[1]; frame->tf_rflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, we know that 'syscall' is 2 bytes. * We have to do a full context restore so that %r10 * (which was holding the value of %rcx) is restored for * the next iteration. */ if (frame->tf_err != 0 && frame->tf_err != 2) kprintf("lp %s:%d frame->tf_err is weird %ld\n", td->td_comm, lp->lwp_proc->p_pid, frame->tf_err); frame->tf_rip -= frame->tf_err; frame->tf_r10 = frame->tf_rcx; break; case EJUSTRETURN: break; case EASYNC: panic("Unexpected EASYNC return value (for now)"); default: bad: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_rax = error; frame->tf_rflags |= PSL_C; break; } /* * Traced syscall. trapsignal() should now be MP aware */ if (orig_tf_rflags & PSL_T) { frame->tf_rflags &= ~PSL_T; trapsignal(lp, SIGTRAP, TRAP_TRACE); } /* * Handle reschedule and other end-of-syscall issues */ userret(lp, frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { ktrsysret(lp, code, error, args.sysmsg_result); } #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); userexit(lp); KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(&td->td_toks_base == td->td_toks_stop, ("syscall: %ld extra tokens held after trap! syscall %p", td->td_toks_stop - &td->td_toks_base, callp->sy_call)); #endif }
void trap(struct trapframe *frame) { #ifdef KDTRACE_HOOKS struct reg regs; #endif struct thread *td = curthread; struct proc *p = td->td_proc; int i = 0, ucode = 0, code; u_int type; register_t addr = 0; ksiginfo_t ksi; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); goto out; } #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A return value of '1' from the handler means that * the NMI was handled by it and we can return immediately. */ if (type == T_NMI && pmc_intr && (*pmc_intr)(PCPU_GET(cpuid), frame)) goto out; #endif if (type == T_MCHK) { mca_intr(); goto out; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) goto out; #endif if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (ISPL(frame->tf_cs) == SEL_UPL) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } code = frame->tf_err; if (ISPL(frame->tf_cs) == SEL_UPL) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_ucred != p->p_ucred) cred_update_thread(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) goto out; } #endif frame->tf_rflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) goto userout; i = SIGFPE; break; case T_PROTFLT: /* general protection fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ addr = frame->tf_addr; i = trap_pfault(frame, TRUE); if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); goto userout; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) goto userout; i = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; break; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); goto out; case T_DNA: KASSERT(!PCB_USER_FPU(td->td_pcb), ("Unregistered use of FPU in kernel")); fpudna(); goto out; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); goto out; case T_STKFLT: /* stack fault */ break; case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; goto out; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; goto out; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; goto out; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; goto out; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; goto out; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; goto out; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; goto out; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap()) { /* * Reset breakpoint bits because the * processor doesn't */ /* XXX check upper bits here */ load_dr6(rdr6() & 0xfffffff0); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, 0, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto out; } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ #endif /* DEV_ISA */ } trap_fatal(frame, 0); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); userout: out: return; }
static __inline int get_esp(struct trapframe *tf) { return ((ISPL(tf->tf_cs)) ? tf->tf_esp : (db_expr_t)tf + (uintptr_t)DB_OFFSET(tf_esp)); }