/* * Process a system call. */ void syscall(register_t code, struct frame frame) { struct lwp *l; struct proc *p; u_quad_t sticks; uvmexp.syscalls++; if (!USERMODE(frame.f_sr)) panic("syscall"); l = curlwp; p = l->l_proc; sticks = p->p_sticks; l->l_md.md_regs = frame.f_regs; LWP_CACHE_CREDS(l, p); #ifdef KERN_SA if (__predict_false((l->l_savp) && (l->l_savp->savp_pflags & SAVP_FLAG_DELIVERING))) l->l_savp->savp_pflags &= ~SAVP_FLAG_DELIVERING; #endif (p->p_md.md_syscall)(code, l, &frame); machine_userret(l, &frame, sticks); }
/* * rump_schedule: ensure that the calling host thread has a valid lwp context. * ie. ensure that curlwp != NULL. Also, ensure that there * a 1:1 mapping between the lwp and rump kernel cpu. */ void rump_schedule() { struct lwp *l; /* * If there is no dedicated lwp, allocate a temp one and * set it to be free'd upon unschedule(). Use lwp0 context * for reserving the necessary resources. Don't optimize * for this case -- anyone who cares about performance will * start a real thread. */ if (__predict_true((l = curlwp) != NULL)) { rump_schedule_cpu(l); LWP_CACHE_CREDS(l, l->l_proc); } else { lwp0busy(); /* schedule cpu and use lwp0 */ rump_schedule_cpu(&lwp0); rump_lwproc_curlwp_set(&lwp0); /* allocate thread, switch to it, and release lwp0 */ l = rump__lwproc_alloclwp(initproc); rump_lwproc_switch(l); lwp0rele(); /* * mark new thread dead-on-unschedule. this * means that we'll be running with l_refcnt == 0. * relax, it's fine. */ rump_lwproc_releaselwp(); } }
void data_abort_handler(struct trapframe *tf) { vaddr_t pc, va; vsize_t asize; struct proc *p; struct lwp *l; vm_prot_t atype; bool usrmode, twopages; struct vm_map *map; /* * Data aborts in kernel mode are possible (copyout etc), so * we hope the compiler (or programmer) has ensured that * R14_svc gets saved. * * We may need to fix up an STM or LDM instruction. This * involves seeing if the base was being written back, and if * so resetting it (by counting the number of registers being * transferred) before retrying (ARM 2 ds pp 10 & 33). */ /* Enable interrupts if they were enabled before the trap. */ if ((tf->tf_r15 & R15_IRQ_DISABLE) == 0) int_on(); uvmexp.traps++; l = curlwp; if (l == NULL) l = &lwp0; p = l->l_proc; if ((tf->tf_r15 & R15_MODE) == R15_MODE_USR) { l->l_addr->u_pcb.pcb_tf = tf; LWP_CACHE_CREDS(l, p); } pc = tf->tf_r15 & R15_PC; data_abort_fixup(tf); va = data_abort_address(tf, &asize); atype = data_abort_atype(tf); usrmode = data_abort_usrmode(tf); twopages = (trunc_page(va) != round_page(va + asize) - PAGE_SIZE); if (!usrmode && va >= VM_MIN_KERNEL_ADDRESS) map = kernel_map; else map = &p->p_vmspace->vm_map; do_fault(tf, l, map, va, atype); if (twopages) do_fault(tf, l, map, va + asize - 4, atype); if ((tf->tf_r15 & R15_MODE) == R15_MODE_USR) userret(l); }
void prefetch_abort_handler(struct trapframe *tf) { vaddr_t pc; struct proc *p; struct lwp *l; /* Enable interrupts if they were enabled before the trap. */ if ((tf->tf_r15 & R15_IRQ_DISABLE) == 0) int_on(); /* * XXX Not done yet: * Check if the page being requested is already present. If * so, call the undefined instruction handler instead (ARM3 ds * p15). */ uvmexp.traps++; l = curlwp; if (l == NULL) l = &lwp0; p = l->l_proc; if ((tf->tf_r15 & R15_MODE) == R15_MODE_USR) { l->l_addr->u_pcb.pcb_tf = tf; LWP_CACHE_CREDS(l, p); } if ((tf->tf_r15 & R15_MODE) != R15_MODE_USR) { #ifdef DDB db_printf("Prefetch abort in kernel mode\n"); kdb_trap(T_FAULT, tf); #else #ifdef DEBUG printf("Prefetch abort:\n"); printregs(tf); #endif panic("prefetch abort in kernel mode"); #endif } /* User-mode prefetch abort */ pc = tf->tf_r15 & R15_PC; do_fault(tf, l, &p->p_vmspace->vm_map, pc, VM_PROT_EXECUTE); userret(l); }
void netbsd32_syscall(struct trapframe *frame) { char *params; const struct sysent *callp; struct proc *p; struct lwp *l; int error; int i; register32_t code, args[2 + SYS_MAXSYSARGS]; register_t rval[2]; register_t args64[SYS_MAXSYSARGS]; l = curlwp; p = l->l_proc; code = frame->tf_rax & (SYS_NSYSENT - 1); callp = p->p_emul->e_sysent + code; LWP_CACHE_CREDS(l, p); SYSCALL_COUNT(syscall_counts, code); SYSCALL_TIME_SYS_ENTRY(l, syscall_times, code); params = (char *)frame->tf_rsp + sizeof(int); if (callp->sy_argsize) { error = copyin(params, args, callp->sy_argsize); if (__predict_false(error != 0)) goto bad; /* Recover 'code' - not in a register */ code = frame->tf_rax & (SYS_NSYSENT - 1); } if (__predict_false(p->p_trace_enabled) && !__predict_false(callp->sy_flags & SYCALL_INDIRECT)) { int narg = callp->sy_argsize >> 2; for (i = 0; i < narg; i++) args64[i] = args[i]; error = trace_enter(code, args64, narg); if (__predict_false(error != 0)) goto out; }
void prefetch_abort_handler(struct trapframe *tf) { struct lwp * const l = curlwp; struct proc * const p = l->l_proc; /* Enable interrupts if they were enabled before the trap. */ if ((tf->tf_r15 & R15_IRQ_DISABLE) == 0) int_on(); /* * XXX Not done yet: * Check if the page being requested is already present. If * so, call the undefined instruction handler instead (ARM3 ds * p15). */ curcpu()->ci_data.cpu_ntrap++; if (TRAP_USERMODE(tf)) { lwp_settrapframe(l, tf); LWP_CACHE_CREDS(l, p); } else { #ifdef DDB db_printf("Prefetch abort in kernel mode\n"); kdb_trap(T_FAULT, tf); #else #ifdef DEBUG printf("Prefetch abort:\n"); printregs(tf); #endif panic("prefetch abort in kernel mode"); #endif } /* User-mode prefetch abort */ vaddr_t pc = tf->tf_r15 & R15_PC; do_fault(tf, l, &p->p_vmspace->vm_map, pc, VM_PROT_EXECUTE); userret(l); }
/* * Process a system call. */ void syscall(register_t code, struct frame frame) { struct lwp *l; struct proc *p; u_quad_t sticks; curcpu()->ci_data.cpu_nsyscall++; if (!USERMODE(frame.f_sr)) panic("syscall"); l = curlwp; p = l->l_proc; sticks = p->p_sticks; l->l_md.md_regs = frame.f_regs; LWP_CACHE_CREDS(l, p); (p->p_md.md_syscall)(code, l, &frame); machine_userret(l, &frame, sticks); }
void undefinedinstruction(trapframe_t *tf) { struct lwp *l; vaddr_t fault_pc; int fault_instruction; int fault_code; int coprocessor; int user; struct undefined_handler *uh; #ifdef VERBOSE_ARM32 int s; #endif curcpu()->ci_und_ev.ev_count++; #ifdef KDTRACE_HOOKS if ((tf->tf_spsr & PSR_MODE) != PSR_USR32_MODE) { tf->tf_pc -= INSN_SIZE; if (dtrace_trapper(tf->tf_pc, tf) == 0) return; tf->tf_pc += INSN_SIZE; /* Reset for the rest code */ } #endif /* Enable interrupts if they were enabled before the exception. */ #ifdef acorn26 if ((tf->tf_r15 & R15_IRQ_DISABLE) == 0) int_on(); #else restore_interrupts(tf->tf_spsr & IF32_bits); #endif #ifndef acorn26 #ifdef THUMB_CODE if (tf->tf_spsr & PSR_T_bit) tf->tf_pc -= THUMB_INSN_SIZE; else #endif { tf->tf_pc -= INSN_SIZE; } #endif #ifdef __PROG26 fault_pc = tf->tf_r15 & R15_PC; #else fault_pc = tf->tf_pc; #endif /* Get the current lwp/proc structure or lwp0/proc0 if there is none. */ l = curlwp; #ifdef __PROG26 if ((tf->tf_r15 & R15_MODE) == R15_MODE_USR) { #else if ((tf->tf_spsr & PSR_MODE) == PSR_USR32_MODE) { #endif user = 1; LWP_CACHE_CREDS(l, l->l_proc); } else user = 0; #ifdef THUMB_CODE if (tf->tf_spsr & PSR_T_bit) { fault_instruction = read_thumb_insn(fault_pc, user); if (fault_instruction >= 0xe000) { fault_instruction = (fault_instruction << 16) | read_thumb_insn(fault_pc + 2, user); } } else #endif { /* * Make sure the program counter is correctly aligned so we * don't take an alignment fault trying to read the opcode. */ if (__predict_false((fault_pc & 3) != 0)) { ksiginfo_t ksi; /* Give the user an illegal instruction signal. */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) fault_pc; trapsignal(l, &ksi); userret(l); return; } /* * Should use fuword() here .. but in the interests of * squeezing every bit of speed we will just use * ReadWord(). We know the instruction can be read * as was just executed so this will never fail unless * the kernel is screwed up in which case it does * not really matter does it ? */ fault_instruction = read_insn(fault_pc, user); } /* Update vmmeter statistics */ curcpu()->ci_data.cpu_ntrap++; #ifdef THUMB_CODE if ((tf->tf_spsr & PSR_T_bit) && !CPU_IS_ARMV7_P()) { coprocessor = THUMB_UNKNOWN_HANDLER; } else #endif { /* Check for coprocessor instruction */ /* * According to the datasheets you only need to look at * bit 27 of the instruction to tell the difference * between and undefined instruction and a coprocessor * instruction following an undefined instruction trap. * * ARMv5 adds undefined instructions in the NV space, * even when bit 27 is set. */ if ((fault_instruction & (1 << 27)) != 0 && (fault_instruction & 0xf0000000) != 0xf0000000) { coprocessor = (fault_instruction >> 8) & 0x0f; #ifdef THUMB_CODE } else if ((tf->tf_spsr & PSR_T_bit) && !CPU_IS_ARMV7_P()) { coprocessor = THUMB_UNKNOWN_HANDLER; #endif } else { coprocessor = CORE_UNKNOWN_HANDLER; } }
/*ARGSUSED*/ void trap(struct frame *fp, int type, unsigned code, unsigned v) { extern char fubail[], subail[]; struct lwp *l; struct proc *p; struct pcb *pcb; void *onfault; ksiginfo_t ksi; int s; int rv; u_quad_t sticks = 0 /* XXX initialiser works around compiler bug */; static int panicking __diagused; curcpu()->ci_data.cpu_ntrap++; l = curlwp; p = l->l_proc; pcb = lwp_getpcb(l); KSI_INIT_TRAP(&ksi); ksi.ksi_trap = type & ~T_USER; if (USERMODE(fp->f_sr)) { type |= T_USER; sticks = p->p_sticks; l->l_md.md_regs = fp->f_regs; LWP_CACHE_CREDS(l, p); } switch (type) { default: dopanic: /* * Let the kernel debugger see the trap frame that * caused us to panic. This is a convenience so * one can see registers at the point of failure. */ s = splhigh(); panicking = 1; printf("trap type %d, code = 0x%x, v = 0x%x\n", type, code, v); printf("%s program counter = 0x%x\n", (type & T_USER) ? "user" : "kernel", fp->f_pc); #ifdef KGDB /* If connected, step or cont returns 1 */ if (kgdb_trap(type, (db_regs_t *)fp)) goto kgdb_cont; #endif #ifdef DDB (void)kdb_trap(type, (db_regs_t *)fp); #endif #ifdef KGDB kgdb_cont: #endif splx(s); if (panicstr) { printf("trap during panic!\n"); #ifdef DEBUG /* XXX should be a machine-dependent hook */ printf("(press a key)\n"); (void)cngetc(); #endif } regdump((struct trapframe *)fp, 128); type &= ~T_USER; if ((u_int)type < trap_types) panic(trap_type[type]); panic("trap"); case T_BUSERR: /* kernel bus error */ onfault = pcb->pcb_onfault; if (onfault == NULL) goto dopanic; rv = EFAULT; /* FALLTHROUGH */ copyfault: /* * If we have arranged to catch this fault in any of the * copy to/from user space routines, set PC to return to * indicated location and set flag informing buserror code * that it may need to clean up stack frame. */ fp->f_stackadj = exframesize[fp->f_format]; fp->f_format = fp->f_vector = 0; fp->f_pc = (int)onfault; fp->f_regs[D0] = rv; return; case T_BUSERR|T_USER: /* bus error */ case T_ADDRERR|T_USER: /* address error */ ksi.ksi_addr = (void *)v; ksi.ksi_signo = SIGBUS; ksi.ksi_code = (type == (T_BUSERR|T_USER)) ? BUS_OBJERR : BUS_ADRERR; break; case T_COPERR: /* kernel coprocessor violation */ case T_FMTERR|T_USER: /* do all RTE errors come in as T_USER? */ case T_FMTERR: /* ...just in case... */ /* * The user has most likely trashed the RTE or FP state info * in the stack frame of a signal handler. */ printf("pid %d: kernel %s exception\n", p->p_pid, type==T_COPERR ? "coprocessor" : "format"); type |= T_USER; mutex_enter(p->p_lock); SIGACTION(p, SIGILL).sa_handler = SIG_DFL; sigdelset(&p->p_sigctx.ps_sigignore, SIGILL); sigdelset(&p->p_sigctx.ps_sigcatch, SIGILL); sigdelset(&l->l_sigmask, SIGILL); mutex_exit(p->p_lock); ksi.ksi_signo = SIGILL; ksi.ksi_addr = (void *)(int)fp->f_format; /* XXX was ILL_RESAD_FAULT */ ksi.ksi_code = (type == T_COPERR) ? ILL_COPROC : ILL_ILLOPC; break; case T_COPERR|T_USER: /* user coprocessor violation */ /* What is a proper response here? */ ksi.ksi_signo = SIGFPE; ksi.ksi_code = FPE_FLTINV; break; case T_FPERR|T_USER: /* 68881 exceptions */ /* * We pass along the 68881 status register which locore stashed * in code for us. */ ksi.ksi_signo = SIGFPE; ksi.ksi_code = fpsr2siginfocode(code); break; #ifdef M68040 case T_FPEMULI|T_USER: /* unimplemented FP instruction */ case T_FPEMULD|T_USER: /* unimplemented FP data type */ /* XXX need to FSAVE */ printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n", p->p_pid, p->p_comm, fp->f_format == 2 ? "instruction" : "data type", fp->f_pc, fp->f_fmt2.f_iaddr); /* XXX need to FRESTORE */ ksi.ksi_signo = SIGFPE; ksi.ksi_code = FPE_FLTINV; break; #endif case T_ILLINST|T_USER: /* illegal instruction fault */ case T_PRIVINST|T_USER: /* privileged instruction fault */ ksi.ksi_addr = (void *)(int)fp->f_format; /* XXX was ILL_PRIVIN_FAULT */ ksi.ksi_signo = SIGILL; ksi.ksi_code = (type == (T_PRIVINST|T_USER)) ? ILL_PRVOPC : ILL_ILLOPC; break; case T_ZERODIV|T_USER: /* Divide by zero */ ksi.ksi_addr = (void *)(int)fp->f_format; /* XXX was FPE_INTDIV_TRAP */ ksi.ksi_signo = SIGFPE; ksi.ksi_code = FPE_FLTDIV; break; case T_CHKINST|T_USER: /* CHK instruction trap */ ksi.ksi_addr = (void *)(int)fp->f_format; /* XXX was FPE_SUBRNG_TRAP */ ksi.ksi_signo = SIGFPE; break; case T_TRAPVINST|T_USER: /* TRAPV instruction trap */ ksi.ksi_addr = (void *)(int)fp->f_format; /* XXX was FPE_INTOVF_TRAP */ ksi.ksi_signo = SIGFPE; break; /* * XXX: Trace traps are a nightmare. * * HP-UX uses trap #1 for breakpoints, * NetBSD/m68k uses trap #2, * SUN 3.x uses trap #15, * DDB and KGDB uses trap #15 (for kernel breakpoints; * handled elsewhere). * * NetBSD and HP-UX traps both get mapped by locore.s into T_TRACE. * SUN 3.x traps get passed through as T_TRAP15 and are not really * supported yet. * * XXX: We should never get kernel-mode T_TRAP15 * XXX: because locore.s now gives them special treatment. */ case T_TRAP15: /* kernel breakpoint */ #ifdef DEBUG printf("unexpected kernel trace trap, type = %d\n", type); printf("program counter = 0x%x\n", fp->f_pc); #endif fp->f_sr &= ~PSL_T; return; case T_TRACE|T_USER: /* user trace trap */ #ifdef COMPAT_SUNOS /* * SunOS uses Trap #2 for a "CPU cache flush". * Just flush the on-chip caches and return. */ if (p->p_emul == &emul_sunos) { ICIA(); DCIU(); return; } #endif /* FALLTHROUGH */ case T_TRACE: /* tracing a trap instruction */ case T_TRAP15|T_USER: /* SUN user trace trap */ fp->f_sr &= ~PSL_T; ksi.ksi_signo = SIGTRAP; break; case T_ASTFLT: /* system async trap, cannot happen */ goto dopanic; case T_ASTFLT|T_USER: /* user async trap */ astpending = 0; /* * We check for software interrupts first. This is because * they are at a higher level than ASTs, and on a VAX would * interrupt the AST. We assume that if we are processing * an AST that we must be at IPL0 so we don't bother to * check. Note that we ensure that we are at least at SIR * IPL while processing the SIR. */ spl1(); /* fall into... */ case T_SSIR: /* software interrupt */ case T_SSIR|T_USER: /* * If this was not an AST trap, we are all done. */ if (type != (T_ASTFLT|T_USER)) { curcpu()->ci_data.cpu_ntrap--; return; } spl0(); if (l->l_pflag & LP_OWEUPC) { l->l_pflag &= ~LP_OWEUPC; ADDUPROF(l); } if (curcpu()->ci_want_resched) preempt(); goto out; case T_MMUFLT: /* kernel mode page fault */ /* * If we were doing profiling ticks or other user mode * stuff from interrupt code, Just Say No. */ onfault = pcb->pcb_onfault; if (onfault == fubail || onfault == subail) { rv = EFAULT; goto copyfault; } /* fall into ... */ case T_MMUFLT|T_USER: /* page fault */ { vaddr_t va; struct vmspace *vm = p->p_vmspace; struct vm_map *map; vm_prot_t ftype; extern struct vm_map *kernel_map; onfault = pcb->pcb_onfault; #ifdef DEBUG if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n", p->p_pid, code, v, fp->f_pc, fp->f_sr); #endif /* * It is only a kernel address space fault iff: * 1. (type & T_USER) == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor space data fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ if ((type & T_USER) == 0 && (onfault == NULL || KDFAULT(code))) map = kernel_map; else { map = vm ? &vm->vm_map : kernel_map; } if (WRFAULT(code)) ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; va = trunc_page((vaddr_t)v); if (map == kernel_map && va == 0) { printf("trap: bad kernel %s access at 0x%x\n", (ftype & VM_PROT_WRITE) ? "read/write" : "read", v); goto dopanic; } #ifdef DIAGNOSTIC if (interrupt_depth && !panicking) { printf("trap: calling uvm_fault() from interrupt!\n"); goto dopanic; } #endif pcb->pcb_onfault = NULL; rv = uvm_fault(map, va, ftype); pcb->pcb_onfault = onfault; #ifdef DEBUG if (rv && MDB_ISPID(p->p_pid)) printf("uvm_fault(%p, 0x%lx, 0x%x) -> 0x%x\n", map, va, ftype, rv); #endif /* * If this was a stack access we keep track of the maximum * accessed stack size. Also, if vm_fault gets a protection * failure it is due to accessing the stack region outside * the current limit and we need to reflect that as an access * error. */ if (rv == 0) { if (map != kernel_map && (void *)va >= vm->vm_maxsaddr) uvm_grow(p, va); if (type == T_MMUFLT) { if (ucas_ras_check(&fp->F_t)) { return; } #ifdef M68040 if (cputype == CPU_68040) (void) writeback(fp, 1); #endif return; } goto out; } if (rv == EACCES) { ksi.ksi_code = SEGV_ACCERR; rv = EFAULT; } else ksi.ksi_code = SEGV_MAPERR; if (type == T_MMUFLT) { if (onfault) goto copyfault; printf("uvm_fault(%p, 0x%lx, 0x%x) -> 0x%x\n", map, va, ftype, rv); printf(" type %x, code [mmu,,ssw]: %x\n", type, code); goto dopanic; } ksi.ksi_addr = (void *)v; switch (rv) { case ENOMEM: printf("UVM: pid %d (%s), uid %d killed: out of swap\n", p->p_pid, p->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; break; case EINVAL: ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_ADRERR; break; case EACCES: ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; break; default: ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_MAPERR; break; } break; } } trapsignal(l, &ksi); if ((type & T_USER) == 0) return; out: userret(l, fp, sticks, v, 1); }
/* * trap(frame): exception, fault, and trap interface to BSD kernel. * * This common code is called from assembly language IDT gate entry routines * that prepare a suitable stack frame, and restore this frame after the * exception has been processed. Note that the effect is as if the arguments * were passed call by reference. */ void trap(struct trapframe *frame) { struct lwp *l = curlwp; struct proc *p; struct pcb *pcb; extern char fusubail[], kcopy_fault[], return_address_fault[], IDTVEC(osyscall)[]; struct trapframe *vframe; ksiginfo_t ksi; void *onfault; int type, error; uint32_t cr2; bool pfail; if (__predict_true(l != NULL)) { pcb = lwp_getpcb(l); p = l->l_proc; } else { /* * this can happen eg. on break points in early on boot. */ pcb = NULL; p = NULL; } type = frame->tf_trapno; #ifdef DEBUG if (trapdebug) { trap_print(frame, l); } #endif if (type != T_NMI && !KERNELMODE(frame->tf_cs, frame->tf_eflags)) { type |= T_USER; l->l_md.md_regs = frame; pcb->pcb_cr2 = 0; LWP_CACHE_CREDS(l, p); } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL) { if ((*dtrace_trap_func)(frame, type)) { return; } } #endif switch (type) { case T_ASTFLT: /*FALLTHROUGH*/ default: we_re_toast: if (type == T_TRCTRAP) check_dr0(); else trap_print(frame, l); if (kdb_trap(type, 0, frame)) return; if (kgdb_trap(type, frame)) return; /* * If this is a breakpoint, don't panic if we're not connected. */ if (type == T_BPTFLT && kgdb_disconnected()) { printf("kgdb: ignored %s\n", trap_type[type]); return; } panic("trap"); /*NOTREACHED*/ case T_PROTFLT: case T_SEGNPFLT: case T_ALIGNFLT: case T_TSSFLT: if (p == NULL) goto we_re_toast; /* Check for copyin/copyout fault. */ onfault = onfault_handler(pcb, frame); if (onfault != NULL) { copyefault: error = EFAULT; copyfault: frame->tf_eip = (uintptr_t)onfault; frame->tf_eax = error; return; } /* * Check for failure during return to user mode. * This can happen loading invalid values into the segment * registers, or during the 'iret' itself. * * We do this by looking at the instruction we faulted on. * The specific instructions we recognize only happen when * returning from a trap, syscall, or interrupt. */ kernelfault: KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; ksi.ksi_trap = type; switch (*(u_char *)frame->tf_eip) { case 0xcf: /* iret */ /* * The 'iret' instruction faulted, so we have the * 'user' registers saved after the kernel %eip:%cs:%fl * of the 'iret' and below that the user %eip:%cs:%fl * the 'iret' was processing. * We must delete the 3 words of kernel return address * from the stack to generate a normal stack frame * (eg for sending a SIGSEGV). */ vframe = (void *)((int *)frame + 3); if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags)) goto we_re_toast; memmove(vframe, frame, offsetof(struct trapframe, tf_eip)); /* Set the faulting address to the user %eip */ ksi.ksi_addr = (void *)vframe->tf_eip; break; case 0x8e: switch (*(uint32_t *)frame->tf_eip) { case 0x8e242c8e: /* mov (%esp,%gs), then */ case 0x0424648e: /* mov 0x4(%esp),%fs */ case 0x0824448e: /* mov 0x8(%esp),%es */ case 0x0c245c8e: /* mov 0xc(%esp),%ds */ break; default: goto we_re_toast; } /* * We faulted loading one if the user segment registers. * The stack frame containing the user registers is * still valid and is just below the %eip:%cs:%fl of * the kernel fault frame. */ vframe = (void *)(&frame->tf_eflags + 1); if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags)) goto we_re_toast; /* There is no valid address for the fault */ break; default: goto we_re_toast; } /* * We might have faulted trying to execute the * trampoline for a local (nested) signal handler. * Only generate SIGSEGV if the user %cs isn't changed. * (This is only strictly necessary in the 'iret' case.) */ if (!pmap_exec_fixup(&p->p_vmspace->vm_map, vframe, pcb)) { /* Save outer frame for any signal return */ l->l_md.md_regs = vframe; (*p->p_emul->e_trapsignal)(l, &ksi); } /* Return to user by reloading the user frame */ trap_return_fault_return(vframe); /* NOTREACHED */ case T_PROTFLT|T_USER: /* protection fault */ case T_TSSFLT|T_USER: case T_SEGNPFLT|T_USER: case T_STKFLT|T_USER: case T_ALIGNFLT|T_USER: KSI_INIT_TRAP(&ksi); ksi.ksi_addr = (void *)rcr2(); switch (type) { case T_SEGNPFLT|T_USER: case T_STKFLT|T_USER: ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_ADRERR; break; case T_TSSFLT|T_USER: ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; break; case T_ALIGNFLT|T_USER: ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_ADRALN; break; case T_PROTFLT|T_USER: #ifdef VM86 if (frame->tf_eflags & PSL_VM) { vm86_gpfault(l, type & ~T_USER); goto out; } #endif /* * If pmap_exec_fixup does something, * let's retry the trap. */ if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame, pcb)){ goto out; } ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; break; default: KASSERT(0); break; } goto trapsignal; case T_PRIVINFLT|T_USER: /* privileged instruction fault */ case T_FPOPFLT|T_USER: /* coprocessor operand fault */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_addr = (void *) frame->tf_eip; switch (type) { case T_PRIVINFLT|T_USER: ksi.ksi_code = ILL_PRVOPC; break; case T_FPOPFLT|T_USER: ksi.ksi_code = ILL_COPROC; break; default: ksi.ksi_code = 0; break; } goto trapsignal; case T_ASTFLT|T_USER: /* Allow process switch. */ //curcpu()->ci_data.cpu_nast++; if (l->l_pflag & LP_OWEUPC) { l->l_pflag &= ~LP_OWEUPC; ADDUPROF(l); } /* Allow a forced task switch. */ if (curcpu()->ci_want_resched) { preempt(); } goto out; case T_BOUND|T_USER: case T_OFLOW|T_USER: case T_DIVIDE|T_USER: KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGFPE; ksi.ksi_addr = (void *)frame->tf_eip; switch (type) { case T_BOUND|T_USER: ksi.ksi_code = FPE_FLTSUB; break; case T_OFLOW|T_USER: ksi.ksi_code = FPE_INTOVF; break; case T_DIVIDE|T_USER: ksi.ksi_code = FPE_INTDIV; break; default: ksi.ksi_code = 0; break; } goto trapsignal; case T_PAGEFLT: /* Allow page faults in kernel mode. */ if (__predict_false(l == NULL)) goto we_re_toast; /* * fusubail is used by [fs]uswintr() to prevent page faulting * from inside the profiling interrupt. */ onfault = pcb->pcb_onfault; if (onfault == fusubail || onfault == return_address_fault) { goto copyefault; } if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) { goto we_re_toast; } cr2 = rcr2(); goto faultcommon; case T_PAGEFLT|T_USER: { /* page fault */ register vaddr_t va; register struct vmspace *vm; register struct vm_map *map; vm_prot_t ftype; extern struct vm_map *kernel_map; cr2 = rcr2(); faultcommon: vm = p->p_vmspace; if (__predict_false(vm == NULL)) { goto we_re_toast; } pcb->pcb_cr2 = cr2; va = trunc_page((vaddr_t)cr2); /* * It is only a kernel address space fault iff: * 1. (type & T_USER) == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor space fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ if (type == T_PAGEFLT && va >= KERNBASE) map = kernel_map; else map = &vm->vm_map; if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if (frame->tf_err & PGEX_X) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; #ifdef DIAGNOSTIC if (map == kernel_map && va == 0) { printf("trap: bad kernel access at %lx\n", va); goto we_re_toast; } #endif /* Fault the original page in. */ onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; error = uvm_fault(map, va, ftype); pcb->pcb_onfault = onfault; if (error == 0) { if (map != kernel_map && (void *)va >= vm->vm_maxsaddr) uvm_grow(p, va); pfail = false; while (type == T_PAGEFLT) { /* * we need to switch pmap now if we're in * the middle of copyin/out. * * but we don't need to do so for kcopy as * it never touch userspace. */ kpreempt_disable(); if (curcpu()->ci_want_pmapload) { onfault = onfault_handler(pcb, frame); if (onfault != kcopy_fault) { pmap_load(); } } /* * We need to keep the pmap loaded and * so avoid being preempted until back * into the copy functions. Disable * interrupts at the hardware level before * re-enabling preemption. Interrupts * will be re-enabled by 'iret' when * returning back out of the trap stub. * They'll only be re-enabled when the * program counter is once again in * the copy functions, and so visible * to cpu_kpreempt_exit(). */ #ifndef XEN x86_disable_intr(); #endif l->l_nopreempt--; if (l->l_nopreempt > 0 || !l->l_dopreempt || pfail) { return; } #ifndef XEN x86_enable_intr(); #endif /* * If preemption fails for some reason, * don't retry it. The conditions won't * change under our nose. */ pfail = kpreempt(0); } goto out; } if (type == T_PAGEFLT) { onfault = onfault_handler(pcb, frame); if (onfault != NULL) goto copyfault; printf("uvm_fault(%p, %#lx, %d) -> %#x\n", map, va, ftype, error); goto kernelfault; } KSI_INIT_TRAP(&ksi); ksi.ksi_trap = type & ~T_USER; ksi.ksi_addr = (void *)cr2; switch (error) { case EINVAL: ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_ADRERR; break; case EACCES: ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; error = EFAULT; break; case ENOMEM: ksi.ksi_signo = SIGKILL; printf("UVM: pid %d.%d (%s), uid %d killed: " "out of swap\n", p->p_pid, l->l_lid, p->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); break; default: ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_MAPERR; break; } #ifdef TRAP_SIGDEBUG printf("pid %d.%d (%s): signal %d at eip %x addr %lx " "error %d\n", p->p_pid, l->l_lid, p->p_comm, ksi.ksi_signo, frame->tf_eip, va, error); #endif (*p->p_emul->e_trapsignal)(l, &ksi); break; } case T_TRCTRAP: /* Check whether they single-stepped into a lcall. */ if (frame->tf_eip == (int)IDTVEC(osyscall)) return; if (frame->tf_eip == (int)IDTVEC(osyscall) + 1) { frame->tf_eflags &= ~PSL_T; return; } goto we_re_toast; case T_BPTFLT|T_USER: /* bpt instruction fault */ case T_TRCTRAP|T_USER: /* trace trap */ /* * Don't go single-stepping into a RAS. */ if (p->p_raslist == NULL || (ras_lookup(p, (void *)frame->tf_eip) == (void *)-1)) { KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_trap = type & ~T_USER; if (type == (T_BPTFLT|T_USER)) ksi.ksi_code = TRAP_BRKPT; else ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; (*p->p_emul->e_trapsignal)(l, &ksi); } break; case T_NMI: if (nmi_dispatch(frame)) return; /* NMI can be hooked up to a pushbutton for debugging */ if (kgdb_trap(type, frame)) return; if (kdb_trap(type, 0, frame)) return; /* machine/parity/power fail/"kitchen sink" faults */ #if NMCA > 0 mca_nmi(); #endif x86_nmi(); } if ((type & T_USER) == 0) return; out: userret(l); return; trapsignal: ksi.ksi_trap = type & ~T_USER; (*p->p_emul->e_trapsignal)(l, &ksi); userret(l); }
void trap(struct trapframe *tf) { u_int sig = 0, type = tf->tf_trap, code = 0; u_int rv, addr; bool trapsig = true; const bool usermode = USERMODE_P(tf); struct lwp * const l = curlwp; struct proc * const p = l->l_proc; struct pcb * const pcb = lwp_getpcb(l); u_quad_t oticks = 0; struct vmspace *vm; struct vm_map *map; vm_prot_t ftype; void *onfault = pcb->pcb_onfault; KASSERT(p != NULL); curcpu()->ci_data.cpu_ntrap++; if (usermode) { type |= T_USER; oticks = p->p_sticks; l->l_md.md_utf = tf; LWP_CACHE_CREDS(l, p); } type &= ~(T_WRITE|T_PTEFETCH); #ifdef TRAPDEBUG if(tf->tf_trap==7) goto fram; if(faultdebug)printf("Trap: type %lx, code %lx, pc %lx, psl %lx\n", tf->tf_trap, tf->tf_code, tf->tf_pc, tf->tf_psl); fram: #endif switch (type) { default: #ifdef DDB kdb_trap(tf); #endif panic("trap: type %x, code %x, pc %x, psl %x", (u_int)tf->tf_trap, (u_int)tf->tf_code, (u_int)tf->tf_pc, (u_int)tf->tf_psl); case T_KSPNOTVAL: panic("%d.%d (%s): KSP invalid %#x@%#x pcb %p fp %#x psl %#x)", p->p_pid, l->l_lid, l->l_name ? l->l_name : "??", mfpr(PR_KSP), (u_int)tf->tf_pc, pcb, (u_int)tf->tf_fp, (u_int)tf->tf_psl); case T_TRANSFLT|T_USER: case T_TRANSFLT: /* * BUG! BUG! BUG! BUG! BUG! * Due to a hardware bug (at in least KA65x CPUs) a double * page table fetch trap will cause a translation fault * even if access in the SPT PTE entry specifies 'no access'. * In for example section 6.4.2 in VAX Architecture * Reference Manual it states that if a page both are invalid * and have no access set, a 'access violation fault' occurs. * Therefore, we must fall through here... */ #ifdef nohwbug panic("translation fault"); #endif case T_PTELEN|T_USER: /* Page table length exceeded */ case T_ACCFLT|T_USER: if (tf->tf_code < 0) { /* Check for kernel space */ sig = SIGSEGV; code = SEGV_ACCERR; break; } case T_PTELEN: #ifndef MULTIPROCESSOR /* * If we referred to an address beyond the end of the system * page table, it may be due to a failed CAS * restartable-atomic-sequence. If it is, restart it at the * beginning and restart. */ { extern const uint8_t cas32_ras_start[], cas32_ras_end[]; if (tf->tf_code == CASMAGIC && tf->tf_pc >= (uintptr_t) cas32_ras_start && tf->tf_pc < (uintptr_t) cas32_ras_end) { tf->tf_pc = (uintptr_t) cas32_ras_start; trapsig = false; break; } } /* FALLTHROUGH */ #endif case T_ACCFLT: #ifdef TRAPDEBUG if(faultdebug)printf("trap accflt type %lx, code %lx, pc %lx, psl %lx\n", tf->tf_trap, tf->tf_code, tf->tf_pc, tf->tf_psl); #endif #ifdef DIAGNOSTIC if (p == 0) panic("trap: access fault: addr %lx code %lx", tf->tf_pc, tf->tf_code); if (tf->tf_psl & PSL_IS) panic("trap: pflt on IS"); #endif /* * Page tables are allocated in pmap_enter(). We get * info from below if it is a page table fault, but * UVM may want to map in pages without faults, so * because we must check for PTE pages anyway we don't * bother doing it here. */ addr = trunc_page(tf->tf_code); if (!usermode && (tf->tf_code < 0)) { vm = NULL; map = kernel_map; } else { vm = p->p_vmspace; map = &vm->vm_map; } if (tf->tf_trap & T_WRITE) ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; pcb->pcb_onfault = NULL; rv = uvm_fault(map, addr, ftype); pcb->pcb_onfault = onfault; if (rv != 0) { if (!usermode) { if (onfault) { pcb->pcb_onfault = NULL; tf->tf_pc = (unsigned)onfault; tf->tf_psl &= ~PSL_FPD; tf->tf_r0 = rv; return; } printf("r0=%08lx r1=%08lx r2=%08lx r3=%08lx ", tf->tf_r0, tf->tf_r1, tf->tf_r2, tf->tf_r3); printf("r4=%08lx r5=%08lx r6=%08lx r7=%08lx\n", tf->tf_r4, tf->tf_r5, tf->tf_r6, tf->tf_r7); printf( "r8=%08lx r9=%08lx r10=%08lx r11=%08lx\n", tf->tf_r8, tf->tf_r9, tf->tf_r10, tf->tf_r11); printf("ap=%08lx fp=%08lx sp=%08lx pc=%08lx\n", tf->tf_ap, tf->tf_fp, tf->tf_sp, tf->tf_pc); panic("SEGV in kernel mode: pc %#lx addr %#lx", tf->tf_pc, tf->tf_code); } switch (rv) { case ENOMEM: printf("UVM: pid %d (%s), uid %d killed: " "out of swap\n", p->p_pid, p->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); sig = SIGKILL; code = SI_NOINFO; break; case EINVAL: code = BUS_ADRERR; sig = SIGBUS; break; case EACCES: code = SEGV_ACCERR; sig = SIGSEGV; break; default: code = SEGV_MAPERR; sig = SIGSEGV; break; } } else { trapsig = false; if (map != kernel_map && addr > 0 && (void *)addr >= vm->vm_maxsaddr) uvm_grow(p, addr); } break; case T_BPTFLT|T_USER: sig = SIGTRAP; code = TRAP_BRKPT; break; case T_TRCTRAP|T_USER: sig = SIGTRAP; code = TRAP_TRACE; tf->tf_psl &= ~PSL_T; break; case T_PRIVINFLT|T_USER: sig = SIGILL; code = ILL_PRVOPC; break; case T_RESADFLT|T_USER: sig = SIGILL; code = ILL_ILLADR; break; case T_RESOPFLT|T_USER: sig = SIGILL; code = ILL_ILLOPC; break; case T_XFCFLT|T_USER: sig = SIGEMT; break; case T_ARITHFLT|T_USER: sig = SIGFPE; switch (tf->tf_code) { case ATRP_INTOVF: code = FPE_INTOVF; break; case ATRP_INTDIV: code = FPE_INTDIV; break; case ATRP_FLTOVF: code = FPE_FLTOVF; break; case ATRP_FLTDIV: code = FPE_FLTDIV; break; case ATRP_FLTUND: code = FPE_FLTUND; break; case ATRP_DECOVF: code = FPE_INTOVF; break; case ATRP_FLTSUB: code = FPE_FLTSUB; break; case AFLT_FLTDIV: code = FPE_FLTDIV; break; case AFLT_FLTUND: code = FPE_FLTUND; break; case AFLT_FLTOVF: code = FPE_FLTOVF; break; default: code = FPE_FLTINV; break; } break; case T_ASTFLT|T_USER: mtpr(AST_NO,PR_ASTLVL); trapsig = false; if (curcpu()->ci_want_resched) preempt(); break; #ifdef DDB case T_BPTFLT: /* Kernel breakpoint */ case T_KDBTRAP: case T_KDBTRAP|T_USER: case T_TRCTRAP: kdb_trap(tf); return; #endif } if (trapsig) { ksiginfo_t ksi; if ((sig == SIGSEGV || sig == SIGILL) && cpu_printfataltraps && (p->p_slflag & PSL_TRACED) == 0 && !sigismember(&p->p_sigctx.ps_sigcatch, sig)) printf("pid %d.%d (%s): sig %d: type %lx, code %lx, pc %lx, psl %lx\n", p->p_pid, l->l_lid, p->p_comm, sig, tf->tf_trap, tf->tf_code, tf->tf_pc, tf->tf_psl); KSI_INIT_TRAP(&ksi); ksi.ksi_signo = sig; ksi.ksi_trap = tf->tf_trap; ksi.ksi_addr = (void *)tf->tf_code; ksi.ksi_code = code; /* * Arithmetic exceptions can be of two kinds: * - traps (codes 1..7), where pc points to the * next instruction to execute. * - faults (codes 8..10), where pc points to the * faulting instruction. * In the latter case, we need to advance pc by ourselves * to prevent a signal loop. * * XXX this is gross -- miod */ if (type == (T_ARITHFLT | T_USER) && (tf->tf_code & 8)) tf->tf_pc = skip_opcode(tf->tf_pc); trapsignal(l, &ksi); } if (!usermode) return; userret(l, tf, oticks); }
/*ARGSUSED*/ void trap(struct trapframe *tf, int type, u_int code, u_int v) { struct lwp *l; struct proc *p; struct pcb *pcb; ksiginfo_t ksi; int tmp; int rv; u_quad_t sticks; void *onfault; curcpu()->ci_data.cpu_ntrap++; l = curlwp; p = l->l_proc; pcb = lwp_getpcb(l); onfault = pcb->pcb_onfault; KSI_INIT_TRAP(&ksi); ksi.ksi_trap = type & ~T_USER; KASSERT(pcb != NULL); if (USERMODE(tf->tf_sr)) { type |= T_USER; sticks = p->p_sticks; l->l_md.md_regs = tf->tf_regs; LWP_CACHE_CREDS(l, p); } else { sticks = 0; /* XXX: Detect trap recursion? */ } switch (type) { default: dopanic: printf("trap type=0x%x, code=0x%x, v=0x%x\n", type, code, v); /* * Let the kernel debugger see the trap frame that * caused us to panic. This is a convenience so * one can see registers at the point of failure. */ tmp = splhigh(); #ifdef KGDB /* If connected, step or cont returns 1 */ if (kgdb_trap(type, tf)) goto kgdb_cont; #endif #ifdef DDB (void) kdb_trap(type, (db_regs_t *) tf); #endif #ifdef KGDB kgdb_cont: #endif splx(tmp); if (panicstr) { /* * Note: panic is smart enough to do: * boot(RB_AUTOBOOT | RB_NOSYNC, NULL) * if we call it again. */ panic("trap during panic!"); } regdump(tf, 128); type &= ~T_USER; if ((u_int)type < trap_types) panic(trap_type[type]); panic("trap type 0x%x", type); case T_BUSERR: /* kernel bus error */ if (onfault == NULL) goto dopanic; rv = EFAULT; /*FALLTHROUGH*/ copyfault: /* * If we have arranged to catch this fault in any of the * copy to/from user space routines, set PC to return to * indicated location and set flag informing buserror code * that it may need to clean up stack frame. */ tf->tf_stackadj = exframesize[tf->tf_format]; tf->tf_format = tf->tf_vector = 0; tf->tf_pc = (int)onfault; tf->tf_regs[D0] = rv; goto done; case T_BUSERR|T_USER: /* bus error */ case T_ADDRERR|T_USER: /* address error */ ksi.ksi_addr = (void *)v; ksi.ksi_signo = SIGBUS; ksi.ksi_code = (type == (T_BUSERR|T_USER)) ? BUS_OBJERR : BUS_ADRERR; break; case T_COPERR: /* kernel coprocessor violation */ case T_FMTERR|T_USER: /* do all RTE errors come in as T_USER? */ case T_FMTERR: /* ...just in case... */ /* * The user has most likely trashed the RTE or FP state info * in the stack frame of a signal handler. */ printf("pid %d: kernel %s exception\n", p->p_pid, type==T_COPERR ? "coprocessor" : "format"); type |= T_USER; mutex_enter(p->p_lock); SIGACTION(p, SIGILL).sa_handler = SIG_DFL; sigdelset(&p->p_sigctx.ps_sigignore, SIGILL); sigdelset(&p->p_sigctx.ps_sigcatch, SIGILL); sigdelset(&l->l_sigmask, SIGILL); mutex_exit(p->p_lock); ksi.ksi_signo = SIGILL; ksi.ksi_addr = (void *)(int)tf->tf_format; ksi.ksi_code = (type == T_COPERR) ? ILL_COPROC : ILL_ILLOPC; break; case T_COPERR|T_USER: /* user coprocessor violation */ /* What is a proper response here? */ ksi.ksi_signo = SIGFPE; ksi.ksi_code = FPE_FLTINV; break; case T_FPERR|T_USER: /* 68881 exceptions */ /* * We pass along the 68881 status register which locore stashed * in code for us. */ ksi.ksi_signo = SIGFPE; ksi.ksi_code = fpsr2siginfocode(code); break; case T_FPEMULI: /* FPU faults in supervisor mode */ case T_FPEMULD: if (nofault) /* Doing FPU probe? */ longjmp(nofault); goto dopanic; case T_FPEMULI|T_USER: /* unimplemented FP instruction */ case T_FPEMULD|T_USER: /* unimplemented FP data type */ #ifdef FPU_EMULATE if (fpu_emulate(tf, &pcb->pcb_fpregs, &ksi) == 0) ; /* XXX - Deal with tracing? (tf->tf_sr & PSL_T) */ #else uprintf("pid %d killed: no floating point support\n", p->p_pid); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; #endif break; case T_ILLINST|T_USER: /* illegal instruction fault */ case T_PRIVINST|T_USER: /* privileged instruction fault */ ksi.ksi_addr = (void *)(int)tf->tf_format; ksi.ksi_signo = SIGILL; ksi.ksi_code = (type == (T_PRIVINST|T_USER)) ? ILL_PRVOPC : ILL_ILLOPC; break; case T_ZERODIV|T_USER: /* Divide by zero */ ksi.ksi_code = FPE_FLTDIV; case T_CHKINST|T_USER: /* CHK instruction trap */ case T_TRAPVINST|T_USER: /* TRAPV instruction trap */ ksi.ksi_addr = (void *)(int)tf->tf_format; ksi.ksi_signo = SIGFPE; break; /* * XXX: Trace traps are a nightmare. * * HP-UX uses trap #1 for breakpoints, * NetBSD/m68k uses trap #2, * SUN 3.x uses trap #15, * DDB and KGDB uses trap #15 (for kernel breakpoints; * handled elsewhere). * * NetBSD and HP-UX traps both get mapped by locore.s into T_TRACE. * SUN 3.x traps get passed through as T_TRAP15 and are not really * supported yet. * * XXX: We should never get kernel-mode T_TRAP15 * XXX: because locore.s now gives them special treatment. */ case T_TRAP15: /* kernel breakpoint */ tf->tf_sr &= ~PSL_T; goto done; case T_TRACE|T_USER: /* user trace trap */ #ifdef COMPAT_SUNOS /* * SunOS uses Trap #2 for a "CPU cache flush" * Just flush the on-chip caches and return. * XXX - Too bad NetBSD uses trap 2... */ if (p->p_emul == &emul_sunos) { /* get out fast */ goto done; } #endif /* FALLTHROUGH */ case T_TRACE: /* tracing a trap instruction */ case T_TRAP15|T_USER: /* SUN user trace trap */ tf->tf_sr &= ~PSL_T; ksi.ksi_signo = SIGTRAP; break; case T_ASTFLT: /* system async trap, cannot happen */ goto dopanic; case T_ASTFLT|T_USER: /* user async trap */ astpending = 0; /* T_SSIR is not used on a Sun2. */ if (l->l_pflag & LP_OWEUPC) { l->l_pflag &= ~LP_OWEUPC; ADDUPROF(l); } if (curcpu()->ci_want_resched) preempt(); goto douret; case T_MMUFLT: /* kernel mode page fault */ /* Hacks to avoid calling VM code from debugger. */ #ifdef DDB if (db_recover != 0) goto dopanic; #endif #ifdef KGDB if (kgdb_recover != 0) goto dopanic; #endif /* * If we were doing profiling ticks or other user mode * stuff from interrupt code, Just Say No. */ if (onfault == (void *)fubail || onfault == (void *)subail) { #ifdef DEBUG if (mmudebug & MDB_CPFAULT) { printf("trap: copyfault fu/su bail\n"); Debugger(); } #endif rv = EFAULT; goto copyfault; } /*FALLTHROUGH*/ case T_MMUFLT|T_USER: { /* page fault */ vaddr_t va; struct vmspace *vm = p->p_vmspace; struct vm_map *map; vm_prot_t ftype; extern struct vm_map *kernel_map; #ifdef DEBUG if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) printf("trap: T_MMUFLT pid=%d, code=0x%x, v=0x%x, pc=0x%x, sr=0x%x\n", p->p_pid, code, v, tf->tf_pc, tf->tf_sr); #endif /* * It is only a kernel address space fault iff: * 1. (type & T_USER) == 0 and: (2 or 3) * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor space data fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ map = &vm->vm_map; if ((type & T_USER) == 0) { /* supervisor mode fault */ if (onfault == NULL || KDFAULT(code)) map = kernel_map; } if (WRFAULT(code)) ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; va = m68k_trunc_page((vaddr_t)v); /* * Need to resolve the fault. * * We give the pmap code a chance to resolve faults by * reloading translations that it was forced to unload. * This function does that, and calls vm_fault if it * could not resolve the fault by reloading the MMU. * This function may also, for example, disallow any * faults in the kernel text segment, etc. */ pcb->pcb_onfault = NULL; rv = _pmap_fault(map, va, ftype); pcb->pcb_onfault = onfault; #ifdef DEBUG if (rv && MDB_ISPID(p->p_pid)) { printf("vm_fault(%p, 0x%lx, 0x%x) -> 0x%x\n", map, va, ftype, rv); if (mmudebug & MDB_WBFAILED) Debugger(); } #endif /* DEBUG */ /* * If this was a stack access we keep track of the maximum * accessed stack size. Also, if vm_fault gets a protection * failure it is due to accessing the stack region outside * the current limit and we need to reflect that as an access * error. */ if (rv == 0) { if (map != kernel_map && (void *)va >= vm->vm_maxsaddr) uvm_grow(p, va); if ((type & T_USER) == 0 && ucas_ras_check(tf)) { return; } goto finish; } if (rv == EACCES) { ksi.ksi_code = SEGV_ACCERR; rv = EFAULT; } else ksi.ksi_code = SEGV_MAPERR; if ((type & T_USER) == 0) { /* supervisor mode fault */ if (onfault) { #ifdef DEBUG if (mmudebug & MDB_CPFAULT) { printf("trap: copyfault pcb_onfault\n"); Debugger(); } #endif goto copyfault; } printf("vm_fault(%p, 0x%lx, 0x%x) -> 0x%x\n", map, va, ftype, rv); goto dopanic; } ksi.ksi_addr = (void *)v; switch (rv) { case ENOMEM: printf("UVM: pid %d (%s), uid %d killed: out of swap\n", p->p_pid, p->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; break; case EINVAL: ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_ADRERR; break; case EACCES: ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; break; default: ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_MAPERR; break; } break; } /* T_MMUFLT */ } /* switch */ finish: /* If trap was from supervisor mode, just return. */ if ((type & T_USER) == 0) goto done; /* Post a signal if necessary. */ if (ksi.ksi_signo) trapsignal(l, &ksi); douret: userret(l, tf, sticks); done:; /* XXX: Detect trap recursion? */ }
void linux_syscall_plain(struct lwp *l, u_int status, u_int cause, u_int opc) { struct proc *p = l->l_proc; struct frame *frame = (struct frame *)l->l_md.md_regs; register_t *args, copyargs[8]; register_t *rval = NULL; /* XXX gcc */ register_t copyrval[2]; size_t nsaved, nargs; const struct sysent *callp; int error; u_int code; LWP_CACHE_CREDS(l, p); uvmexp.syscalls++; callp = p->p_emul->e_sysent; code = frame->f_regs[_R_R8]; #ifdef KERN_SA if (__predict_false((l->l_savp) && (l->l_savp->savp_pflags & SAVP_FLAG_DELIVERING))) l->l_savp->savp_pflags &= ~SAVP_FLAG_DELIVERING; #endif switch (code) { case SYS_syscall: case SYS___syscall: panic ("linux_syscall_plain: SYS*syscall: not yet"); #if notyet args = copyargs; if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = frame->f_regs[_R_A0] - SYSCALL_SHIFT; args[0] = frame->f_regs[_R_A1]; args[1] = frame->f_regs[_R_A2]; args[2] = frame->f_regs[_R_A3]; nsaved = 3; } else { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = frame->f_regs[_R_A0 + _QUAD_LOWWORD] - SYSCALL_SHIFT; args[0] = frame->f_regs[_R_A2]; args[1] = frame->f_regs[_R_A3]; nsaved = 2; } if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_argsize / sizeof(register_t); if (nargs > nsaved) { error = copyin( ((register_t *)(vaddr_t)frame->f_regs[_R_SP] + 4), (args + nsaved), (nargs - nsaved) * sizeof(register_t)); if (error) goto bad; } #endif break; default: if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_narg; if (nargs < 5) { args = copyargs; args[0] = frame->f_regs[_R_R12]; args[1] = frame->f_regs[_R_R11]; args[2] = frame->f_regs[_R_R10]; args[3] = frame->f_regs[_R_R9]; } else { panic("linux_syscall_plain: nargs >=5: notyet"); } break; } rval = copyrval; rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); switch (error) { case 0: frame->f_regs[_R_R12] = rval[0]; if (rval[0] != 0 && rval[1] != 0) panic("linux_syscall_plain: rval[1] != 0: notyet"); break; case ERESTART: panic("linux_syscall_plain: ERESTART: notyet"); break; case EJUSTRETURN: break; /* nothing to do */ default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[_R_R12] = error; break; } userret(l); }
/* * Trap is called from locore to handle most types of processor traps. */ void trap(unsigned int status, unsigned int cause, vaddr_t vaddr, vaddr_t opc, struct trapframe *frame) { int type; struct lwp *l = curlwp; struct proc *p = curproc; vm_prot_t ftype; ksiginfo_t ksi; struct frame *fp; extern void fswintrberr(void); KSI_INIT_TRAP(&ksi); uvmexp.traps++; if ((type = TRAPTYPE(cause)) >= LENGTH(trap_type)) panic("trap: unknown trap type %d", type); if (USERMODE(status)) { type |= T_USER; LWP_CACHE_CREDS(l, p); } /* Enable interrupts just at it was before the trap. */ _splset(status & AVR32_STATUS_IMx); switch (type) { default: dopanic: (void)splhigh(); printf("trap: %s in %s mode\n", trap_type[TRAPTYPE(cause)], USERMODE(status) ? "user" : "kernel"); printf("status=0x%x, cause=0x%x, epc=%#lx, vaddr=%#lx\n", status, cause, opc, vaddr); if (curlwp != NULL) { fp = (struct frame *)l->l_md.md_regs; printf("pid=%d cmd=%s usp=0x%x ", p->p_pid, p->p_comm, (int)fp->f_regs[_R_SP]); } else printf("curlwp == NULL "); printf("ksp=%p\n", &status); #if defined(DDB) kdb_trap(type, (mips_reg_t *) frame); /* XXX force halt XXX */ #elif defined(KGDB) { struct frame *f = (struct frame *)&ddb_regs; extern mips_reg_t kgdb_cause, kgdb_vaddr; kgdb_cause = cause; kgdb_vaddr = vaddr; /* * init global ddb_regs, used in db_interface.c routines * shared between ddb and gdb. Send ddb_regs to gdb so * that db_machdep.h macros will work with it, and * allow gdb to alter the PC. */ db_set_ddb_regs(type, (mips_reg_t *) frame); PC_BREAK_ADVANCE(f); if (kgdb_trap(type, &ddb_regs)) { ((mips_reg_t *)frame)[21] = f->f_regs[_R_PC]; return; } } #else panic("trap: dopanic: notyet"); #endif /*NOTREACHED*/ case T_TLB_MOD: panic("trap: T_TLB_MOD: notyet"); #if notyet if (KERNLAND(vaddr)) { pt_entry_t *pte; unsigned entry; paddr_t pa; pte = kvtopte(vaddr); entry = pte->pt_entry; if (!avr32_pte_v(entry) /*|| (entry & mips_pg_m_bit())*/) { panic("ktlbmod: invalid pte"); } if (entry & avr32_pte_ropage_bit()) { /* write to read only page in the kernel */ ftype = VM_PROT_WRITE; goto kernelfault; } entry |= mips_pg_m_bit(); /* XXXAVR32 Do it on tlbarlo/ tlbarhi? */ pte->pt_entry = entry; vaddr &= ~PGOFSET; MachTLBUpdate(vaddr, entry); pa = avr32_tlbpfn_to_paddr(entry); if (!IS_VM_PHYSADDR(pa)) { printf("ktlbmod: va %#lx pa %#llx\n", vaddr, (long long)pa); panic("ktlbmod: unmanaged page"); } pmap_set_modified(pa); return; /* KERN */ } /*FALLTHROUGH*/ #endif case T_TLB_MOD+T_USER: panic("trap: T_TLB_MOD+T_USER: notyet"); #if notyet { pt_entry_t *pte; unsigned entry; paddr_t pa; pmap_t pmap; pmap = p->p_vmspace->vm_map.pmap; if (!(pte = pmap_segmap(pmap, vaddr))) panic("utlbmod: invalid segmap"); pte += (vaddr >> PGSHIFT) & (NPTEPG - 1); entry = pte->pt_entry; if (!avr32_pte_v(entry)) panic("utlbmod: invalid pte"); if (entry & avr32_pte_ropage_bit()) { /* write to read only page */ ftype = VM_PROT_WRITE; goto pagefault; } /* entry |= mips_pg_m_bit(); XXXAVR32 Do it on tlbarlo/ tlbarhi? */ pte->pt_entry = entry; vaddr = (vaddr & ~PGOFSET) | (pmap->pm_asid << AVR32_TLB_PID_SHIFT); MachTLBUpdate(vaddr, entry); pa = avr32_tlbpfn_to_paddr(entry); if (!IS_VM_PHYSADDR(pa)) { printf("utlbmod: va %#lx pa %#llx\n", vaddr, (long long)pa); panic("utlbmod: unmanaged page"); } pmap_set_modified(pa); if (type & T_USER) userret(l); return; /* GEN */ } #endif case T_TLB_LD_MISS: panic("trap: T_TLB_LD_MISS: notyet"); case T_TLB_ST_MISS: ftype = (type == T_TLB_LD_MISS) ? VM_PROT_READ : VM_PROT_WRITE; if (KERNLAND(vaddr)) goto kernelfault; panic("trap: T_TLB_ST_MISS: notyet"); #if notyet /* * It is an error for the kernel to access user space except * through the copyin/copyout routines. */ if (l == NULL || l->l_addr->u_pcb.pcb_onfault == NULL) goto dopanic; /* check for fuswintr() or suswintr() getting a page fault */ if (l->l_addr->u_pcb.pcb_onfault == (void *)fswintrberr) { frame->tf_regs[TF_EPC] = (int)fswintrberr; return; /* KERN */ } goto pagefault; #endif case T_TLB_LD_MISS+T_USER: panic("trap: T_TLB_LD_MISS+T_USER: notyet"); #if notyet ftype = VM_PROT_READ; goto pagefault; #endif case T_TLB_ST_MISS+T_USER: panic("trap: T_TLB_ST_MISS+T_USER: notyet"); #if notyet ftype = VM_PROT_WRITE; #endif pagefault: ; { vaddr_t va; struct vmspace *vm; struct vm_map *map; int rv; vm = p->p_vmspace; map = &vm->vm_map; va = trunc_page(vaddr); if ((l->l_flag & LW_SA) && (~l->l_pflag & LP_SA_NOBLOCK)) { l->l_savp->savp_faultaddr = (vaddr_t)vaddr; l->l_pflag |= LP_SA_PAGEFAULT; } if (p->p_emul->e_fault) rv = (*p->p_emul->e_fault)(p, va, ftype); else rv = uvm_fault(map, va, ftype); #ifdef VMFAULT_TRACE printf( "uvm_fault(%p (pmap %p), %lx (0x%x), %d) -> %d at pc %p\n", map, vm->vm_map.pmap, va, vaddr, ftype, rv, (void*)opc); #endif /* * If this was a stack access we keep track of the maximum * accessed stack size. Also, if vm_fault gets a protection * failure it is due to accessing the stack region outside * the current limit and we need to reflect that as an access * error. */ if ((void *)va >= vm->vm_maxsaddr) { if (rv == 0){ uvm_grow(p, va); } else if (rv == EACCES) rv = EFAULT; } l->l_pflag &= ~LP_SA_PAGEFAULT; if (rv == 0) { if (type & T_USER) { userret(l); } return; /* GEN */ } if ((type & T_USER) == 0) goto copyfault; if (rv == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed: out of swap\n", p->p_pid, p->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : (uid_t) -1); ksi.ksi_signo = SIGKILL; ksi.ksi_code = 0; } else { if (rv == EACCES) { ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; } else { ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_MAPERR; } } ksi.ksi_trap = type & ~T_USER; ksi.ksi_addr = (void *)vaddr; break; /* SIGNAL */ } kernelfault: ; { vaddr_t va; int rv; va = trunc_page(vaddr); rv = uvm_fault(kernel_map, va, ftype); if (rv == 0) return; /* KERN */ /*FALLTHROUGH*/ } case T_ADDR_ERR_LD: /* misaligned access */ case T_ADDR_ERR_ST: /* misaligned access */ case T_BUS_ERR_LD_ST: /* BERR asserted to CPU */ copyfault: panic("trap: copyfault: notyet"); #if notyet if (l == NULL || l->l_addr->u_pcb.pcb_onfault == NULL) goto dopanic; frame->tf_regs[TF_EPC] = (intptr_t)l->l_addr->u_pcb.pcb_onfault; return; /* KERN */ #endif #if notyet case T_ADDR_ERR_LD+T_USER: /* misaligned or kseg access */ case T_ADDR_ERR_ST+T_USER: /* misaligned or kseg access */ case T_BUS_ERR_IFETCH+T_USER: /* BERR asserted to CPU */ case T_BUS_ERR_LD_ST+T_USER: /* BERR asserted to CPU */ ksi.ksi_trap = type & ~T_USER; ksi.ksi_signo = SIGSEGV; /* XXX */ ksi.ksi_addr = (void *)vaddr; ksi.ksi_code = SEGV_MAPERR; /* XXX */ break; /* SIGNAL */ case T_BREAK: panic("trap: T_BREAK: notyet"); #if defined(DDB) kdb_trap(type, (avr32_reg_t *) frame); return; /* KERN */ #elif defined(KGDB) { struct frame *f = (struct frame *)&ddb_regs; extern avr32_reg_t kgdb_cause, kgdb_vaddr; kgdb_cause = cause; kgdb_vaddr = vaddr; /* * init global ddb_regs, used in db_interface.c routines * shared between ddb and gdb. Send ddb_regs to gdb so * that db_machdep.h macros will work with it, and * allow gdb to alter the PC. */ db_set_ddb_regs(type, (avr32_reg_t *) frame); PC_BREAK_ADVANCE(f); if (!kgdb_trap(type, &ddb_regs)) printf("kgdb: ignored %s\n", trap_type[TRAPTYPE(cause)]); else ((avr32_reg_t *)frame)[21] = f->f_regs[_R_PC]; return; } #else goto dopanic; #endif case T_BREAK+T_USER: { vaddr_t va; uint32_t instr; int rv; /* compute address of break instruction */ va = (DELAYBRANCH(cause)) ? opc + sizeof(int) : opc; /* read break instruction */ instr = fuiword((void *)va); if (l->l_md.md_ss_addr != va || instr != MIPS_BREAK_SSTEP) { ksi.ksi_trap = type & ~T_USER; ksi.ksi_signo = SIGTRAP; ksi.ksi_addr = (void *)va; ksi.ksi_code = TRAP_TRACE; break; } /* * Restore original instruction and clear BP */ rv = suiword((void *)va, l->l_md.md_ss_instr); if (rv < 0) { vaddr_t sa, ea; sa = trunc_page(va); ea = round_page(va + sizeof(int) - 1); rv = uvm_map_protect(&p->p_vmspace->vm_map, sa, ea, VM_PROT_ALL, false); if (rv == 0) { rv = suiword((void *)va, l->l_md.md_ss_instr); (void)uvm_map_protect(&p->p_vmspace->vm_map, sa, ea, VM_PROT_READ|VM_PROT_EXECUTE, false); } } mips_icache_sync_all(); /* XXXJRT -- necessary? */ mips_dcache_wbinv_all(); /* XXXJRT -- necessary? */ if (rv < 0) printf("Warning: can't restore instruction at 0x%lx: 0x%x\n", l->l_md.md_ss_addr, l->l_md.md_ss_instr); l->l_md.md_ss_addr = 0; ksi.ksi_trap = type & ~T_USER; ksi.ksi_signo = SIGTRAP; ksi.ksi_addr = (void *)va; ksi.ksi_code = TRAP_BRKPT; break; /* SIGNAL */ } case T_RES_INST+T_USER: case T_COP_UNUSABLE+T_USER: #if !defined(SOFTFLOAT) && !defined(NOFPU) if ((cause & MIPS_CR_COP_ERR) == 0x10000000) { struct frame *f; f = (struct frame *)l->l_md.md_regs; savefpregs(fpcurlwp); /* yield FPA */ loadfpregs(l); /* load FPA */ fpcurlwp = l; l->l_md.md_flags |= MDP_FPUSED; f->f_regs[_R_SR] |= MIPS_SR_COP_1_BIT; } else #endif { MachEmulateInst(status, cause, opc, l->l_md.md_regs); } userret(l); return; /* GEN */ case T_FPE+T_USER: panic ("trap: T_FPE+T_USER: notyet"); #if defined(SOFTFLOAT) MachEmulateInst(status, cause, opc, l->l_md.md_regs); #elif !defined(NOFPU) MachFPTrap(status, cause, opc, l->l_md.md_regs); #endif userret(l); return; /* GEN */ case T_OVFLOW+T_USER: case T_TRAP+T_USER: ksi.ksi_trap = type & ~T_USER; ksi.ksi_signo = SIGFPE; fp = (struct frame *)l->l_md.md_regs; ksi.ksi_addr = (void *)fp->f_regs[_R_PC]; ksi.ksi_code = FPE_FLTOVF; /* XXX */ break; /* SIGNAL */ #endif } panic("trap: post-switch: notyet"); #if notyet fp = (struct frame *)l->l_md.md_regs; fp->f_regs[_R_CAUSE] = cause; fp->f_regs[_R_BADVADDR] = vaddr; (*p->p_emul->e_trapsignal)(l, &ksi); if ((type & T_USER) == 0) panic("trapsignal"); userret(l); #endif return; }
void EMULNAME(syscall)(struct trapframe *tf) { struct lwp * const l = curlwp; struct proc * const p = l->l_proc; const struct sysent *callp; size_t argsize; register_t code; register_t realcode; register_t *params, rval[2]; register_t args[10]; int error; int n; LWP_CACHE_CREDS(l, p); curcpu()->ci_ev_scalls.ev_count++; code = tf->tf_fixreg[0]; params = tf->tf_fixreg + FIRSTARG; n = NARGREG; realcode = code; { switch (code) { case EMULNAMEU(SYS_syscall): /* * code is first argument, * followed by actual args. */ code = *params++; n -= 1; break; #if !defined(COMPAT_LINUX) case EMULNAMEU(SYS___syscall): params++; code = *params++; n -= 2; break; #endif default: break; } code &= EMULNAMEU(SYS_NSYSENT) - 1; callp = p->p_emul->e_sysent + code; realcode = code; } argsize = callp->sy_argsize; if (argsize > n * sizeof(register_t)) { memcpy(args, params, n * sizeof(register_t)); error = copyin(MOREARGS(tf->tf_fixreg[1]), args + n, argsize - n * sizeof(register_t)); if (error) goto bad; params = args; } error = sy_invoke(callp, l, params, rval, code); if (__predict_true(error == 0)) { tf->tf_fixreg[FIRSTARG] = rval[0]; tf->tf_fixreg[FIRSTARG + 1] = rval[1]; tf->tf_cr &= ~0x10000000; } else { switch (error) { case ERESTART: /* * Set user's pc back to redo the system call. */ tf->tf_srr0 -= 4; break; case EJUSTRETURN: /* nothing to do */ break; default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; tf->tf_fixreg[FIRSTARG] = error; tf->tf_cr |= 0x10000000; break; } } userret(l, tf); }
void EMULNAME(syscall)(struct trapframe *tf) { struct lwp * const l = curlwp; struct proc * const p = l->l_proc; register_t *args = &tf->tf_a0; register_t retval[2]; const struct sysent *callp; int code, error; size_t i; #ifdef _LP64 const bool pk32_p = (p->p_flag & PK_32) != 0; register_t copyargs[EMULNAME(SYS_MAXSYSARGS)]; #endif LWP_CACHE_CREDS(l, p); curcpu()->ci_data.cpu_nsyscall++; tf->tf_pc += sizeof(uint32_t); callp = p->p_emul->e_sysent; code = tf->tf_t6 - SYSCALL_SHIFT; /* * Userland should have taken care of moving everything to their * usual place so these code's should never get to the kernel. */ if (code == SYS_syscall || code == SYS___syscall) { error = ENOSYS; goto bad; } if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; const size_t nargs = callp->sy_narg; #ifdef _LP64 /* * If there are no 64bit arguments, we still need "sanitize" the * 32-bit arguments in case they try to slip through a 64-bit pointer. * and all arguments were in * registers, just use the trapframe for the source of arguments */ if (pk32_p) { size_t narg64 = SYCALL_NARGS64(callp); unsigned int arg64mask = SYCALL_ARG_64_MASK(callp); bool doing_arg64 = false; register_t *args32 = args; /* * All arguments are 32bits wide and if we have 64bit arguments * then use two 32bit registers to construct a 64bit argument. * We remarshall them into 64bit slots but we don't want to * disturb the original arguments in case we get restarted. */ if (SYCALL_NARGS64(callp) > 0) { args = copyargs; } /* * Copy all the arguments to copyargs, starting with the ones * in registers. Using the hints in the 64bit argmask, * we marshall the passed 32bit values into 64bit slots. If we * encounter a 64 bit argument, we grab two adjacent 32bit * values and synthesize the 64bit argument. */ for (i = 0; i < nargs + narg64; ) { register_t arg = *args32++; if (__predict_true((arg64mask & 1) == 0)) { /* * Just copy it with sign extension on */ args[i++] = (int32_t) arg; arg64mask >>= 1; continue; } /* * 64bit arg. grab the low 32 bits, discard the high. */ arg = (uint32_t)arg; if (!doing_arg64) { /* * Pick up the 1st word of a 64bit arg. * If lowword == 1 then highword == 0, * so this is the highword and thus * shifted left by 32, otherwise * lowword == 0 and highword == 1 so * it isn't shifted at all. Remember * we still need another word. */ doing_arg64 = true; args[i] = arg << (_QUAD_LOWWORD*32); narg64--; /* one less 64bit arg */ } else { /* * Pick up the 2nd word of a 64bit arg. * if highword == 1, it's shifted left * by 32, otherwise lowword == 1 and * highword == 0 so it isn't shifted at * all. And now head to the next argument. */ doing_arg64 = false; args[i++] |= arg << (_QUAD_HIGHWORD*32); arg64mask >>= 1; } } }
void EMULNAME(syscall)(struct lwp *l, u_int status, u_int cause, vaddr_t pc) { struct proc *p = l->l_proc; struct trapframe *tf = l->l_md.md_utf; struct reg *reg = &tf->tf_registers; mips_reg_t *fargs = ®->r_regs[_R_A0]; register_t *args = NULL; register_t copyargs[2+SYS_MAXSYSARGS]; vaddr_t usp; size_t nargs; const struct sysent *callp; int code, error; #if defined(__mips_o32) const int abi = _MIPS_BSD_API_O32; KASSERTMSG(p->p_md.md_abi == abi, "pid %d(%p): md_abi(%d) != abi(%d)", p->p_pid, p, p->p_md.md_abi, abi); size_t nregs = 4; #else const int abi = p->p_md.md_abi; size_t nregs = _MIPS_SIM_NEWABI_P(abi) ? 8 : 4; size_t i; #endif LWP_CACHE_CREDS(l, p); curcpu()->ci_data.cpu_nsyscall++; if (cause & MIPS_CR_BR_DELAY) reg->r_regs[_R_PC] = mips_emul_branch(tf, pc, 0, false); else reg->r_regs[_R_PC] = pc + sizeof(uint32_t); callp = p->p_emul->e_sysent; const mips_reg_t saved_v0 = reg->r_regs[_R_V0]; code = saved_v0 - SYSCALL_SHIFT; if (code == SYS_syscall || (code == SYS___syscall && abi != _MIPS_BSD_API_O32)) { /* * Code is first argument, followed by actual args. */ code = *fargs++ - SYSCALL_SHIFT; nregs--; } else if (code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = fargs[_QUAD_LOWWORD] - SYSCALL_SHIFT; fargs += 2; nregs -= 2; } if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_narg; #if !defined(__mips_o32) if (abi != _MIPS_BSD_API_O32) { #endif CTASSERT(sizeof(copyargs[0]) == sizeof(fargs[0])); if (nargs <= nregs) { /* * Just use the trapframe for the source of arguments */ args = fargs; } else { const size_t nsaved = _MIPS_SIM_NEWABI_P(abi) ? 0 : 4; KASSERT(nargs <= __arraycount(copyargs)); args = copyargs; /* * Copy the arguments passed via register from the * trapframe to our argument array */ memcpy(copyargs, fargs, nregs * sizeof(register_t)); /* * Start copying args skipping the register slots * slots on the stack. */ usp = reg->r_regs[_R_SP] + nsaved*sizeof(register_t); error = copyin((register_t *)usp, ©args[nregs], (nargs - nregs) * sizeof(copyargs[0])); if (error) goto bad; } #if !defined(__mips_o32) } else do { /* * The only difference between O32 and N32 is the calling * sequence. If you make O32 */ int32_t copy32args[SYS_MAXSYSARGS]; int32_t *cargs = copy32args; unsigned int arg64mask = SYCALL_ARG_64_MASK(callp); bool doing_arg64; size_t narg64 = SYCALL_NARGS64(callp); /* * All arguments are 32bits wide and 64bit arguments use * two 32bit registers or stack slots. We need to remarshall * them into 64bit slots */ args = copyargs; CTASSERT(sizeof(copy32args[0]) != sizeof(fargs[0])); /* * If there are no 64bit arguments and all arguments were in * registers, just use the trapframe for the source of arguments */ if (nargs <= nregs && narg64 == 0) { args = fargs; break; } if (nregs <= nargs + narg64) { /* * Grab the non-register arguments from the stack * after skipping the slots for the 4 register passed * arguments. */ usp = reg->r_regs[_R_SP] + 4*sizeof(int32_t); error = copyin((int32_t *)usp, copy32args, (nargs + narg64 - nregs) * sizeof(copy32args[0])); if (error) goto bad; } /* * Copy all the arguments to copyargs, starting with the ones * in registers. Using the hints in the 64bit argmask, * we marshall the passed 32bit values into 64bit slots. If we * encounter a 64 bit argument, we grab two adjacent 32bit * values and synthesize the 64bit argument. */ for (i = 0, doing_arg64 = false; i < nargs + narg64;) { register_t arg; if (nregs > 0) { arg = (int32_t) *fargs++; nregs--; } else { arg = *cargs++; } if (__predict_true((arg64mask & 1) == 0)) { /* * Just copy it with sign extension on */ copyargs[i++] = (int32_t) arg; arg64mask >>= 1; continue; } /* * 64bit arg. grab the low 32 bits, discard the high. */ arg = (uint32_t)arg; if (!doing_arg64) { /* * Pick up the 1st word of a 64bit arg. * If lowword == 1 then highword == 0, * so this is the highword and thus * shifted left by 32, otherwise * lowword == 0 and highword == 1 so * it isn't shifted at all. Remember * we still need another word. */ doing_arg64 = true; copyargs[i] = arg << (_QUAD_LOWWORD*32); narg64--; /* one less 64bit arg */ } else { /* * Pick up the 2nd word of a 64bit arg. * if highword == 1, it's shifted left * by 32, otherwise lowword == 1 and * highword == 0 so it isn't shifted at * all. And now head to the next argument. */ doing_arg64 = false; copyargs[i++] |= arg << (_QUAD_HIGHWORD*32); arg64mask >>= 1; } } } while (/*CONSTCOND*/ 0); /* avoid a goto */ #endif #ifdef MIPS_SYSCALL_DEBUG if (p->p_emul->e_syscallnames) printf("syscall %s:", p->p_emul->e_syscallnames[code]); else printf("syscall %u:", code); if (nargs == 0) printf(" <no args>"); else for (size_t j = 0; j < nargs; j++) { if (j == nregs) printf(" *"); printf(" [%s%zu]=%#"PRIxREGISTER, SYCALL_ARG_64_P(callp, j) ? "+" : "", j, args[j]); } printf("\n"); #endif error = sy_invoke(callp, l, args, ®->r_regs[_R_V0], code); switch (error) { case 0: #if !defined(__mips_o32) if (abi == _MIPS_BSD_API_O32 && SYCALL_RET_64_P(callp)) { /* * If this is from O32 and it's a 64bit quantity, * split it into 2 32bit values in adjacent registers. */ mips_reg_t tmp = reg->r_regs[_R_V0]; reg->r_regs[_R_V0 + _QUAD_LOWWORD] = (int32_t) tmp; reg->r_regs[_R_V0 + _QUAD_HIGHWORD] = tmp >> 32; } #endif #ifdef MIPS_SYSCALL_DEBUG if (p->p_emul->e_syscallnames) printf("syscall %s:", p->p_emul->e_syscallnames[code]); else printf("syscall %u:", code); printf(" return v0=%#"PRIxREGISTER" v1=%#"PRIxREGISTER"\n", reg->r_regs[_R_V0], reg->r_regs[_R_V1]); #endif reg->r_regs[_R_A3] = 0; break; case ERESTART: reg->r_regs[_R_V0] = saved_v0; /* restore syscall code */ reg->r_regs[_R_PC] = pc; break; case EJUSTRETURN: break; /* nothing to do */ default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; reg->r_regs[_R_V0] = error; reg->r_regs[_R_A3] = 1; #ifdef MIPS_SYSCALL_DEBUG if (p->p_emul->e_syscallnames) printf("syscall %s:", p->p_emul->e_syscallnames[code]); else printf("syscall %u:", code); printf(" return error=%d\n", error); #endif break; }
void syscall(void) { lwp_t *l = curlwp; const struct proc * const p = l->l_proc; const struct sysent *callp; struct pcb *pcb = lwp_getpcb(l); ucontext_t *ucp = &pcb->pcb_userret_ucp; register_t copyargs[2+SYS_MAXSYSARGS]; register_t *args; register_t rval[2]; uint32_t code, opcode; uint nargs, argsize; int error; /* system call accounting */ curcpu()->ci_data.cpu_nsyscall++; LWP_CACHE_CREDS(l, l->l_proc); /* XXX do we want do do emulation? */ md_syscall_get_opcode(ucp, &opcode); md_syscall_get_syscallnumber(ucp, &code); code &= (SYS_NSYSENT -1); callp = p->p_emul->e_sysent + code; nargs = callp->sy_narg; argsize = callp->sy_argsize; args = copyargs; rval[0] = rval[1] = 0; error = md_syscall_getargs(l, ucp, nargs, argsize, args); #if 0 aprint_debug("syscall no. %d, ", code); aprint_debug("nargs %d, argsize %d => ", nargs, argsize); thunk_printf_debug("syscall no. %d, ", code); thunk_printf_debug("nargs %d, argsize %d => ", nargs, argsize); #endif /* * TODO change the pre and post printing into functions so they can be * easily adjusted and dont clobber up this space */ if (!error) syscall_args_print(l, code, nargs, argsize, args); md_syscall_inc_pc(ucp, opcode); if (!error) { error = sy_invoke(callp, l, args, rval, code); } syscall_retvals_print(l, curlwp, code, nargs, args, error, rval); //out: switch (error) { default: /* fall trough */ case 0: md_syscall_set_returnargs(l, ucp, error, rval); /* fall trough */ case EJUSTRETURN: break; case ERESTART: md_syscall_dec_pc(ucp, opcode); /* nothing to do */ break; } //thunk_printf_debug("end of syscall : return to userland\n"); //if (code != 4) thunk_printf("userret() code %d\n", code); }
void data_abort_handler(trapframe_t *tf) { struct vm_map *map; struct lwp * const l = curlwp; struct cpu_info * const ci = curcpu(); u_int far, fsr; vm_prot_t ftype; void *onfault; vaddr_t va; int error; ksiginfo_t ksi; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); /* Grab FAR/FSR before enabling interrupts */ far = cpu_faultaddress(); fsr = cpu_faultstatus(); /* Update vmmeter statistics */ ci->ci_data.cpu_ntrap++; /* Re-enable interrupts if they were enabled previously */ KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); if (__predict_true((tf->tf_spsr & IF32_bits) != IF32_bits)) restore_interrupts(tf->tf_spsr & IF32_bits); /* Get the current lwp structure */ UVMHIST_LOG(maphist, " (l=%#x, far=%#x, fsr=%#x", l, far, fsr, 0); UVMHIST_LOG(maphist, " tf=%#x, pc=%#x)", tf, tf->tf_pc, 0, 0); /* Data abort came from user mode? */ bool user = (TRAP_USERMODE(tf) != 0); if (user) LWP_CACHE_CREDS(l, l->l_proc); /* Grab the current pcb */ struct pcb * const pcb = lwp_getpcb(l); curcpu()->ci_abt_evs[fsr & FAULT_TYPE_MASK].ev_count++; /* Invoke the appropriate handler, if necessary */ if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) { #ifdef DIAGNOSTIC printf("%s: data_aborts fsr=0x%x far=0x%x\n", __func__, fsr, far); #endif if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far, l, &ksi)) goto do_trapsignal; goto out; } /* * At this point, we're dealing with one of the following data aborts: * * FAULT_TRANS_S - Translation -- Section * FAULT_TRANS_P - Translation -- Page * FAULT_DOMAIN_S - Domain -- Section * FAULT_DOMAIN_P - Domain -- Page * FAULT_PERM_S - Permission -- Section * FAULT_PERM_P - Permission -- Page * * These are the main virtual memory-related faults signalled by * the MMU. */ /* fusubailout is used by [fs]uswintr to avoid page faulting */ if (__predict_false(pcb->pcb_onfault == fusubailout)) { tf->tf_r0 = EFAULT; tf->tf_pc = (intptr_t) pcb->pcb_onfault; return; } if (user) { lwp_settrapframe(l, tf); } /* * Make sure the Program Counter is sane. We could fall foul of * someone executing Thumb code, in which case the PC might not * be word-aligned. This would cause a kernel alignment fault * further down if we have to decode the current instruction. */ #ifdef THUMB_CODE /* * XXX: It would be nice to be able to support Thumb in the kernel * at some point. */ if (__predict_false(!user && (tf->tf_pc & 3) != 0)) { printf("\n%s: Misaligned Kernel-mode Program Counter\n", __func__); dab_fatal(tf, fsr, far, l, NULL); } #else if (__predict_false((tf->tf_pc & 3) != 0)) { if (user) { /* * Give the user an illegal instruction signal. */ /* Deliver a SIGILL to the process */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; goto do_trapsignal; } /* * The kernel never executes Thumb code. */ printf("\n%s: Misaligned Kernel-mode Program Counter\n", __func__); dab_fatal(tf, fsr, far, l, NULL); } #endif /* See if the CPU state needs to be fixed up */ switch (data_abort_fixup(tf, fsr, far, l)) { case ABORT_FIXUP_RETURN: return; case ABORT_FIXUP_FAILED: /* Deliver a SIGILL to the process */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; goto do_trapsignal; default: break; } va = trunc_page((vaddr_t)far); /* * It is only a kernel address space fault iff: * 1. user == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set and not LDRT/LDRBT/STRT/STRBT instruction. */ if (!user && (va >= VM_MIN_KERNEL_ADDRESS || (va < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW)) && __predict_true((pcb->pcb_onfault == NULL || (read_insn(tf->tf_pc, false) & 0x05200000) != 0x04200000))) { map = kernel_map; /* Was the fault due to the FPE/IPKDB ? */ if (__predict_false((tf->tf_spsr & PSR_MODE)==PSR_UND32_MODE)) { KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; /* * Force exit via userret() * This is necessary as the FPE is an extension to * userland that actually runs in a priveledged mode * but uses USR mode permissions for its accesses. */ user = true; goto do_trapsignal; } } else { map = &l->l_proc->p_vmspace->vm_map; } /* * We need to know whether the page should be mapped as R or R/W. * Before ARMv6, the MMU did not give us the info as to whether the * fault was caused by a read or a write. * * However, we know that a permission fault can only be the result of * a write to a read-only location, so we can deal with those quickly. * * Otherwise we need to disassemble the instruction responsible to * determine if it was a write. */ if (CPU_IS_ARMV6_P() || CPU_IS_ARMV7_P()) { ftype = (fsr & FAULT_WRITE) ? VM_PROT_WRITE : VM_PROT_READ; } else if (IS_PERMISSION_FAULT(fsr)) { ftype = VM_PROT_WRITE; } else { #ifdef THUMB_CODE /* Fast track the ARM case. */ if (__predict_false(tf->tf_spsr & PSR_T_bit)) { u_int insn = read_thumb_insn(tf->tf_pc, user); u_int insn_f8 = insn & 0xf800; u_int insn_fe = insn & 0xfe00; if (insn_f8 == 0x6000 || /* STR(1) */ insn_f8 == 0x7000 || /* STRB(1) */ insn_f8 == 0x8000 || /* STRH(1) */ insn_f8 == 0x9000 || /* STR(3) */ insn_f8 == 0xc000 || /* STM */ insn_fe == 0x5000 || /* STR(2) */ insn_fe == 0x5200 || /* STRH(2) */ insn_fe == 0x5400) /* STRB(2) */ ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; } else #endif { u_int insn = read_insn(tf->tf_pc, user); if (((insn & 0x0c100000) == 0x04000000) || /* STR[B] */ ((insn & 0x0e1000b0) == 0x000000b0) || /* STR[HD]*/ ((insn & 0x0a100000) == 0x08000000) || /* STM/CDT*/ ((insn & 0x0f9000f0) == 0x01800090)) /* STREX[BDH] */ ftype = VM_PROT_WRITE; else if ((insn & 0x0fb00ff0) == 0x01000090)/* SWP */ ftype = VM_PROT_READ | VM_PROT_WRITE; else ftype = VM_PROT_READ; } } /* * See if the fault is as a result of ref/mod emulation, * or domain mismatch. */ #ifdef DEBUG last_fault_code = fsr; #endif if (pmap_fault_fixup(map->pmap, va, ftype, user)) { UVMHIST_LOG(maphist, " <- ref/mod emul", 0, 0, 0, 0); goto out; } if (__predict_false(curcpu()->ci_intr_depth > 0)) { if (pcb->pcb_onfault) { tf->tf_r0 = EINVAL; tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault; return; } printf("\nNon-emulated page fault with intr_depth > 0\n"); dab_fatal(tf, fsr, far, l, NULL); } onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; error = uvm_fault(map, va, ftype); pcb->pcb_onfault = onfault; if (__predict_true(error == 0)) { if (user) uvm_grow(l->l_proc, va); /* Record any stack growth */ else ucas_ras_check(tf); UVMHIST_LOG(maphist, " <- uvm", 0, 0, 0, 0); goto out; } if (user == 0) { if (pcb->pcb_onfault) { tf->tf_r0 = error; tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault; return; } printf("\nuvm_fault(%p, %lx, %x) -> %x\n", map, va, ftype, error); dab_fatal(tf, fsr, far, l, NULL); } KSI_INIT_TRAP(&ksi); if (error == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed: " "out of swap\n", l->l_proc->p_pid, l->l_proc->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; } else ksi.ksi_signo = SIGSEGV; ksi.ksi_code = (error == EACCES) ? SEGV_ACCERR : SEGV_MAPERR; ksi.ksi_addr = (uint32_t *)(intptr_t) far; ksi.ksi_trap = fsr; UVMHIST_LOG(maphist, " <- error (%d)", error, 0, 0, 0); do_trapsignal: call_trapsignal(l, tf, &ksi); out: /* If returning to user mode, make sure to invoke userret() */ if (user) userret(l); }
void swi_handler(trapframe_t *frame) { lwp_t *l = curlwp; uint32_t insn; /* * Enable interrupts if they were enabled before the exception. * Since all syscalls *should* come from user mode it will always * be safe to enable them, but check anyway. */ #ifdef acorn26 if ((frame->tf_r15 & R15_IRQ_DISABLE) == 0) int_on(); #else KASSERT((frame->tf_spsr & IF32_bits) == 0); restore_interrupts(frame->tf_spsr & IF32_bits); #endif #ifdef acorn26 frame->tf_pc += INSN_SIZE; #endif #ifdef KERN_SA if (__predict_false((l->l_savp) && (l->l_savp->savp_pflags & SAVP_FLAG_DELIVERING))) l->l_savp->savp_pflags &= ~SAVP_FLAG_DELIVERING; #endif #ifndef THUMB_CODE /* * Make sure the program counter is correctly aligned so we * don't take an alignment fault trying to read the opcode. */ if (__predict_false(((frame->tf_pc - INSN_SIZE) & 3) != 0)) { ksiginfo_t ksi; /* Give the user an illegal instruction signal. */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) (frame->tf_pc-INSN_SIZE); #if 0 /* maybe one day we'll do emulations */ (*l->l_proc->p_emul->e_trapsignal)(l, &ksi); #else trapsignal(l, &ksi); #endif userret(l); return; } #endif #ifdef THUMB_CODE if (frame->tf_spsr & PSR_T_bit) { /* Map a Thumb SWI onto the bottom 256 ARM SWIs. */ insn = fusword((void *)(frame->tf_pc - THUMB_INSN_SIZE)); if (insn & 0x00ff) insn = (insn & 0x00ff) | 0xef000000; else insn = frame->tf_ip | 0xef000000; } else #endif { /* XXX fuword? */ #ifdef __PROG32 insn = *(uint32_t *)(frame->tf_pc - INSN_SIZE); #else insn = *(uint32_t *)((frame->tf_r15 & R15_PC) - INSN_SIZE); #endif } l->l_addr->u_pcb.pcb_tf = frame; #ifdef CPU_ARM7 /* * This code is only needed if we are including support for the ARM7 * core. Other CPUs do not need it but it does not hurt. */ /* * ARM700/ARM710 match sticks and sellotape job ... * * I know this affects GPS/VLSI ARM700/ARM710 + various ARM7500. * * On occasion data aborts are mishandled and end up calling * the swi vector. * * If the instruction that caused the exception is not a SWI * then we hit the bug. */ if ((insn & 0x0f000000) != 0x0f000000) { frame->tf_pc -= INSN_SIZE; curcpu()->ci_arm700bugcount.ev_count++; userret(l); return; } #endif /* CPU_ARM7 */ uvmexp.syscalls++; LWP_CACHE_CREDS(l, l->l_proc); (*l->l_proc->p_md.md_syscall)(frame, l, insn); }
/* * void prefetch_abort_handler(trapframe_t *tf) * * Abort handler called when instruction execution occurs at * a non existent or restricted (access permissions) memory page. * If the address is invalid and we were in SVC mode then panic as * the kernel should never prefetch abort. * If the address is invalid and the page is mapped then the user process * does no have read permission so send it a signal. * Otherwise fault the page in and try again. */ void prefetch_abort_handler(trapframe_t *tf) { struct lwp *l; struct pcb *pcb __diagused; struct vm_map *map; vaddr_t fault_pc, va; ksiginfo_t ksi; int error, user; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); /* Update vmmeter statistics */ curcpu()->ci_data.cpu_ntrap++; l = curlwp; pcb = lwp_getpcb(l); if ((user = TRAP_USERMODE(tf)) != 0) LWP_CACHE_CREDS(l, l->l_proc); /* * Enable IRQ's (disabled by the abort) This always comes * from user mode so we know interrupts were not disabled. * But we check anyway. */ KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); if (__predict_true((tf->tf_spsr & I32_bit) != IF32_bits)) restore_interrupts(tf->tf_spsr & IF32_bits); /* See if the CPU state needs to be fixed up */ switch (prefetch_abort_fixup(tf)) { case ABORT_FIXUP_RETURN: KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); return; case ABORT_FIXUP_FAILED: /* Deliver a SIGILL to the process */ KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; ksi.ksi_addr = (uint32_t *)(intptr_t) tf->tf_pc; lwp_settrapframe(l, tf); goto do_trapsignal; default: break; } /* Prefetch aborts cannot happen in kernel mode */ if (__predict_false(!user)) dab_fatal(tf, 0, tf->tf_pc, NULL, NULL); /* Get fault address */ fault_pc = tf->tf_pc; lwp_settrapframe(l, tf); UVMHIST_LOG(maphist, " (pc=0x%x, l=0x%x, tf=0x%x)", fault_pc, l, tf, 0); /* Ok validate the address, can only execute in USER space */ if (__predict_false(fault_pc >= VM_MAXUSER_ADDRESS || (fault_pc < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW))) { KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; ksi.ksi_addr = (uint32_t *)(intptr_t) fault_pc; ksi.ksi_trap = fault_pc; goto do_trapsignal; } map = &l->l_proc->p_vmspace->vm_map; va = trunc_page(fault_pc); /* * See if the pmap can handle this fault on its own... */ #ifdef DEBUG last_fault_code = -1; #endif if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ|VM_PROT_EXECUTE, 1)) { UVMHIST_LOG (maphist, " <- emulated", 0, 0, 0, 0); goto out; } #ifdef DIAGNOSTIC if (__predict_false(curcpu()->ci_intr_depth > 0)) { printf("\nNon-emulated prefetch abort with intr_depth > 0\n"); dab_fatal(tf, 0, tf->tf_pc, NULL, NULL); } #endif KASSERT(pcb->pcb_onfault == NULL); error = uvm_fault(map, va, VM_PROT_READ|VM_PROT_EXECUTE); if (__predict_true(error == 0)) { UVMHIST_LOG (maphist, " <- uvm", 0, 0, 0, 0); goto out; } KSI_INIT_TRAP(&ksi); UVMHIST_LOG (maphist, " <- fatal (%d)", error, 0, 0, 0); if (error == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed: " "out of swap\n", l->l_proc->p_pid, l->l_proc->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; } else ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_MAPERR; ksi.ksi_addr = (uint32_t *)(intptr_t) fault_pc; ksi.ksi_trap = fault_pc; do_trapsignal: call_trapsignal(l, tf, &ksi); out: KASSERT(!TRAP_USERMODE(tf) || (tf->tf_spsr & IF32_bits) == 0); userret(l); }
/* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ static void linux_syscall(struct trapframe *frame) { const struct sysent *callp; struct proc *p; struct lwp *l; int error; register_t code, rval[2]; #define args (&frame->tf_rdi) l = curlwp; p = l->l_proc; code = frame->tf_rax; LWP_CACHE_CREDS(l, p); callp = p->p_emul->e_sysent; code &= (LINUX_SYS_NSYSENT - 1); callp += code; /* * Linux system calls have a maximum of 6 arguments, they are * already adjacent in the syscall trapframe. */ if (__predict_false(p->p_trace_enabled) && (error = trace_enter(code, args, callp->sy_narg)) != 0) goto out; rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: frame->tf_rax = rval[0]; break; case ERESTART: /* * The offset to adjust the PC by depends on whether we entered * the kernel through the trap or call gate. We pushed the * size of the instruction into tf_err on entry. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux_errno[error]; frame->tf_rax = error; break; } if (__predict_false(p->p_trace_enabled)) trace_exit(code, rval, error); userret(l); }
void linux32_syscall(struct trapframe *frame) { const struct sysent *callp; struct proc *p; struct lwp *l; int error; size_t narg; register32_t code, args[6]; register_t rval[2]; int i; register_t args64[6]; l = curlwp; p = l->l_proc; code = frame->tf_rax; LWP_CACHE_CREDS(l, p); callp = p->p_emul->e_sysent; code &= (LINUX32_SYS_NSYSENT - 1); callp += code; /* * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in * increasing order. */ args[0] = frame->tf_rbx & 0xffffffff; args[1] = frame->tf_rcx & 0xffffffff; args[2] = frame->tf_rdx & 0xffffffff; args[3] = frame->tf_rsi & 0xffffffff; args[4] = frame->tf_rdi & 0xffffffff; args[5] = frame->tf_rbp & 0xffffffff; if (__predict_false(p->p_trace_enabled)) { narg = callp->sy_narg; if (__predict_false(narg > __arraycount(args))) panic("impossible syscall narg, code %d, narg %zd", code, narg); for (i = 0; i < narg; i++) args64[i] = args[i] & 0xffffffff; if ((error = trace_enter(code, args64, narg)) != 0) goto out; } rval[0] = 0; rval[1] = 0; error = sy_call(callp, l, args, rval); out: switch (error) { case 0: frame->tf_rax = rval[0]; frame->tf_rflags &= ~PSL_C; /* carry bit */ break; case ERESTART: /* * The offset to adjust the PC by depends on whether we entered * the kernel through the trap or call gate. We pushed the * size of the instruction into tf_err on entry. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux32_errno[error]; frame->tf_rax = error; frame->tf_rflags |= PSL_C; /* carry bit */ break; } if (__predict_false(p->p_trace_enabled)) trace_exit(code, rval, error); userret(l); }
/* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ void linux_syscall(struct trapframe *frame) { register const struct sysent *callp; struct lwp *l; int error; register_t code, args[6], rval[2]; l = curlwp; LWP_CACHE_CREDS(l, l->l_proc); code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1); callp = linux_sysent; callp += code; /* * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in * increasing order. */ args[0] = frame->tf_ebx; args[1] = frame->tf_ecx; args[2] = frame->tf_edx; args[3] = frame->tf_esi; args[4] = frame->tf_edi; args[5] = frame->tf_ebp; rval[0] = 0; rval[1] = 0; if (__predict_false(l->l_proc->p_trace_enabled)) { error = trace_enter(code, args, callp->sy_narg); if (__predict_true(error == 0)) { error = sy_call(callp, l, args, rval); code = frame->tf_eax & (LINUX_SYS_NSYSENT - 1); trace_exit(code, rval, error); } } else error = sy_call(callp, l, args, rval); if (__predict_true(error == 0)) { frame->tf_eax = rval[0]; /* * XXX The linux libc code I (dsl) looked at doesn't use the * carry bit. * Values above 0xfffff000 are assumed to be errno values and * not result codes! */ frame->tf_eflags &= ~PSL_C; /* carry bit */ } else { switch (error) { case ERESTART: /* * The offset to adjust the PC by depends on whether * we entered the kernel through the trap or call gate. * We save the instruction size in tf_err on entry. */ frame->tf_eip -= frame->tf_err; break; case EJUSTRETURN: /* nothing to do */ break; default: error = native_to_linux_errno[error]; frame->tf_eax = error; frame->tf_eflags |= PSL_C; /* carry bit */ break; } } userret(l); }
/*ARGSUSED*/ void trap(struct frame *fp, int type, u_int code, u_int v) { extern char fubail[], subail[]; struct lwp *l; struct proc *p; ksiginfo_t ksi; int s; u_quad_t sticks; uvmexp.traps++; l = curlwp; KSI_INIT_TRAP(&ksi); ksi.ksi_trap = type & ~T_USER; p = l->l_proc; if (USERMODE(fp->f_sr)) { type |= T_USER; sticks = p->p_sticks; l->l_md.md_regs = fp->f_regs; LWP_CACHE_CREDS(l, p); } else sticks = 0; #ifdef DIAGNOSTIC if (l->l_addr == NULL) panic("trap: type 0x%x, code 0x%x, v 0x%x -- no pcb", type, code, v); #endif switch (type) { default: dopanic: printf("trap type %d, code = 0x%x, v = 0x%x\n", type, code, v); printf("%s program counter = 0x%x\n", (type & T_USER) ? "user" : "kernel", fp->f_pc); /* * Let the kernel debugger see the trap frame that * caused us to panic. This is a convenience so * one can see registers at the point of failure. */ s = splhigh(); #ifdef KGDB /* If connected, step or cont returns 1 */ if (kgdb_trap(type, (db_regs_t *)fp)) goto kgdb_cont; #endif #ifdef DDB (void)kdb_trap(type, (db_regs_t *)fp); #endif #ifdef KGDB kgdb_cont: #endif splx(s); if (panicstr) { printf("trap during panic!\n"); #ifdef DEBUG /* XXX should be a machine-dependent hook */ printf("(press a key)\n"); (void)cngetc(); #endif } regdump((struct trapframe *)fp, 128); type &= ~T_USER; if ((u_int)type < trap_types) panic(trap_type[type]); panic("trap"); case T_BUSERR: /* Kernel bus error */ if (!l->l_addr->u_pcb.pcb_onfault) goto dopanic; /* * If we have arranged to catch this fault in any of the * copy to/from user space routines, set PC to return to * indicated location and set flag informing buserror code * that it may need to clean up stack frame. */ copyfault: fp->f_stackadj = exframesize[fp->f_format]; fp->f_format = fp->f_vector = 0; fp->f_pc = (int)l->l_addr->u_pcb.pcb_onfault; return; case T_BUSERR|T_USER: /* Bus error */ case T_ADDRERR|T_USER: /* Address error */ ksi.ksi_addr = (void *)v; ksi.ksi_signo = SIGBUS; ksi.ksi_code = (type == (T_BUSERR|T_USER)) ? BUS_OBJERR : BUS_ADRERR; break; case T_ILLINST|T_USER: /* Illegal instruction fault */ case T_PRIVINST|T_USER: /* Privileged instruction fault */ ksi.ksi_addr = (void *)(int)fp->f_format; /* XXX was ILL_PRIVIN_FAULT */ ksi.ksi_signo = SIGILL; ksi.ksi_code = (type == (T_PRIVINST|T_USER)) ? ILL_PRVOPC : ILL_ILLOPC; break; /* * divde by zero, CHK/TRAPV inst */ case T_ZERODIV|T_USER: /* Divide by zero trap */ ksi.ksi_code = FPE_FLTDIV; case T_CHKINST|T_USER: /* CHK instruction trap */ case T_TRAPVINST|T_USER: /* TRAPV instruction trap */ ksi.ksi_addr = (void *)(int)fp->f_format; ksi.ksi_signo = SIGFPE; break; /* * User coprocessor violation */ case T_COPERR|T_USER: /* XXX What is a proper response here? */ ksi.ksi_signo = SIGFPE; ksi.ksi_code = FPE_FLTINV; break; /* * 6888x exceptions */ case T_FPERR|T_USER: /* * We pass along the 68881 status register which locore * stashed in code for us. Note that there is a * possibility that the bit pattern of this register * will conflict with one of the FPE_* codes defined * in signal.h. Fortunately for us, the only such * codes we use are all in the range 1-7 and the low * 3 bits of the status register are defined as 0 so * there is no clash. */ ksi.ksi_signo = SIGFPE; ksi.ksi_addr = (void *)code; break; /* * FPU faults in supervisor mode. */ case T_ILLINST: /* fnop generates this, apparently. */ case T_FPEMULI: case T_FPEMULD: { extern label_t *nofault; if (nofault) /* If we're probing. */ longjmp(nofault); if (type == T_ILLINST) printf("Kernel Illegal Instruction trap.\n"); else printf("Kernel FPU trap.\n"); goto dopanic; } /* * Unimplemented FPU instructions/datatypes. */ case T_FPEMULI|T_USER: case T_FPEMULD|T_USER: #ifdef FPU_EMULATE if (fpu_emulate(fp, &l->l_addr->u_pcb.pcb_fpregs, &ksi) == 0) ; /* XXX - Deal with tracing? (fp->f_sr & PSL_T) */ #else uprintf("pid %d killed: no floating point support.\n", p->p_pid); ksi.ksi_signo = SIGILL; ksi.ksi_code = ILL_ILLOPC; #endif break; case T_COPERR: /* Kernel coprocessor violation */ case T_FMTERR: /* Kernel format error */ case T_FMTERR|T_USER: /* User format error */ /* * The user has most likely trashed the RTE or FP state info * in the stack frame of a signal handler. */ printf("pid %d: kernel %s exception\n", p->p_pid, type==T_COPERR ? "coprocessor" : "format"); type |= T_USER; mutex_enter(p->p_lock); SIGACTION(p, SIGILL).sa_handler = SIG_DFL; sigdelset(&p->p_sigctx.ps_sigignore, SIGILL); sigdelset(&p->p_sigctx.ps_sigcatch, SIGILL); sigdelset(&l->l_sigmask, SIGILL); mutex_exit(p->p_lock); ksi.ksi_signo = SIGILL; ksi.ksi_addr = (void *)(int)fp->f_format; /* XXX was ILL_RESAD_FAULT */ ksi.ksi_code = (type == T_COPERR) ? ILL_COPROC : ILL_ILLOPC; break; /* * XXX: Trace traps are a nightmare. * * HP-UX uses trap #1 for breakpoints, * NetBSD/m68k uses trap #2, * SUN 3.x uses trap #15, * DDB and KGDB uses trap #15 (for kernel breakpoints; * handled elsewhere). * * NetBSD and HP-UX traps both get mapped by locore.s into T_TRACE. * SUN 3.x traps get passed through as T_TRAP15 and are not really * supported yet. * * XXX: We should never get kernel-mode T_TRAP15 because * XXX: locore.s now gives it special treatment. */ case T_TRAP15: /* SUN trace trap */ #ifdef DEBUG printf("unexpected kernel trace trap, type = %d\n", type); printf("program counter = 0x%x\n", fp->f_pc); #endif fp->f_sr &= ~PSL_T; ksi.ksi_signo = SIGTRAP; break; case T_TRACE|T_USER: /* user trace trap */ #ifdef COMPAT_SUNOS /* * SunOS uses Trap #2 for a "CPU cache flush". * Just flush the on-chip caches and return. */ if (p->p_emul == &emul_sunos) { ICIA(); DCIU(); return; } #endif /* FALLTHROUGH */ case T_TRACE: /* tracing a trap instruction */ case T_TRAP15|T_USER: /* SUN user trace trap */ fp->f_sr &= ~PSL_T; ksi.ksi_signo = SIGTRAP; break; case T_ASTFLT: /* System async trap, cannot happen */ goto dopanic; case T_ASTFLT|T_USER: /* User async trap. */ astpending = 0; /* * We check for software interrupts first. This is because * they are at a higher level than ASTs, and on a VAX would * interrupt the AST. We assume that if we are processing * an AST that we must be at IPL0 so we don't bother to * check. Note that we ensure that we are at least at SIR * IPL while processing the SIR. */ spl1(); /* fall into... */ case T_SSIR: /* Software interrupt */ case T_SSIR|T_USER: /* * If this was not an AST trap, we are all done. */ if (type != (T_ASTFLT|T_USER)) { uvmexp.traps--; return; } spl0(); if (l->l_pflag & LP_OWEUPC) { l->l_pflag &= ~LP_OWEUPC; ADDUPROF(l); } if (curcpu()->ci_want_resched) preempt(); goto out; case T_MMUFLT: /* Kernel mode page fault */ /* * If we were doing profiling ticks or other user mode * stuff from interrupt code, Just Say No. */ if (l->l_addr->u_pcb.pcb_onfault == fubail || l->l_addr->u_pcb.pcb_onfault == subail) goto copyfault; /* fall into... */ case T_MMUFLT|T_USER: /* page fault */ { vaddr_t va; struct vmspace *vm = p->p_vmspace; struct vm_map *map; int rv; vm_prot_t ftype; extern struct vm_map *kernel_map; #ifdef DEBUG if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n", p->p_pid, code, v, fp->f_pc, fp->f_sr); #endif /* * It is only a kernel address space fault iff: * 1. (type & T_USER) == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor data fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ if (type == T_MMUFLT && (!l->l_addr->u_pcb.pcb_onfault || KDFAULT(code))) map = kernel_map; else { map = vm ? &vm->vm_map : kernel_map; if ((l->l_flag & LW_SA) && (~l->l_pflag & LP_SA_NOBLOCK)) { l->l_savp->savp_faultaddr = (vaddr_t)v; l->l_pflag |= LP_SA_PAGEFAULT; } } if (WRFAULT(code)) ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; va = trunc_page((vaddr_t)v); #ifdef DEBUG if (map == kernel_map && va == 0) { printf("trap: bad kernel access at %x\n", v); goto dopanic; } #endif rv = uvm_fault(map, va, ftype); #ifdef DEBUG if (rv && MDB_ISPID(p->p_pid)) printf("uvm_fault(%p, 0x%lx, 0x%x) -> 0x%x\n", map, va, ftype, rv); #endif /* * If this was a stack access, we keep track of the maximum * accessed stack size. Also, if vm_fault gets a protection * failure, it is due to accessing the stack region outside * the current limit and we need to reflect that as an access * error. */ if (rv == 0) { if (map != kernel_map && (void *)va >= vm->vm_maxsaddr) uvm_grow(p, va); if (type == T_MMUFLT) { #if defined(M68040) if (mmutype == MMU_68040) (void)writeback(fp, 1); #endif return; } l->l_pflag &= ~LP_SA_PAGEFAULT; goto out; } if (rv == EACCES) { ksi.ksi_code = SEGV_ACCERR; rv = EFAULT; } else ksi.ksi_code = SEGV_MAPERR; if (type == T_MMUFLT) { if (l->l_addr->u_pcb.pcb_onfault) goto copyfault; printf("uvm_fault(%p, 0x%lx, 0x%x) -> 0x%x\n", map, va, ftype, rv); printf(" type %x, code [mmu,,ssw]: %x\n", type, code); goto dopanic; } l->l_pflag &= ~LP_SA_PAGEFAULT; ksi.ksi_addr = (void *)v; if (rv == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed: out of swap\n", p->p_pid, p->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; } else { ksi.ksi_signo = SIGSEGV; } break; } } if (ksi.ksi_signo) trapsignal(l, &ksi); if ((type & T_USER) == 0) return; out: userret(l, fp, sticks, v, 1); }