/* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. * * This code will return back into the fork trampoline code which then * runs doreti. * * NOTE: The mplock is not held at any point. */ void generic_lwp_return(struct lwp *lp, struct trapframe *frame) { struct proc *p = lp->lwp_proc; /* * Newly forked processes are given a kernel priority. We have to * adjust the priority to a normal user priority and fake entry * into the kernel (call userenter()) to install a passive release * function just in case userret() decides to stop the process. This * can occur when ^Z races a fork. If we do not install the passive * release function the current process designation will not be * released when the thread goes to sleep. */ lwkt_setpri_self(TDPRI_USER_NORM); userenter(lp->lwp_thread, p); userret(lp, frame, 0); #ifdef KTRACE if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) ktrsysret(lp, SYS_fork, 0, 0); #endif lp->lwp_flags |= LWP_PASSIVE_ACQ; userexit(lp); lp->lwp_flags &= ~LWP_PASSIVE_ACQ; }
/* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. * * This code will return back into the fork trampoline code which then * runs doreti. */ void generic_lwp_return(struct lwp *lp, struct trapframe *frame) { struct proc *p = lp->lwp_proc; /* * Check for exit-race. If one lwp exits the process concurrent with * another lwp creating a new thread, the two operations may cross * each other resulting in the newly-created lwp not receiving a * KILL signal. */ if (p->p_flags & P_WEXIT) { lwpsignal(p, lp, SIGKILL); } /* * Newly forked processes are given a kernel priority. We have to * adjust the priority to a normal user priority and fake entry * into the kernel (call userenter()) to install a passive release * function just in case userret() decides to stop the process. This * can occur when ^Z races a fork. If we do not install the passive * release function the current process designation will not be * released when the thread goes to sleep. */ lwkt_setpri_self(TDPRI_USER_NORM); userenter(lp->lwp_thread, p); userret(lp, frame, 0); #ifdef KTRACE if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) ktrsysret(lp, SYS_fork, 0, 0); #endif lp->lwp_flags |= LWP_PASSIVE_ACQ; userexit(lp); lp->lwp_flags &= ~LWP_PASSIVE_ACQ; }
/* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. Giant is not held on entry, and must not * be held on return. This function is passed in to fork_exit() as the * first parameter and is called when returning to a new userland process. */ void fork_return(struct thread *td, struct trapframe *frame) { struct proc *p, *dbg; p = td->td_proc; if (td->td_dbgflags & TDB_STOPATFORK) { sx_xlock(&proctree_lock); PROC_LOCK(p); if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED | P_FOLLOWFORK)) { /* * If debugger still wants auto-attach for the * parent's children, do it now. */ dbg = p->p_pptr->p_pptr; p->p_flag |= P_TRACED; p->p_oppid = p->p_pptr->p_pid; CTR2(KTR_PTRACE, "fork_return: attaching to new child pid %d: oppid %d", p->p_pid, p->p_oppid); proc_reparent(p, dbg); sx_xunlock(&proctree_lock); td->td_dbgflags |= TDB_CHILD | TDB_SCX; ptracestop(td, SIGSTOP); td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX); } else { /* * ... otherwise clear the request. */ sx_xunlock(&proctree_lock); td->td_dbgflags &= ~TDB_STOPATFORK; cv_broadcast(&p->p_dbgwait); } PROC_UNLOCK(p); } else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) { /* * This is the start of a new thread in a traced * process. Report a system call exit event. */ PROC_LOCK(p); td->td_dbgflags |= TDB_SCX; _STOPEVENT(p, S_SCX, td->td_dbg_sc_code); if ((p->p_stops & S_PT_SCX) != 0 || (td->td_dbgflags & TDB_BORN) != 0) ptracestop(td, SIGTRAP); td->td_dbgflags &= ~(TDB_SCX | TDB_BORN); PROC_UNLOCK(p); } userret(td, frame); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(SYS_fork, 0, 0); #endif }
void unix_syscall_return(int error) { thread_act_t thread; volatile int *rval; struct i386_saved_state *regs; struct proc *p; struct proc *current_proc(); unsigned short code; vm_offset_t params; struct sysent *callp; extern int nsysent; thread = current_act(); rval = (int *)get_bsduthreadrval(thread); p = current_proc(); regs = USER_REGS(thread); /* reconstruct code for tracing before blasting eax */ code = regs->eax; params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { code = fuword(params); } if (error == ERESTART) { regs->eip -= 7; } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = rval[0]; regs->edx = rval[1]; regs->efl &= ~EFL_CF; } } ktrsysret(p, code, error, rval[0], callp->sy_funnel); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, rval[0], rval[1], 0, 0); if (callp->sy_funnel != NO_FUNNEL) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); thread_exception_return(); /* NOTREACHED */ }
void child_return(void *arg) { struct lwp * const l = arg; struct trapframe * const tf = l->l_md.md_utf; tf->tf_fixreg[FIRSTARG] = 0; tf->tf_fixreg[FIRSTARG + 1] = 1; tf->tf_cr &= ~0x10000000; tf->tf_srr1 &= ~(PSL_FP|PSL_VEC); /* Disable FP & AltiVec, as we can't be them. */ ktrsysret(SYS_fork, 0, 0); /* Profiling? XXX */ }
void child_return(void *arg) { struct lwp *l = arg; /* See cpu_lwp_fork() */ struct frame *f = (struct frame *)l->l_md.md_regs; f->f_regs[D0] = 0; f->f_sr &= ~PSL_C; f->f_format = FMT0; machine_userret(l, f, 0); ktrsysret(SYS_fork, 0, 0); }
void child_return(void *arg) { struct lwp *l = arg; struct proc *p = l->l_proc; userret(l, l->l_md.md_regs->tf_iioq_head, 0); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p, SYS_fork, 0, 0); #endif #ifdef DEBUG frame_sanity_check(l->l_md.md_regs, l); #endif /* DEBUG */ }
void child_return(void *arg) { lwp_t *l = arg; struct trapframe *frame = l->l_addr->u_pcb.pcb_tf; frame->tf_r0 = 0; #ifdef __PROG32 frame->tf_spsr &= ~PSR_C_bit; /* carry bit */ #else frame->tf_r15 &= ~R15_FLAG_C; /* carry bit */ #endif userret(l); ktrsysret(SYS_fork, 0, 0); }
void child_return(void *arg) { lwp_t *l = arg; register_t rval[2]; struct pcb *pcb = lwp_getpcb(l); ucontext_t *ucp = &pcb->pcb_userret_ucp; /* return value zero */ rval[0] = 0; rval[1] = 0; md_syscall_set_returnargs(l, ucp, 0, rval); aprint_debug("child return! lwp %p\n", l); userret(l); ktrsysret(SYS_fork, 0, 0); }
/* * Process the tail end of a fork() for the child. */ void child_return(void *arg) { struct proc *p = arg; /* * Return values in the frame set by cpu_fork(). */ KERNEL_PROC_UNLOCK(p); userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { KERNEL_PROC_LOCK(p); ktrsysret(p, SYS_fork, 0, 0); KERNEL_PROC_UNLOCK(p); } #endif }
void EMULNAME(syscall_fancy)(struct proc *p, u_int status, u_int cause, u_int opc) { struct frame *frame = (struct frame *)p->p_md.md_regs; register_t *args, copyargs[8]; register_t *rval; #if _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN register_t copyrval[2]; #endif mips_reg_t ov0; size_t code, numsys, nsaved, nargs; const struct sysent *callp; int error; uvmexp.syscalls++; if (DELAYBRANCH(cause)) frame->f_regs[PC] = MachEmulateBranch(frame, opc, 0, 0); else frame->f_regs[PC] = opc + sizeof(int); callp = p->p_emul->e_sysent; numsys = p->p_emul->e_nsysent; ov0 = code = frame->f_regs[V0] - SYSCALL_SHIFT; switch (code) { case SYS_syscall: case SYS___syscall: args = copyargs; if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = frame->f_regs[A0] - SYSCALL_SHIFT; args[0] = frame->f_regs[A1]; args[1] = frame->f_regs[A2]; args[2] = frame->f_regs[A3]; nsaved = 3; } else { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = frame->f_regs[A0 + _QUAD_LOWWORD] - SYSCALL_SHIFT; args[0] = frame->f_regs[A2]; args[1] = frame->f_regs[A3]; nsaved = 2; } if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_argsize / sizeof(register_t); if (nargs > nsaved) { error = copyin( ((register_t *)(vaddr_t)frame->f_regs[SP] + 4), (args + nsaved), (nargs - nsaved) * sizeof(register_t)); if (error) goto bad; } break; default: if (code >= p->p_emul->e_nsysent) callp += p->p_emul->e_nosys; else callp += code; nargs = callp->sy_narg; if (nargs < 5) { #if !defined(_MIPS_BSD_API) || _MIPS_BSD_API == _MIPS_BSD_API_LP32 args = (register_t *)&frame->f_regs[A0]; #elif _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN args = copyargs; args[0] = frame->f_regs[A0]; args[1] = frame->f_regs[A1]; args[2] = frame->f_regs[A2]; args[3] = frame->f_regs[A3]; #else # error syscall not implemented for current MIPS ABI #endif } else { args = copyargs; error = copyin( ((register_t *)(vaddr_t)frame->f_regs[SP] + 4), (©args[4]), (nargs - 4) * sizeof(register_t)); if (error) goto bad; args[0] = frame->f_regs[A0]; args[1] = frame->f_regs[A1]; args[2] = frame->f_regs[A2]; args[3] = frame->f_regs[A3]; } break; } #ifdef SYSCALL_DEBUG scdebug_call(p, code, args); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p, code, callp->sy_argsize, args); #endif #if !defined(_MIPS_BSD_API) || _MIPS_BSD_API == _MIPS_BSD_API_LP32 rval = (register_t *)&frame->f_regs[V0]; rval[0] = 0; /* rval[1] already has V1 */ #elif _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN rval = copyrval; rval[0] = 0; rval[1] = frame->f_regs[V1]; #endif error = (*callp->sy_call)(p, args, rval); switch (error) { case 0: #if _MIPS_BSD_API == _MIPS_BSD_API_LP32_64CLEAN frame->f_regs[V0] = rval[0]; frame->f_regs[V1] = rval[1]; #endif frame->f_regs[A3] = 0; break; case ERESTART: frame->f_regs[V0] = ov0; /* restore syscall code */ frame->f_regs[PC] = opc; break; case EJUSTRETURN: break; /* nothing to do */ default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; frame->f_regs[V0] = error; frame->f_regs[A3] = 1; break; } #ifdef SYSCALL_DEBUG scdebug_ret(p, code, error, rval); #endif userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p, code, error, rval[0]); #endif }
int main(int argc, char **argv) { int ch, col, ktrlen, size; pid_t do_pid = -1; void *m; int trpoints = ALL_POINTS; char *cp; (void) setlocale(LC_CTYPE, ""); while ((ch = getopt(argc,argv,"f:djlm:np:RTt:")) != -1) switch((char)ch) { case 'f': tracefile = optarg; break; case 'j': fixedformat = 1; break; case 'd': decimal = 1; break; case 'l': tail = 1; break; case 'm': maxdata = atoi(optarg); break; case 'n': fancy = 0; break; case 'p': do_pid = strtoul(optarg, &cp, 0); if (*cp != 0) errx(1,"invalid number %s", optarg); break; case 'R': timestamp = 2; /* relative timestamp */ break; case 'T': timestamp = 1; break; case 't': trpoints = getpoints(optarg); if (trpoints < 0) errx(1, "unknown trace point in %s", optarg); break; default: usage(); } if (argc > optind) usage(); m = (void *)malloc(size = 1025); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); if (!freopen(tracefile, "r", stdin)) err(1, "%s", tracefile); while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) { if (trpoints & (1 << ktr_header.ktr_type) && (do_pid == -1 || ktr_header.ktr_pid == do_pid)) col = dumpheader(&ktr_header); else col = -1; if ((ktrlen = ktr_header.ktr_len) < 0) errx(1, "bogus length 0x%x", ktrlen); if (ktrlen > size) { m = (void *)realloc(m, ktrlen+1); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); size = ktrlen; } if (ktrlen && fread_tail(m, ktrlen, 1) == 0) errx(1, "data too short"); if ((trpoints & (1<<ktr_header.ktr_type)) == 0) continue; if (col == -1) continue; switch (ktr_header.ktr_type) { case KTR_SYSCALL: ktrsyscall((struct ktr_syscall *)m); break; case KTR_SYSRET: ktrsysret((struct ktr_sysret *)m); break; case KTR_NAMEI: ktrnamei(m, ktrlen); break; case KTR_GENIO: ktrgenio((struct ktr_genio *)m, ktrlen); break; case KTR_PSIG: ktrpsig((struct ktr_psig *)m); break; case KTR_CSW: ktrcsw((struct ktr_csw *)m); break; case KTR_USER: ktruser(ktrlen, m); break; } if (tail) (void)fflush(stdout); } exit(0); }
/* * Handle an exception. * In the case of a kernel trap, we return the pc where to resume if * pcb_onfault is set, otherwise, return old pc. */ void trap(struct trap_frame *trapframe) { struct cpu_info *ci = curcpu(); int type, i; unsigned ucode = 0; struct proc *p = ci->ci_curproc; vm_prot_t ftype; extern vaddr_t onfault_table[]; int onfault; int typ = 0; union sigval sv; trapdebug_enter(ci, trapframe, -1); type = (trapframe->cause & CR_EXC_CODE) >> CR_EXC_CODE_SHIFT; if (USERMODE(trapframe->sr)) { type |= T_USER; } /* * Enable hardware interrupts if they were on before the trap; * enable IPI interrupts only otherwise. */ if (type != T_BREAK) { if (ISSET(trapframe->sr, SR_INT_ENAB)) enableintr(); else { #ifdef MULTIPROCESSOR ENABLEIPI(); #endif } } switch (type) { case T_TLB_MOD: /* check for kernel address */ if (trapframe->badvaddr < 0) { pt_entry_t *pte, entry; paddr_t pa; vm_page_t pg; pte = kvtopte(trapframe->badvaddr); entry = *pte; #ifdef DIAGNOSTIC if (!(entry & PG_V) || (entry & PG_M)) panic("trap: ktlbmod: invalid pte"); #endif if (pmap_is_page_ro(pmap_kernel(), trunc_page(trapframe->badvaddr), entry)) { /* write to read only page in the kernel */ ftype = VM_PROT_WRITE; goto kernel_fault; } entry |= PG_M; *pte = entry; KERNEL_LOCK(); pmap_update_kernel_page(trapframe->badvaddr & ~PGOFSET, entry); pa = pfn_to_pad(entry); pg = PHYS_TO_VM_PAGE(pa); if (pg == NULL) panic("trap: ktlbmod: unmanaged page"); pmap_set_modify(pg); KERNEL_UNLOCK(); return; } /* FALLTHROUGH */ case T_TLB_MOD+T_USER: { pt_entry_t *pte, entry; paddr_t pa; vm_page_t pg; pmap_t pmap = p->p_vmspace->vm_map.pmap; if (!(pte = pmap_segmap(pmap, trapframe->badvaddr))) panic("trap: utlbmod: invalid segmap"); pte += uvtopte(trapframe->badvaddr); entry = *pte; #ifdef DIAGNOSTIC if (!(entry & PG_V) || (entry & PG_M)) panic("trap: utlbmod: invalid pte"); #endif if (pmap_is_page_ro(pmap, trunc_page(trapframe->badvaddr), entry)) { /* write to read only page */ ftype = VM_PROT_WRITE; goto fault_common; } entry |= PG_M; *pte = entry; KERNEL_LOCK(); pmap_update_user_page(pmap, (trapframe->badvaddr & ~PGOFSET), entry); pa = pfn_to_pad(entry); pg = PHYS_TO_VM_PAGE(pa); if (pg == NULL) panic("trap: utlbmod: unmanaged page"); pmap_set_modify(pg); KERNEL_UNLOCK(); if (!USERMODE(trapframe->sr)) return; goto out; } case T_TLB_LD_MISS: case T_TLB_ST_MISS: ftype = (type == T_TLB_ST_MISS) ? VM_PROT_WRITE : VM_PROT_READ; /* check for kernel address */ if (trapframe->badvaddr < 0) { vaddr_t va; int rv; kernel_fault: va = trunc_page((vaddr_t)trapframe->badvaddr); onfault = p->p_addr->u_pcb.pcb_onfault; p->p_addr->u_pcb.pcb_onfault = 0; KERNEL_LOCK(); rv = uvm_fault(kernel_map, trunc_page(va), 0, ftype); KERNEL_UNLOCK(); p->p_addr->u_pcb.pcb_onfault = onfault; if (rv == 0) return; if (onfault != 0) { p->p_addr->u_pcb.pcb_onfault = 0; trapframe->pc = onfault_table[onfault]; return; } goto err; } /* * It is an error for the kernel to access user space except * through the copyin/copyout routines. */ if (p->p_addr->u_pcb.pcb_onfault != 0) { /* * We want to resolve the TLB fault before invoking * pcb_onfault if necessary. */ goto fault_common; } else { goto err; } case T_TLB_LD_MISS+T_USER: ftype = VM_PROT_READ; goto fault_common; case T_TLB_ST_MISS+T_USER: ftype = VM_PROT_WRITE; fault_common: { vaddr_t va; struct vmspace *vm; vm_map_t map; int rv; vm = p->p_vmspace; map = &vm->vm_map; va = trunc_page((vaddr_t)trapframe->badvaddr); onfault = p->p_addr->u_pcb.pcb_onfault; p->p_addr->u_pcb.pcb_onfault = 0; KERNEL_LOCK(); rv = uvm_fault(map, trunc_page(va), 0, ftype); p->p_addr->u_pcb.pcb_onfault = onfault; /* * If this was a stack access we keep track of the maximum * accessed stack size. Also, if vm_fault gets a protection * failure it is due to accessing the stack region outside * the current limit and we need to reflect that as an access * error. */ if ((caddr_t)va >= vm->vm_maxsaddr) { if (rv == 0) uvm_grow(p, va); else if (rv == EACCES) rv = EFAULT; } KERNEL_UNLOCK(); if (rv == 0) { if (!USERMODE(trapframe->sr)) return; goto out; } if (!USERMODE(trapframe->sr)) { if (onfault != 0) { p->p_addr->u_pcb.pcb_onfault = 0; trapframe->pc = onfault_table[onfault]; return; } goto err; } #ifdef ADEBUG printf("SIG-SEGV @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapframe->ra); #endif ucode = ftype; i = SIGSEGV; typ = SEGV_MAPERR; break; } case T_ADDR_ERR_LD+T_USER: /* misaligned or kseg access */ case T_ADDR_ERR_ST+T_USER: /* misaligned or kseg access */ ucode = 0; /* XXX should be VM_PROT_something */ i = SIGBUS; typ = BUS_ADRALN; #ifdef ADEBUG printf("SIG-BUSA @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapframe->ra); #endif break; case T_BUS_ERR_IFETCH+T_USER: /* BERR asserted to cpu */ case T_BUS_ERR_LD_ST+T_USER: /* BERR asserted to cpu */ ucode = 0; /* XXX should be VM_PROT_something */ i = SIGBUS; typ = BUS_OBJERR; #ifdef ADEBUG printf("SIG-BUSB @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapframe->ra); #endif break; case T_SYSCALL+T_USER: { struct trap_frame *locr0 = p->p_md.md_regs; struct sysent *callp; unsigned int code; unsigned long tpc; int numsys; struct args { register_t i[8]; } args; register_t rval[2]; uvmexp.syscalls++; /* compute next PC after syscall instruction */ tpc = trapframe->pc; /* Remember if restart */ if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; callp = p->p_emul->e_sysent; numsys = p->p_emul->e_nsysent; code = locr0->v0; switch (code) { case SYS_syscall: /* * Code is first argument, followed by actual args. */ code = locr0->a0; if (code >= numsys) callp += p->p_emul->e_nosys; /* (illegal) */ else callp += code; i = callp->sy_argsize / sizeof(register_t); args.i[0] = locr0->a1; args.i[1] = locr0->a2; args.i[2] = locr0->a3; if (i > 3) { args.i[3] = locr0->a4; args.i[4] = locr0->a5; args.i[5] = locr0->a6; args.i[6] = locr0->a7; i = copyin((void *)locr0->sp, &args.i[7], sizeof(register_t)); } break; case SYS___syscall: /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = locr0->a0; args.i[0] = locr0->a1; args.i[1] = locr0->a2; args.i[2] = locr0->a3; if (code >= numsys) callp += p->p_emul->e_nosys; /* (illegal) */ else callp += code; i = callp->sy_argsize / sizeof(int); if (i > 3) { args.i[3] = locr0->a4; args.i[4] = locr0->a5; args.i[5] = locr0->a6; args.i[6] = locr0->a7; i = copyin((void *)locr0->sp, &args.i[7], sizeof(register_t)); } break; default: if (code >= numsys) callp += p->p_emul->e_nosys; /* (illegal) */ else callp += code; i = callp->sy_narg; args.i[0] = locr0->a0; args.i[1] = locr0->a1; args.i[2] = locr0->a2; args.i[3] = locr0->a3; if (i > 4) { args.i[4] = locr0->a4; args.i[5] = locr0->a5; args.i[6] = locr0->a6; args.i[7] = locr0->a7; } } #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_call(p, code, args.i); KERNEL_UNLOCK(); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) { KERNEL_LOCK(); ktrsyscall(p, code, callp->sy_argsize, args.i); KERNEL_UNLOCK(); } #endif rval[0] = 0; rval[1] = locr0->v1; #if defined(DDB) || defined(DEBUG) trapdebug[TRAPSIZE * ci->ci_cpuid + (trppos[ci->ci_cpuid] == 0 ? TRAPSIZE : trppos[ci->ci_cpuid]) - 1].code = code; #endif #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) { KERNEL_LOCK(); i = systrace_redirect(code, p, args.i, rval); KERNEL_UNLOCK(); } else #endif { int nolock = (callp->sy_flags & SY_NOLOCK); if (!nolock) KERNEL_LOCK(); i = (*callp->sy_call)(p, &args, rval); if (!nolock) KERNEL_UNLOCK(); } switch (i) { case 0: locr0->v0 = rval[0]; locr0->v1 = rval[1]; locr0->a3 = 0; break; case ERESTART: locr0->pc = tpc; break; case EJUSTRETURN: break; /* nothing to do */ default: locr0->v0 = i; locr0->a3 = 1; } #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, i, rval); KERNEL_UNLOCK(); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { KERNEL_LOCK(); ktrsysret(p, code, i, rval[0]); KERNEL_UNLOCK(); } #endif goto out; } case T_BREAK: #ifdef DDB kdb_trap(type, trapframe); #endif /* Reenable interrupts if necessary */ if (trapframe->sr & SR_INT_ENAB) { enableintr(); } return; case T_BREAK+T_USER: { caddr_t va; u_int32_t instr; struct trap_frame *locr0 = p->p_md.md_regs; /* compute address of break instruction */ va = (caddr_t)trapframe->pc; if (trapframe->cause & CR_BR_DELAY) va += 4; /* read break instruction */ copyin(va, &instr, sizeof(int32_t)); switch ((instr & BREAK_VAL_MASK) >> BREAK_VAL_SHIFT) { case 6: /* gcc range error */ i = SIGFPE; typ = FPE_FLTSUB; /* skip instruction */ if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; break; case 7: /* gcc3 divide by zero */ i = SIGFPE; typ = FPE_INTDIV; /* skip instruction */ if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; break; #ifdef PTRACE case BREAK_SSTEP_VAL: if (p->p_md.md_ss_addr == (long)va) { #ifdef DEBUG printf("trap: %s (%d): breakpoint at %p " "(insn %08x)\n", p->p_comm, p->p_pid, p->p_md.md_ss_addr, p->p_md.md_ss_instr); #endif /* Restore original instruction and clear BP */ process_sstep(p, 0); typ = TRAP_BRKPT; } else { typ = TRAP_TRACE; } i = SIGTRAP; break; #endif #ifdef FPUEMUL case BREAK_FPUEMUL_VAL: /* * If this is a genuine FP emulation break, * resume execution to our branch destination. */ if ((p->p_md.md_flags & MDP_FPUSED) != 0 && p->p_md.md_fppgva + 4 == (vaddr_t)va) { struct vm_map *map = &p->p_vmspace->vm_map; p->p_md.md_flags &= ~MDP_FPUSED; locr0->pc = p->p_md.md_fpbranchva; /* * Prevent access to the relocation page. * XXX needs to be fixed to work with rthreads */ uvm_fault_unwire(map, p->p_md.md_fppgva, p->p_md.md_fppgva + PAGE_SIZE); (void)uvm_map_protect(map, p->p_md.md_fppgva, p->p_md.md_fppgva + PAGE_SIZE, UVM_PROT_NONE, FALSE); goto out; } /* FALLTHROUGH */ #endif default: typ = TRAP_TRACE; i = SIGTRAP; break; } break; } case T_IWATCH+T_USER: case T_DWATCH+T_USER: { caddr_t va; /* compute address of trapped instruction */ va = (caddr_t)trapframe->pc; if (trapframe->cause & CR_BR_DELAY) va += 4; printf("watch exception @ %p\n", va); #ifdef RM7K_PERFCNTR if (rm7k_watchintr(trapframe)) { /* Return to user, don't add any more overhead */ goto out; } #endif i = SIGTRAP; typ = TRAP_BRKPT; break; } case T_TRAP+T_USER: { caddr_t va; u_int32_t instr; struct trap_frame *locr0 = p->p_md.md_regs; /* compute address of trap instruction */ va = (caddr_t)trapframe->pc; if (trapframe->cause & CR_BR_DELAY) va += 4; /* read break instruction */ copyin(va, &instr, sizeof(int32_t)); if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; #ifdef RM7K_PERFCNTR if (instr == 0x040c0000) { /* Performance cntr trap */ int result; result = rm7k_perfcntr(trapframe->a0, trapframe->a1, trapframe->a2, trapframe->a3); locr0->v0 = -result; /* Return to user, don't add any more overhead */ goto out; } else #endif /* * GCC 4 uses teq with code 7 to signal divide by * zero at runtime. This is one instruction shorter * than the BEQ + BREAK combination used by gcc 3. */ if ((instr & 0xfc00003f) == 0x00000034 /* teq */ && (instr & 0x001fffc0) == ((ZERO << 16) | (7 << 6))) { i = SIGFPE; typ = FPE_INTDIV; } else { i = SIGEMT; /* Stuff it with something for now */ typ = 0; } break; } case T_RES_INST+T_USER: i = SIGILL; typ = ILL_ILLOPC; break; case T_COP_UNUSABLE+T_USER: /* * Note MIPS IV COP1X instructions issued with FPU * disabled correctly report coprocessor 1 as the * unusable coprocessor number. */ if ((trapframe->cause & CR_COP_ERR) != 0x10000000) { i = SIGILL; /* only FPU instructions allowed */ typ = ILL_ILLOPC; break; } #ifdef FPUEMUL MipsFPTrap(trapframe); #else enable_fpu(p); #endif goto out; case T_FPE: printf("FPU Trap: PC %x CR %x SR %x\n", trapframe->pc, trapframe->cause, trapframe->sr); goto err; case T_FPE+T_USER: MipsFPTrap(trapframe); goto out; case T_OVFLOW+T_USER: i = SIGFPE; typ = FPE_FLTOVF; break; case T_ADDR_ERR_LD: /* misaligned access */ case T_ADDR_ERR_ST: /* misaligned access */ case T_BUS_ERR_LD_ST: /* BERR asserted to cpu */ if ((onfault = p->p_addr->u_pcb.pcb_onfault) != 0) { p->p_addr->u_pcb.pcb_onfault = 0; trapframe->pc = onfault_table[onfault]; return; } goto err; default: err: disableintr(); #if !defined(DDB) && defined(DEBUG) trapDump("trap"); #endif printf("\nTrap cause = %d Frame %p\n", type, trapframe); printf("Trap PC %p RA %p fault %p\n", trapframe->pc, trapframe->ra, trapframe->badvaddr); #ifdef DDB stacktrace(!USERMODE(trapframe->sr) ? trapframe : p->p_md.md_regs); kdb_trap(type, trapframe); #endif panic("trap"); } #ifdef FPUEMUL /* * If a relocated delay slot causes an exception, blame the * original delay slot address - userland is not supposed to * know anything about emulation bowels. */ if ((p->p_md.md_flags & MDP_FPUSED) != 0 && trapframe->badvaddr == p->p_md.md_fppgva) trapframe->badvaddr = p->p_md.md_fpslotva; #endif p->p_md.md_regs->pc = trapframe->pc; p->p_md.md_regs->cause = trapframe->cause; p->p_md.md_regs->badvaddr = trapframe->badvaddr; sv.sival_ptr = (void *)trapframe->badvaddr; KERNEL_LOCK(); trapsignal(p, i, ucode, typ, sv); KERNEL_UNLOCK(); out: /* * Note: we should only get here if returning to user mode. */ userret(p); }
/* * syscall2 - MP aware system call request C handler * * A system call is essentially treated as a trap except that the * MP lock is not held on entry or return. We are responsible for * obtaining the MP lock if necessary and for handling ASTs * (e.g. a task switch) prior to return. * * MPSAFE */ void syscall2(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; struct lwp *lp = td->td_lwp; struct sysent *callp; register_t orig_tf_rflags; int sticks; int error; int narg; #ifdef INVARIANTS int crit_count = td->td_critcount; #endif register_t *argp; u_int code; int regcnt, optimized_regcnt; union sysunion args; register_t *argsdst; mycpu->gd_cnt.v_syscall++; #ifdef DIAGNOSTIC if (ISPL(frame->tf_cs) != SEL_UPL) { panic("syscall"); /* NOT REACHED */ } #endif KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, frame->tf_rax); userenter(td, p); /* lazy raise our priority */ regcnt = 6; optimized_regcnt = 6; /* * Misc */ sticks = (int)td->td_sticks; orig_tf_rflags = frame->tf_rflags; /* * Virtual kernel intercept - if a VM context managed by a virtual * kernel issues a system call the virtual kernel handles it, not us. * Restore the virtual kernel context and return from its system * call. The current frame is copied out to the virtual kernel. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); error = EJUSTRETURN; callp = NULL; code = 0; goto out; } /* * Get the system call parameters and account for time */ KASSERT(lp->lwp_md.md_regs == frame, ("Frame mismatch %p %p", lp->lwp_md.md_regs, frame)); code = (u_int)frame->tf_rax; if (code == SYS_syscall || code == SYS___syscall) { code = frame->tf_rdi; regcnt--; argp = &frame->tf_rdi + 1; } else { argp = &frame->tf_rdi; } if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg & SYF_ARGMASK; /* * On x86_64 we get up to six arguments in registers. The rest are * on the stack. The first six members of 'struct trapframe' happen * to be the registers used to pass arguments, in exactly the right * order. */ argsdst = (register_t *)(&args.nosys.sysmsg + 1); /* * Its easier to copy up to the highest number of syscall arguments * passed in registers, which is 6, than to conditionalize it. */ bcopy(argp, argsdst, sizeof(register_t) * optimized_regcnt); /* * Any arguments beyond available argument-passing registers must * be copyin()'d from the user stack. */ if (narg > regcnt) { caddr_t params; params = (caddr_t)frame->tf_rsp + sizeof(register_t); error = copyin(params, &argsdst[regcnt], (narg - regcnt) * sizeof(register_t)); if (error) { #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif /* * Default return value is 0 (will be copied to %rax). Double-value * returns use %rax and %rdx. %rdx is left unchanged for system * calls which return only one result. */ args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame->tf_rdx; /* * The syscall might manipulate the trap frame. If it does it * will probably return EJUSTRETURN. */ args.sysmsg_frame = frame; STOPEVENT(p, S_SCE, narg); /* MP aware */ /* * NOTE: All system calls run MPSAFE now. The system call itself * is responsible for getting the MP lock. */ #ifdef SYSCALL_DEBUG tsc_uclock_t tscval = rdtsc(); #endif error = (*callp->sy_call)(&args); #ifdef SYSCALL_DEBUG tscval = rdtsc() - tscval; tscval = tscval * 1000000 / tsc_frequency; if (SysCallsWorstCase[code] < tscval) SysCallsWorstCase[code] = tscval; #endif out: /* * MP SAFE (we may or may not have the MP lock at this point) */ //kprintf("SYSMSG %d ", error); switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; lp = curthread->td_lwp; frame->tf_rax = args.sysmsg_fds[0]; frame->tf_rdx = args.sysmsg_fds[1]; frame->tf_rflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, we know that 'syscall' is 2 bytes. * We have to do a full context restore so that %r10 * (which was holding the value of %rcx) is restored for * the next iteration. */ if (frame->tf_err != 0 && frame->tf_err != 2) kprintf("lp %s:%d frame->tf_err is weird %ld\n", td->td_comm, lp->lwp_proc->p_pid, frame->tf_err); frame->tf_rip -= frame->tf_err; frame->tf_r10 = frame->tf_rcx; break; case EJUSTRETURN: break; case EASYNC: panic("Unexpected EASYNC return value (for now)"); default: bad: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_rax = error; frame->tf_rflags |= PSL_C; break; } /* * Traced syscall. trapsignal() should now be MP aware */ if (orig_tf_rflags & PSL_T) { frame->tf_rflags &= ~PSL_T; trapsignal(lp, SIGTRAP, TRAP_TRACE); } /* * Handle reschedule and other end-of-syscall issues */ userret(lp, frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { ktrsysret(lp, code, error, args.sysmsg_result); } #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); userexit(lp); KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(&td->td_toks_base == td->td_toks_stop, ("syscall: %ld extra tokens held after trap! syscall %p", td->td_toks_stop - &td->td_toks_base, callp->sy_call)); #endif }
void m88100_syscall(register_t code, struct trapframe *tf) { int i, nsys, nap; struct sysent *callp; struct proc *p = curproc; int error; register_t args[8], rval[2], *ap; int nolock; uvmexp.syscalls++; callp = p->p_emul->e_sysent; nsys = p->p_emul->e_nsysent; p->p_md.md_tf = tf; /* * For 88k, all the arguments are passed in the registers (r2-r9), * and further arguments (if any) on stack. * For syscall (and __syscall), r2 (and r3) has the actual code. * __syscall takes a quad syscall number, so that other * arguments are at their natural alignments. */ ap = &tf->tf_r[2]; nap = 8; /* r2-r9 */ switch (code) { case SYS_syscall: code = *ap++; nap--; break; case SYS___syscall: if (callp != sysent) break; code = ap[_QUAD_LOWWORD]; ap += 2; nap -= 2; break; } if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; else callp += code; i = callp->sy_argsize / sizeof(register_t); if (i > sizeof(args) / sizeof(register_t)) panic("syscall nargs"); if (i > nap) { bcopy((caddr_t)ap, (caddr_t)args, nap * sizeof(register_t)); error = copyin((caddr_t)tf->tf_r[31], (caddr_t)(args + nap), (i - nap) * sizeof(register_t)); } else { bcopy((caddr_t)ap, (caddr_t)args, i * sizeof(register_t)); error = 0; } if (error != 0) goto bad; #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_call(p, code, args); KERNEL_UNLOCK(); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) { KERNEL_LOCK(); ktrsyscall(p, code, callp->sy_argsize, args); KERNEL_UNLOCK(); } #endif rval[0] = 0; rval[1] = tf->tf_r[3]; #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) { KERNEL_LOCK(); error = systrace_redirect(code, p, args, rval); KERNEL_UNLOCK(); } else #endif { nolock = (callp->sy_flags & SY_NOLOCK); if (!nolock) KERNEL_LOCK(); error = (*callp->sy_call)(p, args, rval); if (!nolock) KERNEL_UNLOCK(); } /* * system call will look like: * or r13, r0, <code> * tb0 0, r0, <128> <- sxip * br err <- snip * jmp r1 <- sfip * err: or.u r3, r0, hi16(errno) * st r2, r3, lo16(errno) * subu r2, r0, 1 * jmp r1 * * So, when we take syscall trap, sxip/snip/sfip will be as * shown above. * Given this, * 1. If the system call returned 0, need to skip nip. * nip = fip, fip += 4 * (doesn't matter what fip + 4 will be but we will never * execute this since jmp r1 at nip will change the execution flow.) * 2. If the system call returned an errno > 0, plug the value * in r2, and leave nip and fip unchanged. This will have us * executing "br err" on return to user space. * 3. If the system call code returned ERESTART, * we need to rexecute the trap instruction. Back up the pipe * line. * fip = nip, nip = xip * 4. If the system call returned EJUSTRETURN, don't need to adjust * any pointers. */ switch (error) { case 0: tf->tf_r[2] = rval[0]; tf->tf_r[3] = rval[1]; tf->tf_epsr &= ~PSR_C; tf->tf_snip = tf->tf_sfip & ~NIP_E; tf->tf_sfip = tf->tf_snip + 4; break; case ERESTART: tf->tf_epsr &= ~PSR_C; tf->tf_sfip = tf->tf_snip & ~FIP_E; tf->tf_snip = tf->tf_sxip & ~NIP_E; break; case EJUSTRETURN: tf->tf_epsr &= ~PSR_C; break; default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; tf->tf_r[2] = error; tf->tf_epsr |= PSR_C; /* fail */ tf->tf_snip = tf->tf_snip & ~NIP_E; tf->tf_sfip = tf->tf_sfip & ~FIP_E; break; } #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, error, rval); KERNEL_UNLOCK(); #endif userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { KERNEL_LOCK(); ktrsysret(p, code, error, rval[0]); KERNEL_UNLOCK(); } #endif }
void unix_syscall(struct i386_saved_state *regs) { thread_act_t thread; void *vt; unsigned short code; struct sysent *callp; int nargs, error; volatile int *rval; int funnel_type; vm_offset_t params; extern int nsysent; struct proc *p; struct proc *current_proc(); thread = current_act(); p = current_proc(); rval = (int *)get_bsduthreadrval(thread); //printf("[scall : eax %x]", regs->eax); code = regs->eax; params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { code = fuword(params); params += sizeof (int); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; } vt = get_bsduthreadarg(thread); if ((nargs = (callp->sy_narg * sizeof (int))) && (error = copyin((char *) params, (char *)vt , nargs)) != 0) { regs->eax = error; regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } rval[0] = 0; rval[1] = regs->edx; funnel_type = callp->sy_funnel; if(funnel_type == KERNEL_FUNNEL) (void) thread_funnel_set(kernel_flock, TRUE); else if (funnel_type == NETWORK_FUNNEL) (void) thread_funnel_set(network_flock, TRUE); set_bsduthreadargs(thread, regs, NULL); if (callp->sy_narg > 8) panic("unix_syscall max arg count exceeded (%d)", callp->sy_narg); ktrsyscall(p, code, callp->sy_narg, vt, funnel_type); { int *ip = (int *)vt; KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, *ip, *(ip+1), *(ip+2), *(ip+3), 0); } error = (*(callp->sy_call))(p, (void *) vt, (int *) &rval[0]); #if 0 /* May be needed with vfork changes */ regs = USER_REGS(thread); #endif if (error == ERESTART) { regs->eip -= 7; } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = rval[0]; regs->edx = rval[1]; regs->efl &= ~EFL_CF; } } ktrsysret(p, code, error, rval[0], funnel_type); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, rval[0], rval[1], 0, 0); if(funnel_type != NO_FUNNEL) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); thread_exception_return(); /* NOTREACHED */ }
/* Instruction pointers operate differently on mc88110 */ void m88110_syscall(register_t code, struct trapframe *tf) { int i, nsys, nap; struct sysent *callp; struct proc *p; int error; register_t args[8], rval[2], *ap; uvmexp.syscalls++; p = curproc; callp = p->p_emul->e_sysent; nsys = p->p_emul->e_nsysent; p->p_md.md_tf = tf; /* * For 88k, all the arguments are passed in the registers (r2-r9), * and further arguments (if any) on stack. * For syscall (and __syscall), r2 (and r3) has the actual code. * __syscall takes a quad syscall number, so that other * arguments are at their natural alignments. */ ap = &tf->tf_r[2]; nap = 8; /* r2-r9 */ switch (code) { case SYS_syscall: code = *ap++; nap--; break; case SYS___syscall: if (callp != sysent) break; code = ap[_QUAD_LOWWORD]; ap += 2; nap -= 2; break; } if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; else callp += code; i = callp->sy_argsize / sizeof(register_t); if (i > sizeof(args) > sizeof(register_t)) panic("syscall nargs"); if (i > nap) { bcopy((caddr_t)ap, (caddr_t)args, nap * sizeof(register_t)); error = copyin((caddr_t)tf->tf_r[31], (caddr_t)(args + nap), (i - nap) * sizeof(register_t)); } else { bcopy((caddr_t)ap, (caddr_t)args, i * sizeof(register_t)); error = 0; } if (error != 0) goto bad; KERNEL_PROC_LOCK(p); #ifdef SYSCALL_DEBUG scdebug_call(p, code, args); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p, code, callp->sy_argsize, args); #endif rval[0] = 0; rval[1] = tf->tf_r[3]; #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) error = systrace_redirect(code, p, args, rval); else #endif error = (*callp->sy_call)(p, args, rval); /* * system call will look like: * or r13, r0, <code> * tb0 0, r0, <128> <- exip * br err <- enip * jmp r1 * err: or.u r3, r0, hi16(errno) * st r2, r3, lo16(errno) * subu r2, r0, 1 * jmp r1 * * So, when we take syscall trap, exip/enip will be as * shown above. * Given this, * 1. If the system call returned 0, need to jmp r1. * exip += 8 * 2. If the system call returned an errno > 0, increment * exip += 4 and plug the value in r2. This will have us * executing "br err" on return to user space. * 3. If the system call code returned ERESTART, * we need to rexecute the trap instruction. leave exip as is. * 4. If the system call returned EJUSTRETURN, just return. * exip += 4 */ KERNEL_PROC_UNLOCK(p); switch (error) { case 0: tf->tf_r[2] = rval[0]; tf->tf_r[3] = rval[1]; tf->tf_epsr &= ~PSR_C; /* skip two instructions */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip + 4; else tf->tf_exip += 4 + 4; break; case ERESTART: /* * Reexecute the trap. * exip is already at the trap instruction, so * there is nothing to do. */ tf->tf_epsr &= ~PSR_C; break; case EJUSTRETURN: tf->tf_epsr &= ~PSR_C; /* skip one instruction */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip; else tf->tf_exip += 4; break; default: bad: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; tf->tf_r[2] = error; tf->tf_epsr |= PSR_C; /* fail */ /* skip one instruction */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip; else tf->tf_exip += 4; break; } #ifdef SYSCALL_DEBUG KERNEL_PROC_LOCK(p); scdebug_ret(p, code, error, rval); KERNEL_PROC_UNLOCK(p); #endif userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { KERNEL_PROC_LOCK(p); ktrsysret(p, code, error, rval[0]); KERNEL_PROC_UNLOCK(p); } #endif }
void osf1_syscall_fancy(struct proc *p, u_int64_t code, struct trapframe *framep) { const struct sysent *callp; int error; u_int64_t rval[2]; u_int64_t *args, copyargs[10]; /* XXX */ u_int hidden, nargs; KERNEL_PROC_LOCK(p); uvmexp.syscalls++; p->p_md.md_tf = framep; callp = p->p_emul->e_sysent; switch (code) { case OSF1_SYS_syscall: /* OSF/1 syscall() */ code = framep->tf_regs[FRAME_A0]; hidden = 1; break; default: hidden = 0; break; } code &= (OSF1_SYS_NSYSENT - 1); callp += code; nargs = callp->sy_narg + hidden; switch (nargs) { default: error = copyin((caddr_t)alpha_pal_rdusp(), ©args[6], (nargs - 6) * sizeof(u_int64_t)); if (error) goto bad; case 6: copyargs[5] = framep->tf_regs[FRAME_A5]; case 5: copyargs[4] = framep->tf_regs[FRAME_A4]; case 4: copyargs[3] = framep->tf_regs[FRAME_A3]; copyargs[2] = framep->tf_regs[FRAME_A2]; copyargs[1] = framep->tf_regs[FRAME_A1]; copyargs[0] = framep->tf_regs[FRAME_A0]; args = copyargs; break; case 3: case 2: case 1: case 0: args = &framep->tf_regs[FRAME_A0]; break; } args += hidden; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p, code, callp->sy_argsize, args); #endif #ifdef SYSCALL_DEBUG scdebug_call(p, code, args); #endif rval[0] = 0; rval[1] = 0; error = (*callp->sy_call)(p, args, rval); switch (error) { case 0: framep->tf_regs[FRAME_V0] = rval[0]; framep->tf_regs[FRAME_A4] = rval[1]; framep->tf_regs[FRAME_A3] = 0; break; case ERESTART: framep->tf_regs[FRAME_PC] -= 4; break; case EJUSTRETURN: break; default: bad: error = native_to_osf1_errno[error]; framep->tf_regs[FRAME_V0] = error; framep->tf_regs[FRAME_A3] = 1; break; } #ifdef SYSCALL_DEBUG scdebug_ret(p, code, error, rval); #endif KERNEL_PROC_UNLOCK(p); userret(p); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { KERNEL_PROC_LOCK(p); ktrsysret(p, code, error, rval[0]); KERNEL_PROC_UNLOCK(p); } #endif }
/* * syscall2 - MP aware system call request C handler * * A system call is essentially treated as a trap. The MP lock is not * held on entry or return. We are responsible for handling ASTs * (e.g. a task switch) prior to return. * * MPSAFE */ void syscall2(struct trapframe *frame) { struct thread *td = curthread; struct proc *p = td->td_proc; struct lwp *lp = td->td_lwp; caddr_t params; struct sysent *callp; register_t orig_tf_eflags; int sticks; int error; int narg; #ifdef INVARIANTS int crit_count = td->td_critcount; #endif int have_mplock = 0; u_int code; union sysunion args; #ifdef DIAGNOSTIC if (ISPL(frame->tf_cs) != SEL_UPL) { get_mplock(); panic("syscall"); /* NOT REACHED */ } #endif KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, frame->tf_eax); userenter(td, p); /* lazy raise our priority */ /* * Misc */ sticks = (int)td->td_sticks; orig_tf_eflags = frame->tf_eflags; /* * Virtual kernel intercept - if a VM context managed by a virtual * kernel issues a system call the virtual kernel handles it, not us. * Restore the virtual kernel context and return from its system * call. The current frame is copied out to the virtual kernel. */ if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { vkernel_trap(lp, frame); error = EJUSTRETURN; callp = NULL; goto out; } /* * Get the system call parameters and account for time */ lp->lwp_md.md_regs = frame; params = (caddr_t)frame->tf_esp + sizeof(int); code = frame->tf_eax; if (p->p_sysent->sv_prepsyscall) { (*p->p_sysent->sv_prepsyscall)( frame, (int *)(&args.nosys.sysmsg + 1), &code, ¶ms); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. * fuword is MP aware. */ if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = fuword(params); params += sizeof(int); } else if (code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ code = fuword(params); params += sizeof(quad_t); } } code &= p->p_sysent->sv_mask; if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg & SYF_ARGMASK; #if 0 if (p->p_sysent->sv_name[0] == 'L') kprintf("Linux syscall, code = %d\n", code); #endif /* * copyin is MP aware, but the tracing code is not */ if (narg && params) { error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1), narg * sizeof(register_t)); if (error) { #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { MAKEMPSAFE(have_mplock); ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif goto bad; } } #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { MAKEMPSAFE(have_mplock); ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); } #endif /* * For traditional syscall code edx is left untouched when 32 bit * results are returned. Since edx is loaded from fds[1] when the * system call returns we pre-set it here. */ args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame->tf_edx; /* * The syscall might manipulate the trap frame. If it does it * will probably return EJUSTRETURN. */ args.sysmsg_frame = frame; STOPEVENT(p, S_SCE, narg); /* MP aware */ /* * NOTE: All system calls run MPSAFE now. The system call itself * is responsible for getting the MP lock. */ error = (*callp->sy_call)(&args); out: /* * MP SAFE (we may or may not have the MP lock at this point) */ switch (error) { case 0: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; lp = curthread->td_lwp; frame->tf_eax = args.sysmsg_fds[0]; frame->tf_edx = args.sysmsg_fds[1]; frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, * int 0x80 is 2 bytes. We saved this in tf_err. */ frame->tf_eip -= frame->tf_err; break; case EJUSTRETURN: break; case EASYNC: panic("Unexpected EASYNC return value (for now)"); default: bad: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_eax = error; frame->tf_eflags |= PSL_C; break; } /* * Traced syscall. trapsignal() is not MP aware. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { MAKEMPSAFE(have_mplock); frame->tf_eflags &= ~PSL_T; trapsignal(lp, SIGTRAP, TRAP_TRACE); } /* * Handle reschedule and other end-of-syscall issues */ userret(lp, frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { MAKEMPSAFE(have_mplock); ktrsysret(lp, code, error, args.sysmsg_result); } #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); userexit(lp); /* * Release the MP lock if we had to get it */ if (have_mplock) rel_mplock(); KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); KASSERT(&td->td_toks_base == td->td_toks_stop, ("syscall: extra tokens held after trap! %zd", td->td_toks_stop - &td->td_toks_base)); #endif }
void ia32_syscall(struct trapframe *frame) { caddr_t params; int i; struct sysent *callp; struct thread *td = curthread; struct proc *p = td->td_proc; register_t orig_tf_rflags; int error; int narg; u_int32_t args[8]; u_int64_t args64[8]; u_int code; ksiginfo_t ksi; PCPU_INC(cnt.v_syscall); td->td_pticks = 0; td->td_frame = frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t); code = frame->tf_rax; orig_tf_rflags = frame->tf_rflags; if (p->p_sysent->sv_prepsyscall) { /* * The prep code is MP aware. */ (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. * fuword is MP aware. */ if (code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ code = fuword32(params); params += sizeof(int); } else if (code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. * We use a 32-bit fetch in case params is not * aligned. */ code = fuword32(params); params += sizeof(quad_t); } } if (p->p_sysent->sv_mask) code &= p->p_sysent->sv_mask; if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg; /* * copyin and the ktrsyscall()/ktrsysret() code is MP-aware */ if (params != NULL && narg != 0) error = copyin(params, (caddr_t)args, (u_int)(narg * sizeof(int))); else error = 0; for (i = 0; i < narg; i++) args64[i] = args[i]; #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(code, narg, args64); #endif CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td, td->td_proc->p_pid, td->td_proc->p_comm, code); if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; STOPEVENT(p, S_SCE, narg); PTRACESTOP_SC(p, td, S_PT_SCE); AUDIT_SYSCALL_ENTER(code, td); error = (*callp->sy_call)(td, args64); AUDIT_SYSCALL_EXIT(error, td); } switch (error) { case 0: frame->tf_rax = td->td_retval[0]; frame->tf_rdx = td->td_retval[1]; frame->tf_rflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, * int 0x80 is 2 bytes. We saved this in tf_err. */ frame->tf_rip -= frame->tf_err; break; case EJUSTRETURN: break; default: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } frame->tf_rax = error; frame->tf_rflags |= PSL_C; break; } /* * Traced syscall. */ if (orig_tf_rflags & PSL_T) { frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_rip; trapsignal(td, &ksi); } /* * Check for misbehavior. */ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???"); KASSERT(td->td_critnest == 0, ("System call %s returning in a critical section", (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???")); KASSERT(td->td_locks == 0, ("System call %s returning with %d locks held", (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???", td->td_locks)); /* * Handle reschedule and other end-of-syscall issues */ userret(td, frame); CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td, td->td_proc->p_pid, td->td_proc->p_comm, code); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); PTRACESTOP_SC(p, td, S_PT_SCX); }
/* Instruction pointers operate differently on mc88110 */ void m88110_syscall(register_t code, struct trapframe *tf) { int i, nsys, nap; struct sysent *callp; struct proc *p; int error; register_t args[11], rval[2], *ap; u_quad_t sticks; #ifdef DIAGNOSTIC extern struct pcb *curpcb; #endif uvmexp.syscalls++; p = curproc; callp = p->p_emul->e_sysent; nsys = p->p_emul->e_nsysent; #ifdef DIAGNOSTIC if (USERMODE(tf->tf_epsr) == 0) panic("syscall"); if (curpcb != &p->p_addr->u_pcb) panic("syscall curpcb/ppcb"); if (tf != (struct trapframe *)&curpcb->user_state) panic("syscall trapframe"); #endif sticks = p->p_sticks; p->p_md.md_tf = tf; /* * For 88k, all the arguments are passed in the registers (r2-r12) * For syscall (and __syscall), r2 (and r3) has the actual code. * __syscall takes a quad syscall number, so that other * arguments are at their natural alignments. */ ap = &tf->tf_r[2]; nap = 11; /* r2-r12 */ switch (code) { case SYS_syscall: code = *ap++; nap--; break; case SYS___syscall: if (callp != sysent) break; code = ap[_QUAD_LOWWORD]; ap += 2; nap -= 2; break; } /* Callp currently points to syscall, which returns ENOSYS. */ if (code < 0 || code >= nsys) callp += p->p_emul->e_nosys; else { callp += code; i = callp->sy_argsize / sizeof(register_t); if (i > nap) panic("syscall nargs"); /* * just copy them; syscall stub made sure all the * args are moved from user stack to registers. */ bcopy((caddr_t)ap, (caddr_t)args, i * sizeof(register_t)); } #ifdef SYSCALL_DEBUG scdebug_call(p, code, args); #endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p, code, callp->sy_argsize, args); #endif rval[0] = 0; rval[1] = tf->tf_r[3]; #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) error = systrace_redirect(code, p, args, rval); else #endif error = (*callp->sy_call)(p, args, rval); /* * system call will look like: * ld r10, r31, 32; r10,r11,r12 might be garbage. * ld r11, r31, 36 * ld r12, r31, 40 * or r13, r0, <code> * tb0 0, r0, <128> <- exip * br err <- enip * jmp r1 * err: or.u r3, r0, hi16(errno) * st r2, r3, lo16(errno) * subu r2, r0, 1 * jmp r1 * * So, when we take syscall trap, exip/enip will be as * shown above. * Given this, * 1. If the system call returned 0, need to jmp r1. * exip += 8 * 2. If the system call returned an errno > 0, increment * exip += 4 and plug the value in r2. This will have us * executing "br err" on return to user space. * 3. If the system call code returned ERESTART, * we need to rexecute the trap instruction. leave exip as is. * 4. If the system call returned EJUSTRETURN, just return. * exip += 4 */ switch (error) { case 0: /* * If fork succeeded and we are the child, our stack * has moved and the pointer tf is no longer valid, * and p is wrong. Compute the new trapframe pointer. * (The trap frame invariably resides at the * tippity-top of the u. area.) */ p = curproc; tf = (struct trapframe *)USER_REGS(p); tf->tf_r[2] = rval[0]; tf->tf_r[3] = rval[1]; tf->tf_epsr &= ~PSR_C; /* skip two instructions */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip + 4; else tf->tf_exip += 4 + 4; break; case ERESTART: /* * Reexecute the trap. * exip is already at the trap instruction, so * there is nothing to do. */ tf->tf_epsr &= ~PSR_C; break; case EJUSTRETURN: tf->tf_epsr &= ~PSR_C; /* skip one instruction */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip; else tf->tf_exip += 4; break; default: if (p->p_emul->e_errno) error = p->p_emul->e_errno[error]; tf->tf_r[2] = error; tf->tf_epsr |= PSR_C; /* fail */ /* skip one instruction */ if (tf->tf_exip & 1) tf->tf_exip = tf->tf_enip; else tf->tf_exip += 4; break; } #ifdef SYSCALL_DEBUG scdebug_ret(p, code, error, rval); #endif userret(p, tf, sticks); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p, code, error, rval[0]); #endif }