void machdep_syscall64(x86_saved_state_t *state) { int trapno; const machdep_call_t *entry; x86_saved_state64_t *regs; assert(is_saved_state64(state)); regs = saved_state64(state); trapno = (int)(regs->rax & SYSCALL_NUMBER_MASK); DEBUG_KPRINT_SYSCALL_MDEP( "machdep_syscall64: trapno=%d\n", trapno); if (trapno < 0 || trapno >= machdep_call_count) { regs->rax = (unsigned int)kern_invalid(NULL); thread_exception_return(); /* NOTREACHED */ } entry = &machdep_call_table64[trapno]; switch (entry->nargs) { case 0: regs->rax = (*entry->routine.args_0)(); break; case 1: regs->rax = (*entry->routine.args64_1)(regs->rdi); break; case 2: regs->rax = (*entry->routine.args64_2)(regs->rdi, regs->rsi); break; default: panic("machdep_syscall64: too many args"); } DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax); throttle_lowpri_io(1); thread_exception_return(); /* NOTREACHED */ }
/* * This routine frees all the BSD context in uthread except the credential. * It does not free the uthread structure as well */ void uthread_cleanup(task_t task, void *uthread, void * bsd_info) { struct _select *sel; uthread_t uth = (uthread_t)uthread; proc_t p = (proc_t)bsd_info; if (uth->uu_lowpri_window || uth->uu_throttle_info) { /* * task is marked as a low priority I/O type * and we've somehow managed to not dismiss the throttle * through the normal exit paths back to user space... * no need to throttle this thread since its going away * but we do need to update our bookeeping w/r to throttled threads * * Calling this routine will clean up any throttle info reference * still inuse by the thread. */ throttle_lowpri_io(FALSE); } /* * Per-thread audit state should never last beyond system * call return. Since we don't audit the thread creation/ * removal, the thread state pointer should never be * non-NULL when we get here. */ assert(uth->uu_ar == NULL); sel = &uth->uu_select; /* cleanup the select bit space */ if (sel->nbytes) { FREE(sel->ibits, M_TEMP); FREE(sel->obits, M_TEMP); sel->nbytes = 0; } if (uth->uu_cdir) { vnode_rele(uth->uu_cdir); uth->uu_cdir = NULLVP; } if (uth->uu_allocsize && uth->uu_wqset){ kfree(uth->uu_wqset, uth->uu_allocsize); sel->count = 0; uth->uu_allocsize = 0; uth->uu_wqset = 0; sel->wql = 0; } if(uth->pth_name != NULL) { kfree(uth->pth_name, MAXTHREADNAMESIZE); uth->pth_name = 0; } if ((task != kernel_task) && p) { if (((uth->uu_flag & UT_VFORK) == UT_VFORK) && (uth->uu_proc != PROC_NULL)) { vfork_exit_internal(uth->uu_proc, 0, 1); } /* * Remove the thread from the process list and * transfer [appropriate] pending signals to the process. */ if (get_bsdtask_info(task) == p) { proc_lock(p); TAILQ_REMOVE(&p->p_uthlist, uth, uu_list); p->p_siglist |= (uth->uu_siglist & execmask & (~p->p_sigignore | sigcantmask)); proc_unlock(p); } #if CONFIG_DTRACE struct dtrace_ptss_page_entry *tmpptr = uth->t_dtrace_scratch; uth->t_dtrace_scratch = NULL; if (tmpptr != NULL) { dtrace_ptss_release_entry(p, tmpptr); } #endif } }
/* * Function: unix_syscall * * Inputs: regs - pointer to i386 save area * * Outputs: none */ void unix_syscall(x86_saved_state_t *state) { thread_t thread; void *vt; unsigned int code; struct sysent *callp; int error; vm_offset_t params; struct proc *p; struct uthread *uthread; x86_saved_state32_t *regs; boolean_t is_vfork; assert(is_saved_state32(state)); regs = saved_state32(state); #if DEBUG if (regs->eax == 0x800) thread_exception_return(); #endif thread = current_thread(); uthread = get_bsdthread_info(thread); /* Get the approriate proc; may be different from task's for vfork() */ is_vfork = uthread->uu_flag & UT_VFORK; if (__improbable(is_vfork != 0)) p = current_proc(); else p = (struct proc *)get_bsdtask_info(current_task()); /* Verify that we are not being called from a task without a proc */ if (__improbable(p == NULL)) { regs->eax = EPERM; regs->efl |= EFL_CF; task_terminate_internal(current_task()); thread_exception_return(); /* NOTREACHED */ } code = regs->eax & I386_SYSCALL_NUMBER_MASK; DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); params = (vm_offset_t) (regs->uesp + sizeof (int)); regs->efl &= ~(EFL_CF); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; if (__improbable(callp == sysent)) { code = fuword(params); params += sizeof(int); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; } vt = (void *)uthread->uu_arg; if (callp->sy_arg_bytes != 0) { #if CONFIG_REQUIRES_U32_MUNGING sy_munge_t *mungerp; #else #error U32 syscalls on x86_64 kernel requires munging #endif uint32_t nargs; assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); nargs = callp->sy_arg_bytes; error = copyin((user_addr_t) params, (char *) vt, nargs); if (error) { regs->eax = error; regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } if (__probable(code != 180)) { int *ip = (int *)vt; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, *ip, *(ip+1), *(ip+2), *(ip+3), 0); } #if CONFIG_REQUIRES_U32_MUNGING mungerp = callp->sy_arg_munge32; if (mungerp != NULL) (*mungerp)(vt); #endif } else KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 0, 0, 0, 0, 0); /* * Delayed binding of thread credential to process credential, if we * are not running with an explicitly set thread credential. */ kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; uthread->syscall_code = code; #ifdef JOE_DEBUG uthread->uu_iocount = 0; uthread->uu_vpindex = 0; #endif AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); AUDIT_SYSCALL_EXIT(code, p, uthread, error); #ifdef JOE_DEBUG if (uthread->uu_iocount) printf("system call returned with uu_iocount != 0\n"); #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; #endif /* CONFIG_DTRACE */ if (__improbable(error == ERESTART)) { /* * Move the user's pc back to repeat the syscall: * 5 bytes for a sysenter, or 2 for an int 8x. * The SYSENTER_TF_CS covers single-stepping over a sysenter * - see debug trap handler in idt.s/idt64.s */ pal_syscall_restart(thread, state); } else if (error != EJUSTRETURN) { if (__improbable(error)) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ /* * We split retval across two registers, in case the * syscall had a 64-bit return value, in which case * eax/edx matches the function call ABI. */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall: error=%d retval=(%u,%u)\n", error, regs->eax, regs->edx); uthread->uu_flag &= ~UT_NOTCANCELPT; if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (__probable(code != 180)) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { pal_execve_return(thread); } thread_exception_return(); /* NOTREACHED */ }
void unix_syscall_return(int error) { thread_t thread; struct uthread *uthread; struct proc *p; unsigned int code; struct sysent *callp; thread = current_thread(); uthread = get_bsdthread_info(thread); pal_register_cache_state(thread, DIRTY); p = current_proc(); if (proc_is64bit(p)) { x86_saved_state64_t *regs; regs = saved_state64(find_user_regs(thread)); code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; #if CONFIG_DTRACE if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { /* * repeat the syscall */ pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { if (error) { regs->rax = error; regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: regs->rax = uthread->uu_rval[0]; regs->rdx = uthread->uu_rval[1]; break; case _SYSCALL_RET_UINT_T: regs->rax = ((u_int)uthread->uu_rval[0]); regs->rdx = ((u_int)uthread->uu_rval[1]); break; case _SYSCALL_RET_OFF_T: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: case _SYSCALL_RET_UINT64_T: regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; case _SYSCALL_RET_NONE: break; default: panic("unix_syscall: unknown return type"); break; } regs->isf.rflags &= ~EFL_CF; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall_return: error=%d retval=(%llu,%llu)\n", error, regs->rax, regs->rdx); } else { x86_saved_state32_t *regs; regs = saved_state32(find_user_regs(thread)); regs->efl &= ~(EFL_CF); code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; #if CONFIG_DTRACE if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall_return: error=%d retval=(%u,%u)\n", error, regs->eax, regs->edx); } uthread->uu_flag &= ~UT_NOTCANCELPT; if (uthread->uu_lowpri_window) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (code != 180) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ }
void unix_syscall64(x86_saved_state_t *state) { thread_t thread; void *vt; unsigned int code; struct sysent *callp; int args_in_regs; boolean_t args_start_at_rdi; int error; struct proc *p; struct uthread *uthread; x86_saved_state64_t *regs; assert(is_saved_state64(state)); regs = saved_state64(state); #if DEBUG if (regs->rax == 0x2000800) thread_exception_return(); #endif thread = current_thread(); uthread = get_bsdthread_info(thread); /* Get the approriate proc; may be different from task's for vfork() */ if (__probable(!(uthread->uu_flag & UT_VFORK))) p = (struct proc *)get_bsdtask_info(current_task()); else p = current_proc(); /* Verify that we are not being called from a task without a proc */ if (__improbable(p == NULL)) { regs->rax = EPERM; regs->isf.rflags |= EFL_CF; task_terminate_internal(current_task()); thread_exception_return(); /* NOTREACHED */ } code = regs->rax & SYSCALL_NUMBER_MASK; DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall64: code=%d(%s) rip=%llx\n", code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; vt = (void *)uthread->uu_arg; if (__improbable(callp == sysent)) { /* * indirect system call... system call number * passed as 'arg0' */ code = regs->rdi; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; args_start_at_rdi = FALSE; args_in_regs = 5; } else { args_start_at_rdi = TRUE; args_in_regs = 6; } if (callp->sy_narg != 0) { assert(callp->sy_narg <= 8); /* size of uu_arg */ args_in_regs = MIN(args_in_regs, callp->sy_narg); memcpy(vt, args_start_at_rdi ? ®s->rdi : ®s->rsi, args_in_regs * sizeof(syscall_arg_t)); if (code != 180) { uint64_t *ip = (uint64_t *)vt; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); } if (__improbable(callp->sy_narg > args_in_regs)) { int copyin_count; copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t); error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count); if (error) { regs->rax = error; regs->isf.rflags |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } } } else KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 0, 0, 0, 0, 0); /* * Delayed binding of thread credential to process credential, if we * are not running with an explicitly set thread credential. */ kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; uthread->syscall_code = code; #ifdef JOE_DEBUG uthread->uu_iocount = 0; uthread->uu_vpindex = 0; #endif AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0])); AUDIT_SYSCALL_EXIT(code, p, uthread, error); #ifdef JOE_DEBUG if (uthread->uu_iocount) printf("system call returned with uu_iocount != 0\n"); #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; #endif /* CONFIG_DTRACE */ if (__improbable(error == ERESTART)) { /* * all system calls come through via the syscall instruction * in 64 bit mode... its 2 bytes in length * move the user's pc back to repeat the syscall: */ pal_syscall_restart( thread, state ); } else if (error != EJUSTRETURN) { if (__improbable(error)) { regs->rax = error; regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: regs->rax = uthread->uu_rval[0]; regs->rdx = uthread->uu_rval[1]; break; case _SYSCALL_RET_UINT_T: regs->rax = ((u_int)uthread->uu_rval[0]); regs->rdx = ((u_int)uthread->uu_rval[1]); break; case _SYSCALL_RET_OFF_T: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: case _SYSCALL_RET_UINT64_T: regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; case _SYSCALL_RET_NONE: break; default: panic("unix_syscall: unknown return type"); break; } regs->isf.rflags &= ~EFL_CF; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall64: error=%d retval=(%llu,%llu)\n", error, regs->rax, regs->rdx); uthread->uu_flag &= ~UT_NOTCANCELPT; if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (__probable(code != 180)) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ }
void mach_call_munger64(x86_saved_state_t *state) { int call_number; int argc; mach_call_t mach_call; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state64_t *regs; #if PROC_REF_DEBUG struct uthread *ut = get_bsdthread_info(current_thread()); uthread_reset_proc_refcount(ut); #endif assert(is_saved_state64(state)); regs = saved_state64(state); call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START, regs->rdi, regs->rsi, regs->rdx, regs->r10, 0); if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, regs->rax, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { i386_exception(EXC_SYSCALL, regs->rax, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { int args_in_regs = MIN(6, argc); memcpy(&args.arg1, ®s->rdi, args_in_regs * sizeof(syscall_arg_t)); if (argc > 6) { int copyin_count; assert(argc <= 9); copyin_count = (argc - 6) * (int)sizeof(syscall_arg_t); if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&args.arg7, copyin_count)) { regs->rax = KERN_INVALID_ARGUMENT; thread_exception_return(); /* NOTREACHED */ } } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif regs->rax = (uint64_t)mach_call((void *)&args); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, regs->rax, 0, 0, 0, 0); throttle_lowpri_io(1); #if PROC_REF_DEBUG if (__improbable(uthread_get_proc_refcount(ut) != 0)) { panic("system call returned with uu_proc_refcount != 0"); } #endif thread_exception_return(); /* NOTREACHED */ }
void mach_call_munger(x86_saved_state_t *state) { int argc; int call_number; mach_call_t mach_call; kern_return_t retval; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state32_t *regs; #if PROC_REF_DEBUG struct uthread *ut = get_bsdthread_info(current_thread()); uthread_reset_proc_refcount(ut); #endif assert(is_saved_state32(state)); regs = saved_state32(state); call_number = -(regs->eax); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); #if DEBUG_TRACE kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); #endif if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: kern_invalid 0x%x\n", regs->eax); i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { retval = mach_call_arg_munger32(regs->uesp, &args, &mach_trap_table[call_number]); if (retval != KERN_SUCCESS) { regs->eax = retval; DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: retval=0x%x\n", retval); thread_exception_return(); /* NOTREACHED */ } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, args.arg1, args.arg2, args.arg3, args.arg4, 0); retval = mach_call(&args); DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, retval, 0, 0, 0, 0); regs->eax = retval; throttle_lowpri_io(1); #if PROC_REF_DEBUG if (__improbable(uthread_get_proc_refcount(ut) != 0)) { panic("system call returned with uu_proc_refcount != 0"); } #endif thread_exception_return(); /* NOTREACHED */ }
void machdep_syscall(x86_saved_state_t *state) { int args[machdep_call_count]; int trapno; int nargs; const machdep_call_t *entry; x86_saved_state32_t *regs; assert(is_saved_state32(state)); regs = saved_state32(state); trapno = regs->eax; #if DEBUG_TRACE kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno); #endif DEBUG_KPRINT_SYSCALL_MDEP( "machdep_syscall: trapno=%d\n", trapno); if (trapno < 0 || trapno >= machdep_call_count) { regs->eax = (unsigned int)kern_invalid(NULL); thread_exception_return(); /* NOTREACHED */ } entry = &machdep_call_table[trapno]; nargs = entry->nargs; if (nargs != 0) { if (copyin((user_addr_t) regs->uesp + sizeof (int), (char *) args, (nargs * sizeof (int)))) { regs->eax = KERN_INVALID_ADDRESS; thread_exception_return(); /* NOTREACHED */ } } switch (nargs) { case 0: regs->eax = (*entry->routine.args_0)(); break; case 1: regs->eax = (*entry->routine.args_1)(args[0]); break; case 2: regs->eax = (*entry->routine.args_2)(args[0],args[1]); break; case 3: if (!entry->bsd_style) regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]); else { int error; uint32_t rval; error = (*entry->routine.args_bsd_3)(&rval, args[0], args[1], args[2]); if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { regs->eax = rval; regs->efl &= ~EFL_CF; } } break; case 4: regs->eax = (*entry->routine.args_4)(args[0], args[1], args[2], args[3]); break; default: panic("machdep_syscall: too many args"); } DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax); throttle_lowpri_io(1); thread_exception_return(); /* NOTREACHED */ }
int hfs_vnop_lookup(struct vnop_lookup_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp; struct cnode *cp; struct cnode *dcp; struct hfsmount *hfsmp; int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct proc *p = vfs_context_proc(ap->a_context); int flags = cnp->cn_flags; int force_casesensitive_lookup = proc_is_forcing_hfs_case_sensitivity(p); int cnode_locked; *vpp = NULL; dcp = VTOC(dvp); hfsmp = VTOHFS(dvp); /* * Lookup an entry in the cache * * If the lookup succeeds, the vnode is returned in *vpp, * and a status of -1 is returned. * * If the lookup determines that the name does not exist * (negative cacheing), a status of ENOENT is returned. * * If the lookup fails, a status of zero is returned. */ error = cache_lookup(dvp, vpp, cnp); if (error != -1) { if ((error == ENOENT) && (cnp->cn_nameiop != CREATE)) goto exit; /* found a negative cache entry */ goto lookup; /* did not find it in the cache */ } /* * We have a name that matched * cache_lookup returns the vp with an iocount reference already taken */ error = 0; vp = *vpp; cp = VTOC(vp); /* We aren't allowed to vend out vp's via lookup to the hidden directory */ if (cp->c_cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid || cp->c_cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) { /* Drop the iocount from cache_lookup */ vnode_put (vp); error = ENOENT; goto exit; } /* * If this is a hard-link vnode then we need to update * the name (of the link), the parent ID, the cnid, the * text encoding and the catalog hint. This enables * getattrlist calls to return the correct link info. */ /* * Alternatively, if we are forcing a case-sensitive lookup * on a case-insensitive volume, the namecache entry * may have been for an incorrect case. Since we cannot * determine case vs. normalization, redrive the catalog * lookup based on any byte mismatch. */ if (((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK)) || (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE))) { int stale_link = 0; hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); if ((cp->c_parentcnid != dcp->c_cnid) || (cnp->cn_namelen != cp->c_desc.cd_namelen) || (bcmp(cnp->cn_nameptr, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0)) { struct cat_desc desc; struct cat_attr lookup_attr; int lockflags; if (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) { /* * Since the name in the cnode doesn't match our lookup * string exactly, do a full lookup. */ hfs_unlock (cp); vnode_put(vp); goto lookup; } /* * Get an updated descriptor */ desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; desc.cd_namelen = cnp->cn_namelen; desc.cd_parentcnid = dcp->c_fileid; desc.cd_hint = dcp->c_childhint; desc.cd_encoding = 0; desc.cd_cnid = 0; desc.cd_flags = S_ISDIR(cp->c_mode) ? CD_ISDIR : 0; /* * Because lookups call replace_desc to put a new descriptor in * the cnode we are modifying it is possible that this cnode's * descriptor is out of date for the parent ID / name that * we are trying to look up. (It may point to a different hardlink). * * We need to be cautious that when re-supplying the * descriptor below that the results of the catalog lookup * still point to the same raw inode for the hardlink. This would * not be the case if we found something in the cache above but * the vnode it returned no longer has a valid hardlink for the * parent ID/filename combo we are requesting. (This is because * hfs_unlink does not directly trigger namecache removal). * * As a result, before vending out the vnode (and replacing * its descriptor) verify that the fileID is the same by comparing * the in-cnode attributes vs. the one returned from the lookup call * below. If they do not match, treat this lookup as if we never hit * in the cache at all. */ lockflags = hfs_systemfile_lock(VTOHFS(dvp), SFL_CATALOG, HFS_SHARED_LOCK); error = cat_lookup(VTOHFS(vp), &desc, 0, 0, &desc, &lookup_attr, NULL, NULL); hfs_systemfile_unlock(VTOHFS(dvp), lockflags); /* * Note that cat_lookup may fail to find something with the name provided in the * stack-based descriptor above. In that case, an ENOENT is a legitimate errno * to be placed in error, which will get returned in the fastpath below. */ if (error == 0) { if (lookup_attr.ca_fileid == cp->c_attr.ca_fileid) { /* It still points to the right raw inode. Replacing the descriptor is fine */ replace_desc (cp, &desc); /* * Save the origin info for file and directory hardlinks. Directory hardlinks * need the origin for '..' lookups, and file hardlinks need it to ensure that * competing lookups do not cause us to vend different hardlinks than the ones requested. * We want to restrict saving the cache entries to LOOKUP namei operations, since * we're really doing this to protect getattr. */ if (cnp->cn_nameiop == LOOKUP) { hfs_savelinkorigin(cp, dcp->c_fileid); } } else { /* If the fileID does not match then do NOT replace the descriptor! */ stale_link = 1; } } } hfs_unlock (cp); if (stale_link) { /* * If we had a stale_link, then we need to pretend as though * we never found this vnode and force a lookup through the * traditional path. Drop the iocount acquired through * cache_lookup above and force a cat lookup / getnewvnode */ vnode_put(vp); goto lookup; } if (error) { /* * If the cat_lookup failed then the caller will not expect * a vnode with an iocount on it. */ vnode_put(vp); } } goto exit; lookup: /* * The vnode was not in the name cache or it was stale. * * So we need to do a real lookup. */ cnode_locked = 0; error = hfs_lookup(dvp, vpp, cnp, &cnode_locked, force_casesensitive_lookup); if (cnode_locked) hfs_unlock(VTOC(*vpp)); exit: { uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread()); /* * check to see if we issued any I/O while completing this lookup and * this thread/task is throttleable... if so, throttle now * * this allows us to throttle in between multiple meta data reads that * might result due to looking up a long pathname (since we'll have to * re-enter hfs_vnop_lookup for each component of the pathnam not in * the VFS cache), instead of waiting until the entire path lookup has * completed and throttling at the systemcall return */ if (__improbable(ut->uu_lowpri_window)) { throttle_lowpri_io(1); } } return (error); }
void mach_call_munger64(x86_saved_state_t *state) { int call_number; int argc; mach_call_t mach_call; x86_saved_state64_t *regs; assert(is_saved_state64(state)); regs = saved_state64(state); call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, regs->rdi, regs->rsi, regs->rdx, regs->r10, 0); if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, regs->rax, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { i386_exception(EXC_SYSCALL, regs->rax, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc > 6) { int copyin_count; copyin_count = (argc - 6) * (int)sizeof(uint64_t); if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count)) { regs->rax = KERN_INVALID_ARGUMENT; thread_exception_return(); /* NOTREACHED */ } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif regs->rax = (uint64_t)mach_call((void *)(®s->rdi)); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_END, regs->rax, 0, 0, 0, 0); throttle_lowpri_io(TRUE); thread_exception_return(); /* NOTREACHED */ }
void mach_call_munger(x86_saved_state_t *state) { int argc; int call_number; mach_call_t mach_call; kern_return_t retval; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state32_t *regs; assert(is_saved_state32(state)); regs = saved_state32(state); call_number = -(regs->eax); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); #if DEBUG_TRACE kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); #endif if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: kern_invalid 0x%x\n", regs->eax); i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { retval = mach_call_arg_munger32(regs->uesp, argc, call_number, &args); if (retval != KERN_SUCCESS) { regs->eax = retval; DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: retval=0x%x\n", retval); thread_exception_return(); /* NOTREACHED */ } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, args.arg1, args.arg2, args.arg3, args.arg4, 0); retval = mach_call(&args); DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, retval, 0, 0, 0, 0); regs->eax = retval; throttle_lowpri_io(TRUE); thread_exception_return(); /* NOTREACHED */ }