static void profile_tick(void *arg) { profile_probe_t *prof = arg; #if defined(__x86_64__) x86_saved_state_t *kern_regs = find_kern_regs(current_thread()); if (NULL != kern_regs) { /* Kernel was interrupted. */ dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0); } else { pal_register_cache_state(current_thread(), VALID); /* Possibly a user interrupt */ x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); if (NULL == tagged_regs) { /* Too bad, so sad, no useful interrupt state. */ dtrace_probe(prof->prof_id, 0xcafebabe, 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */ } else if (is_saved_state64(tagged_regs)) { x86_saved_state64_t *regs = saved_state64(tagged_regs); dtrace_probe(prof->prof_id, 0x0, regs->isf.rip, 0, 0, 0); } else { x86_saved_state32_t *regs = saved_state32(tagged_regs); dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0); } } #else #error Unknown architecture #endif }
/* * Real-time clock device interrupt. */ void rtclock_intr( x86_saved_state_t *tregs) { uint64_t rip; boolean_t user_mode = FALSE; assert(get_preemption_level() > 0); assert(!ml_get_interrupts_enabled()); if (is_saved_state64(tregs) == TRUE) { x86_saved_state64_t *regs; regs = saved_state64(tregs); if (regs->isf.cs & 0x03) user_mode = TRUE; rip = regs->isf.rip; } else { x86_saved_state32_t *regs; regs = saved_state32(tregs); if (regs->cs & 0x03) user_mode = TRUE; rip = regs->eip; } /* call the generic etimer */ timer_intr(user_mode, rip); }
/* wind-back a syscall instruction */ void pal_syscall_restart(thread_t thread __unused, x86_saved_state_t *state) { /* work out which flavour thread it is */ if( is_saved_state32(state) ) { x86_saved_state32_t *regs32; regs32 = saved_state32(state); if (regs32->cs == SYSENTER_CS || regs32->cs == SYSENTER_TF_CS) regs32->eip -= 5; else regs32->eip -= 2; } else { x86_saved_state64_t *regs64; assert( is_saved_state64(state) ); regs64 = saved_state64(state); /* Only one instruction for 64-bit threads */ regs64->isf.rip -= 2; } }
/* * thread_fast_set_cthread_self: Sets the machine kernel thread ID of the * current thread to the given thread ID; fast version for 32-bit processes * * Parameters: self Thread ID to set * * Returns: 0 Success * !0 Not success */ kern_return_t thread_fast_set_cthread_self(uint32_t self) { thread_t thread = current_thread(); pcb_t pcb = thread->machine.pcb; struct real_descriptor desc = { .limit_low = 1, .limit_high = 0, .base_low = self & 0xffff, .base_med = (self >> 16) & 0xff, .base_high = (self >> 24) & 0xff, .access = ACC_P|ACC_PL_U|ACC_DATA_W, .granularity = SZ_32|SZ_G, }; current_thread()->machine.pcb->cthread_self = (uint64_t) self; /* preserve old func too */ /* assign descriptor */ mp_disable_preemption(); pcb->cthread_desc = desc; *ldt_desc_p(USER_CTHREAD) = desc; saved_state32(pcb->iss)->gs = USER_CTHREAD; mp_enable_preemption(); return (USER_CTHREAD); } /* * thread_fast_set_cthread_self64: Sets the machine kernel thread ID of the * current thread to the given thread ID; fast version for 64-bit processes * * Parameters: self Thread ID * * Returns: 0 Success * !0 Not success */ kern_return_t thread_fast_set_cthread_self64(uint64_t self) { pcb_t pcb = current_thread()->machine.pcb; cpu_data_t *cdp; /* check for canonical address, set 0 otherwise */ if (!IS_USERADDR64_CANONICAL(self)) self = 0ULL; pcb->cthread_self = self; mp_disable_preemption(); cdp = current_cpu_datap(); #if defined(__x86_64__) if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) wrmsr64(MSR_IA32_KERNEL_GS_BASE, self); #endif cdp->cpu_uber.cu_user_gs_base = self; mp_enable_preemption(); return (USER_CTHREAD); }
/* * Function: unix_syscall * * Inputs: regs - pointer to i386 save area * * Outputs: none */ void unix_syscall(x86_saved_state_t *state) { thread_t thread; void *vt; unsigned int code; struct sysent *callp; int error; vm_offset_t params; struct proc *p; struct uthread *uthread; x86_saved_state32_t *regs; boolean_t is_vfork; assert(is_saved_state32(state)); regs = saved_state32(state); #if DEBUG if (regs->eax == 0x800) thread_exception_return(); #endif thread = current_thread(); uthread = get_bsdthread_info(thread); /* Get the approriate proc; may be different from task's for vfork() */ is_vfork = uthread->uu_flag & UT_VFORK; if (__improbable(is_vfork != 0)) p = current_proc(); else p = (struct proc *)get_bsdtask_info(current_task()); /* Verify that we are not being called from a task without a proc */ if (__improbable(p == NULL)) { regs->eax = EPERM; regs->efl |= EFL_CF; task_terminate_internal(current_task()); thread_exception_return(); /* NOTREACHED */ } code = regs->eax & I386_SYSCALL_NUMBER_MASK; DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); params = (vm_offset_t) (regs->uesp + sizeof (int)); regs->efl &= ~(EFL_CF); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; if (__improbable(callp == sysent)) { code = fuword(params); params += sizeof(int); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; } vt = (void *)uthread->uu_arg; if (callp->sy_arg_bytes != 0) { #if CONFIG_REQUIRES_U32_MUNGING sy_munge_t *mungerp; #else #error U32 syscalls on x86_64 kernel requires munging #endif uint32_t nargs; assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); nargs = callp->sy_arg_bytes; error = copyin((user_addr_t) params, (char *) vt, nargs); if (error) { regs->eax = error; regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } if (__probable(code != 180)) { int *ip = (int *)vt; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, *ip, *(ip+1), *(ip+2), *(ip+3), 0); } #if CONFIG_REQUIRES_U32_MUNGING mungerp = callp->sy_arg_munge32; if (mungerp != NULL) (*mungerp)(vt); #endif } else KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 0, 0, 0, 0, 0); /* * Delayed binding of thread credential to process credential, if we * are not running with an explicitly set thread credential. */ kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; uthread->syscall_code = code; #ifdef JOE_DEBUG uthread->uu_iocount = 0; uthread->uu_vpindex = 0; #endif AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); AUDIT_SYSCALL_EXIT(code, p, uthread, error); #ifdef JOE_DEBUG if (uthread->uu_iocount) printf("system call returned with uu_iocount != 0\n"); #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; #endif /* CONFIG_DTRACE */ if (__improbable(error == ERESTART)) { /* * Move the user's pc back to repeat the syscall: * 5 bytes for a sysenter, or 2 for an int 8x. * The SYSENTER_TF_CS covers single-stepping over a sysenter * - see debug trap handler in idt.s/idt64.s */ pal_syscall_restart(thread, state); } else if (error != EJUSTRETURN) { if (__improbable(error)) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ /* * We split retval across two registers, in case the * syscall had a 64-bit return value, in which case * eax/edx matches the function call ABI. */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall: error=%d retval=(%u,%u)\n", error, regs->eax, regs->edx); uthread->uu_flag &= ~UT_NOTCANCELPT; if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (__probable(code != 180)) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { pal_execve_return(thread); } thread_exception_return(); /* NOTREACHED */ }
void unix_syscall_return(int error) { thread_t thread; struct uthread *uthread; struct proc *p; unsigned int code; struct sysent *callp; thread = current_thread(); uthread = get_bsdthread_info(thread); pal_register_cache_state(thread, DIRTY); p = current_proc(); if (proc_is64bit(p)) { x86_saved_state64_t *regs; regs = saved_state64(find_user_regs(thread)); code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; #if CONFIG_DTRACE if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { /* * repeat the syscall */ pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { if (error) { regs->rax = error; regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: regs->rax = uthread->uu_rval[0]; regs->rdx = uthread->uu_rval[1]; break; case _SYSCALL_RET_UINT_T: regs->rax = ((u_int)uthread->uu_rval[0]); regs->rdx = ((u_int)uthread->uu_rval[1]); break; case _SYSCALL_RET_OFF_T: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: case _SYSCALL_RET_UINT64_T: regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; case _SYSCALL_RET_NONE: break; default: panic("unix_syscall: unknown return type"); break; } regs->isf.rflags &= ~EFL_CF; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall_return: error=%d retval=(%llu,%llu)\n", error, regs->rax, regs->rdx); } else { x86_saved_state32_t *regs; regs = saved_state32(find_user_regs(thread)); regs->efl &= ~(EFL_CF); code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; #if CONFIG_DTRACE if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall_return: error=%d retval=(%u,%u)\n", error, regs->eax, regs->edx); } uthread->uu_flag &= ~UT_NOTCANCELPT; if (uthread->uu_lowpri_window) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (code != 180) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ }
void mach_call_munger(x86_saved_state_t *state) { int argc; int call_number; mach_call_t mach_call; kern_return_t retval; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state32_t *regs; #if PROC_REF_DEBUG struct uthread *ut = get_bsdthread_info(current_thread()); uthread_reset_proc_refcount(ut); #endif assert(is_saved_state32(state)); regs = saved_state32(state); call_number = -(regs->eax); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); #if DEBUG_TRACE kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); #endif if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: kern_invalid 0x%x\n", regs->eax); i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { retval = mach_call_arg_munger32(regs->uesp, &args, &mach_trap_table[call_number]); if (retval != KERN_SUCCESS) { regs->eax = retval; DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: retval=0x%x\n", retval); thread_exception_return(); /* NOTREACHED */ } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, args.arg1, args.arg2, args.arg3, args.arg4, 0); retval = mach_call(&args); DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, retval, 0, 0, 0, 0); regs->eax = retval; throttle_lowpri_io(1); #if PROC_REF_DEBUG if (__improbable(uthread_get_proc_refcount(ut) != 0)) { panic("system call returned with uu_proc_refcount != 0"); } #endif thread_exception_return(); /* NOTREACHED */ }
void machdep_syscall(x86_saved_state_t *state) { int args[machdep_call_count]; int trapno; int nargs; const machdep_call_t *entry; x86_saved_state32_t *regs; assert(is_saved_state32(state)); regs = saved_state32(state); trapno = regs->eax; #if DEBUG_TRACE kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno); #endif DEBUG_KPRINT_SYSCALL_MDEP( "machdep_syscall: trapno=%d\n", trapno); if (trapno < 0 || trapno >= machdep_call_count) { regs->eax = (unsigned int)kern_invalid(NULL); thread_exception_return(); /* NOTREACHED */ } entry = &machdep_call_table[trapno]; nargs = entry->nargs; if (nargs != 0) { if (copyin((user_addr_t) regs->uesp + sizeof (int), (char *) args, (nargs * sizeof (int)))) { regs->eax = KERN_INVALID_ADDRESS; thread_exception_return(); /* NOTREACHED */ } } switch (nargs) { case 0: regs->eax = (*entry->routine.args_0)(); break; case 1: regs->eax = (*entry->routine.args_1)(args[0]); break; case 2: regs->eax = (*entry->routine.args_2)(args[0],args[1]); break; case 3: if (!entry->bsd_style) regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]); else { int error; uint32_t rval; error = (*entry->routine.args_bsd_3)(&rval, args[0], args[1], args[2]); if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { regs->eax = rval; regs->efl &= ~EFL_CF; } } break; case 4: regs->eax = (*entry->routine.args_4)(args[0], args[1], args[2], args[3]); break; default: panic("machdep_syscall: too many args"); } DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax); throttle_lowpri_io(1); thread_exception_return(); /* NOTREACHED */ }
static kern_return_t dtrace_machtrace_syscall(struct mach_call_args *args) { int code; /* The mach call number */ machtrace_sysent_t *sy; dtrace_id_t id; kern_return_t rval; #if 0 /* XXX */ proc_t *p; #endif syscall_arg_t *ip = (syscall_arg_t *)args; mach_call_t mach_call; #if defined (__x86_64__) { pal_register_cache_state(current_thread(), VALID); x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); if (is_saved_state64(tagged_regs)) { code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK; } else { code = -saved_state32(tagged_regs)->eax; } } #else #error Unknown Architecture #endif sy = &machtrace_sysent[code]; if ((id = sy->stsy_entry) != DTRACE_IDNONE) { uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread) uthread->t_dtrace_syscall_args = (void *)ip; (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); if (uthread) uthread->t_dtrace_syscall_args = (void *)0; } #if 0 /* XXX */ /* * We want to explicitly allow DTrace consumers to stop a process * before it actually executes the meat of the syscall. */ p = ttoproc(curthread); mutex_enter(&p->p_lock); if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { curthread->t_dtrace_stop = 0; stop(PR_REQUESTED, 0); } mutex_exit(&p->p_lock); #endif mach_call = (mach_call_t)(*sy->stsy_underlying); rval = mach_call(args); if ((id = sy->stsy_return) != DTRACE_IDNONE) (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0); return (rval); }
int32_t dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) { unsigned short code; /* The system call number */ systrace_sysent_t *sy; dtrace_id_t id; int32_t rval; #if 0 /* XXX */ proc_t *p; #endif syscall_arg_t *ip = (syscall_arg_t *)uap; #if defined (__x86_64__) { pal_register_cache_state(current_thread(), VALID); x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); if (is_saved_state64(tagged_regs)) { x86_saved_state64_t *regs = saved_state64(tagged_regs); code = regs->rax & SYSCALL_NUMBER_MASK; /* * Check for indirect system call... system call number * passed as 'arg0' */ if (code == 0) { code = regs->rdi; } } else { code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK; if (code == 0) { vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int)); code = fuword(params); } } } #else #error Unknown Architecture #endif // Bounds "check" the value of code a la unix_syscall sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; if ((id = sy->stsy_entry) != DTRACE_IDNONE) { uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread) uthread->t_dtrace_syscall_args = (void *)ip; if (ip) (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); else (*systrace_probe)(id, 0, 0, 0, 0, 0); if (uthread) uthread->t_dtrace_syscall_args = (void *)0; } #if 0 /* XXX */ /* * We want to explicitly allow DTrace consumers to stop a process * before it actually executes the meat of the syscall. */ p = ttoproc(curthread); mutex_enter(&p->p_lock); if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { curthread->t_dtrace_stop = 0; stop(PR_REQUESTED, 0); } mutex_exit(&p->p_lock); #endif rval = (*sy->stsy_underlying)(pp, uap, rv); if ((id = sy->stsy_return) != DTRACE_IDNONE) { uint64_t munged_rv0, munged_rv1; uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread) uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ /* * "Decode" rv for use in the call to dtrace_probe() */ if (rval == ERESTART) { munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ munged_rv1 = -1LL; } else if (rval != EJUSTRETURN) { if (rval) { munged_rv0 = -1LL; /* Mimic what libc will do. */ munged_rv1 = -1LL; } else { switch (sy->stsy_return_type) { case _SYSCALL_RET_INT_T: munged_rv0 = rv[0]; munged_rv1 = rv[1]; break; case _SYSCALL_RET_UINT_T: munged_rv0 = ((u_int)rv[0]); munged_rv1 = ((u_int)rv[1]); break; case _SYSCALL_RET_OFF_T: case _SYSCALL_RET_UINT64_T: munged_rv0 = *(u_int64_t *)rv; munged_rv1 = 0LL; break; case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: munged_rv0 = *(user_addr_t *)rv; munged_rv1 = 0LL; break; case _SYSCALL_RET_NONE: munged_rv0 = 0LL; munged_rv1 = 0LL; break; default: munged_rv0 = 0LL; munged_rv1 = 0LL; break; } } } else { munged_rv0 = 0LL; munged_rv1 = 0LL; } /* * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says: * * "This is a bit of an historical artifact. At first, the syscall provider just * had its return value in arg0, and the fbt and pid providers had their return * values in arg1 (so that we could use arg0 for the offset of the return site). * * We inevitably started writing scripts where we wanted to see the return * values from probes in all three providers, and we made this script easier * to write by replicating the syscall return values in arg1 to match fbt and * pid. We debated briefly about removing the return value from arg0, but * decided that it would be less confusing to have the same data in two places * than to have some non-helpful, non-intuitive value in arg0. * * This change was made 4/23/2003 according to the DTrace project's putback log." */ (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); } return (rval); }
kern_return_t dtrace_user_probe(x86_saved_state_t *regs) { x86_saved_state64_t *regs64; x86_saved_state32_t *regs32; int trapno; /* * FIXME! * * The only call path into this method is always a user trap. * We don't need to test for user trap, but should assert it. */ boolean_t user_mode = TRUE; if (is_saved_state64(regs) == TRUE) { regs64 = saved_state64(regs); regs32 = NULL; trapno = regs64->isf.trapno; user_mode = TRUE; // By default, because xnu is 32 bit only } else { regs64 = NULL; regs32 = saved_state32(regs); if (regs32->cs & 0x03) user_mode = TRUE; trapno = regs32->trapno; } lck_rw_t *rwp; struct proc *p = current_proc(); uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (user_mode /*|| (rp->r_ps & PS_VM)*/) { /* * DTrace accesses t_cred in probe context. t_cred * must always be either NULL, or point to a valid, * allocated cred structure. */ kauth_cred_uthread_update(uthread, p); } if (trapno == T_DTRACE_RET) { uint8_t step = uthread->t_dtrace_step; uint8_t ret = uthread->t_dtrace_ret; user_addr_t npc = uthread->t_dtrace_npc; if (uthread->t_dtrace_ast) { printf("dtrace_user_probe() should be calling aston()\n"); // aston(uthread); // uthread->t_sig_check = 1; } /* * Clear all user tracing flags. */ uthread->t_dtrace_ft = 0; /* * If we weren't expecting to take a return probe trap, kill * the process as though it had just executed an unassigned * trap instruction. */ if (step == 0) { /* * APPLE NOTE: We're returning KERN_FAILURE, which causes * the generic signal handling code to take over, which will effectively * deliver a EXC_BAD_INSTRUCTION to the user process. */ return KERN_FAILURE; } /* * If we hit this trap unrelated to a return probe, we're * just here to reset the AST flag since we deferred a signal * until after we logically single-stepped the instruction we * copied out. */ if (ret == 0) { if (regs64) { regs64->isf.rip = npc; } else { regs32->eip = npc; } return KERN_SUCCESS; } /* * We need to wait until after we've called the * dtrace_return_probe_ptr function pointer to set %pc. */ rwp = &CPU->cpu_ft_lock; lck_rw_lock_shared(rwp); if (dtrace_return_probe_ptr != NULL) (void) (*dtrace_return_probe_ptr)(regs); lck_rw_unlock_shared(rwp); if (regs64) { regs64->isf.rip = npc; } else { regs32->eip = npc; } return KERN_SUCCESS; } else if (trapno == T_INT3) { uint8_t instr; rwp = &CPU->cpu_ft_lock; /* * The DTrace fasttrap provider uses the breakpoint trap * (int 3). We let DTrace take the first crack at handling * this trap; if it's not a probe that DTrace knowns about, * we call into the trap() routine to handle it like a * breakpoint placed by a conventional debugger. */ /* * APPLE NOTE: I believe the purpose of the reader/writers lock * is thus: There are times which dtrace needs to prevent calling * dtrace_pid_probe_ptr(). Sun's original impl grabbed a plain * mutex here. However, that serialized all probe calls, and * destroyed MP behavior. So now they use a RW lock, with probes * as readers, and the top level synchronization as a writer. */ lck_rw_lock_shared(rwp); if (dtrace_pid_probe_ptr != NULL && (*dtrace_pid_probe_ptr)(regs) == 0) { lck_rw_unlock_shared(rwp); return KERN_SUCCESS; } lck_rw_unlock_shared(rwp); /* * If the instruction that caused the breakpoint trap doesn't * look like an int 3 anymore, it may be that this tracepoint * was removed just after the user thread executed it. In * that case, return to user land to retry the instuction. */ user_addr_t pc = (regs64) ? regs64->isf.rip : (user_addr_t)regs32->eip; if (fuword8(pc - 1, &instr) == 0 && instr != FASTTRAP_INSTR) { if (regs64) { regs64->isf.rip--; } else { regs32->eip--; } return KERN_SUCCESS; } } return KERN_FAILURE; }
static kern_return_t do_kernel_backtrace( thread_t thread, struct x86_kernel_state *regs, uint64_t *frames, mach_msg_type_number_t *start_idx, mach_msg_type_number_t max_idx) { uint64_t kernStackMin = (uint64_t)thread->kernel_stack; uint64_t kernStackMax = (uint64_t)kernStackMin + kernel_stack_size; mach_msg_type_number_t ct = *start_idx; kern_return_t kr = KERN_FAILURE; #if __LP64__ uint64_t currPC = 0ULL; uint64_t currFP = 0ULL; uint64_t prevPC = 0ULL; uint64_t prevFP = 0ULL; if(KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(regs->k_rip), sizeof(uint64_t))) { return KERN_FAILURE; } if(KERN_SUCCESS != chudxnu_kern_read(&currFP, (vm_offset_t)&(regs->k_rbp), sizeof(uint64_t))) { return KERN_FAILURE; } #else uint32_t currPC = 0U; uint32_t currFP = 0U; uint32_t prevPC = 0U; uint32_t prevFP = 0U; if(KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(regs->k_eip), sizeof(uint32_t))) { return KERN_FAILURE; } if(KERN_SUCCESS != chudxnu_kern_read(&currFP, (vm_offset_t)&(regs->k_ebp), sizeof(uint32_t))) { return KERN_FAILURE; } #endif if(*start_idx >= max_idx) return KERN_RESOURCE_SHORTAGE; // no frames traced if(!currPC) { return KERN_FAILURE; } frames[ct++] = (uint64_t)currPC; // build a backtrace of this kernel state #if __LP64__ while(VALID_STACK_ADDRESS64(TRUE, currFP, kernStackMin, kernStackMax)) { // this is the address where caller lives in the user thread uint64_t caller = currFP + sizeof(uint64_t); #else while(VALID_STACK_ADDRESS(TRUE, currFP, kernStackMin, kernStackMax)) { uint32_t caller = (uint32_t)currFP + sizeof(uint32_t); #endif if(!currFP || !currPC) { currPC = 0; break; } if(ct >= max_idx) { *start_idx = ct; return KERN_RESOURCE_SHORTAGE; } /* read our caller */ kr = chudxnu_kern_read(&currPC, (vm_offset_t)caller, sizeof(currPC)); if(kr != KERN_SUCCESS || !currPC) { currPC = 0UL; break; } /* * retrive contents of the frame pointer and advance to the next stack * frame if it's valid */ prevFP = 0; kr = chudxnu_kern_read(&prevFP, (vm_offset_t)currFP, sizeof(currPC)); #if __LP64__ if(VALID_STACK_ADDRESS64(TRUE, prevFP, kernStackMin, kernStackMax)) { #else if(VALID_STACK_ADDRESS(TRUE, prevFP, kernStackMin, kernStackMax)) { #endif frames[ct++] = (uint64_t)currPC; prevPC = currPC; } if(prevFP <= currFP) { break; } else { currFP = prevFP; } } *start_idx = ct; return KERN_SUCCESS; } __private_extern__ kern_return_t chudxnu_thread_get_callstack64( thread_t thread, uint64_t *callstack, mach_msg_type_number_t *count, boolean_t user_only) { kern_return_t kr = KERN_FAILURE; task_t task = thread->task; uint64_t currPC = 0ULL; boolean_t supervisor = FALSE; mach_msg_type_number_t bufferIndex = 0; mach_msg_type_number_t bufferMaxIndex = *count; x86_saved_state_t *tagged_regs = NULL; // kernel register state x86_saved_state64_t *regs64 = NULL; x86_saved_state32_t *regs32 = NULL; x86_saved_state32_t *u_regs32 = NULL; x86_saved_state64_t *u_regs64 = NULL; struct x86_kernel_state *kregs = NULL; if(ml_at_interrupt_context()) { if(user_only) { /* can't backtrace user state on interrupt stack. */ return KERN_FAILURE; } /* backtracing at interrupt context? */ if(thread == current_thread() && current_cpu_datap()->cpu_int_state) { /* * Locate the registers for the interrupted thread, assuming it is * current_thread(). */ tagged_regs = current_cpu_datap()->cpu_int_state; if(is_saved_state64(tagged_regs)) { /* 64 bit registers */ regs64 = saved_state64(tagged_regs); supervisor = ((regs64->isf.cs & SEL_PL) != SEL_PL_U); } else { /* 32 bit registers */ regs32 = saved_state32(tagged_regs); supervisor = ((regs32->cs & SEL_PL) != SEL_PL_U); } } } if(!ml_at_interrupt_context() && kernel_task == task) { if(!thread->kernel_stack) { return KERN_FAILURE; } // Kernel thread not at interrupt context kregs = (struct x86_kernel_state *)NULL; // nofault read of the thread->kernel_stack pointer if(KERN_SUCCESS != chudxnu_kern_read(&kregs, (vm_offset_t)&(thread->kernel_stack), sizeof(void *))) { return KERN_FAILURE; } // Adjust to find the saved kernel state kregs = STACK_IKS((vm_offset_t)(uintptr_t)kregs); supervisor = TRUE; } else if(!tagged_regs) { /* * not at interrupt context, or tracing a different thread than * current_thread() at interrupt context */ tagged_regs = USER_STATE(thread); if(is_saved_state64(tagged_regs)) { /* 64 bit registers */ regs64 = saved_state64(tagged_regs); supervisor = ((regs64->isf.cs & SEL_PL) != SEL_PL_U); } else { /* 32 bit registers */ regs32 = saved_state32(tagged_regs); supervisor = ((regs32->cs & SEL_PL) != SEL_PL_U); } } *count = 0; if(supervisor) { // the caller only wants a user callstack. if(user_only) { // bail - we've only got kernel state return KERN_FAILURE; } } else { // regs32(64) is not in supervisor mode. u_regs32 = regs32; u_regs64 = regs64; regs32 = NULL; regs64 = NULL; } if (user_only) { /* we only want to backtrace the user mode */ if(!(u_regs32 || u_regs64)) { /* no user state to look at */ return KERN_FAILURE; } } /* * Order of preference for top of stack: * 64 bit kernel state (not likely) * 32 bit kernel state * 64 bit user land state * 32 bit user land state */ if(kregs) { /* * nofault read of the registers from the kernel stack (as they can * disappear on the fly). */ #if __LP64__ if(KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(kregs->k_rip), sizeof(uint64_t))) { return KERN_FAILURE; } #else uint32_t tmp; if(KERN_SUCCESS != chudxnu_kern_read(&tmp, (vm_offset_t)&(kregs->k_eip), sizeof(uint32_t))) { return KERN_FAILURE; } currPC = (uint64_t)tmp; #endif } else if(regs64) { currPC = regs64->isf.rip; } else if(regs32) { currPC = (uint64_t) regs32->eip; } else if(u_regs64) { currPC = u_regs64->isf.rip; } else if(u_regs32) { currPC = (uint64_t) u_regs32->eip; } if(!currPC) { /* no top of the stack, bail out */ return KERN_FAILURE; } bufferIndex = 0; if(bufferMaxIndex < 1) { *count = 0; return KERN_RESOURCE_SHORTAGE; } /* backtrace kernel */ if(kregs) { addr64_t address = 0ULL; size_t size = 0UL; // do the backtrace kr = do_kernel_backtrace(thread, kregs, callstack, &bufferIndex, bufferMaxIndex); // and do a nofault read of (r|e)sp #if __LP64__ uint64_t rsp = 0ULL; size = sizeof(uint64_t); if(KERN_SUCCESS != chudxnu_kern_read(&address, (vm_offset_t)&(kregs->k_rsp), size)) { address = 0ULL; } #else uint32_t rsp = 0ULL, tmp = 0ULL; size = sizeof(uint32_t); if(KERN_SUCCESS != chudxnu_kern_read(&tmp, (vm_offset_t)&(kregs->k_esp), size)) { address = 0ULL; } else { address = (addr64_t)tmp; } #endif if(address && KERN_SUCCESS == chudxnu_kern_read(&rsp, (vm_offset_t)address, size) && bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = (uint64_t)rsp; } } else if(regs64) { uint64_t rsp = 0ULL; // backtrace the 64bit side. kr = do_backtrace64(task, thread, regs64, callstack, &bufferIndex, bufferMaxIndex, TRUE); if(KERN_SUCCESS == chudxnu_kern_read(&rsp, (vm_offset_t) regs64->isf.rsp, sizeof(uint64_t)) && bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = rsp; } } else if(regs32) { uint32_t esp = 0UL; // backtrace the 32bit side. kr = do_backtrace32(task, thread, regs32, callstack, &bufferIndex, bufferMaxIndex, TRUE); if(KERN_SUCCESS == chudxnu_kern_read(&esp, (vm_offset_t) regs32->uesp, sizeof(uint32_t)) && bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = (uint64_t) esp; } } else if(u_regs64) { /* backtrace user land */ uint64_t rsp = 0ULL; kr = do_backtrace64(task, thread, u_regs64, callstack, &bufferIndex, bufferMaxIndex, FALSE); if(KERN_SUCCESS == chudxnu_task_read(task, &rsp, (addr64_t) u_regs64->isf.rsp, sizeof(uint64_t)) && bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = rsp; } } else if(u_regs32) { uint32_t esp = 0UL; kr = do_backtrace32(task, thread, u_regs32, callstack, &bufferIndex, bufferMaxIndex, FALSE); if(KERN_SUCCESS == chudxnu_task_read(task, &esp, (addr64_t) u_regs32->uesp, sizeof(uint32_t)) && bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = (uint64_t) esp; } } *count = bufferIndex; return kr; }
void mach_call_munger(x86_saved_state_t *state) { int argc; int call_number; mach_call_t mach_call; kern_return_t retval; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state32_t *regs; assert(is_saved_state32(state)); regs = saved_state32(state); call_number = -(regs->eax); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); #if DEBUG_TRACE kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); #endif if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: kern_invalid 0x%x\n", regs->eax); i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { retval = mach_call_arg_munger32(regs->uesp, argc, call_number, &args); if (retval != KERN_SUCCESS) { regs->eax = retval; DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: retval=0x%x\n", retval); thread_exception_return(); /* NOTREACHED */ } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, args.arg1, args.arg2, args.arg3, args.arg4, 0); retval = mach_call(&args); DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, retval, 0, 0, 0, 0); regs->eax = retval; throttle_lowpri_io(TRUE); thread_exception_return(); /* NOTREACHED */ }
void panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boolean_t do_mca_dump) { /* Set postcode (DEBUG only) */ postcode(pc); /* * Issue an I/O port read if one has been requested - this is an * event logic analyzers can use as a trigger point. */ panic_io_port_read(); /* * Break kprintf lock in case of recursion, * and record originally faulted instruction address. */ kprintf_break_lock(); if (do_mca_dump) { #if CONFIG_MCA /* * Dump the contents of the machine check MSRs (if any). */ mca_dump(); #endif } #ifdef __i386__ /* * Dump the interrupt stack frame at last kernel entry. */ if (is_saved_state64(sp)) { x86_saved_state64_t *ss64p = saved_state64(sp); panic("%s trapno:0x%x, err:0x%qx, " "registers:\n" "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n" "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx%s\n", msg, ss64p->isf.trapno, ss64p->isf.err, (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2, virtualized ? " VMM" : ""); } else { x86_saved_state32_t *ss32p = saved_state32(sp); panic("%s at 0x%08x, trapno:0x%x, err:0x%x," "registers:\n" "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" "EFL: 0x%08x, EIP: 0x%08x%s\n", msg, ss32p->eip, ss32p->trapno, ss32p->err, (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, ss32p->efl, ss32p->eip, virtualized ? " VMM" : ""); } #else x86_saved_state64_t *regs = saved_state64(sp); panic("%s at 0x%016llx, registers:\n" "CR0: 0x%016lx, CR2: 0x%016lx, CR3: 0x%016lx, CR4: 0x%016lx\n" "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n" "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n" "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n" "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n" "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n" "Error code: 0x%016llx%s\n", msg, regs->isf.rip, get_cr0(), get_cr2(), get_cr3_raw(), get_cr4(), regs->rax, regs->rbx, regs->rcx, regs->rdx, regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi, regs->r8, regs->r9, regs->r10, regs->r11, regs->r12, regs->r13, regs->r14, regs->r15, regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF, regs->isf.ss & 0xFFFF, regs->isf.err, virtualized ? " VMM" : ""); #endif }
int diagCall(x86_saved_state_t * state) { uint32_t stk, curpos, i, j; uint32_t selector, data; int err; uint64_t currNap, durNap; x86_saved_state32_t *regs; assert(is_saved_state32(state)); regs = saved_state32(state); if (!(dgWork.dgFlags & enaDiagSCs)) return 0; /* If not enabled, cause an exception */ stk = regs->uesp; /* Point to the stack */ err = copyin((user_addr_t) (stk + 4), (char *) &selector, sizeof(uint32_t)); /* Get the selector */ if (err) { return 0; /* Failed to fetch stack */ } switch (selector) { /* Select the routine */ case dgRuptStat: /* Suck Interruption statistics */ err = copyin((user_addr_t) (stk + 8), (char *) &data, sizeof(uint32_t)); /* Get the selector */ if (data == 0) {/* If number of processors is 0, clear all * counts */ for (i = 0; i < real_ncpus; i++) { /* Cycle through * processors */ for (j = 0; j < 256; j++) cpu_data_ptr[i]->cpu_hwIntCnt[j] = 0; } lastRuptClear = mach_absolute_time(); /* Get the time of clear */ return 1; /* Normal return */ } (void) copyout((char *) &real_ncpus, data, sizeof(real_ncpus)); /* Copy out number of * processors */ currNap = mach_absolute_time(); /* Get the time now */ durNap = currNap - lastRuptClear; /* Get the last interval * duration */ if (durNap == 0) durNap = 1; /* This is a very short time, make it * bigger */ curpos = data + sizeof(real_ncpus); /* Point to the next * available spot */ for (i = 0; i < real_ncpus; i++) { /* Move 'em all out */ (void) copyout((char *) &durNap, curpos, 8); /* Copy out the time * since last clear */ (void) copyout((char *) &cpu_data_ptr[i]->cpu_hwIntCnt, curpos + 8, 256 * sizeof(uint32_t)); /* Copy out interrupt * data for this * processor */ curpos = curpos + (256 * sizeof(uint32_t) + 8); /* Point to next out put * slot */ } break; default: /* Handle invalid ones */ return 0; /* Return an exception */ } return 1; /* Normal non-ast check return */ }