static void kdebug_lookup(struct vnode *dp, struct componentname *cnp) { unsigned int i; int code; int dbg_namelen; char *dbg_nameptr; long dbg_parms[NUMPARMS]; /* Collect the pathname for tracing */ dbg_namelen = (cnp->cn_nameptr - cnp->cn_pnbuf) + cnp->cn_namelen; dbg_nameptr = cnp->cn_nameptr + cnp->cn_namelen; if (dbg_namelen > (int)sizeof(dbg_parms)) dbg_namelen = sizeof(dbg_parms); dbg_nameptr -= dbg_namelen; /* Copy the (possibly truncated) path itself */ memcpy(dbg_parms, dbg_nameptr, dbg_namelen); /* Pad with '\0' or '>' */ if (dbg_namelen < (int)sizeof(dbg_parms)) { memset((char *)dbg_parms + dbg_namelen, *(cnp->cn_nameptr + cnp->cn_namelen) ? '>' : 0, sizeof(dbg_parms) - dbg_namelen); } /* * In the event that we collect multiple, consecutive pathname * entries, we must mark the start of the path's string and the end. */ code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START; if (dbg_namelen <= 12) code |= DBG_FUNC_END; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); code &= ~DBG_FUNC_START; for (i=3, dbg_namelen -= 12; dbg_namelen > 0; i+=4, dbg_namelen -= 16) { if (dbg_namelen <= 16) code |= DBG_FUNC_END; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); } }
/* * Re-evaluate the outstanding deadlines and select the most proximate. * * Should be called at splclock. */ void timer_resync_deadlines(void) { uint64_t deadline = EndOfAllTime; uint64_t pmdeadline; rtclock_timer_t *mytimer; spl_t s = splclock(); cpu_data_t *pp; uint32_t decr; pp = current_cpu_datap(); if (!pp->cpu_running) /* There's really nothing to do if this processor is down */ return; /* * If we have a clock timer set, pick that. */ mytimer = &pp->rtclock_timer; if (!mytimer->has_expired && 0 < mytimer->deadline && mytimer->deadline < EndOfAllTime) deadline = mytimer->deadline; /* * If we have a power management deadline, see if that's earlier. */ pmdeadline = pmCPUGetDeadline(pp); if (0 < pmdeadline && pmdeadline < deadline) deadline = pmdeadline; /* * Go and set the "pop" event. */ decr = (uint32_t) setPop(deadline); /* Record non-PM deadline for latency tool */ if (decr != 0 && deadline != pmdeadline) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_SET_DEADLINE | DBG_FUNC_NONE, decr, 2, deadline, mytimer->queue.count, 0); } splx(s); }
/* * Complete the shutdown and place the processor offline. * * Called at splsched in the shutdown context. * This performs a minimal thread_invoke() to the idle thread, * so it needs to be kept in sync with what thread_invoke() does. * * The onlining half of this is done in load_context(). */ void processor_offline( processor_t processor) { assert(processor == current_processor()); assert(processor->active_thread == current_thread()); thread_t old_thread = processor->active_thread; thread_t new_thread = processor->idle_thread; processor->active_thread = new_thread; processor->current_pri = IDLEPRI; processor->current_thmode = TH_MODE_NONE; processor->starting_pri = IDLEPRI; processor->current_sfi_class = SFI_CLASS_KERNEL; processor->deadline = UINT64_MAX; new_thread->last_processor = processor; uint64_t ctime = mach_absolute_time(); processor->last_dispatch = ctime; old_thread->last_run_time = ctime; /* Update processor->thread_timer and ->kernel_timer to point to the new thread */ thread_timer_event(ctime, &new_thread->system_timer); PROCESSOR_DATA(processor, kernel_timer) = &new_thread->system_timer; timer_stop(PROCESSOR_DATA(processor, current_state), ctime); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED) | DBG_FUNC_NONE, old_thread->reason, (uintptr_t)thread_tid(new_thread), old_thread->sched_pri, new_thread->sched_pri, 0); machine_set_current_thread(new_thread); thread_dispatch(old_thread, new_thread); PMAP_DEACTIVATE_KERNEL(processor->cpu_id); cpu_sleep(); panic("zombie processor"); /*NOTREACHED*/ }
/* * timer_queue_migrate_cpu() is called from the Power-Management kext * when a logical processor goes idle (in a deep C-state) with a distant * deadline so that it's timer queue can be moved to another processor. * This target processor should be the least idle (most busy) -- * currently this is the primary processor for the calling thread's package. * Locking restrictions demand that the target cpu must be the boot cpu. */ uint32_t timer_queue_migrate_cpu(int target_cpu) { cpu_data_t *target_cdp = cpu_datap(target_cpu); cpu_data_t *cdp = current_cpu_datap(); int ntimers_moved; assert(!ml_get_interrupts_enabled()); assert(target_cpu != cdp->cpu_number); assert(target_cpu == master_cpu); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TIMER_MIGRATE | DBG_FUNC_START, target_cpu, cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32), 0, 0); /* * Move timer requests from the local queue to the target processor's. * The return value is the number of requests moved. If this is 0, * it indicates that the first (i.e. earliest) timer is earlier than * the earliest for the target processor. Since this would force a * resync, the move of this and all later requests is aborted. */ ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue, &target_cdp->rtclock_timer.queue); /* * Assuming we moved stuff, clear local deadline. */ if (ntimers_moved > 0) { cdp->rtclock_timer.deadline = EndOfAllTime; setPop(EndOfAllTime); } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TIMER_MIGRATE | DBG_FUNC_END, target_cpu, ntimers_moved, 0, 0, 0); return ntimers_moved; }
/* * Create a new thread. * Doesn't start the thread running. */ static kern_return_t thread_create_internal( task_t parent_task, integer_t priority, thread_continue_t continuation, int options, #define TH_OPTION_NONE 0x00 #define TH_OPTION_NOCRED 0x01 #define TH_OPTION_NOSUSP 0x02 thread_t *out_thread) { thread_t new_thread; static thread_t first_thread = THREAD_NULL; /* * Allocate a thread and initialize static fields */ if (first_thread == THREAD_NULL) new_thread = first_thread = current_thread(); new_thread = (thread_t)zalloc(thread_zone); if (new_thread == THREAD_NULL) return (KERN_RESOURCE_SHORTAGE); if (new_thread != first_thread) *new_thread = thread_template; #ifdef MACH_BSD new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0); if (new_thread->uthread == NULL) { zfree(thread_zone, new_thread); return (KERN_RESOURCE_SHORTAGE); } #endif /* MACH_BSD */ if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) { #ifdef MACH_BSD void *ut = new_thread->uthread; new_thread->uthread = NULL; /* cred free may not be necessary */ uthread_cleanup(parent_task, ut, parent_task->bsd_info); uthread_cred_free(ut); uthread_zone_free(ut); #endif /* MACH_BSD */ zfree(thread_zone, new_thread); return (KERN_FAILURE); } new_thread->task = parent_task; thread_lock_init(new_thread); wake_lock_init(new_thread); lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr); ipc_thread_init(new_thread); queue_init(&new_thread->held_ulocks); new_thread->continuation = continuation; lck_mtx_lock(&tasks_threads_lock); task_lock(parent_task); if ( !parent_task->active || parent_task->halting || ((options & TH_OPTION_NOSUSP) != 0 && parent_task->suspend_count > 0) || (parent_task->thread_count >= task_threadmax && parent_task != kernel_task) ) { task_unlock(parent_task); lck_mtx_unlock(&tasks_threads_lock); #ifdef MACH_BSD { void *ut = new_thread->uthread; new_thread->uthread = NULL; uthread_cleanup(parent_task, ut, parent_task->bsd_info); /* cred free may not be necessary */ uthread_cred_free(ut); uthread_zone_free(ut); } #endif /* MACH_BSD */ ipc_thread_disable(new_thread); ipc_thread_terminate(new_thread); lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp); machine_thread_destroy(new_thread); zfree(thread_zone, new_thread); return (KERN_FAILURE); } /* New threads inherit any default state on the task */ machine_thread_inherit_taskwide(new_thread, parent_task); task_reference_internal(parent_task); if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { /* * This task has a per-thread CPU limit; make sure this new thread * gets its limit set too, before it gets out of the kernel. */ set_astledger(new_thread); } new_thread->t_threadledger = LEDGER_NULL; /* per thread ledger is not inherited */ new_thread->t_ledger = new_thread->task->ledger; if (new_thread->t_ledger) ledger_reference(new_thread->t_ledger); /* Cache the task's map */ new_thread->map = parent_task->map; /* Chain the thread onto the task's list */ queue_enter(&parent_task->threads, new_thread, thread_t, task_threads); parent_task->thread_count++; /* So terminating threads don't need to take the task lock to decrement */ hw_atomic_add(&parent_task->active_thread_count, 1); /* Protected by the tasks_threads_lock */ new_thread->thread_id = ++thread_unique_id; queue_enter(&threads, new_thread, thread_t, threads); threads_count++; timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread); timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread); #if CONFIG_COUNTERS /* * If parent task has any reservations, they need to be propagated to this * thread. */ new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ? THREAD_PMC_FLAG : 0U; #endif /* Set the thread's scheduling parameters */ new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task); new_thread->sched_flags = 0; new_thread->max_priority = parent_task->max_priority; new_thread->task_priority = parent_task->priority; new_thread->priority = (priority < 0)? parent_task->priority: priority; if (new_thread->priority > new_thread->max_priority) new_thread->priority = new_thread->max_priority; #if CONFIG_EMBEDDED if (new_thread->priority < MAXPRI_THROTTLE) { new_thread->priority = MAXPRI_THROTTLE; } #endif /* CONFIG_EMBEDDED */ new_thread->importance = new_thread->priority - new_thread->task_priority; #if CONFIG_EMBEDDED new_thread->saved_importance = new_thread->importance; /* apple ios daemon starts all threads in darwin background */ if (parent_task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { /* Cannot use generic routines here so apply darwin bacground directly */ new_thread->policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL; /* set thread self backgrounding */ new_thread->appliedstate.hw_bg = new_thread->policystate.hw_bg; /* priority will get recomputed suitably bit later */ new_thread->importance = INT_MIN; /* to avoid changes to many pri compute routines, set the effect of those here */ new_thread->priority = MAXPRI_THROTTLE; } #endif /* CONFIG_EMBEDDED */ #if defined(CONFIG_SCHED_TRADITIONAL) new_thread->sched_stamp = sched_tick; new_thread->pri_shift = sched_pri_shift; #endif SCHED(compute_priority)(new_thread, FALSE); new_thread->active = TRUE; *out_thread = new_thread; { long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; kdbg_trace_data(parent_task->bsd_info, &dbg_arg2); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE, (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0); kdbg_trace_string(parent_task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE, dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); } DTRACE_PROC1(lwp__create, thread_t, *out_thread); return (KERN_SUCCESS); }
/* * Event timer interrupt. * * XXX a drawback of this implementation is that events serviced earlier must not set deadlines * that occur before the entire chain completes. * * XXX a better implementation would use a set of generic callouts and iterate over them */ void timer_intr(int user_mode, uint64_t rip) { uint64_t abstime; rtclock_timer_t *mytimer; cpu_data_t *pp; int64_t latency; uint64_t pmdeadline; boolean_t timer_processed = FALSE; pp = current_cpu_datap(); SCHED_STATS_TIMER_POP(current_processor()); abstime = mach_absolute_time(); /* Get the time now */ /* has a pending clock timer expired? */ mytimer = &pp->rtclock_timer; /* Point to the event timer */ if ((timer_processed = ((mytimer->deadline <= abstime) || (abstime >= (mytimer->queue.earliest_soft_deadline))))) { /* * Log interrupt service latency (-ve value expected by tool) * a non-PM event is expected next. * The requested deadline may be earlier than when it was set * - use MAX to avoid reporting bogus latencies. */ latency = (int64_t) (abstime - MAX(mytimer->deadline, mytimer->when_set)); /* Log zero timer latencies when opportunistically processing * coalesced timers. */ if (latency < 0) { TCOAL_DEBUG(0xEEEE0000, abstime, mytimer->queue.earliest_soft_deadline, abstime - mytimer->queue.earliest_soft_deadline, 0, 0); latency = 0; } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TRAP_LATENCY | DBG_FUNC_NONE, -latency, ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)), user_mode, 0, 0); mytimer->has_expired = TRUE; /* Remember that we popped */ mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); mytimer->has_expired = FALSE; /* Get the time again since we ran a bit */ abstime = mach_absolute_time(); mytimer->when_set = abstime; } /* is it time for power management state change? */ if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_PM_DEADLINE | DBG_FUNC_START, 0, 0, 0, 0, 0); pmCPUDeadline(pp); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_PM_DEADLINE | DBG_FUNC_END, 0, 0, 0, 0, 0); timer_processed = TRUE; } /* schedule our next deadline */ x86_lcpu()->rtcDeadline = EndOfAllTime; timer_resync_deadlines(); if (__improbable(timer_processed == FALSE)) spurious_timers++; }
/* * Function: unix_syscall * * Inputs: regs - pointer to i386 save area * * Outputs: none */ void unix_syscall(x86_saved_state_t *state) { thread_t thread; void *vt; unsigned int code; struct sysent *callp; int error; vm_offset_t params; struct proc *p; struct uthread *uthread; x86_saved_state32_t *regs; boolean_t is_vfork; assert(is_saved_state32(state)); regs = saved_state32(state); #if DEBUG if (regs->eax == 0x800) thread_exception_return(); #endif thread = current_thread(); uthread = get_bsdthread_info(thread); /* Get the approriate proc; may be different from task's for vfork() */ is_vfork = uthread->uu_flag & UT_VFORK; if (__improbable(is_vfork != 0)) p = current_proc(); else p = (struct proc *)get_bsdtask_info(current_task()); /* Verify that we are not being called from a task without a proc */ if (__improbable(p == NULL)) { regs->eax = EPERM; regs->efl |= EFL_CF; task_terminate_internal(current_task()); thread_exception_return(); /* NOTREACHED */ } code = regs->eax & I386_SYSCALL_NUMBER_MASK; DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); params = (vm_offset_t) (regs->uesp + sizeof (int)); regs->efl &= ~(EFL_CF); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; if (__improbable(callp == sysent)) { code = fuword(params); params += sizeof(int); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; } vt = (void *)uthread->uu_arg; if (callp->sy_arg_bytes != 0) { #if CONFIG_REQUIRES_U32_MUNGING sy_munge_t *mungerp; #else #error U32 syscalls on x86_64 kernel requires munging #endif uint32_t nargs; assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); nargs = callp->sy_arg_bytes; error = copyin((user_addr_t) params, (char *) vt, nargs); if (error) { regs->eax = error; regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } if (__probable(code != 180)) { int *ip = (int *)vt; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, *ip, *(ip+1), *(ip+2), *(ip+3), 0); } #if CONFIG_REQUIRES_U32_MUNGING mungerp = callp->sy_arg_munge32; if (mungerp != NULL) (*mungerp)(vt); #endif } else KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 0, 0, 0, 0, 0); /* * Delayed binding of thread credential to process credential, if we * are not running with an explicitly set thread credential. */ kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; uthread->syscall_code = code; #ifdef JOE_DEBUG uthread->uu_iocount = 0; uthread->uu_vpindex = 0; #endif AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); AUDIT_SYSCALL_EXIT(code, p, uthread, error); #ifdef JOE_DEBUG if (uthread->uu_iocount) printf("system call returned with uu_iocount != 0\n"); #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; #endif /* CONFIG_DTRACE */ if (__improbable(error == ERESTART)) { /* * Move the user's pc back to repeat the syscall: * 5 bytes for a sysenter, or 2 for an int 8x. * The SYSENTER_TF_CS covers single-stepping over a sysenter * - see debug trap handler in idt.s/idt64.s */ pal_syscall_restart(thread, state); } else if (error != EJUSTRETURN) { if (__improbable(error)) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ /* * We split retval across two registers, in case the * syscall had a 64-bit return value, in which case * eax/edx matches the function call ABI. */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall: error=%d retval=(%u,%u)\n", error, regs->eax, regs->edx); uthread->uu_flag &= ~UT_NOTCANCELPT; if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (__probable(code != 180)) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { pal_execve_return(thread); } thread_exception_return(); /* NOTREACHED */ }
void unix_syscall_return(int error) { thread_t thread; struct uthread *uthread; struct proc *p; unsigned int code; struct sysent *callp; thread = current_thread(); uthread = get_bsdthread_info(thread); pal_register_cache_state(thread, DIRTY); p = current_proc(); if (proc_is64bit(p)) { x86_saved_state64_t *regs; regs = saved_state64(find_user_regs(thread)); code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; #if CONFIG_DTRACE if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { /* * repeat the syscall */ pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { if (error) { regs->rax = error; regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: regs->rax = uthread->uu_rval[0]; regs->rdx = uthread->uu_rval[1]; break; case _SYSCALL_RET_UINT_T: regs->rax = ((u_int)uthread->uu_rval[0]); regs->rdx = ((u_int)uthread->uu_rval[1]); break; case _SYSCALL_RET_OFF_T: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: case _SYSCALL_RET_UINT64_T: regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; case _SYSCALL_RET_NONE: break; default: panic("unix_syscall: unknown return type"); break; } regs->isf.rflags &= ~EFL_CF; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall_return: error=%d retval=(%llu,%llu)\n", error, regs->rax, regs->rdx); } else { x86_saved_state32_t *regs; regs = saved_state32(find_user_regs(thread)); regs->efl &= ~(EFL_CF); code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; #if CONFIG_DTRACE if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall_return: error=%d retval=(%u,%u)\n", error, regs->eax, regs->edx); } uthread->uu_flag &= ~UT_NOTCANCELPT; if (uthread->uu_lowpri_window) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (code != 180) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ }
void unix_syscall64(x86_saved_state_t *state) { thread_t thread; void *vt; unsigned int code; struct sysent *callp; int args_in_regs; boolean_t args_start_at_rdi; int error; struct proc *p; struct uthread *uthread; x86_saved_state64_t *regs; assert(is_saved_state64(state)); regs = saved_state64(state); #if DEBUG if (regs->rax == 0x2000800) thread_exception_return(); #endif thread = current_thread(); uthread = get_bsdthread_info(thread); /* Get the approriate proc; may be different from task's for vfork() */ if (__probable(!(uthread->uu_flag & UT_VFORK))) p = (struct proc *)get_bsdtask_info(current_task()); else p = current_proc(); /* Verify that we are not being called from a task without a proc */ if (__improbable(p == NULL)) { regs->rax = EPERM; regs->isf.rflags |= EFL_CF; task_terminate_internal(current_task()); thread_exception_return(); /* NOTREACHED */ } code = regs->rax & SYSCALL_NUMBER_MASK; DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall64: code=%d(%s) rip=%llx\n", code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; vt = (void *)uthread->uu_arg; if (__improbable(callp == sysent)) { /* * indirect system call... system call number * passed as 'arg0' */ code = regs->rdi; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; args_start_at_rdi = FALSE; args_in_regs = 5; } else { args_start_at_rdi = TRUE; args_in_regs = 6; } if (callp->sy_narg != 0) { assert(callp->sy_narg <= 8); /* size of uu_arg */ args_in_regs = MIN(args_in_regs, callp->sy_narg); memcpy(vt, args_start_at_rdi ? ®s->rdi : ®s->rsi, args_in_regs * sizeof(syscall_arg_t)); if (code != 180) { uint64_t *ip = (uint64_t *)vt; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); } if (__improbable(callp->sy_narg > args_in_regs)) { int copyin_count; copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t); error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count); if (error) { regs->rax = error; regs->isf.rflags |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } } } else KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 0, 0, 0, 0, 0); /* * Delayed binding of thread credential to process credential, if we * are not running with an explicitly set thread credential. */ kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; uthread->syscall_code = code; #ifdef JOE_DEBUG uthread->uu_iocount = 0; uthread->uu_vpindex = 0; #endif AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0])); AUDIT_SYSCALL_EXIT(code, p, uthread, error); #ifdef JOE_DEBUG if (uthread->uu_iocount) printf("system call returned with uu_iocount != 0\n"); #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; #endif /* CONFIG_DTRACE */ if (__improbable(error == ERESTART)) { /* * all system calls come through via the syscall instruction * in 64 bit mode... its 2 bytes in length * move the user's pc back to repeat the syscall: */ pal_syscall_restart( thread, state ); } else if (error != EJUSTRETURN) { if (__improbable(error)) { regs->rax = error; regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: regs->rax = uthread->uu_rval[0]; regs->rdx = uthread->uu_rval[1]; break; case _SYSCALL_RET_UINT_T: regs->rax = ((u_int)uthread->uu_rval[0]); regs->rdx = ((u_int)uthread->uu_rval[1]); break; case _SYSCALL_RET_OFF_T: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: case _SYSCALL_RET_UINT64_T: regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; case _SYSCALL_RET_NONE: break; default: panic("unix_syscall: unknown return type"); break; } regs->isf.rflags &= ~EFL_CF; } } DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall64: error=%d retval=(%llu,%llu)\n", error, regs->rax, regs->rdx); uthread->uu_flag &= ~UT_NOTCANCELPT; if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ throttle_lowpri_io(1); } if (__probable(code != 180)) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ }
void mach_call_munger64(x86_saved_state_t *state) { int call_number; int argc; mach_call_t mach_call; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state64_t *regs; #if PROC_REF_DEBUG struct uthread *ut = get_bsdthread_info(current_thread()); uthread_reset_proc_refcount(ut); #endif assert(is_saved_state64(state)); regs = saved_state64(state); call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START, regs->rdi, regs->rsi, regs->rdx, regs->r10, 0); if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, regs->rax, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { i386_exception(EXC_SYSCALL, regs->rax, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { int args_in_regs = MIN(6, argc); memcpy(&args.arg1, ®s->rdi, args_in_regs * sizeof(syscall_arg_t)); if (argc > 6) { int copyin_count; assert(argc <= 9); copyin_count = (argc - 6) * (int)sizeof(syscall_arg_t); if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&args.arg7, copyin_count)) { regs->rax = KERN_INVALID_ARGUMENT; thread_exception_return(); /* NOTREACHED */ } } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif regs->rax = (uint64_t)mach_call((void *)&args); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, regs->rax, 0, 0, 0, 0); throttle_lowpri_io(1); #if PROC_REF_DEBUG if (__improbable(uthread_get_proc_refcount(ut) != 0)) { panic("system call returned with uu_proc_refcount != 0"); } #endif thread_exception_return(); /* NOTREACHED */ }
void mach_call_munger(x86_saved_state_t *state) { int argc; int call_number; mach_call_t mach_call; kern_return_t retval; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state32_t *regs; #if PROC_REF_DEBUG struct uthread *ut = get_bsdthread_info(current_thread()); uthread_reset_proc_refcount(ut); #endif assert(is_saved_state32(state)); regs = saved_state32(state); call_number = -(regs->eax); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: code=%d(%s)\n", call_number, mach_syscall_name_table[call_number]); #if DEBUG_TRACE kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); #endif if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: kern_invalid 0x%x\n", regs->eax); i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { retval = mach_call_arg_munger32(regs->uesp, &args, &mach_trap_table[call_number]); if (retval != KERN_SUCCESS) { regs->eax = retval; DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger: retval=0x%x\n", retval); thread_exception_return(); /* NOTREACHED */ } } #ifdef MACH_BSD mach_kauth_cred_uthread_update(); #endif KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, args.arg1, args.arg2, args.arg3, args.arg4, 0); retval = mach_call(&args); DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, retval, 0, 0, 0, 0); regs->eax = retval; throttle_lowpri_io(1); #if PROC_REF_DEBUG if (__improbable(uthread_get_proc_refcount(ut) != 0)) { panic("system call returned with uu_proc_refcount != 0"); } #endif thread_exception_return(); /* NOTREACHED */ }
uint64_t timer_queue_expire( mpqueue_head_t *queue, uint64_t deadline) { timer_call_t call; DBG("timer_queue_expire(%p,)\n", queue); timer_call_lock_spin(queue); while (!queue_empty(&queue->head)) { call = TIMER_CALL(queue_first(&queue->head)); if (call->soft_deadline <= deadline) { timer_call_func_t func; timer_call_param_t param0, param1; if (!simple_lock_try(&call->lock)) { /* case (2b) lock inversion, dequeue and skip */ timer_queue_expire_lock_skips++; (void) remque(qe(call)); call->async_dequeue = TRUE; continue; } timer_call_entry_dequeue(call); func = CE(call)->func; param0 = CE(call)->param0; param1 = CE(call)->param1; simple_unlock(&call->lock); timer_call_unlock(queue); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_START, VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0); #if CONFIG_DTRACE && (DEVELOPMENT || DEBUG ) DTRACE_TMR3(callout__start, timer_call_func_t, func, timer_call_param_t, param0, timer_call_param_t, param1); #endif (*func)(param0, param1); #if CONFIG_DTRACE && (DEVELOPMENT || DEBUG ) DTRACE_TMR3(callout__end, timer_call_func_t, func, timer_call_param_t, param0, timer_call_param_t, param1); #endif KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_END, VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0); timer_call_lock_spin(queue); } else break; } if (!queue_empty(&queue->head)) deadline = CE(call)->deadline; else deadline = UINT64_MAX; timer_call_unlock(queue); return (deadline); }