void pmap_destroy_pcid_sync(pmap_t p) { int i; pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); for (i = 0; i < PMAP_PCID_MAX_CPUS; i++) if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID) pmap_pcid_deallocate_pcid(i, p); }
static void mca_save_state(mca_state_t *mca_state) { mca_mci_bank_t *bank; unsigned int i; assert(!ml_get_interrupts_enabled() || get_preemption_level() > 0); if (mca_state == NULL) return; mca_state->mca_mcg_ctl = mca_control_MSR_present ? rdmsr64(IA32_MCG_CTL) : 0ULL; mca_state->mca_mcg_status.u64 = rdmsr64(IA32_MCG_STATUS); bank = (mca_mci_bank_t *) &mca_state->mca_error_bank[0]; for (i = 0; i < mca_error_bank_count; i++, bank++) { bank->mca_mci_ctl = rdmsr64(IA32_MCi_CTL(i)); bank->mca_mci_status.u64 = rdmsr64(IA32_MCi_STATUS(i)); if (!bank->mca_mci_status.bits.val) continue; bank->mca_mci_misc = (bank->mca_mci_status.bits.miscv)? rdmsr64(IA32_MCi_MISC(i)) : 0ULL; bank->mca_mci_addr = (bank->mca_mci_status.bits.addrv)? rdmsr64(IA32_MCi_ADDR(i)) : 0ULL; } /* * If we're the first thread with MCA state, point our package to it * and don't care about races */ if (x86_package()->mca_state == NULL) x86_package()->mca_state = mca_state; }
/* * Real-time clock device interrupt. */ void rtclock_intr( x86_saved_state_t *tregs) { uint64_t rip; boolean_t user_mode = FALSE; assert(get_preemption_level() > 0); assert(!ml_get_interrupts_enabled()); if (is_saved_state64(tregs) == TRUE) { x86_saved_state64_t *regs; regs = saved_state64(tregs); if (regs->isf.cs & 0x03) user_mode = TRUE; rip = regs->isf.rip; } else { x86_saved_state32_t *regs; regs = saved_state32(tregs); if (regs->cs & 0x03) user_mode = TRUE; rip = regs->eip; } /* call the generic etimer */ timer_intr(user_mode, rip); }
struct kperf_sample * kperf_intr_sample_buffer(void) { unsigned ncpu = cpu_number(); assert(ml_get_interrupts_enabled() == FALSE); assert(ncpu < intr_samplec); return &(intr_samplev[ncpu]); }
/* * Invoked from power management to correct the SFLM TSC entry drift problem: * a small delta is added to the tsc_base. This is equivalent to nudgin time * backwards. We require this to be on the order of a TSC quantum which won't * cause callers of mach_absolute_time() to see time going backwards! */ void rtc_clock_adjust(uint64_t tsc_base_delta) { pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info; assert(!ml_get_interrupts_enabled()); assert(tsc_base_delta < 100ULL); /* i.e. it's small */ _rtc_nanotime_adjust(tsc_base_delta, rntp); rtc_nanotime_set_commpage(rntp); }
/* * AST_URGENT was detected while in kernel mode * Called with interrupts disabled, returns the same way * Must return to caller */ void ast_taken_kernel(void) { assert(ml_get_interrupts_enabled() == FALSE); thread_t thread = current_thread(); /* Idle threads handle preemption themselves */ if ((thread->state & TH_IDLE)) { ast_off(AST_PREEMPTION); return; } /* * It's possible for this to be called after AST_URGENT * has already been handled, due to races in enable_preemption */ if (ast_peek(AST_URGENT) != AST_URGENT) return; /* * Don't preempt if the thread is already preparing to block. * TODO: the thread can cheese this with clear_wait() */ if (waitq_wait_possible(thread) == FALSE) { /* Consume AST_URGENT or the interrupt will call us again */ ast_consume(AST_URGENT); return; } /* TODO: Should we csw_check again to notice if conditions have changed? */ ast_t urgent_reason = ast_consume(AST_PREEMPTION); assert(urgent_reason & AST_PREEMPT); counter(c_ast_taken_block++); thread_block_reason(THREAD_CONTINUE_NULL, NULL, urgent_reason); assert(ml_get_interrupts_enabled() == FALSE); }
void act_set_kperf( thread_t thread) { /* safety check */ if (thread != current_thread()) if( !ml_get_interrupts_enabled() ) panic("unsafe act_set_kperf operation"); act_set_ast( thread, AST_KPERF ); }
void clock_delay_until( uint64_t deadline) { uint64_t now = mach_absolute_time(); if (now >= deadline) return; if ( (deadline - now) < (8 * sched_cswtime) || get_preemption_level() != 0 || ml_get_interrupts_enabled() == FALSE ) machine_delay_until(deadline); else { assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline - sched_cswtime); thread_block(THREAD_CONTINUE_NULL); } }
/* * Preserve the original precise interval that the client * requested for comparison to the spin threshold. */ void _clock_delay_until_deadline( uint64_t interval, uint64_t deadline) { if (interval == 0) return; if ( ml_delay_should_spin(interval) || get_preemption_level() != 0 || ml_get_interrupts_enabled() == FALSE ) { machine_delay_until(deadline); } else { assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline); thread_block(THREAD_CONTINUE_NULL); } }
void thread_tell_urgency(int urgency, uint64_t rt_period, uint64_t rt_deadline, thread_t nthread) { uint64_t urgency_notification_time_start, delta; boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0); assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE); #if DEBUG urgency_stats[cpu_number() % 64][urgency]++; #endif if (!pmInitDone || pmDispatch == NULL || pmDispatch->pmThreadTellUrgency == NULL) return; KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0); if (__improbable((urgency_assert == TRUE))) urgency_notification_time_start = mach_absolute_time(); current_cpu_datap()->cpu_nthread = nthread; pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline); if (__improbable((urgency_assert == TRUE))) { delta = mach_absolute_time() - urgency_notification_time_start; if (__improbable(delta > urgency_notification_max_recorded)) { /* This is not synchronized, but it doesn't matter * if we (rarely) miss an event, as it is statistically * unlikely that it will never recur. */ urgency_notification_max_recorded = delta; if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta); } } KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0); }
/* * timer_queue_migrate_cpu() is called from the Power-Management kext * when a logical processor goes idle (in a deep C-state) with a distant * deadline so that it's timer queue can be moved to another processor. * This target processor should be the least idle (most busy) -- * currently this is the primary processor for the calling thread's package. * Locking restrictions demand that the target cpu must be the boot cpu. */ uint32_t timer_queue_migrate_cpu(int target_cpu) { cpu_data_t *target_cdp = cpu_datap(target_cpu); cpu_data_t *cdp = current_cpu_datap(); int ntimers_moved; assert(!ml_get_interrupts_enabled()); assert(target_cpu != cdp->cpu_number); assert(target_cpu == master_cpu); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TIMER_MIGRATE | DBG_FUNC_START, target_cpu, cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32), 0, 0); /* * Move timer requests from the local queue to the target processor's. * The return value is the number of requests moved. If this is 0, * it indicates that the first (i.e. earliest) timer is earlier than * the earliest for the target processor. Since this would force a * resync, the move of this and all later requests is aborted. */ ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue, &target_cdp->rtclock_timer.queue); /* * Assuming we moved stuff, clear local deadline. */ if (ntimers_moved > 0) { cdp->rtclock_timer.deadline = EndOfAllTime; setPop(EndOfAllTime); } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TIMER_MIGRATE | DBG_FUNC_END, target_cpu, ntimers_moved, 0, 0, 0); return ntimers_moved; }
void timer_queue_expire_rescan( __unused void *arg) { rtclock_timer_t *mytimer; uint64_t abstime; cpu_data_t *pp; assert(ml_get_interrupts_enabled() == FALSE); pp = current_cpu_datap(); mytimer = &pp->rtclock_timer; abstime = mach_absolute_time(); mytimer->has_expired = TRUE; mytimer->deadline = timer_queue_expire_with_options(&mytimer->queue, abstime, TRUE); mytimer->has_expired = FALSE; mytimer->when_set = mach_absolute_time(); timer_resync_deadlines(); }
/* * rtc_clock_napped: * * Invoked from power management when we exit from a low C-State (>= C4) * and the TSC has stopped counting. The nanotime data is updated according * to the provided value which represents the new value for nanotime. */ void rtc_clock_napped(uint64_t base, uint64_t tsc_base) { pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info; uint64_t oldnsecs; uint64_t newnsecs; uint64_t tsc; assert(!ml_get_interrupts_enabled()); tsc = rdtsc64(); oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp); newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp); /* * Only update the base values if time using the new base values * is later than the time using the old base values. */ if (oldnsecs < newnsecs) { _pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp); rtc_nanotime_set_commpage(rntp); } }
/* * Initialize the real-time clock device. * In addition, various variables used to support the clock are initialized. */ int rtclock_init(void) { uint64_t cycles; assert(!ml_get_interrupts_enabled()); if (cpu_number() == master_cpu) { assert(tscFreq); rtc_set_timescale(tscFreq); /* * Adjust and set the exported cpu speed. */ cycles = rtc_export_speed(tscFreq); /* * Set min/max to actual. * ACPI may update these later if speed-stepping is detected. */ gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles; gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles; rtc_timer_init(); clock_timebase_init(); ml_init_lock_timeout(); ml_init_delay_spin_threshold(10); } /* Set fixed configuration for lapic timers */ rtc_timer->config(); rtc_timer_start(); return (1); }
/* * timer_queue_migrate() is called by etimer_queue_migrate() * to move timer requests from the local processor (queue_from) * to a target processor's (queue_to). */ int timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) { timer_call_t call; timer_call_t head_to; int timers_migrated = 0; DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to); assert(!ml_get_interrupts_enabled()); assert(queue_from != queue_to); if (serverperfmode) { /* * if we're running a high end server * avoid migrations... they add latency * and don't save us power under typical * server workloads */ return -4; } /* * Take both local (from) and target (to) timer queue locks while * moving the timers from the local queue to the target processor. * We assume that the target is always the boot processor. * But only move if all of the following is true: * - the target queue is non-empty * - the local queue is non-empty * - the local queue's first deadline is later than the target's * - the local queue contains no non-migrateable "local" call * so that we need not have the target resync. */ timer_call_lock_spin(queue_to); head_to = TIMER_CALL(queue_first(&queue_to->head)); if (queue_empty(&queue_to->head)) { timers_migrated = -1; goto abort1; } timer_call_lock_spin(queue_from); if (queue_empty(&queue_from->head)) { timers_migrated = -2; goto abort2; } call = TIMER_CALL(queue_first(&queue_from->head)); if (CE(call)->deadline < CE(head_to)->deadline) { timers_migrated = 0; goto abort2; } /* perform scan for non-migratable timers */ do { if (call->flags & TIMER_CALL_LOCAL) { timers_migrated = -3; goto abort2; } call = TIMER_CALL(queue_next(qe(call))); } while (!queue_end(&queue_from->head, qe(call))); /* migration loop itself -- both queues are locked */ while (!queue_empty(&queue_from->head)) { call = TIMER_CALL(queue_first(&queue_from->head)); if (!simple_lock_try(&call->lock)) { /* case (2b) lock order inversion, dequeue only */ timer_queue_migrate_lock_skips++; (void) remque(qe(call)); call->async_dequeue = TRUE; continue; } timer_call_entry_dequeue(call); timer_call_entry_enqueue_deadline( call, queue_to, CE(call)->deadline); timers_migrated++; simple_unlock(&call->lock); } abort2: timer_call_unlock(queue_from); abort1: timer_call_unlock(queue_to); return timers_migrated; }
#define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT) extern zone_t iss_zone; /* zone for saved_state area */ extern zone_t ids_zone; /* zone for debug_state area */ void act_machine_switch_pcb(__unused thread_t old, thread_t new) { pcb_t pcb = THREAD_TO_PCB(new); cpu_data_t *cdp = current_cpu_datap(); struct real_descriptor *ldtp; mach_vm_offset_t pcb_stack_top; assert(new->kernel_stack != 0); assert(ml_get_interrupts_enabled() == FALSE); #ifdef DIRECTION_FLAG_DEBUG if (x86_get_flags() & EFL_DF) { panic("Direction flag detected: 0x%lx", x86_get_flags()); } #endif /* * Clear segment state * unconditionally for DS/ES/FS but more carefully for GS whose * cached state we track. */ set_ds(NULL_SEG); set_es(NULL_SEG); set_fs(NULL_SEG); if (get_gs() != NULL_SEG) {
void pmap_pcid_configure(void) { int ccpu = cpu_number(); uintptr_t cr4 = get_cr4(); boolean_t pcid_present = FALSE; pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu); pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); pmap_assert(cpu_mode_is64bit()); if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof (pmap_pcid_disabled))) { pmap_pcid_log("PMAP: PCID feature disabled\n"); printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled); kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled); } /* no_shared_cr3+PCID is currently unsupported */ #if DEBUG if (pmap_pcid_disabled == FALSE) no_shared_cr3 = FALSE; else no_shared_cr3 = TRUE; #else if (no_shared_cr3) pmap_pcid_disabled = TRUE; #endif if (pmap_pcid_disabled || no_shared_cr3) { unsigned i; /* Reset PCID status, as we may have picked up * strays if discovered prior to platform * expert initialization. */ for (i = 0; i < real_ncpus; i++) { if (cpu_datap(i)) { cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE; } pmap_pcid_ncpus = 0; } cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; return; } /* DRKTODO: assert if features haven't been discovered yet. Redundant * invocation of cpu_mode_init and descendants masks this for now. */ if ((cpuid_features() & CPUID_FEATURE_PCID)) pcid_present = TRUE; else { cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu); return; } if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE|CR4_PGE)) { cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu); return; } if (pcid_present == TRUE) { pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4); if (cpu_number() >= PMAP_PCID_MAX_CPUS) { panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number()); } if ((get_cr4() & CR4_PGE) == 0) { set_cr4(get_cr4() | CR4_PGE); pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu); } set_cr4(get_cr4() | CR4_PCIDE); pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4()); tlb_flush_global(); cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) { pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus); } cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel = &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]); cpu_datap(ccpu)->cpu_pcid_refcounts[0] = 1; } }
__private_extern__ boolean_t chudxnu_get_interrupts_enabled(void) { return ml_get_interrupts_enabled(); }
/* * Now running in a thread. Kick off other services, * invoke user bootstrap, enter pageout loop. */ static void kernel_bootstrap_thread(void) { processor_t processor = current_processor(); #define kernel_bootstrap_thread_kprintf(x...) /* kprintf("kernel_bootstrap_thread: " x) */ kernel_bootstrap_thread_log("idle_thread_create"); /* * Create the idle processor thread. */ idle_thread_create(processor); /* * N.B. Do not stick anything else * before this point. * * Start up the scheduler services. */ kernel_bootstrap_thread_log("sched_startup"); sched_startup(); /* * Thread lifecycle maintenance (teardown, stack allocation) */ kernel_bootstrap_thread_log("thread_daemon_init"); thread_daemon_init(); /* Create kernel map entry reserve */ vm_kernel_reserved_entry_init(); /* * Thread callout service. */ kernel_bootstrap_thread_log("thread_call_initialize"); thread_call_initialize(); /* * Remain on current processor as * additional processors come online. */ kernel_bootstrap_thread_log("thread_bind"); thread_bind(processor); /* * Initialize ipc thread call support. */ kernel_bootstrap_thread_log("ipc_thread_call_init"); ipc_thread_call_init(); /* * Kick off memory mapping adjustments. */ kernel_bootstrap_thread_log("mapping_adjust"); mapping_adjust(); /* * Create the clock service. */ kernel_bootstrap_thread_log("clock_service_create"); clock_service_create(); /* * Create the device service. */ device_service_create(); kth_started = 1; #if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 /* * Create and initialize the physical copy window for processor 0 * This is required before starting kicking off IOKit. */ cpu_physwindow_init(0); #endif #if MACH_KDP kernel_bootstrap_log("kdp_init"); kdp_init(); #endif #if ALTERNATE_DEBUGGER alternate_debugger_init(); #endif #if KPC kpc_init(); #endif #if CONFIG_ECC_LOGGING ecc_log_init(); #endif #if KPERF kperf_bootstrap(); #endif #if HYPERVISOR hv_support_init(); #endif #if CONFIG_TELEMETRY kernel_bootstrap_log("bootprofile_init"); bootprofile_init(); #endif #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX vmx_init(); #endif #if (defined(__i386__) || defined(__x86_64__)) if (kdebug_serial) { new_nkdbufs = 1; if (trace_typefilter == 0) trace_typefilter = 1; } if (turn_on_log_leaks && !new_nkdbufs) new_nkdbufs = 200000; if (trace_typefilter) start_kern_tracing_with_typefilter(new_nkdbufs, FALSE, trace_typefilter); else start_kern_tracing(new_nkdbufs, FALSE); if (turn_on_log_leaks) log_leaks = 1; #endif kernel_bootstrap_log("prng_init"); prng_cpu_init(master_cpu); #ifdef IOKIT PE_init_iokit(); #endif assert(ml_get_interrupts_enabled() == FALSE); (void) spllo(); /* Allow interruptions */ #if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 /* * Create and initialize the copy window for processor 0 * This also allocates window space for all other processors. * However, this is dependent on the number of processors - so this call * must be after IOKit has been started because IOKit performs processor * discovery. */ cpu_userwindow_init(0); #endif #if (!defined(__i386__) && !defined(__x86_64__)) if (turn_on_log_leaks && !new_nkdbufs) new_nkdbufs = 200000; if (trace_typefilter) start_kern_tracing_with_typefilter(new_nkdbufs, FALSE, trace_typefilter); else start_kern_tracing(new_nkdbufs, FALSE); if (turn_on_log_leaks) log_leaks = 1; #endif /* * Initialize the shared region module. */ vm_shared_region_init(); vm_commpage_init(); vm_commpage_text_init(); #if CONFIG_MACF kernel_bootstrap_log("mac_policy_initmach"); mac_policy_initmach(); #endif #if CONFIG_SCHED_SFI kernel_bootstrap_log("sfi_init"); sfi_init(); #endif /* * Initialize the globals used for permuting kernel * addresses that may be exported to userland as tokens * using VM_KERNEL_ADDRPERM()/VM_KERNEL_ADDRPERM_EXTERNAL(). * Force the random number to be odd to avoid mapping a non-zero * word-aligned address to zero via addition. * Note: at this stage we can use the cryptographically secure PRNG * rather than early_random(). */ read_random(&vm_kernel_addrperm, sizeof(vm_kernel_addrperm)); vm_kernel_addrperm |= 1; read_random(&buf_kernel_addrperm, sizeof(buf_kernel_addrperm)); buf_kernel_addrperm |= 1; read_random(&vm_kernel_addrperm_ext, sizeof(vm_kernel_addrperm_ext)); vm_kernel_addrperm_ext |= 1; vm_set_restrictions(); /* * Start the user bootstrap. */ #ifdef MACH_BSD bsd_init(); #endif /* * Get rid of segments used to bootstrap kext loading. This removes * the KLD, PRELINK symtab, LINKEDIT, and symtab segments/load commands. */ OSKextRemoveKextBootstrap(); serial_keyboard_init(); /* Start serial keyboard if wanted */ vm_page_init_local_q(); thread_bind(PROCESSOR_NULL); /* * Become the pageout daemon. */ vm_pageout(); /*NOTREACHED*/ }
/* * An AST flag was set while returning to user mode * Called with interrupts disabled, returns with interrupts enabled * May call continuation instead of returning */ void ast_taken_user(void) { assert(ml_get_interrupts_enabled() == FALSE); thread_t thread = current_thread(); /* We are about to return to userspace, there must not be a pending wait */ assert(waitq_wait_possible(thread)); assert((thread->state & TH_IDLE) == 0); /* TODO: Add more 'return to userspace' assertions here */ /* * If this thread was urgently preempted in userspace, * take the preemption before processing the ASTs. * The trap handler will call us again if we have more ASTs, so it's * safe to block in a continuation here. */ if (ast_peek(AST_URGENT) == AST_URGENT) { ast_t urgent_reason = ast_consume(AST_PREEMPTION); assert(urgent_reason & AST_PREEMPT); /* TODO: Should we csw_check again to notice if conditions have changed? */ thread_block_reason(thread_preempted, NULL, urgent_reason); /* NOTREACHED */ } /* * AST_KEVENT does not send an IPI when setting the ast for a thread running in parallel * on a different processor. Only the ast bit on the thread will be set. * * Force a propagate for concurrent updates without an IPI. */ ast_propagate(thread); /* * Consume all non-preemption processor ASTs matching reasons * because we're handling them here. * * If one of the AST handlers blocks in a continuation, * we'll reinstate the unserviced thread-level AST flags * from the thread to the processor on context switch. * If one of the AST handlers sets another AST, * the trap handler will call ast_taken_user again. * * We expect the AST handlers not to thread_exception_return * without an ast_propagate or context switch to reinstate * the per-processor ASTs. * * TODO: Why are AST_DTRACE and AST_KPERF not per-thread ASTs? */ ast_t reasons = ast_consume(AST_PER_THREAD | AST_KPERF | AST_DTRACE); ml_set_interrupts_enabled(TRUE); #if CONFIG_DTRACE if (reasons & AST_DTRACE) { dtrace_ast(); } #endif #ifdef MACH_BSD if (reasons & AST_BSD) { thread_ast_clear(thread, AST_BSD); bsd_ast(thread); } #endif #if CONFIG_MACF if (reasons & AST_MACF) { thread_ast_clear(thread, AST_MACF); mac_thread_userret(thread); } #endif if (reasons & AST_APC) { thread_ast_clear(thread, AST_APC); thread_apc_ast(thread); } if (reasons & AST_GUARD) { thread_ast_clear(thread, AST_GUARD); guard_ast(thread); } if (reasons & AST_LEDGER) { thread_ast_clear(thread, AST_LEDGER); ledger_ast(thread); } if (reasons & AST_KPERF) { thread_ast_clear(thread, AST_KPERF); kperf_kpc_thread_ast(thread); } if (reasons & AST_KEVENT) { thread_ast_clear(thread, AST_KEVENT); uint16_t bits = atomic_exchange(&thread->kevent_ast_bits, 0); if (bits) kevent_ast(thread, bits); } #if CONFIG_TELEMETRY if (reasons & AST_TELEMETRY_ALL) { ast_t telemetry_reasons = reasons & AST_TELEMETRY_ALL; thread_ast_clear(thread, AST_TELEMETRY_ALL); telemetry_ast(thread, telemetry_reasons); } #endif spl_t s = splsched(); #if CONFIG_SCHED_SFI /* * SFI is currently a per-processor AST, not a per-thread AST * TODO: SFI should be a per-thread AST */ if (ast_consume(AST_SFI) == AST_SFI) { sfi_ast(thread); } #endif /* We are about to return to userspace, there must not be a pending wait */ assert(waitq_wait_possible(thread)); /* * We've handled all per-thread ASTs, time to handle non-urgent preemption. * * We delay reading the preemption bits until now in case the thread * blocks while handling per-thread ASTs. * * If one of the AST handlers had managed to set a new AST bit, * thread_exception_return will call ast_taken again. */ ast_t preemption_reasons = ast_consume(AST_PREEMPTION); if (preemption_reasons & AST_PREEMPT) { /* Conditions may have changed from when the AST_PREEMPT was originally set, so re-check. */ thread_lock(thread); preemption_reasons = csw_check(current_processor(), (preemption_reasons & AST_QUANTUM)); thread_unlock(thread); #if CONFIG_SCHED_SFI /* csw_check might tell us that SFI is needed */ if (preemption_reasons & AST_SFI) { sfi_ast(thread); } #endif if (preemption_reasons & AST_PREEMPT) { counter(c_ast_taken_block++); /* switching to a continuation implicitly re-enables interrupts */ thread_block_reason(thread_preempted, NULL, preemption_reasons); /* NOTREACHED */ } } splx(s); }