/* * Event timer interrupt. * * XXX a drawback of this implementation is that events serviced earlier must not set deadlines * that occur before the entire chain completes. * * XXX a better implementation would use a set of generic callouts and iterate over them */ void timer_intr(int user_mode, uint64_t rip) { uint64_t abstime; rtclock_timer_t *mytimer; cpu_data_t *pp; int64_t latency; uint64_t pmdeadline; boolean_t timer_processed = FALSE; pp = current_cpu_datap(); SCHED_STATS_TIMER_POP(current_processor()); abstime = mach_absolute_time(); /* Get the time now */ /* has a pending clock timer expired? */ mytimer = &pp->rtclock_timer; /* Point to the event timer */ if ((timer_processed = ((mytimer->deadline <= abstime) || (abstime >= (mytimer->queue.earliest_soft_deadline))))) { /* * Log interrupt service latency (-ve value expected by tool) * a non-PM event is expected next. * The requested deadline may be earlier than when it was set * - use MAX to avoid reporting bogus latencies. */ latency = (int64_t) (abstime - MAX(mytimer->deadline, mytimer->when_set)); /* Log zero timer latencies when opportunistically processing * coalesced timers. */ if (latency < 0) { TCOAL_DEBUG(0xEEEE0000, abstime, mytimer->queue.earliest_soft_deadline, abstime - mytimer->queue.earliest_soft_deadline, 0, 0); latency = 0; } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_TRAP_LATENCY | DBG_FUNC_NONE, -latency, ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)), user_mode, 0, 0); mytimer->has_expired = TRUE; /* Remember that we popped */ mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); mytimer->has_expired = FALSE; /* Get the time again since we ran a bit */ abstime = mach_absolute_time(); mytimer->when_set = abstime; } /* is it time for power management state change? */ if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_PM_DEADLINE | DBG_FUNC_START, 0, 0, 0, 0, 0); pmCPUDeadline(pp); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, DECR_PM_DEADLINE | DBG_FUNC_END, 0, 0, 0, 0, 0); timer_processed = TRUE; } /* schedule our next deadline */ x86_lcpu()->rtcDeadline = EndOfAllTime; timer_resync_deadlines(); if (__improbable(timer_processed == FALSE)) spurious_timers++; }
uint64_t timer_queue_expire_with_options( mpqueue_head_t *queue, uint64_t deadline, boolean_t rescan) { timer_call_t call = NULL; uint32_t tc_iterations = 0; DBG("timer_queue_expire(%p,)\n", queue); uint64_t cur_deadline = deadline; timer_queue_lock_spin(queue); while (!queue_empty(&queue->head)) { /* Upon processing one or more timer calls, refresh the * deadline to account for time elapsed in the callout */ if (++tc_iterations > 1) cur_deadline = mach_absolute_time(); if (call == NULL) call = TIMER_CALL(queue_first(&queue->head)); if (call->soft_deadline <= cur_deadline) { timer_call_func_t func; timer_call_param_t param0, param1; TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0); TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_EXPIRE | DBG_FUNC_NONE, call, call->soft_deadline, CE(call)->deadline, CE(call)->entry_time, 0); /* Bit 0 of the "soft" deadline indicates that * this particular timer call is rate-limited * and hence shouldn't be processed before its * hard deadline. */ if ((call->soft_deadline & 0x1) && (CE(call)->deadline > cur_deadline)) { if (rescan == FALSE) break; } if (!simple_lock_try(&call->lock)) { /* case (2b) lock inversion, dequeue and skip */ timer_queue_expire_lock_skips++; timer_call_entry_dequeue_async(call); call = NULL; continue; } timer_call_entry_dequeue(call); func = CE(call)->func; param0 = CE(call)->param0; param1 = CE(call)->param1; simple_unlock(&call->lock); timer_queue_unlock(queue); TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_START, call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); #if CONFIG_DTRACE DTRACE_TMR7(callout__start, timer_call_func_t, func, timer_call_param_t, param0, unsigned, call->flags, 0, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call); #endif /* Maintain time-to-deadline in per-processor data * structure for thread wakeup deadline statistics. */ uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd)); *ttdp = call->ttd; (*func)(param0, param1); *ttdp = 0; #if CONFIG_DTRACE DTRACE_TMR4(callout__end, timer_call_func_t, func, param0, param1, call); #endif TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_END, call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); call = NULL; timer_queue_lock_spin(queue); } else { if (__probable(rescan == FALSE)) {
/* * Called when the CPU is idle. It calls into the power management kext * to determine the best way to idle the CPU. */ void machine_idle(void) { cpu_data_t *my_cpu = current_cpu_datap(); __unused uint32_t cnum = my_cpu->cpu_number; uint64_t ctime, rtime, itime; #if CST_DEMOTION_DEBUG processor_t cproc = my_cpu->cpu_processor; uint64_t cwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total); #endif /* CST_DEMOTION_DEBUG */ uint64_t esdeadline, ehdeadline; boolean_t do_process_pending_timers = FALSE; ctime = mach_absolute_time(); esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline; ehdeadline = my_cpu->rtclock_timer.deadline; /* Determine if pending timers exist */ if ((ctime >= esdeadline) && (ctime < ehdeadline) && ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) { idle_pending_timers_processed++; do_process_pending_timers = TRUE; goto machine_idle_exit; } else { TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0); } my_cpu->lcpu.state = LCPU_IDLE; DBGLOG(cpu_handle, cpu_number(), MP_IDLE); MARK_CPU_IDLE(cnum); rtime = ctime - my_cpu->cpu_ixtime; my_cpu->cpu_rtime_total += rtime; machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS); #if CST_DEMOTION_DEBUG uint32_t cl = 0, ch = 0; uint64_t c3res, c6res, c7res; rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); c3res = ((uint64_t)ch << 32) | cl; rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); c6res = ((uint64_t)ch << 32) | cl; rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); c7res = ((uint64_t)ch << 32) | cl; #endif if (pmInitDone) { /* * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay() * were called prior to the CPU PM kext being registered. We do * this here since we know at this point the values will be first * used since idle is where the decisions using these values is made. */ if (earlyMaxBusDelay != DELAY_UNSET) ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF)); if (earlyMaxIntDelay != DELAY_UNSET) ml_set_maxintdelay(earlyMaxIntDelay); } if (pmInitDone && pmDispatch != NULL && pmDispatch->MachineIdle != NULL) (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL); else { /* * If no power management, re-enable interrupts and halt. * This will keep the CPU from spinning through the scheduler * and will allow at least some minimal power savings (but it * cause problems in some MP configurations w.r.t. the APIC * stopping during a GV3 transition). */ pal_hlt(); /* Once woken, re-disable interrupts. */ pal_cli(); } /* * Mark the CPU as running again. */ MARK_CPU_ACTIVE(cnum); DBGLOG(cpu_handle, cnum, MP_UNIDLE); my_cpu->lcpu.state = LCPU_RUN; uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time(); itime = ixtime - ctime; my_cpu->cpu_idle_exits++; my_cpu->cpu_itime_total += itime; machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS); #if CST_DEMOTION_DEBUG cl = ch = 0; rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); c3res = (((uint64_t)ch << 32) | cl) - c3res; rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); c6res = (((uint64_t)ch << 32) | cl) - c6res; rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); c7res = (((uint64_t)ch << 32) | cl) - c7res; uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n); KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res); if ((itime > 1000000) && (ndelta > 250000)) KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res); #endif machine_idle_exit: /* * Re-enable interrupts. */ pal_sti(); if (do_process_pending_timers) { TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0); /* Adjust to reflect that this isn't truly a package idle exit */ __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1); lapic_timer_swi(); /* Trigger software timer interrupt */ __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1); TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0); } #if CST_DEMOTION_DEBUG uint64_t nwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total); if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) { KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0); } #endif }