void cpu_powerstats(__unused void *arg) { cpu_data_t *cdp = current_cpu_datap(); __unused int cnum = cdp->cpu_number; uint32_t cl = 0, ch = 0, mpl = 0, mph = 0, apl = 0, aph = 0; rdmsr_carefully(MSR_IA32_MPERF, &mpl, &mph); rdmsr_carefully(MSR_IA32_APERF, &apl, &aph); cdp->cpu_mperf = ((uint64_t)mph << 32) | mpl; cdp->cpu_aperf = ((uint64_t)aph << 32) | apl; uint64_t ctime = mach_absolute_time(); cdp->cpu_rtime_total += ctime - cdp->cpu_ixtime; cdp->cpu_ixtime = ctime; rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); cdp->cpu_c3res = ((uint64_t)ch << 32) | cl; rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); cdp->cpu_c6res = ((uint64_t)ch << 32) | cl; rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); cdp->cpu_c7res = ((uint64_t)ch << 32) | cl; if (diag_pmc_enabled) { uint64_t insns = read_pmc(FIXED_PMC0); uint64_t ucc = read_pmc(FIXED_PMC1); uint64_t urc = read_pmc(FIXED_PMC2); cdp->cpu_cur_insns = insns; cdp->cpu_cur_ucc = ucc; cdp->cpu_cur_urc = urc; } }
int diagCall64(x86_saved_state_t * state) { uint64_t curpos, i, j; uint64_t selector, data; uint64_t currNap, durNap; x86_saved_state64_t *regs; boolean_t diagflag; uint32_t rval = 0; assert(is_saved_state64(state)); regs = saved_state64(state); diagflag = ((dgWork.dgFlags & enaDiagSCs) != 0); selector = regs->rdi; switch (selector) { /* Select the routine */ case dgRuptStat: /* Suck Interruption statistics */ (void) ml_set_interrupts_enabled(TRUE); data = regs->rsi; /* Get the number of processors */ if (data == 0) { /* If no location is specified for data, clear all * counts */ for (i = 0; i < real_ncpus; i++) { /* Cycle through * processors */ for (j = 0; j < 256; j++) cpu_data_ptr[i]->cpu_hwIntCnt[j] = 0; } lastRuptClear = mach_absolute_time(); /* Get the time of clear */ rval = 1; /* Normal return */ break; } (void) copyout((char *) &real_ncpus, data, sizeof(real_ncpus)); /* Copy out number of * processors */ currNap = mach_absolute_time(); /* Get the time now */ durNap = currNap - lastRuptClear; /* Get the last interval * duration */ if (durNap == 0) durNap = 1; /* This is a very short time, make it * bigger */ curpos = data + sizeof(real_ncpus); /* Point to the next * available spot */ for (i = 0; i < real_ncpus; i++) { /* Move 'em all out */ (void) copyout((char *) &durNap, curpos, 8); /* Copy out the time * since last clear */ (void) copyout((char *) &cpu_data_ptr[i]->cpu_hwIntCnt, curpos + 8, 256 * sizeof(uint32_t)); /* Copy out interrupt * data for this * processor */ curpos = curpos + (256 * sizeof(uint32_t) + 8); /* Point to next out put * slot */ } rval = 1; break; case dgPowerStat: { uint32_t c2l = 0, c2h = 0, c3l = 0, c3h = 0, c6l = 0, c6h = 0, c7l = 0, c7h = 0; uint32_t pkg_unit_l = 0, pkg_unit_h = 0, pkg_ecl = 0, pkg_ech = 0; pkg_energy_statistics_t pkes; core_energy_stat_t cest; bzero(&pkes, sizeof(pkes)); bzero(&cest, sizeof(cest)); pkes.pkes_version = 1ULL; rdmsr_carefully(MSR_IA32_PKG_C2_RESIDENCY, &c2l, &c2h); rdmsr_carefully(MSR_IA32_PKG_C3_RESIDENCY, &c3l, &c3h); rdmsr_carefully(MSR_IA32_PKG_C6_RESIDENCY, &c6l, &c6h); rdmsr_carefully(MSR_IA32_PKG_C7_RESIDENCY, &c7l, &c7h); pkes.pkg_cres[0][0] = ((uint64_t)c2h << 32) | c2l; pkes.pkg_cres[0][1] = ((uint64_t)c3h << 32) | c3l; pkes.pkg_cres[0][2] = ((uint64_t)c6h << 32) | c6l; pkes.pkg_cres[0][3] = ((uint64_t)c7h << 32) | c7l; uint32_t cpumodel = cpuid_info()->cpuid_model; boolean_t c8avail; switch (cpumodel) { case CPUID_MODEL_HASWELL_ULT: c8avail = TRUE; break; default: c8avail = FALSE; break; } uint64_t c8r = ~0ULL, c9r = ~0ULL, c10r = ~0ULL; if (c8avail) { rdmsr64_carefully(MSR_IA32_PKG_C8_RESIDENCY, &c8r); rdmsr64_carefully(MSR_IA32_PKG_C9_RESIDENCY, &c9r); rdmsr64_carefully(MSR_IA32_PKG_C10_RESIDENCY, &c10r); } pkes.pkg_cres[0][4] = c8r; pkes.pkg_cres[0][5] = c9r; pkes.pkg_cres[0][6] = c10r; pkes.ddr_energy = ~0ULL; rdmsr64_carefully(MSR_IA32_DDR_ENERGY_STATUS, &pkes.ddr_energy); pkes.llc_flushed_cycles = ~0ULL; rdmsr64_carefully(MSR_IA32_LLC_FLUSHED_RESIDENCY_TIMER, &pkes.llc_flushed_cycles); pkes.ring_ratio_instantaneous = ~0ULL; rdmsr64_carefully(MSR_IA32_RING_PERF_STATUS, &pkes.ring_ratio_instantaneous); pkes.IA_frequency_clipping_cause = ~0ULL; rdmsr64_carefully(MSR_IA32_IA_PERF_LIMIT_REASONS, &pkes.IA_frequency_clipping_cause); pkes.GT_frequency_clipping_cause = ~0ULL; rdmsr64_carefully(MSR_IA32_GT_PERF_LIMIT_REASONS, &pkes.GT_frequency_clipping_cause); rdmsr_carefully(MSR_IA32_PKG_POWER_SKU_UNIT, &pkg_unit_l, &pkg_unit_h); rdmsr_carefully(MSR_IA32_PKG_ENERGY_STATUS, &pkg_ecl, &pkg_ech); pkes.pkg_power_unit = ((uint64_t)pkg_unit_h << 32) | pkg_unit_l; pkes.pkg_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl; rdmsr_carefully(MSR_IA32_PP0_ENERGY_STATUS, &pkg_ecl, &pkg_ech); pkes.pp0_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl; rdmsr_carefully(MSR_IA32_PP1_ENERGY_STATUS, &pkg_ecl, &pkg_ech); pkes.pp1_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl; pkes.pkg_idle_exits = current_cpu_datap()->lcpu.package->package_idle_exits; pkes.ncpus = real_ncpus; (void) ml_set_interrupts_enabled(TRUE); copyout(&pkes, regs->rsi, sizeof(pkes)); curpos = regs->rsi + sizeof(pkes); mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_powerstats, NULL); for (i = 0; i < real_ncpus; i++) { (void) ml_set_interrupts_enabled(FALSE); cest.caperf = cpu_data_ptr[i]->cpu_aperf; cest.cmperf = cpu_data_ptr[i]->cpu_mperf; cest.ccres[0] = cpu_data_ptr[i]->cpu_c3res; cest.ccres[1] = cpu_data_ptr[i]->cpu_c6res; cest.ccres[2] = cpu_data_ptr[i]->cpu_c7res; bcopy(&cpu_data_ptr[i]->cpu_rtimes[0], &cest.crtimes[0], sizeof(cest.crtimes)); bcopy(&cpu_data_ptr[i]->cpu_itimes[0], &cest.citimes[0], sizeof(cest.citimes)); cest.citime_total = cpu_data_ptr[i]->cpu_itime_total; cest.crtime_total = cpu_data_ptr[i]->cpu_rtime_total; cest.cpu_idle_exits = cpu_data_ptr[i]->cpu_idle_exits; cest.cpu_insns = cpu_data_ptr[i]->cpu_cur_insns; cest.cpu_ucc = cpu_data_ptr[i]->cpu_cur_ucc; cest.cpu_urc = cpu_data_ptr[i]->cpu_cur_urc; (void) ml_set_interrupts_enabled(TRUE); copyout(&cest, curpos, sizeof(cest)); curpos += sizeof(cest); } rval = 1; } break; case dgEnaPMC: { boolean_t enable = TRUE; uint32_t cpuinfo[4]; /* Require architectural PMC v2 or higher, corresponding to * Merom+, or equivalent virtualised facility. */ do_cpuid(0xA, &cpuinfo[0]); if ((cpuinfo[0] & 0xFF) >= 2) { mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_pmc_control, &enable); diag_pmc_enabled = TRUE; } rval = 1; } break; #if DEBUG case dgGzallocTest: { (void) ml_set_interrupts_enabled(TRUE); if (diagflag) { unsigned *ptr = (unsigned *)kalloc(1024); kfree(ptr, 1024); *ptr = 0x42; } } break; #endif #if PERMIT_PERMCHECK case dgPermCheck: { (void) ml_set_interrupts_enabled(TRUE); if (diagflag) rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL); } break; #endif /* PERMIT_PERMCHECK */ default: /* Handle invalid ones */ rval = 0; /* Return an exception */ } regs->rax = rval; return rval; }
/* * Called when the CPU is idle. It calls into the power management kext * to determine the best way to idle the CPU. */ void machine_idle(void) { cpu_data_t *my_cpu = current_cpu_datap(); __unused uint32_t cnum = my_cpu->cpu_number; uint64_t ctime, rtime, itime; #if CST_DEMOTION_DEBUG processor_t cproc = my_cpu->cpu_processor; uint64_t cwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total); #endif /* CST_DEMOTION_DEBUG */ uint64_t esdeadline, ehdeadline; boolean_t do_process_pending_timers = FALSE; ctime = mach_absolute_time(); esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline; ehdeadline = my_cpu->rtclock_timer.deadline; /* Determine if pending timers exist */ if ((ctime >= esdeadline) && (ctime < ehdeadline) && ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) { idle_pending_timers_processed++; do_process_pending_timers = TRUE; goto machine_idle_exit; } else { TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0); } my_cpu->lcpu.state = LCPU_IDLE; DBGLOG(cpu_handle, cpu_number(), MP_IDLE); MARK_CPU_IDLE(cnum); rtime = ctime - my_cpu->cpu_ixtime; my_cpu->cpu_rtime_total += rtime; machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS); #if CST_DEMOTION_DEBUG uint32_t cl = 0, ch = 0; uint64_t c3res, c6res, c7res; rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); c3res = ((uint64_t)ch << 32) | cl; rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); c6res = ((uint64_t)ch << 32) | cl; rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); c7res = ((uint64_t)ch << 32) | cl; #endif if (pmInitDone) { /* * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay() * were called prior to the CPU PM kext being registered. We do * this here since we know at this point the values will be first * used since idle is where the decisions using these values is made. */ if (earlyMaxBusDelay != DELAY_UNSET) ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF)); if (earlyMaxIntDelay != DELAY_UNSET) ml_set_maxintdelay(earlyMaxIntDelay); } if (pmInitDone && pmDispatch != NULL && pmDispatch->MachineIdle != NULL) (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL); else { /* * If no power management, re-enable interrupts and halt. * This will keep the CPU from spinning through the scheduler * and will allow at least some minimal power savings (but it * cause problems in some MP configurations w.r.t. the APIC * stopping during a GV3 transition). */ pal_hlt(); /* Once woken, re-disable interrupts. */ pal_cli(); } /* * Mark the CPU as running again. */ MARK_CPU_ACTIVE(cnum); DBGLOG(cpu_handle, cnum, MP_UNIDLE); my_cpu->lcpu.state = LCPU_RUN; uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time(); itime = ixtime - ctime; my_cpu->cpu_idle_exits++; my_cpu->cpu_itime_total += itime; machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS); #if CST_DEMOTION_DEBUG cl = ch = 0; rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); c3res = (((uint64_t)ch << 32) | cl) - c3res; rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); c6res = (((uint64_t)ch << 32) | cl) - c6res; rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); c7res = (((uint64_t)ch << 32) | cl) - c7res; uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n); KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res); if ((itime > 1000000) && (ndelta > 250000)) KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res); #endif machine_idle_exit: /* * Re-enable interrupts. */ pal_sti(); if (do_process_pending_timers) { TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0); /* Adjust to reflect that this isn't truly a package idle exit */ __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1); lapic_timer_swi(); /* Trigger software timer interrupt */ __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1); TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0); } #if CST_DEMOTION_DEBUG uint64_t nwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total); if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) { KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0); } #endif }