Beispiel #1
0
void cpu_powerstats(__unused void *arg) {
	cpu_data_t *cdp = current_cpu_datap();
	__unused int cnum = cdp->cpu_number;
	uint32_t cl = 0, ch = 0, mpl = 0, mph = 0, apl = 0, aph = 0;

	rdmsr_carefully(MSR_IA32_MPERF, &mpl, &mph);
	rdmsr_carefully(MSR_IA32_APERF, &apl, &aph);

	cdp->cpu_mperf = ((uint64_t)mph << 32) | mpl;
	cdp->cpu_aperf = ((uint64_t)aph << 32) | apl;

	uint64_t ctime = mach_absolute_time();
	cdp->cpu_rtime_total += ctime - cdp->cpu_ixtime;
	cdp->cpu_ixtime = ctime;

	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
	cdp->cpu_c3res = ((uint64_t)ch << 32) | cl;

	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
	cdp->cpu_c6res = ((uint64_t)ch << 32) | cl;

	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
	cdp->cpu_c7res = ((uint64_t)ch << 32) | cl;

	if (diag_pmc_enabled) {
		uint64_t insns = read_pmc(FIXED_PMC0);
		uint64_t ucc = read_pmc(FIXED_PMC1);
		uint64_t urc = read_pmc(FIXED_PMC2);
		cdp->cpu_cur_insns = insns;
		cdp->cpu_cur_ucc = ucc;
		cdp->cpu_cur_urc = urc;
	}
}
Beispiel #2
0
int 
diagCall64(x86_saved_state_t * state)
{
	uint64_t	curpos, i, j;
	uint64_t	selector, data;
	uint64_t	currNap, durNap;
	x86_saved_state64_t	*regs;
	boolean_t 	diagflag;
	uint32_t	rval = 0;

	assert(is_saved_state64(state));
	regs = saved_state64(state);

	diagflag = ((dgWork.dgFlags & enaDiagSCs) != 0);
	selector = regs->rdi;

	switch (selector) {	/* Select the routine */
	case dgRuptStat:	/* Suck Interruption statistics */
		(void) ml_set_interrupts_enabled(TRUE);
		data = regs->rsi; /* Get the number of processors */

		if (data == 0) { /* If no location is specified for data, clear all
				  * counts
				  */
			for (i = 0; i < real_ncpus; i++) {	/* Cycle through
								 * processors */
				for (j = 0; j < 256; j++)
					cpu_data_ptr[i]->cpu_hwIntCnt[j] = 0;
			}

			lastRuptClear = mach_absolute_time();	/* Get the time of clear */
			rval = 1;	/* Normal return */
			break;
		}

		(void) copyout((char *) &real_ncpus, data, sizeof(real_ncpus));	/* Copy out number of
										 * processors */
		currNap = mach_absolute_time();	/* Get the time now */
		durNap = currNap - lastRuptClear;	/* Get the last interval
							 * duration */
		if (durNap == 0)
			durNap = 1;	/* This is a very short time, make it
					 * bigger */

		curpos = data + sizeof(real_ncpus);	/* Point to the next
							 * available spot */

		for (i = 0; i < real_ncpus; i++) {	/* Move 'em all out */
			(void) copyout((char *) &durNap, curpos, 8);	/* Copy out the time
									 * since last clear */
			(void) copyout((char *) &cpu_data_ptr[i]->cpu_hwIntCnt, curpos + 8, 256 * sizeof(uint32_t));	/* Copy out interrupt
															 * data for this
															 * processor */
			curpos = curpos + (256 * sizeof(uint32_t) + 8);	/* Point to next out put
									 * slot */
		}
		rval = 1;
		break;

	case dgPowerStat:
	{
		uint32_t c2l = 0, c2h = 0, c3l = 0, c3h = 0, c6l = 0, c6h = 0, c7l = 0, c7h = 0;
		uint32_t pkg_unit_l = 0, pkg_unit_h = 0, pkg_ecl = 0, pkg_ech = 0;

		pkg_energy_statistics_t pkes;
		core_energy_stat_t cest;

		bzero(&pkes, sizeof(pkes));
		bzero(&cest, sizeof(cest));

		pkes.pkes_version = 1ULL;
		rdmsr_carefully(MSR_IA32_PKG_C2_RESIDENCY, &c2l, &c2h);
		rdmsr_carefully(MSR_IA32_PKG_C3_RESIDENCY, &c3l, &c3h);
		rdmsr_carefully(MSR_IA32_PKG_C6_RESIDENCY, &c6l, &c6h);
		rdmsr_carefully(MSR_IA32_PKG_C7_RESIDENCY, &c7l, &c7h);

		pkes.pkg_cres[0][0] = ((uint64_t)c2h << 32) | c2l;
		pkes.pkg_cres[0][1] = ((uint64_t)c3h << 32) | c3l;
		pkes.pkg_cres[0][2] = ((uint64_t)c6h << 32) | c6l;
		pkes.pkg_cres[0][3] = ((uint64_t)c7h << 32) | c7l;

		uint32_t cpumodel = cpuid_info()->cpuid_model;
		boolean_t c8avail;
		switch (cpumodel) {
		case CPUID_MODEL_HASWELL_ULT:
			c8avail = TRUE;
			break;
		default:
			c8avail = FALSE;
			break;
		}
		uint64_t c8r = ~0ULL, c9r = ~0ULL, c10r = ~0ULL;

		if (c8avail) {
			rdmsr64_carefully(MSR_IA32_PKG_C8_RESIDENCY, &c8r);
			rdmsr64_carefully(MSR_IA32_PKG_C9_RESIDENCY, &c9r);
			rdmsr64_carefully(MSR_IA32_PKG_C10_RESIDENCY, &c10r);
		}

		pkes.pkg_cres[0][4] = c8r;
		pkes.pkg_cres[0][5] = c9r;
		pkes.pkg_cres[0][6] = c10r;

		pkes.ddr_energy = ~0ULL;
		rdmsr64_carefully(MSR_IA32_DDR_ENERGY_STATUS, &pkes.ddr_energy);
		pkes.llc_flushed_cycles = ~0ULL;
		rdmsr64_carefully(MSR_IA32_LLC_FLUSHED_RESIDENCY_TIMER, &pkes.llc_flushed_cycles);

		pkes.ring_ratio_instantaneous = ~0ULL;
		rdmsr64_carefully(MSR_IA32_RING_PERF_STATUS, &pkes.ring_ratio_instantaneous);

		pkes.IA_frequency_clipping_cause = ~0ULL;
		rdmsr64_carefully(MSR_IA32_IA_PERF_LIMIT_REASONS, &pkes.IA_frequency_clipping_cause);

		pkes.GT_frequency_clipping_cause = ~0ULL;
		rdmsr64_carefully(MSR_IA32_GT_PERF_LIMIT_REASONS, &pkes.GT_frequency_clipping_cause);

		rdmsr_carefully(MSR_IA32_PKG_POWER_SKU_UNIT, &pkg_unit_l, &pkg_unit_h);
		rdmsr_carefully(MSR_IA32_PKG_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
		pkes.pkg_power_unit = ((uint64_t)pkg_unit_h << 32) | pkg_unit_l;
		pkes.pkg_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;

		rdmsr_carefully(MSR_IA32_PP0_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
		pkes.pp0_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;

		rdmsr_carefully(MSR_IA32_PP1_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
		pkes.pp1_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;

		pkes.pkg_idle_exits = current_cpu_datap()->lcpu.package->package_idle_exits;
		pkes.ncpus = real_ncpus;

		(void) ml_set_interrupts_enabled(TRUE);

		copyout(&pkes, regs->rsi, sizeof(pkes));
		curpos = regs->rsi + sizeof(pkes);

		mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_powerstats, NULL);
		
		for (i = 0; i < real_ncpus; i++) {
			(void) ml_set_interrupts_enabled(FALSE);

			cest.caperf = cpu_data_ptr[i]->cpu_aperf;
			cest.cmperf = cpu_data_ptr[i]->cpu_mperf;
			cest.ccres[0] = cpu_data_ptr[i]->cpu_c3res;
			cest.ccres[1] = cpu_data_ptr[i]->cpu_c6res;
			cest.ccres[2] = cpu_data_ptr[i]->cpu_c7res;

			bcopy(&cpu_data_ptr[i]->cpu_rtimes[0], &cest.crtimes[0], sizeof(cest.crtimes));
			bcopy(&cpu_data_ptr[i]->cpu_itimes[0], &cest.citimes[0], sizeof(cest.citimes));

			cest.citime_total = cpu_data_ptr[i]->cpu_itime_total;
			cest.crtime_total = cpu_data_ptr[i]->cpu_rtime_total;
 			cest.cpu_idle_exits = cpu_data_ptr[i]->cpu_idle_exits;
 			cest.cpu_insns = cpu_data_ptr[i]->cpu_cur_insns;
 			cest.cpu_ucc = cpu_data_ptr[i]->cpu_cur_ucc;
 			cest.cpu_urc = cpu_data_ptr[i]->cpu_cur_urc;
 			(void) ml_set_interrupts_enabled(TRUE);

			copyout(&cest, curpos, sizeof(cest));
			curpos += sizeof(cest);
		}
		rval = 1;
	}
		break;
 	case dgEnaPMC:
 	{
 		boolean_t enable = TRUE;
		uint32_t cpuinfo[4];
		/* Require architectural PMC v2 or higher, corresponding to
		 * Merom+, or equivalent virtualised facility.
		 */
		do_cpuid(0xA, &cpuinfo[0]);
		if ((cpuinfo[0] & 0xFF) >= 2) {
			mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_pmc_control, &enable);
			diag_pmc_enabled = TRUE;
		}
 		rval = 1;
 	}
 	break;
#if	DEBUG
	case dgGzallocTest:
	{
		(void) ml_set_interrupts_enabled(TRUE);
		if (diagflag) {
			unsigned *ptr = (unsigned *)kalloc(1024);
			kfree(ptr, 1024);
			*ptr = 0x42;
		}
	}
	break;
#endif

#if PERMIT_PERMCHECK	
	case	dgPermCheck:
	{
		(void) ml_set_interrupts_enabled(TRUE);
		if (diagflag)
			rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL);
	}
 		break;
#endif /* PERMIT_PERMCHECK */
	default:		/* Handle invalid ones */
		rval = 0;	/* Return an exception */
	}

	regs->rax = rval;

	return rval;
}
Beispiel #3
0
/*
 * Called when the CPU is idle.  It calls into the power management kext
 * to determine the best way to idle the CPU.
 */
void
machine_idle(void)
{
	cpu_data_t		*my_cpu		= current_cpu_datap();
	__unused uint32_t	cnum = my_cpu->cpu_number;
	uint64_t		ctime, rtime, itime;
#if CST_DEMOTION_DEBUG
	processor_t		cproc = my_cpu->cpu_processor;
	uint64_t		cwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);
#endif /* CST_DEMOTION_DEBUG */
	uint64_t esdeadline, ehdeadline;
	boolean_t do_process_pending_timers = FALSE;

	ctime = mach_absolute_time();
	esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
	ehdeadline = my_cpu->rtclock_timer.deadline;
/* Determine if pending timers exist */    
	if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
	    ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
		idle_pending_timers_processed++;
		do_process_pending_timers = TRUE;
		goto machine_idle_exit;
	} else {
		TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
	}
    
	my_cpu->lcpu.state = LCPU_IDLE;
	DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
	MARK_CPU_IDLE(cnum);

	rtime = ctime - my_cpu->cpu_ixtime;

	my_cpu->cpu_rtime_total += rtime;
	machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
#if CST_DEMOTION_DEBUG
	uint32_t cl = 0, ch = 0;
	uint64_t c3res, c6res, c7res;
	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
	c3res = ((uint64_t)ch << 32) | cl;
	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
	c6res = ((uint64_t)ch << 32) | cl;
	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
	c7res = ((uint64_t)ch << 32) | cl;
#endif

	if (pmInitDone) {
		/*
		 * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
		 * were called prior to the CPU PM kext being registered.  We do
		 * this here since we know at this point the values will be first
		 * used since idle is where the decisions using these values is made.
		 */
		if (earlyMaxBusDelay != DELAY_UNSET)
			ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
		if (earlyMaxIntDelay != DELAY_UNSET)
			ml_set_maxintdelay(earlyMaxIntDelay);
	}

	if (pmInitDone
	    && pmDispatch != NULL
	    && pmDispatch->MachineIdle != NULL)
		(*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
	else {
		/*
		 * If no power management, re-enable interrupts and halt.
		 * This will keep the CPU from spinning through the scheduler
		 * and will allow at least some minimal power savings (but it
		 * cause problems in some MP configurations w.r.t. the APIC
		 * stopping during a GV3 transition).
		 */
		pal_hlt();
		/* Once woken, re-disable interrupts. */
		pal_cli();
	}

	/*
	 * Mark the CPU as running again.
	 */
	MARK_CPU_ACTIVE(cnum);
	DBGLOG(cpu_handle, cnum, MP_UNIDLE);
	my_cpu->lcpu.state = LCPU_RUN;
	uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
	itime = ixtime - ctime;
	my_cpu->cpu_idle_exits++;
        my_cpu->cpu_itime_total += itime;
    	machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
#if CST_DEMOTION_DEBUG
	cl = ch = 0;
	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
	c3res = (((uint64_t)ch << 32) | cl) - c3res;
	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
	c6res = (((uint64_t)ch << 32) | cl) - c6res;
	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
	c7res = (((uint64_t)ch << 32) | cl) - c7res;

	uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
	KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
	if ((itime > 1000000) && (ndelta > 250000))
		KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
#endif

	machine_idle_exit:
	/*
	 * Re-enable interrupts.
	 */

	pal_sti();

	if (do_process_pending_timers) {
		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);

		/* Adjust to reflect that this isn't truly a package idle exit */
		__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
		lapic_timer_swi(); /* Trigger software timer interrupt */
		__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);

		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
	}
#if CST_DEMOTION_DEBUG
	uint64_t nwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);

	if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
		KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
	}
#endif    
}