/* * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer */ static uint64_t measure_tsc_frequency(void) { uint64_t tscStart; uint64_t tscEnd; uint64_t tscDelta = 0xffffffffffffffffULL; unsigned long pollCount; uint64_t retval = 0; int i; /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT * counter 2. We run this loop 3 times to make sure the cache * is hot and we take the minimum delta from all of the runs. * That is to say that we're biased towards measuring the minimum * number of TSC ticks that occur while waiting for the timer to * expire. That theoretically helps avoid inconsistencies when * running under a VM if the TSC is not virtualized and the host * steals time. The TSC is normally virtualized for VMware. */ for(i = 0; i < 10; ++i) { enable_PIT2(); set_PIT2_mode0(CALIBRATE_LATCH); tscStart = rdtsc64(); pollCount = poll_PIT2_gate(); tscEnd = rdtsc64(); /* The poll loop must have run at least a few times for accuracy */ if (pollCount <= 1) continue; /* The TSC must increment at LEAST once every millisecond. * We should have waited exactly 30 msec so the TSC delta should * be >= 30. Anything less and the processor is way too slow. */ if ((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC) continue; // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) if ( (tscEnd - tscStart) < tscDelta ) tscDelta = tscEnd - tscStart; } /* tscDelta is now the least number of TSC ticks the processor made in * a timespan of 0.03 s (e.g. 30 milliseconds) * Linux thus divides by 30 which gives the answer in kiloHertz because * 1 / ms = kHz. But we're xnu and most of the rest of the code uses * Hz so we need to convert our milliseconds to seconds. Since we're * dividing by the milliseconds, we simply multiply by 1000. */ /* Unlike linux, we're not limited to 32-bit, but we do need to take care * that we're going to multiply by 1000 first so we do need at least some * arithmetic headroom. For now, 32-bit should be enough. * Also unlike Linux, our compiler can do 64-bit integer arithmetic. */ if (tscDelta > (1ULL<<32)) retval = 0; else { retval = tscDelta * 1000 / 30; } disable_PIT2(); return retval; }
/* * Original comment/code: * "DFE: Measures the Max Performance Frequency in Hz (64-bit)" * * Measures the Actual Performance Frequency in Hz (64-bit) * (just a naming change, mperf --> aperf ) */ static uint64_t measure_aperf_frequency(void) { uint64_t aperfStart; uint64_t aperfEnd; uint64_t aperfDelta = 0xffffffffffffffffULL; unsigned long pollCount; uint64_t retval = 0; int i; /* Time how many APERF ticks elapse in 30 msec using the 8254 PIT * counter 2. We run this loop 3 times to make sure the cache * is hot and we take the minimum delta from all of the runs. * That is to say that we're biased towards measuring the minimum * number of APERF ticks that occur while waiting for the timer to * expire. */ for(i = 0; i < 10; ++i) { enable_PIT2(); set_PIT2_mode0(CALIBRATE_LATCH); aperfStart = rdmsr64(MSR_AMD_APERF); pollCount = poll_PIT2_gate(); aperfEnd = rdmsr64(MSR_AMD_APERF); /* The poll loop must have run at least a few times for accuracy */ if (pollCount <= 1) { continue; } /* The TSC must increment at LEAST once every millisecond. * We should have waited exactly 30 msec so the APERF delta should * be >= 30. Anything less and the processor is way too slow. */ if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC) { continue; } // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) if ( (aperfEnd - aperfStart) < aperfDelta ) { aperfDelta = aperfEnd - aperfStart; } } /* mperfDelta is now the least number of MPERF ticks the processor made in * a timespan of 0.03 s (e.g. 30 milliseconds) */ if (aperfDelta > (1ULL<<32)) { retval = 0; } else { retval = aperfDelta * 1000 / 30; } disable_PIT2(); return retval; }
/* * timeRDTSC() * This routine sets up PIT counter 2 to count down 1/20 of a second. * It pauses until the value is latched in the counter * and then reads the time stamp counter to return to the caller. */ uint64_t timeRDTSC(void) { int attempts = 0; uint64_t latchTime; uint64_t saveTime,intermediate; unsigned int timerValue, lastValue; //boolean_t int_enabled; /* * Table of correction factors to account for * - timer counter quantization errors, and * - undercounts 0..5 */ #define SAMPLE_CLKS_EXACT (((double) CLKNUM) / 20.0) #define SAMPLE_CLKS_INT ((int) CLKNUM / 20) #define SAMPLE_NSECS (2000000000LL) #define SAMPLE_MULTIPLIER (((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT) #define ROUND64(x) ((uint64_t)((x) + 0.5)) uint64_t scale[6] = { ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5)) }; //int_enabled = ml_set_interrupts_enabled(FALSE); restart: if (attempts >= 9) // increase to up to 9 attempts. { // This will flash-reboot. TODO: Use tscPanic instead. printf("Timestamp counter calibation failed with %d attempts\n", attempts); } attempts++; enable_PIT2(); // turn on PIT2 set_PIT2(0); // reset timer 2 to be zero latchTime = rdtsc64(); // get the time stamp to time latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second saveTime = rdtsc64(); // now time how long a 20th a second is... get_PIT2(&lastValue); get_PIT2(&lastValue); // read twice, first value may be unreliable do { intermediate = get_PIT2(&timerValue); if (timerValue > lastValue) { // Timer wrapped set_PIT2(0); disable_PIT2(); goto restart; } lastValue = timerValue; } while (timerValue > 5); printf("timerValue %d\n",timerValue); printf("intermediate 0x%016llx\n",intermediate); printf("saveTime 0x%016llx\n",saveTime); intermediate -= saveTime; // raw count for about 1/20 second intermediate *= scale[timerValue]; // rescale measured time spent intermediate /= SAMPLE_NSECS; // so its exactly 1/20 a second intermediate += latchTime; // add on our save fudge set_PIT2(0); // reset timer 2 to be zero disable_PIT2(); // turn off PIT 2 //ml_set_interrupts_enabled(int_enabled); return intermediate; }