// @return ratio between max/min time required to access one node's // memory from each processor. static double MeasureRelativeDistance() { const size_t size = 32*MiB; void* mem = vm::Allocate(size); ASSUME_ALIGNED(mem, pageSize); const uintptr_t previousProcessorMask = os_cpu_SetThreadAffinityMask(os_cpu_ProcessorMask()); double minTime = 1e10, maxTime = 0.0; for(size_t node = 0; node < numa_NumNodes(); node++) { const uintptr_t processorMask = numa_ProcessorMaskFromNode(node); os_cpu_SetThreadAffinityMask(processorMask); const double startTime = timer_Time(); memset(mem, 0, size); const double elapsedTime = timer_Time() - startTime; minTime = std::min(minTime, elapsedTime); maxTime = std::max(maxTime, elapsedTime); } (void)os_cpu_SetThreadAffinityMask(previousProcessorMask); vm::Free(mem, size); return maxTime / minTime; }
static Status InitTopology() { PopulateNodes(); #if ARCH_X86_X64 const SRAT* srat = (const SRAT*)acpi_GetTable("SRAT"); if(srat && AreApicIdsReliable()) { const ProximityDomains proximityDomains = ExtractProximityDomainsFromSRAT(srat); PopulateNodesFromProximityDomains(proximityDomains); } #endif // neither OS nor ACPI information is available if(numNodes == 0) { // add dummy node that contains all system processors Node* node = AddNode(); node->nodeNumber = 0; node->proximityDomainNumber = 0; node->processorMask = os_cpu_ProcessorMask(); } return INFO::OK; }
static Descriptors GetDescriptors() { // ensure consistency by pinning to a CPU. // (don't use a hard-coded mask because process affinity may be restricted) const uintptr_t allProcessors = os_cpu_ProcessorMask(); const uintptr_t firstProcessor = allProcessors & -intptr_t(allProcessors); const uintptr_t prevAffinityMask = os_cpu_SetThreadAffinityMask(firstProcessor); x86_x64::CpuidRegs regs = { 0 }; regs.eax = 2; if(!x86_x64::cpuid(®s)) return Descriptors(); Descriptors descriptors; size_t iterations = bits(regs.eax, 0, 7); for(;;) // abort mid-loop (invoke CPUID exactly <iterations> times) { AppendDescriptors(bits(regs.eax, 8, 31), descriptors); AppendDescriptors(regs.ebx, descriptors); AppendDescriptors(regs.ecx, descriptors); AppendDescriptors(regs.edx, descriptors); if(--iterations == 0) break; regs.eax = 2; const bool ok = x86_x64::cpuid(®s); ENSURE(ok); } os_cpu_SetThreadAffinityMask(prevAffinityMask); return descriptors; }
uintptr_t os_cpu_SetThreadAffinityMask(uintptr_t UNUSED(processorMask)) { // not yet implemented return os_cpu_ProcessorMask(); }
uintptr_t os_cpu_SetThreadAffinityMask(uintptr_t UNUSED(processorMask)) { // not yet implemented. when doing so, see http://developer.apple.com/releasenotes/Performance/RN-AffinityAPI/ return os_cpu_ProcessorMask(); }