windows_topology() { // {{{ std::size_t const num_of_cores = hardware_concurrency(); numa_node_numbers_.reserve(num_of_cores); numa_node_affinity_masks_.reserve(num_of_cores); ns_numa_node_affinity_masks_.reserve(num_of_cores); thread_affinity_masks_.reserve(num_of_cores); ns_thread_affinity_masks_.reserve(num_of_cores); // Initialize each set of data entirely, as some of the initialization // routines rely on access to other pieces of topology data. The // compiler will optimize the loops where possible anyways. for (std::size_t i = 0; i < num_of_cores; ++i) numa_node_numbers_.push_back(init_numa_node_number(i)); for (std::size_t i = 0; i < num_of_cores; ++i) { numa_node_affinity_masks_.push_back( init_numa_node_affinity_mask(i, false)); ns_numa_node_affinity_masks_.push_back( init_numa_node_affinity_mask(i, true)); } for (std::size_t i = 0; i < num_of_cores; ++i) { thread_affinity_masks_.push_back( init_thread_affinity_mask(i, false)); ns_thread_affinity_masks_.push_back( init_thread_affinity_mask(i, true)); } } // }}}
void set_thread_affinity( std::size_t num_thread , bool numa_sensitive , error_code& ec = throws ) const { cpu_set_t cpu; CPU_ZERO(&cpu); CPU_SET(num_thread % hardware_concurrency(), &cpu); #if defined(HPX_HAVE_PTHREAD_SETAFFINITY_NP) if (0 == pthread_setaffinity_np(pthread_self(), sizeof(cpu), &cpu)) sleep(0); // Allow the OS to pick up the change. #else if (0 == sched_setaffinity(syscall(SYS_gettid), sizeof(cpu), &cpu)) sleep(0); // Allow the OS to pick up the change. #endif else { HPX_THROWS_IF(ec, kernel_error , "hpx::threads::set_thread_affinity" , "failed to set thread affinity"); } if (ec) return; else if (&ec != &throws) ec = make_success_code(); }
mask_type init_thread_affinity_mask( std::size_t num_thread , bool numa_sensitive ) { // {{{ std::size_t num_of_cores = hardware_concurrency(); std::size_t affinity = num_thread % num_of_cores; ULONG numa_nodes = 1; if (GetNumaHighestNodeNumber(&numa_nodes)) ++numa_nodes; std::size_t num_of_cores_per_numa_node = num_of_cores / numa_nodes; ULONGLONG node_affinity_mask = 0; ULONGLONG mask = 0x01LL; if (numa_sensitive) { UCHAR numa_node = UCHAR(affinity % numa_nodes); if (!GetNumaNodeProcessorMask(numa_node, &node_affinity_mask)) { HPX_THROW_EXCEPTION(kernel_error , "hpx::threads::windows_topology::init_thread_affinity_mask" , boost::str(boost::format( "failed to initialize thread %1% affinity mask") % num_thread)); } mask = least_significant_bit(node_affinity_mask) << (affinity / numa_nodes); } else { UCHAR numa_node = UCHAR(get_numa_node_number(num_thread)); if (!GetNumaNodeProcessorMask(numa_node, &node_affinity_mask)) { HPX_THROW_EXCEPTION(kernel_error , "hpx::threads::windows_topology::init_thread_affinity_mask" , boost::str(boost::format( "failed to initialize thread %1% affinity mask") % num_thread)); } mask = least_significant_bit(node_affinity_mask) << (affinity % num_of_cores_per_numa_node); } while (!(mask & node_affinity_mask)) { mask <<= 1LL; if (0 == mask) mask = 0x01LL; } return static_cast<mask_type>(mask); } // }}}
mask_type get_machine_affinity_mask( error_code& ec = throws ) const { if (&ec != &throws) ec = make_success_code(); // the machine mask is the bit-or of all masks in the system std::size_t const num_of_cores = hardware_concurrency(); mask_type mask = 0; for (std::size_t i = 0; i < num_of_cores; ++i) mask |= numa_node_affinity_masks_[i]; return mask; }
mask_type init_numa_node_affinity_mask( std::size_t num_thread , bool numa_sensitive ) { // {{{ std::size_t num_of_cores = hardware_concurrency(); UCHAR affinity = UCHAR(num_thread % num_of_cores); ULONG numa_nodes = 1; if (GetNumaHighestNodeNumber(&numa_nodes)) ++numa_nodes; ULONGLONG mask = 0; if (numa_sensitive) { UCHAR numa_node = affinity % numa_nodes; if (!GetNumaNodeProcessorMask(numa_node, &mask)) { HPX_THROW_EXCEPTION(kernel_error , "hpx::threads::windows_topology::init_numa_node_affinity_mask" , boost::str(boost::format( "failed to initialize NUMA node affinity mask for " "thread %1%") % num_thread)); } return static_cast<mask_type>(mask); } UCHAR numa_node = UCHAR(get_numa_node_number(num_thread)); if (!GetNumaNodeProcessorMask(numa_node, &mask)) { HPX_THROW_EXCEPTION(kernel_error , "hpx::threads::windows_topology::init_numa_node_affinity_mask" , boost::str(boost::format( "failed to initialize NUMA node affinity mask for " "thread %1%") % num_thread)); } return static_cast<mask_type>(mask); } // }}}
std::size_t init_numa_node_number( std::size_t num_thread ) { // {{{ if (std::size_t(-1) == num_thread) return std::size_t(-1); UCHAR node_number = 0; if (GetNumaProcessorNode(UCHAR(num_thread), &node_number)) return node_number; std::size_t num_of_cores = hardware_concurrency(); if (0 == num_of_cores) num_of_cores = 1; // assume one core std::size_t num_of_numa_cores = num_of_cores; ULONG numa_nodes = 0; if (GetNumaHighestNodeNumber(&numa_nodes) && 0 != numa_nodes) num_of_numa_cores = num_of_cores / (numa_nodes + 1); return num_thread / num_of_numa_cores; } // }}}