mask_cref_type affinity_data::get_pu_mask(topology const& topology, std::size_t num_thread, bool numa_sensitive) const { // if we have individual, predefined affinity masks, return those if (!affinity_masks_.empty()) return affinity_masks_[num_thread]; // otherwise return mask based on affinity domain std::size_t pu_num = get_pu_num(num_thread); if (0 == std::string("pu").find(affinity_domain_)) { // The affinity domain is 'processing unit', just convert the // pu-number into a bit-mask. return topology.get_thread_affinity_mask(pu_num, numa_sensitive); } if (0 == std::string("core").find(affinity_domain_)) { // The affinity domain is 'core', return a bit mask corresponding // to all processing units of the core containing the given // pu_num. return topology.get_core_affinity_mask(pu_num, numa_sensitive); } if (0 == std::string("numa").find(affinity_domain_)) { // The affinity domain is 'numa', return a bit mask corresponding // to all processing units of the NUMA domain containing the // given pu_num. return topology.get_numa_node_affinity_mask(pu_num, numa_sensitive); } // The affinity domain is 'machine', return a bit mask corresponding // to all processing units of the machine. BOOST_ASSERT(0 == std::string("machine").find(affinity_domain_)); return topology.get_machine_affinity_mask(); }
void on_start_thread(std::size_t num_thread) { if (nullptr == queues_[num_thread]) { queues_[num_thread] = new thread_queue_type(max_queue_thread_count_); if (num_thread < high_priority_queues_.size()) { high_priority_queues_[num_thread] = new thread_queue_type(max_queue_thread_count_); } } // forward this call to all queues etc. if (num_thread < high_priority_queues_.size()) high_priority_queues_[num_thread]->on_start_thread(num_thread); if (num_thread == queues_.size()-1) low_priority_queue_.on_start_thread(num_thread); queues_[num_thread]->on_start_thread(num_thread); std::size_t num_threads = queues_.size(); // get numa domain masks of all queues... std::vector<mask_type> numa_masks(num_threads); std::vector<mask_type> core_masks(num_threads); for (std::size_t i = 0; i != num_threads; ++i) { std::size_t num_pu = get_pu_num(i); numa_masks[i] = topology_.get_numa_node_affinity_mask(num_pu, numa_sensitive_ != 0); core_masks[i] = topology_.get_core_affinity_mask(num_pu, numa_sensitive_ != 0); } // iterate over the number of threads again to determine where to // steal from std::ptrdiff_t radius = static_cast<std::ptrdiff_t>((num_threads / 2.0) + 0.5); victim_threads_[num_thread].reserve(num_threads); std::size_t num_pu = get_pu_num(num_thread); mask_cref_type pu_mask = topology_.get_thread_affinity_mask(num_pu, numa_sensitive_ != 0); mask_cref_type numa_mask = numa_masks[num_thread]; mask_cref_type core_mask = core_masks[num_thread]; // we allow the thread on the boundary of the NUMA domain to steal mask_type first_mask = mask_type(); resize(first_mask, mask_size(pu_mask)); std::size_t first = find_first(numa_mask); if (first != std::size_t(-1)) set(first_mask, first); else first_mask = pu_mask; auto iterate = [&](hpx::util::function_nonser<bool(std::size_t)> f) { // check our neighbors in a radial fashion (left and right // alternating, increasing distance each iteration) int i = 1; for (/**/; i < radius; ++i) { std::ptrdiff_t left = (static_cast<std::ptrdiff_t>(num_thread) - i) % static_cast<std::ptrdiff_t>(num_threads); if (left < 0) left = num_threads + left; if (f(std::size_t(left))) { victim_threads_[num_thread].push_back( static_cast<std::size_t>(left)); } std::size_t right = (num_thread + i) % num_threads; if (f(right)) { victim_threads_[num_thread].push_back(right); } } if ((num_threads % 2) == 0) { std::size_t right = (num_thread + i) % num_threads; if (f(right)) { victim_threads_[num_thread].push_back(right); } } }; // check for threads which share the same core... iterate( [&](std::size_t other_num_thread) { return any(core_mask & core_masks[other_num_thread]); } ); // check for threads which share the same numa domain... iterate( [&](std::size_t other_num_thread) { return !any(core_mask & core_masks[other_num_thread]) && any(numa_mask & numa_masks[other_num_thread]); } ); // check for the rest and if we are numa aware if (numa_sensitive_ != 2 && any(first_mask & pu_mask)) { iterate( [&](std::size_t other_num_thread) { return !any(numa_mask & numa_masks[other_num_thread]); } ); } }