void decode_scatter_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, std::size_t used_cores, std::size_t max_cores, std::vector<std::size_t>& num_pus, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores()); std::vector<std::size_t> num_pus_cores(num_cores, 0); num_pus.resize(num_threads); for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_scatter_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % num_thread)); return; } num_pus[num_thread] = t.get_pu_number(num_core + used_cores, num_pus_cores[num_core]); affinities[num_thread] = t.init_thread_affinity_mask( num_core + used_cores, num_pus_cores[num_core]++); if(++num_thread == num_threads) return; } } }
void decode_balanced_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, std::size_t used_cores, std::size_t max_cores, std::vector<std::size_t>& num_pus, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores()); std::vector<std::size_t> num_pus_cores(num_cores, 0); num_pus.resize(num_threads); // At first, calculate the number of used pus per core. // This needs to be done to make sure that we occupy all the available // cores for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { num_pus_cores[num_core]++; if(++num_thread == num_threads) break; } } // Iterate over the cores and assigned pus per core. this additional // loop is needed so that we have consecutive worker thread numbers std::size_t num_thread = 0; for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { for(std::size_t num_pu = 0; num_pu != num_pus_cores[num_core]; ++num_pu) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_balanced_distribution", boost::str(boost::format( "affinity mask for thread %1% has " "already been set" ) % num_thread)); return; } num_pus[num_thread] = t.get_pu_number(num_core + used_cores, num_pu); affinities[num_thread] = t.init_thread_affinity_mask( num_core + used_cores, num_pu); ++num_thread; } } }