void decode_scatter_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = t.get_number_of_cores(); std::vector<std::size_t> num_pus_cores(num_cores, 0); for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_scatter_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % num_thread)); return; } // Check if we exceed the number of PUs on the current core. // If yes, we need to proceed with the next one. std::size_t num_pus_core = t.get_number_of_core_pus(num_core); if(num_pus_cores[num_core] == num_pus_core) continue; affinities[num_thread] = t.init_thread_affinity_mask( num_core, num_pus_cores[num_core]++); if(++num_thread == num_threads) return; } } }
void decode_scatter_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, std::size_t used_cores, std::size_t max_cores, std::vector<std::size_t>& num_pus, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores()); std::vector<std::size_t> num_pus_cores(num_cores, 0); num_pus.resize(num_threads); for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_scatter_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % num_thread)); return; } num_pus[num_thread] = t.get_pu_number(num_core + used_cores, num_pus_cores[num_core]); affinities[num_thread] = t.init_thread_affinity_mask( num_core + used_cores, num_pus_cores[num_core]++); if(++num_thread == num_threads) return; } } }
void decode_balanced_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, std::size_t used_cores, std::size_t max_cores, std::vector<std::size_t>& num_pus, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores()); std::vector<std::size_t> num_pus_cores(num_cores, 0); num_pus.resize(num_threads); // At first, calculate the number of used pus per core. // This needs to be done to make sure that we occupy all the available // cores for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { num_pus_cores[num_core]++; if(++num_thread == num_threads) break; } } // Iterate over the cores and assigned pus per core. this additional // loop is needed so that we have consecutive worker thread numbers std::size_t num_thread = 0; for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { for(std::size_t num_pu = 0; num_pu != num_pus_cores[num_core]; ++num_pu) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_balanced_distribution", boost::str(boost::format( "affinity mask for thread %1% has " "already been set" ) % num_thread)); return; } num_pus[num_thread] = t.get_pu_number(num_core + used_cores, num_pu); affinities[num_thread] = t.init_thread_affinity_mask( num_core + used_cores, num_pu); ++num_thread; } } }
void decode_compact_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, error_code& ec) { std::size_t num_threads = affinities.size(); for(std::size_t i = 0; i != num_threads; ++i) { if (any(affinities[i])) { HPX_THROWS_IF(ec, bad_parameter, "decode_compact_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % i)); return; } affinities[i] = t.init_thread_affinity_mask(i); } }
void decode_balanced_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = t.get_number_of_cores(); std::vector<std::size_t> num_pus_cores(num_cores, 0); // At first, calculate the number of used pus per core. // This needs to be done to make sure that we occupy all the available cores for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { // Check if we exceed the number of PUs on the current core. // If yes, we need to proceed with the next one. std::size_t num_pus_core = t.get_number_of_core_pus(num_core); if(num_pus_cores[num_core] == num_pus_core) continue; num_pus_cores[num_core]++; if(++num_thread == num_threads) break; } } // Iterate over the cores and assigned pus per core. this additional loop // is needed so that we have consecutive worker thread numbers std::size_t num_thread = 0; for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { for(std::size_t num_pu = 0; num_pu != num_pus_cores[num_core]; ++num_pu) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_balanced_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % num_thread)); return; } affinities[num_thread] = t.init_thread_affinity_mask( num_core, num_pu); ++num_thread; } } }
mask_type decode_mapping_pu(hwloc_topology const& t, mapping_type& m, std::size_t size, mask_type mask, std::size_t pu_base_index, std::size_t thread_index, error_code& ec) { bounds_type b = extract_bounds(m[2], size, ec); if (ec) return 0; std::size_t index = std::size_t(-1); if (b.size() > 1) index = thread_index; mask_type pu_mask = 0; std::size_t pu_index = 0; for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++pu_index) { if (index == std::size_t(-1) || pu_index == index) pu_mask |= t.init_thread_affinity_mask(*it+pu_base_index); } return mask & pu_mask; }
std::vector<mask_info> extract_pu_masks(hwloc_topology const& t, spec_type const& s, std::size_t socket, std::size_t core, mask_cref_type core_mask, error_code& ec) { std::vector<mask_info> masks; switch (s.type_) { case spec_type::pu: { std::size_t num_pus = 0; std::size_t socket_base = 0; if (std::size_t(-1) != socket) { // core number is relative to socket for (std::size_t i = 0; i != socket; ++i) socket_base += t.get_number_of_socket_cores(i); } if (std::size_t(-1) != core) { num_pus = t.get_number_of_core_pus(core); } else { num_pus = t.get_number_of_pus(); } bounds_type bounds = extract_bounds(s, num_pus, ec); if (ec) break; std::size_t num_cores = t.get_number_of_cores(); for (std::int64_t index : bounds) { std::size_t base_core = socket_base; if (std::size_t(-1) != core) { base_core += core; } else { // find core the given pu belongs to std::size_t base = 0; for (/**/; base_core < num_cores; ++base_core) { std::size_t num_core_pus = t.get_number_of_core_pus(base_core); if (base + num_core_pus > std::size_t(index)) break; base += num_core_pus; } } mask_type mask = t.init_thread_affinity_mask(base_core, index); masks.push_back(util::make_tuple(index, mask & core_mask)); } } break; case spec_type::unknown: { mask_type mask = extract_machine_mask(t, ec); masks.push_back(util::make_tuple( std::size_t(-1), mask & core_mask )); } break; default: HPX_THROWS_IF(ec, bad_parameter, "extract_pu_mask", boost::str(boost::format( "unexpected specification type %s" ) % spec_type::type_name(s.type_))); break; } return masks; }