void decode_scatter_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = t.get_number_of_cores(); std::vector<std::size_t> num_pus_cores(num_cores, 0); for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_scatter_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % num_thread)); return; } // Check if we exceed the number of PUs on the current core. // If yes, we need to proceed with the next one. std::size_t num_pus_core = t.get_number_of_core_pus(num_core); if(num_pus_cores[num_core] == num_pus_core) continue; affinities[num_thread] = t.init_thread_affinity_mask( num_core, num_pus_cores[num_core]++); if(++num_thread == num_threads) return; } } }
void decode_scatter_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, std::size_t used_cores, std::size_t max_cores, std::vector<std::size_t>& num_pus, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores()); std::vector<std::size_t> num_pus_cores(num_cores, 0); num_pus.resize(num_threads); for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_scatter_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % num_thread)); return; } num_pus[num_thread] = t.get_pu_number(num_core + used_cores, num_pus_cores[num_core]); affinities[num_thread] = t.init_thread_affinity_mask( num_core + used_cores, num_pus_cores[num_core]++); if(++num_thread == num_threads) return; } } }
mask_type decode_mapping_numanode(hwloc_topology const& t, mapping_type& m, std::size_t size, std::size_t thread_index, error_code& ec) { bounds_type b = extract_bounds(m[0], size, ec); if (ec) return 0; std::size_t index = std::size_t(-1); if (m[1].type_ == spec_type::unknown && m[2].type_ == spec_type::unknown && b.size() > 1) { index = thread_index; } mask_type mask = 0; std::size_t node_index = 0; for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++node_index) { if (index == std::size_t(-1) || node_index == index) mask |= t.init_numa_node_affinity_mask_from_numa_node(*it); } std::size_t node_base_index = *b.begin(); if (thread_index != std::size_t(-1) && b.size() > 1) node_base_index += thread_index; std::size_t base_index = 0; for (std::size_t i = 0; i != node_base_index; ++i) base_index += t.get_number_of_numa_node_cores(i); return decode_mapping0_unknown(t, m, size, mask, base_index, thread_index, ec); }
mask_type decode_mapping_core(hwloc_topology const& t, mapping_type& m, std::size_t size, mask_type mask, std::size_t core_base_index, std::size_t thread_index, error_code& ec) { bounds_type b = extract_bounds(m[1], size, ec); if (ec) return 0; // We have to account for the thread index at this level if there are // no specifications related to processing units. std::size_t index = std::size_t(-1); if (m[2].type_ == spec_type::unknown && b.size() > 1) index = thread_index; mask_type core_mask = 0; std::size_t core_index = 0; for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++core_index) { if (index == std::size_t(-1) || core_index == index) { core_mask |= t.init_core_affinity_mask_from_core( *it+core_base_index, 0); } } core_base_index += *b.begin(); if (thread_index != std::size_t(-1) && b.size() > 1) core_base_index += thread_index; std::size_t base_index = 0; for (std::size_t i = 0; i != core_base_index; ++i) base_index += t.get_number_of_core_pus(i); return decode_mapping1_unknown(t, m, size, mask & core_mask, base_index, thread_index, ec); }
std::vector<mask_info> extract_core_masks(hwloc_topology const& t, spec_type const& s, std::size_t socket, mask_cref_type socket_mask, error_code& ec) { std::vector<mask_info> masks; switch (s.type_) { case spec_type::core: { std::size_t base = 0; std::size_t num_cores = 0; if (socket != std::size_t(-1)) { for (std::size_t i = 0; i != socket; ++i) base += t.get_number_of_socket_cores(i); num_cores = t.get_number_of_socket_cores(socket); } else { num_cores = t.get_number_of_cores(); } bounds_type bounds = extract_bounds(s, num_cores, ec); if (ec) break; for (std::int64_t index : bounds) { mask_type mask = t.init_core_affinity_mask_from_core(index + base); masks.push_back(util::make_tuple(index, mask & socket_mask)); } } break; case spec_type::unknown: { mask_type mask = extract_machine_mask(t, ec); masks.push_back(util::make_tuple( std::size_t(-1), mask & socket_mask )); } break; default: HPX_THROWS_IF(ec, bad_parameter, "extract_core_mask", boost::str(boost::format( "unexpected specification type %s" ) % spec_type::type_name(s.type_))); break; } return masks; }
void decode_balanced_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, std::size_t used_cores, std::size_t max_cores, std::vector<std::size_t>& num_pus, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores()); std::vector<std::size_t> num_pus_cores(num_cores, 0); num_pus.resize(num_threads); // At first, calculate the number of used pus per core. // This needs to be done to make sure that we occupy all the available // cores for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { num_pus_cores[num_core]++; if(++num_thread == num_threads) break; } } // Iterate over the cores and assigned pus per core. this additional // loop is needed so that we have consecutive worker thread numbers std::size_t num_thread = 0; for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { for(std::size_t num_pu = 0; num_pu != num_pus_cores[num_core]; ++num_pu) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_balanced_distribution", boost::str(boost::format( "affinity mask for thread %1% has " "already been set" ) % num_thread)); return; } num_pus[num_thread] = t.get_pu_number(num_core + used_cores, num_pu); affinities[num_thread] = t.init_thread_affinity_mask( num_core + used_cores, num_pu); ++num_thread; } } }
mask_type decode_mapping(hwloc_topology const& t, mapping_type& m, std::vector<mask_type>& affinities, std::size_t thread_index, error_code& ec) { std::size_t size = affinities.size(); mask_type mask; switch (m[0].type_) { case spec_type::socket: // requested top level is a socket mask = decode_mapping_socket(t, m, size, thread_index, ec); break; case spec_type::numanode: // requested top level is a NUMA node mask = decode_mapping_numanode(t, m, size, thread_index, ec); break; case spec_type::unknown: // no top level is requested mask = decode_mapping0_unknown(t, m, size, t.get_machine_affinity_mask(), 0, thread_index, ec); break; default: HPX_THROWS_IF(ec, bad_parameter, "decode_mapping", boost::str(boost::format("unexpected specification type at " "index zero: %x (%s)") % static_cast<unsigned>(m[0].type_) % spec_type::type_name(m[0].type_))); return mask_type(); } return mask; }
mask_type decode_mapping0_unknown(hwloc_topology const& t, mapping_type& m, std::size_t size, mask_type mask, std::size_t core_base_index, std::size_t thread_index, error_code& ec) { switch (m[1].type_) { case spec_type::core: mask = decode_mapping_core(t, m, size, mask, core_base_index, thread_index, ec); break; case spec_type::unknown: { std::size_t base_index = 0; for (std::size_t i = 0; i != core_base_index; ++i) base_index += t.get_number_of_core_pus(i); mask = decode_mapping1_unknown(t, m, size, mask, base_index, thread_index, ec); } break; default: HPX_THROWS_IF(ec, bad_parameter, "decode_mapping0_unknown", boost::str(boost::format("unexpected specification type at " "index one: %x (%s)") % static_cast<unsigned>(m[1].type_) % spec_type::type_name(m[1].type_))); break; } return mask; }
void decode_balanced_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, error_code& ec) { std::size_t num_threads = affinities.size(); std::size_t num_cores = t.get_number_of_cores(); std::vector<std::size_t> num_pus_cores(num_cores, 0); // At first, calculate the number of used pus per core. // This needs to be done to make sure that we occupy all the available cores for (std::size_t num_thread = 0; num_thread != num_threads; /**/) { for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { // Check if we exceed the number of PUs on the current core. // If yes, we need to proceed with the next one. std::size_t num_pus_core = t.get_number_of_core_pus(num_core); if(num_pus_cores[num_core] == num_pus_core) continue; num_pus_cores[num_core]++; if(++num_thread == num_threads) break; } } // Iterate over the cores and assigned pus per core. this additional loop // is needed so that we have consecutive worker thread numbers std::size_t num_thread = 0; for(std::size_t num_core = 0; num_core != num_cores; ++num_core) { for(std::size_t num_pu = 0; num_pu != num_pus_cores[num_core]; ++num_pu) { if (any(affinities[num_thread])) { HPX_THROWS_IF(ec, bad_parameter, "decode_balanced_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % num_thread)); return; } affinities[num_thread] = t.init_thread_affinity_mask( num_core, num_pu); ++num_thread; } } }
std::vector<mask_info> extract_socket_or_numanode_masks(hwloc_topology const& t, spec_type const& s, error_code& ec) { switch (s.type_) { case spec_type::socket: // requested top level is a socket { std::size_t num_sockets = t.get_number_of_sockets(); return extract_socket_masks( t, extract_bounds(s, num_sockets, ec)); } case spec_type::numanode: // requested top level is a NUMA node { std::size_t num_numanodes = t.get_number_of_numa_nodes(); return extract_numanode_masks( t, extract_bounds(s, num_numanodes, ec)); } case spec_type::unknown: { std::vector<mask_info> masks; masks.push_back(util::make_tuple( std::size_t(-1), extract_machine_mask(t, ec) )); return masks; } default: HPX_THROWS_IF(ec, bad_parameter, "extract_socket_or_numanode_mask", boost::str(boost::format( "unexpected specification type %s" ) % spec_type::type_name(s.type_))); break; } return std::vector<mask_info>(); }
std::vector<mask_info> extract_numanode_masks(hwloc_topology const& t, bounds_type const& b) { std::vector<mask_info> masks; for (std::int64_t index : b) { masks.push_back(util::make_tuple( index, t.init_numa_node_affinity_mask_from_numa_node(index) )); } return masks; }
void decode_compact_distribution(hwloc_topology& t, std::vector<mask_type>& affinities, error_code& ec) { std::size_t num_threads = affinities.size(); for(std::size_t i = 0; i != num_threads; ++i) { if (any(affinities[i])) { HPX_THROWS_IF(ec, bad_parameter, "decode_compact_distribution", boost::str(boost::format("affinity mask for thread %1% has " "already been set") % i)); return; } affinities[i] = t.init_thread_affinity_mask(i); } }
mask_type decode_mapping_pu(hwloc_topology const& t, mapping_type& m, std::size_t size, mask_type mask, std::size_t pu_base_index, std::size_t thread_index, error_code& ec) { bounds_type b = extract_bounds(m[2], size, ec); if (ec) return 0; std::size_t index = std::size_t(-1); if (b.size() > 1) index = thread_index; mask_type pu_mask = 0; std::size_t pu_index = 0; for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++pu_index) { if (index == std::size_t(-1) || pu_index == index) pu_mask |= t.init_thread_affinity_mask(*it+pu_base_index); } return mask & pu_mask; }
std::vector<mask_info> extract_pu_masks(hwloc_topology const& t, spec_type const& s, std::size_t socket, std::size_t core, mask_cref_type core_mask, error_code& ec) { std::vector<mask_info> masks; switch (s.type_) { case spec_type::pu: { std::size_t num_pus = 0; std::size_t socket_base = 0; if (std::size_t(-1) != socket) { // core number is relative to socket for (std::size_t i = 0; i != socket; ++i) socket_base += t.get_number_of_socket_cores(i); } if (std::size_t(-1) != core) { num_pus = t.get_number_of_core_pus(core); } else { num_pus = t.get_number_of_pus(); } bounds_type bounds = extract_bounds(s, num_pus, ec); if (ec) break; std::size_t num_cores = t.get_number_of_cores(); for (std::int64_t index : bounds) { std::size_t base_core = socket_base; if (std::size_t(-1) != core) { base_core += core; } else { // find core the given pu belongs to std::size_t base = 0; for (/**/; base_core < num_cores; ++base_core) { std::size_t num_core_pus = t.get_number_of_core_pus(base_core); if (base + num_core_pus > std::size_t(index)) break; base += num_core_pus; } } mask_type mask = t.init_thread_affinity_mask(base_core, index); masks.push_back(util::make_tuple(index, mask & core_mask)); } } break; case spec_type::unknown: { mask_type mask = extract_machine_mask(t, ec); masks.push_back(util::make_tuple( std::size_t(-1), mask & core_mask )); } break; default: HPX_THROWS_IF(ec, bad_parameter, "extract_pu_mask", boost::str(boost::format( "unexpected specification type %s" ) % spec_type::type_name(s.type_))); break; } return masks; }
mask_cref_type extract_machine_mask(hwloc_topology const& t, error_code& ec) { return t.get_machine_affinity_mask(ec); }