Esempio n. 1
0
    void decode_scatter_distribution(hwloc_topology& t,
        std::vector<mask_type>& affinities, error_code& ec)
    {
        std::size_t num_threads = affinities.size();
        std::size_t num_cores = t.get_number_of_cores();

        std::vector<std::size_t> num_pus_cores(num_cores, 0);
        for (std::size_t num_thread = 0; num_thread != num_threads; /**/)
        {
            for(std::size_t num_core = 0; num_core != num_cores; ++num_core)
            {
                if (any(affinities[num_thread]))
                {
                    HPX_THROWS_IF(ec, bad_parameter, "decode_scatter_distribution",
                        boost::str(boost::format("affinity mask for thread %1% has "
                            "already been set") % num_thread));
                    return;
                }

                // Check if we exceed the number of PUs on the current core.
                // If yes, we need to proceed with the next one.
                std::size_t num_pus_core = t.get_number_of_core_pus(num_core);
                if(num_pus_cores[num_core] == num_pus_core) continue;

                affinities[num_thread] = t.init_thread_affinity_mask(
                    num_core, num_pus_cores[num_core]++);

                if(++num_thread == num_threads)
                    return;
            }
        }
    }
Esempio n. 2
0
    void decode_scatter_distribution(hwloc_topology& t,
        std::vector<mask_type>& affinities,
        std::size_t used_cores, std::size_t max_cores,
        std::vector<std::size_t>& num_pus, error_code& ec)
    {
        std::size_t num_threads = affinities.size();
        std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores());

        std::vector<std::size_t> num_pus_cores(num_cores, 0);
        num_pus.resize(num_threads);

        for (std::size_t num_thread = 0; num_thread != num_threads; /**/)
        {
            for(std::size_t num_core = 0; num_core != num_cores; ++num_core)
            {
                if (any(affinities[num_thread]))
                {
                    HPX_THROWS_IF(ec, bad_parameter, "decode_scatter_distribution",
                        boost::str(boost::format("affinity mask for thread %1% has "
                            "already been set") % num_thread));
                    return;
                }

                num_pus[num_thread] = t.get_pu_number(num_core + used_cores,
                    num_pus_cores[num_core]);
                affinities[num_thread] = t.init_thread_affinity_mask(
                    num_core + used_cores, num_pus_cores[num_core]++);

                if(++num_thread == num_threads)
                    return;
            }
        }
    }
    mask_type decode_mapping_numanode(hwloc_topology const& t,
        mapping_type& m, std::size_t size, std::size_t thread_index,
        error_code& ec)
    {
        bounds_type b = extract_bounds(m[0], size, ec);
        if (ec) return 0;

        std::size_t index = std::size_t(-1);
        if (m[1].type_ == spec_type::unknown &&
            m[2].type_ == spec_type::unknown &&
            b.size() > 1)
        {
            index = thread_index;
        }

        mask_type mask = 0;
        std::size_t node_index = 0;
        for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++node_index)
        {
            if (index == std::size_t(-1) || node_index == index)
                mask |= t.init_numa_node_affinity_mask_from_numa_node(*it);
        }

        std::size_t node_base_index = *b.begin();
        if (thread_index != std::size_t(-1) && b.size() > 1)
            node_base_index += thread_index;

        std::size_t base_index = 0;
        for (std::size_t i = 0; i != node_base_index; ++i)
            base_index += t.get_number_of_numa_node_cores(i);

        return decode_mapping0_unknown(t, m, size, mask, base_index,
            thread_index, ec);
    }
    mask_type decode_mapping_core(hwloc_topology const& t,
        mapping_type& m, std::size_t size, mask_type mask,
        std::size_t core_base_index, std::size_t thread_index, error_code& ec)
    {
        bounds_type b = extract_bounds(m[1], size, ec);
        if (ec) return 0;

        // We have to account for the thread index at this level if there are
        // no specifications related to processing units.
        std::size_t index = std::size_t(-1);
        if (m[2].type_ == spec_type::unknown && b.size() > 1)
            index = thread_index;

        mask_type core_mask = 0;
        std::size_t core_index = 0;
        for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++core_index)
        {
            if (index == std::size_t(-1) || core_index == index)
            {
                core_mask |= t.init_core_affinity_mask_from_core(
                    *it+core_base_index, 0);
            }
        }

        core_base_index += *b.begin();
        if (thread_index != std::size_t(-1) && b.size() > 1)
            core_base_index += thread_index;

        std::size_t base_index = 0;
        for (std::size_t i = 0; i != core_base_index; ++i)
            base_index += t.get_number_of_core_pus(i);

        return decode_mapping1_unknown(t, m, size, mask & core_mask,
            base_index, thread_index, ec);
    }
    std::vector<mask_info>
    extract_core_masks(hwloc_topology const& t, spec_type const& s,
        std::size_t socket, mask_cref_type socket_mask, error_code& ec)
    {
        std::vector<mask_info> masks;

        switch (s.type_)
        {
        case spec_type::core:
            {
                std::size_t base = 0;
                std::size_t num_cores = 0;

                if (socket != std::size_t(-1))
                {
                    for (std::size_t i = 0; i != socket; ++i)
                        base += t.get_number_of_socket_cores(i);
                    num_cores = t.get_number_of_socket_cores(socket);
                }
                else
                {
                    num_cores = t.get_number_of_cores();
                }

                bounds_type bounds = extract_bounds(s, num_cores, ec);
                if (ec) break;

                for (std::int64_t index : bounds)
                {
                    mask_type mask =
                        t.init_core_affinity_mask_from_core(index + base);
                    masks.push_back(util::make_tuple(index, mask & socket_mask));
                }
            }
            break;

        case spec_type::unknown:
            {
                mask_type mask = extract_machine_mask(t, ec);
                masks.push_back(util::make_tuple(
                    std::size_t(-1), mask & socket_mask
                ));
            }
            break;

        default:
            HPX_THROWS_IF(ec, bad_parameter, "extract_core_mask",
                boost::str(boost::format(
                    "unexpected specification type %s"
                ) % spec_type::type_name(s.type_)));
            break;
        }

        return masks;
    }
    void decode_balanced_distribution(hwloc_topology& t,
        std::vector<mask_type>& affinities,
        std::size_t used_cores, std::size_t max_cores,
        std::vector<std::size_t>& num_pus, error_code& ec)
    {
        std::size_t num_threads = affinities.size();
        std::size_t num_cores = (std::min)(max_cores, t.get_number_of_cores());

        std::vector<std::size_t> num_pus_cores(num_cores, 0);
        num_pus.resize(num_threads);

        // At first, calculate the number of used pus per core.
        // This needs to be done to make sure that we occupy all the available
        // cores
        for (std::size_t num_thread = 0; num_thread != num_threads; /**/)
        {
            for(std::size_t num_core = 0; num_core != num_cores; ++num_core)
            {
                num_pus_cores[num_core]++;
                if(++num_thread == num_threads)
                    break;
            }
        }

        // Iterate over the cores and assigned pus per core. this additional
        // loop is needed so that we have consecutive worker thread numbers
        std::size_t num_thread = 0;
        for(std::size_t num_core = 0; num_core != num_cores; ++num_core)
        {
            for(std::size_t num_pu = 0; num_pu != num_pus_cores[num_core]; ++num_pu)
            {
                if (any(affinities[num_thread]))
                {
                    HPX_THROWS_IF(ec, bad_parameter,
                        "decode_balanced_distribution",
                        boost::str(boost::format(
                            "affinity mask for thread %1% has "
                            "already been set"
                        ) % num_thread));
                    return;
                }
                num_pus[num_thread] = t.get_pu_number(num_core + used_cores, num_pu);
                affinities[num_thread] = t.init_thread_affinity_mask(
                    num_core + used_cores, num_pu);
                ++num_thread;
            }
        }
    }
Esempio n. 7
0
    mask_type decode_mapping(hwloc_topology const& t,
        mapping_type& m, std::vector<mask_type>& affinities,
        std::size_t thread_index, error_code& ec)
    {
        std::size_t size = affinities.size();
        mask_type mask;
        switch (m[0].type_) {
        case spec_type::socket:
            // requested top level is a socket
            mask = decode_mapping_socket(t, m, size, thread_index, ec);
            break;

        case spec_type::numanode:
            // requested top level is a NUMA node
            mask = decode_mapping_numanode(t, m, size, thread_index, ec);
            break;

        case spec_type::unknown:
            // no top level is requested
            mask = decode_mapping0_unknown(t, m, size,
                t.get_machine_affinity_mask(), 0, thread_index, ec);
            break;

        default:
            HPX_THROWS_IF(ec, bad_parameter, "decode_mapping",
                boost::str(boost::format("unexpected specification type at "
                    "index zero: %x (%s)") %
                        static_cast<unsigned>(m[0].type_) %
                        spec_type::type_name(m[0].type_)));
            return mask_type();
        }
        return mask;
    }
Esempio n. 8
0
    mask_type decode_mapping0_unknown(hwloc_topology const& t,
        mapping_type& m, std::size_t size, mask_type mask,
        std::size_t core_base_index, std::size_t thread_index, error_code& ec)
    {
        switch (m[1].type_) {
        case spec_type::core:
            mask = decode_mapping_core(t, m, size, mask, core_base_index,
                thread_index, ec);
            break;

        case spec_type::unknown:
            {
                std::size_t base_index = 0;
                for (std::size_t i = 0; i != core_base_index; ++i)
                    base_index += t.get_number_of_core_pus(i);

                mask = decode_mapping1_unknown(t, m, size, mask, base_index,
                    thread_index, ec);
            }
            break;

        default:
            HPX_THROWS_IF(ec, bad_parameter, "decode_mapping0_unknown",
                boost::str(boost::format("unexpected specification type at "
                    "index one: %x (%s)") %
                        static_cast<unsigned>(m[1].type_) %
                        spec_type::type_name(m[1].type_)));
            break;
        }
        return mask;
    }
Esempio n. 9
0
    void decode_balanced_distribution(hwloc_topology& t,
        std::vector<mask_type>& affinities, error_code& ec)
    {
        std::size_t num_threads = affinities.size();
        std::size_t num_cores = t.get_number_of_cores();

        std::vector<std::size_t> num_pus_cores(num_cores, 0);
        // At first, calculate the number of used pus per core.
        // This needs to be done to make sure that we occupy all the available cores
        for (std::size_t num_thread = 0; num_thread != num_threads; /**/)
        {
            for(std::size_t num_core = 0; num_core != num_cores; ++num_core)
            {
                // Check if we exceed the number of PUs on the current core.
                // If yes, we need to proceed with the next one.
                std::size_t num_pus_core = t.get_number_of_core_pus(num_core);
                if(num_pus_cores[num_core] == num_pus_core) continue;

                num_pus_cores[num_core]++;
                if(++num_thread == num_threads)
                    break;
            }
        }
        // Iterate over the cores and assigned pus per core. this additional loop
        // is needed so that we have consecutive worker thread numbers
        std::size_t num_thread = 0;
        for(std::size_t num_core = 0; num_core != num_cores; ++num_core)
        {
            for(std::size_t num_pu = 0; num_pu != num_pus_cores[num_core]; ++num_pu)
            {
                if (any(affinities[num_thread]))
                {
                    HPX_THROWS_IF(ec, bad_parameter, "decode_balanced_distribution",
                        boost::str(boost::format("affinity mask for thread %1% has "
                            "already been set") % num_thread));
                    return;
                }
                affinities[num_thread] = t.init_thread_affinity_mask(
                    num_core, num_pu);
                ++num_thread;
            }
        }
    }
Esempio n. 10
0
    std::vector<mask_info>
    extract_socket_or_numanode_masks(hwloc_topology const& t,
        spec_type const& s, error_code& ec)
    {
        switch (s.type_)
        {
        case spec_type::socket:
            // requested top level is a socket
            {
                std::size_t num_sockets = t.get_number_of_sockets();
                return extract_socket_masks(
                    t, extract_bounds(s, num_sockets, ec));
            }

        case spec_type::numanode:
            // requested top level is a NUMA node
            {
                std::size_t num_numanodes = t.get_number_of_numa_nodes();
                return extract_numanode_masks(
                    t, extract_bounds(s, num_numanodes, ec));
            }

        case spec_type::unknown:
            {
                std::vector<mask_info> masks;
                masks.push_back(util::make_tuple(
                    std::size_t(-1), extract_machine_mask(t, ec)
                ));
                return masks;
            }

        default:
            HPX_THROWS_IF(ec, bad_parameter, "extract_socket_or_numanode_mask",
                boost::str(boost::format(
                    "unexpected specification type %s"
                ) % spec_type::type_name(s.type_)));
            break;
        }

        return std::vector<mask_info>();
    }
Esempio n. 11
0
 std::vector<mask_info>
 extract_numanode_masks(hwloc_topology const& t, bounds_type const& b)
 {
     std::vector<mask_info> masks;
     for (std::int64_t index : b)
     {
         masks.push_back(util::make_tuple(
             index, t.init_numa_node_affinity_mask_from_numa_node(index)
         ));
     }
     return masks;
 }
Esempio n. 12
0
 void decode_compact_distribution(hwloc_topology& t,
     std::vector<mask_type>& affinities, error_code& ec)
 {
     std::size_t num_threads = affinities.size();
     for(std::size_t i = 0; i != num_threads; ++i)
     {
         if (any(affinities[i]))
         {
             HPX_THROWS_IF(ec, bad_parameter, "decode_compact_distribution",
                 boost::str(boost::format("affinity mask for thread %1% has "
                     "already been set") % i));
             return;
         }
         affinities[i] = t.init_thread_affinity_mask(i);
     }
 }
Esempio n. 13
0
    mask_type decode_mapping_pu(hwloc_topology const& t,
        mapping_type& m, std::size_t size, mask_type mask,
        std::size_t pu_base_index, std::size_t thread_index, error_code& ec)
    {
        bounds_type b = extract_bounds(m[2], size, ec);
        if (ec) return 0;

        std::size_t index = std::size_t(-1);
        if (b.size() > 1)
            index = thread_index;

        mask_type pu_mask = 0;
        std::size_t pu_index = 0;
        for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++pu_index)
        {
            if (index == std::size_t(-1) || pu_index == index)
                pu_mask |= t.init_thread_affinity_mask(*it+pu_base_index);
        }

        return mask & pu_mask;
    }
Esempio n. 14
0
    std::vector<mask_info>
    extract_pu_masks(hwloc_topology const& t, spec_type const& s,
        std::size_t socket, std::size_t core, mask_cref_type core_mask,
        error_code& ec)
    {
        std::vector<mask_info> masks;

        switch (s.type_)
        {
        case spec_type::pu:
            {
                std::size_t num_pus = 0;
                std::size_t socket_base = 0;
                if (std::size_t(-1) != socket)
                {
                    // core number is relative to socket
                    for (std::size_t i = 0; i != socket; ++i)
                        socket_base += t.get_number_of_socket_cores(i);
                }

                if (std::size_t(-1) != core)
                {
                    num_pus = t.get_number_of_core_pus(core);
                }
                else
                {
                    num_pus = t.get_number_of_pus();
                }

                bounds_type bounds = extract_bounds(s, num_pus, ec);
                if (ec) break;

                std::size_t num_cores = t.get_number_of_cores();
                for (std::int64_t index : bounds)
                {
                    std::size_t base_core = socket_base;
                    if (std::size_t(-1) != core)
                    {
                        base_core += core;
                    }
                    else
                    {
                        // find core the given pu belongs to
                        std::size_t base = 0;
                        for (/**/; base_core < num_cores; ++base_core)
                        {
                            std::size_t num_core_pus =
                                t.get_number_of_core_pus(base_core);
                            if (base + num_core_pus > std::size_t(index))
                                break;
                            base += num_core_pus;
                        }
                    }

                    mask_type mask = t.init_thread_affinity_mask(base_core, index);
                    masks.push_back(util::make_tuple(index, mask & core_mask));
                }
            }
            break;

        case spec_type::unknown:
            {
                mask_type mask = extract_machine_mask(t, ec);
                masks.push_back(util::make_tuple(
                    std::size_t(-1), mask & core_mask
                ));
            }
            break;

        default:
            HPX_THROWS_IF(ec, bad_parameter, "extract_pu_mask",
                boost::str(boost::format(
                    "unexpected specification type %s"
                ) % spec_type::type_name(s.type_)));
            break;
        }

        return masks;
    }
Esempio n. 15
0
 mask_cref_type extract_machine_mask(hwloc_topology const& t, error_code& ec)
 {
     return t.get_machine_affinity_mask(ec);
 }