示例#1
0
    mask_type decode_mapping(hwloc_topology const& t,
        mapping_type& m, std::vector<mask_type>& affinities,
        std::size_t thread_index, error_code& ec)
    {
        std::size_t size = affinities.size();
        mask_type mask;
        switch (m[0].type_) {
        case spec_type::socket:
            // requested top level is a socket
            mask = decode_mapping_socket(t, m, size, thread_index, ec);
            break;

        case spec_type::numanode:
            // requested top level is a NUMA node
            mask = decode_mapping_numanode(t, m, size, thread_index, ec);
            break;

        case spec_type::unknown:
            // no top level is requested
            mask = decode_mapping0_unknown(t, m, size,
                t.get_machine_affinity_mask(), 0, thread_index, ec);
            break;

        default:
            HPX_THROWS_IF(ec, bad_parameter, "decode_mapping",
                boost::str(boost::format("unexpected specification type at "
                    "index zero: %x (%s)") %
                        static_cast<unsigned>(m[0].type_) %
                        spec_type::type_name(m[0].type_)));
            return mask_type();
        }
        return mask;
    }
示例#2
0
    mask_type decode_mapping_core(hwloc_topology const& t,
        mapping_type& m, std::size_t size, mask_type mask,
        std::size_t core_base_index, std::size_t thread_index, error_code& ec)
    {
        bounds_type b = extract_bounds(m[1], size, ec);
        if (ec) return mask_type();

        // We have to account for the thread index at this level if there are
        // no specifications related to processing units.
        std::size_t index = std::size_t(-1);
        if (m[2].type_ == spec_type::unknown && b.size() > 1)
            index = thread_index;

        mask_type core_mask = mask_type();
        resize(core_mask, size);

        std::size_t core_index = 0;
        for (bounds_type::const_iterator it = b.begin(); it != b.end();
            ++it, ++core_index)
        {
            if (index == std::size_t(-1) || core_index == index)
            {
                core_mask |= t.init_core_affinity_mask_from_core(
                    std::size_t(*it+core_base_index), mask_type());
            }
        }

        core_base_index += std::size_t(*b.begin());
        if (thread_index != std::size_t(-1) && b.size() > 1)
            core_base_index += thread_index;

        std::size_t base_index = 0;
        for (std::size_t i = 0; i != core_base_index; ++i)
            base_index += t.get_number_of_core_pus(i);

        return decode_mapping1_unknown(t, m, size, mask & core_mask,
            base_index, thread_index, ec);
    }
示例#3
0
    mask_type decode_mapping_pu(hwloc_topology const& t,
        mapping_type& m, std::size_t size, mask_type mask,
        std::size_t pu_base_index, std::size_t thread_index, error_code& ec)
    {
        bounds_type b = extract_bounds(m[2], size, ec);
        if (ec) return mask_type();

        std::size_t index = std::size_t(-1);
        if (b.size() > 1)
            index = thread_index;

        mask_type pu_mask = mask_type();
        resize(pu_mask, size);

        std::size_t pu_index = 0;
        for (bounds_type::const_iterator it = b.begin(); it != b.end(); ++it, ++pu_index)
        {
            if (index == std::size_t(-1) || pu_index == index)
                pu_mask |= t.init_thread_affinity_mask(std::size_t(*it+pu_base_index));
        }

        return mask & pu_mask;
    }
示例#4
0
    mask_type decode_mapping_numanode(hwloc_topology const& t,
        mapping_type& m, std::size_t size, std::size_t thread_index,
        error_code& ec)
    {
        bounds_type b = extract_bounds(m[0], size, ec);
        if (ec) return mask_type();

        std::size_t index = std::size_t(-1);
        if (m[1].type_ == spec_type::unknown &&
            m[2].type_ == spec_type::unknown &&
            b.size() > 1)
        {
            index = thread_index;
        }

        mask_type mask = mask_type();
        resize(mask, size);

        std::size_t node_index = 0;
        for (bounds_type::const_iterator it = b.begin(); it != b.end();
            ++it, ++node_index)
        {
            if (index == std::size_t(-1) || node_index == index)
                mask |= t.init_numa_node_affinity_mask_from_numa_node(std::size_t(*it));
        }

        std::size_t node_base_index = std::size_t(*b.begin());
        if (thread_index != std::size_t(-1) && b.size() > 1)
            node_base_index += thread_index;

        std::size_t base_index = 0;
        for (std::size_t i = 0; i != node_base_index; ++i)
            base_index += t.get_number_of_numa_node_cores(i);

        return decode_mapping0_unknown(t, m, size, mask, base_index,
            thread_index, ec);
    }
示例#5
0
namespace hpx { namespace threads
{
    mask_type noop_topology::empty_mask =
        mask_type(noop_topology::hardware_concurrency());
}}
        void on_start_thread(std::size_t num_thread)
        {
            if (nullptr == queues_[num_thread])
            {
                queues_[num_thread] =
                    new thread_queue_type(max_queue_thread_count_);

                if (num_thread < high_priority_queues_.size())
                {
                    high_priority_queues_[num_thread] =
                        new thread_queue_type(max_queue_thread_count_);
                }
            }

            // forward this call to all queues etc.
            if (num_thread < high_priority_queues_.size())
                high_priority_queues_[num_thread]->on_start_thread(num_thread);
            if (num_thread == queues_.size()-1)
                low_priority_queue_.on_start_thread(num_thread);

            queues_[num_thread]->on_start_thread(num_thread);

            std::size_t num_threads = queues_.size();
            // get numa domain masks of all queues...
            std::vector<mask_type> numa_masks(num_threads);
            std::vector<mask_type> core_masks(num_threads);
            for (std::size_t i = 0; i != num_threads; ++i)
            {
                std::size_t num_pu = get_pu_num(i);
                numa_masks[i] =
                    topology_.get_numa_node_affinity_mask(num_pu, numa_sensitive_ != 0);
                core_masks[i] =
                    topology_.get_core_affinity_mask(num_pu, numa_sensitive_ != 0);
            }

            // iterate over the number of threads again to determine where to
            // steal from
            std::ptrdiff_t radius =
                static_cast<std::ptrdiff_t>((num_threads / 2.0) + 0.5);
            victim_threads_[num_thread].reserve(num_threads);
            std::size_t num_pu = get_pu_num(num_thread);
            mask_cref_type pu_mask =
                topology_.get_thread_affinity_mask(num_pu, numa_sensitive_ != 0);
            mask_cref_type numa_mask = numa_masks[num_thread];
            mask_cref_type core_mask = core_masks[num_thread];

            // we allow the thread on the boundary of the NUMA domain to steal
            mask_type first_mask = mask_type();
            resize(first_mask, mask_size(pu_mask));

            std::size_t first = find_first(numa_mask);
            if (first != std::size_t(-1))
                set(first_mask, first);
            else
                first_mask = pu_mask;

            auto iterate = [&](hpx::util::function_nonser<bool(std::size_t)> f)
            {
                // check our neighbors in a radial fashion (left and right
                // alternating, increasing distance each iteration)
                int i = 1;
                for (/**/; i < radius; ++i)
                {
                    std::ptrdiff_t left =
                        (static_cast<std::ptrdiff_t>(num_thread) - i) %
                            static_cast<std::ptrdiff_t>(num_threads);
                    if (left < 0)
                        left = num_threads + left;

                    if (f(std::size_t(left)))
                    {
                        victim_threads_[num_thread].push_back(
                            static_cast<std::size_t>(left));
                    }

                    std::size_t right = (num_thread + i) % num_threads;
                    if (f(right))
                    {
                        victim_threads_[num_thread].push_back(right);
                    }
                }
                if ((num_threads % 2) == 0)
                {
                    std::size_t right = (num_thread + i) % num_threads;
                    if (f(right))
                    {
                        victim_threads_[num_thread].push_back(right);
                    }
                }
            };

            // check for threads which share the same core...
            iterate(
                [&](std::size_t other_num_thread)
                {
                    return any(core_mask & core_masks[other_num_thread]);
                }
            );

            // check for threads which share the same numa domain...
            iterate(
                [&](std::size_t other_num_thread)
                {
                    return
                        !any(core_mask & core_masks[other_num_thread])
                        && any(numa_mask & numa_masks[other_num_thread]);
                }
            );

            // check for the rest and if we are numa aware
            if (numa_sensitive_ != 2 && any(first_mask & pu_mask))
            {
                iterate(
                    [&](std::size_t other_num_thread)
                    {
                        return !any(numa_mask & numa_masks[other_num_thread]);
                    }
                );
            }
        }