Example #1
0
    mask_cref_type affinity_data::get_pu_mask(topology const& topology,
        std::size_t num_thread, bool numa_sensitive) const
    {
        // if we have individual, predefined affinity masks, return those
        if (!affinity_masks_.empty())
            return affinity_masks_[num_thread];

        // otherwise return mask based on affinity domain
        std::size_t pu_num = get_pu_num(num_thread);
        if (0 == std::string("pu").find(affinity_domain_)) {
            // The affinity domain is 'processing unit', just convert the
            // pu-number into a bit-mask.
            return topology.get_thread_affinity_mask(pu_num, numa_sensitive);
        }
        if (0 == std::string("core").find(affinity_domain_)) {
            // The affinity domain is 'core', return a bit mask corresponding
            // to all processing units of the core containing the given
            // pu_num.
            return topology.get_core_affinity_mask(pu_num, numa_sensitive);
        }
        if (0 == std::string("numa").find(affinity_domain_)) {
            // The affinity domain is 'numa', return a bit mask corresponding
            // to all processing units of the NUMA domain containing the
            // given pu_num.
            return topology.get_numa_node_affinity_mask(pu_num, numa_sensitive);
        }

        // The affinity domain is 'machine', return a bit mask corresponding
        // to all processing units of the machine.
        BOOST_ASSERT(0 == std::string("machine").find(affinity_domain_));
        return topology.get_machine_affinity_mask();
    }
        void on_start_thread(std::size_t num_thread)
        {
            if (nullptr == queues_[num_thread])
            {
                queues_[num_thread] =
                    new thread_queue_type(max_queue_thread_count_);

                if (num_thread < high_priority_queues_.size())
                {
                    high_priority_queues_[num_thread] =
                        new thread_queue_type(max_queue_thread_count_);
                }
            }

            // forward this call to all queues etc.
            if (num_thread < high_priority_queues_.size())
                high_priority_queues_[num_thread]->on_start_thread(num_thread);
            if (num_thread == queues_.size()-1)
                low_priority_queue_.on_start_thread(num_thread);

            queues_[num_thread]->on_start_thread(num_thread);

            std::size_t num_threads = queues_.size();
            // get numa domain masks of all queues...
            std::vector<mask_type> numa_masks(num_threads);
            std::vector<mask_type> core_masks(num_threads);
            for (std::size_t i = 0; i != num_threads; ++i)
            {
                std::size_t num_pu = get_pu_num(i);
                numa_masks[i] =
                    topology_.get_numa_node_affinity_mask(num_pu, numa_sensitive_ != 0);
                core_masks[i] =
                    topology_.get_core_affinity_mask(num_pu, numa_sensitive_ != 0);
            }

            // iterate over the number of threads again to determine where to
            // steal from
            std::ptrdiff_t radius =
                static_cast<std::ptrdiff_t>((num_threads / 2.0) + 0.5);
            victim_threads_[num_thread].reserve(num_threads);
            std::size_t num_pu = get_pu_num(num_thread);
            mask_cref_type pu_mask =
                topology_.get_thread_affinity_mask(num_pu, numa_sensitive_ != 0);
            mask_cref_type numa_mask = numa_masks[num_thread];
            mask_cref_type core_mask = core_masks[num_thread];

            // we allow the thread on the boundary of the NUMA domain to steal
            mask_type first_mask = mask_type();
            resize(first_mask, mask_size(pu_mask));

            std::size_t first = find_first(numa_mask);
            if (first != std::size_t(-1))
                set(first_mask, first);
            else
                first_mask = pu_mask;

            auto iterate = [&](hpx::util::function_nonser<bool(std::size_t)> f)
            {
                // check our neighbors in a radial fashion (left and right
                // alternating, increasing distance each iteration)
                int i = 1;
                for (/**/; i < radius; ++i)
                {
                    std::ptrdiff_t left =
                        (static_cast<std::ptrdiff_t>(num_thread) - i) %
                            static_cast<std::ptrdiff_t>(num_threads);
                    if (left < 0)
                        left = num_threads + left;

                    if (f(std::size_t(left)))
                    {
                        victim_threads_[num_thread].push_back(
                            static_cast<std::size_t>(left));
                    }

                    std::size_t right = (num_thread + i) % num_threads;
                    if (f(right))
                    {
                        victim_threads_[num_thread].push_back(right);
                    }
                }
                if ((num_threads % 2) == 0)
                {
                    std::size_t right = (num_thread + i) % num_threads;
                    if (f(right))
                    {
                        victim_threads_[num_thread].push_back(right);
                    }
                }
            };

            // check for threads which share the same core...
            iterate(
                [&](std::size_t other_num_thread)
                {
                    return any(core_mask & core_masks[other_num_thread]);
                }
            );

            // check for threads which share the same numa domain...
            iterate(
                [&](std::size_t other_num_thread)
                {
                    return
                        !any(core_mask & core_masks[other_num_thread])
                        && any(numa_mask & numa_masks[other_num_thread]);
                }
            );

            // check for the rest and if we are numa aware
            if (numa_sensitive_ != 2 && any(first_mask & pu_mask))
            {
                iterate(
                    [&](std::size_t other_num_thread)
                    {
                        return !any(numa_mask & numa_masks[other_num_thread]);
                    }
                );
            }
        }