Beispiel #1
0
    windows_topology()
    { // {{{
        std::size_t const num_of_cores = hardware_concurrency();

        numa_node_numbers_.reserve(num_of_cores);
        numa_node_affinity_masks_.reserve(num_of_cores);
        ns_numa_node_affinity_masks_.reserve(num_of_cores);
        thread_affinity_masks_.reserve(num_of_cores);
        ns_thread_affinity_masks_.reserve(num_of_cores);

        // Initialize each set of data entirely, as some of the initialization
        // routines rely on access to other pieces of topology data. The
        // compiler will optimize the loops where possible anyways.

        for (std::size_t i = 0; i < num_of_cores; ++i)
            numa_node_numbers_.push_back(init_numa_node_number(i));

        for (std::size_t i = 0; i < num_of_cores; ++i)
        {
            numa_node_affinity_masks_.push_back(
                init_numa_node_affinity_mask(i, false));
            ns_numa_node_affinity_masks_.push_back(
                init_numa_node_affinity_mask(i, true));
        }

        for (std::size_t i = 0; i < num_of_cores; ++i)
        {
            thread_affinity_masks_.push_back(
                init_thread_affinity_mask(i, false));
            ns_thread_affinity_masks_.push_back(
                init_thread_affinity_mask(i, true));
        }
    } // }}}
Beispiel #2
0
    void set_thread_affinity(
        std::size_t num_thread
      , bool numa_sensitive
      , error_code& ec = throws
        ) const
    {
        cpu_set_t cpu;

        CPU_ZERO(&cpu);

        CPU_SET(num_thread % hardware_concurrency(), &cpu);

        #if defined(HPX_HAVE_PTHREAD_SETAFFINITY_NP)
            if (0 == pthread_setaffinity_np(pthread_self(), sizeof(cpu), &cpu))
                sleep(0); // Allow the OS to pick up the change.
        #else
            if (0 == sched_setaffinity(syscall(SYS_gettid), sizeof(cpu), &cpu))
                sleep(0); // Allow the OS to pick up the change.
        #endif

        else
        {
            HPX_THROWS_IF(ec, kernel_error
              , "hpx::threads::set_thread_affinity"
              , "failed to set thread affinity");
        }

        if (ec)
            return;
        else if (&ec != &throws)
            ec = make_success_code();
    }
Beispiel #3
0
    mask_type init_thread_affinity_mask(
        std::size_t num_thread
      , bool numa_sensitive
        )
    { // {{{
        std::size_t num_of_cores = hardware_concurrency();
        std::size_t affinity = num_thread % num_of_cores;

        ULONG numa_nodes = 1;
        if (GetNumaHighestNodeNumber(&numa_nodes))
            ++numa_nodes;

        std::size_t num_of_cores_per_numa_node = num_of_cores / numa_nodes;
        ULONGLONG node_affinity_mask = 0;
        ULONGLONG mask = 0x01LL;

        if (numa_sensitive) {
            UCHAR numa_node = UCHAR(affinity % numa_nodes);

            if (!GetNumaNodeProcessorMask(numa_node, &node_affinity_mask))
            {
                HPX_THROW_EXCEPTION(kernel_error
                  , "hpx::threads::windows_topology::init_thread_affinity_mask"
                  , boost::str(boost::format(
                        "failed to initialize thread %1% affinity mask")
                        % num_thread));
            }
            mask = least_significant_bit(node_affinity_mask) <<
                (affinity / numa_nodes);
        }
        else {
            UCHAR numa_node = UCHAR(get_numa_node_number(num_thread));

            if (!GetNumaNodeProcessorMask(numa_node, &node_affinity_mask))
            {
                HPX_THROW_EXCEPTION(kernel_error
                  , "hpx::threads::windows_topology::init_thread_affinity_mask"
                  , boost::str(boost::format(
                        "failed to initialize thread %1% affinity mask")
                        % num_thread));
            }
            mask = least_significant_bit(node_affinity_mask) <<
                (affinity % num_of_cores_per_numa_node);
        }

        while (!(mask & node_affinity_mask)) {
            mask <<= 1LL;
            if (0 == mask)
                mask = 0x01LL;
        }

        return static_cast<mask_type>(mask);
    } // }}}
Beispiel #4
0
    mask_type get_machine_affinity_mask(
        error_code& ec = throws
        ) const
    {
        if (&ec != &throws)
            ec = make_success_code();

        // the machine mask is the bit-or of all masks in the system
        std::size_t const num_of_cores = hardware_concurrency();
        mask_type mask = 0;
        for (std::size_t i = 0; i < num_of_cores; ++i)
            mask |= numa_node_affinity_masks_[i];

        return mask;
    }
Beispiel #5
0
    mask_type init_numa_node_affinity_mask(
        std::size_t num_thread
      , bool numa_sensitive
        )
    { // {{{
        std::size_t num_of_cores = hardware_concurrency();
        UCHAR affinity = UCHAR(num_thread % num_of_cores);

        ULONG numa_nodes = 1;
        if (GetNumaHighestNodeNumber(&numa_nodes))
            ++numa_nodes;

        ULONGLONG mask = 0;
        if (numa_sensitive) {
            UCHAR numa_node = affinity % numa_nodes;
            if (!GetNumaNodeProcessorMask(numa_node, &mask))
            {
                HPX_THROW_EXCEPTION(kernel_error
                  , "hpx::threads::windows_topology::init_numa_node_affinity_mask"
                  , boost::str(boost::format(
                        "failed to initialize NUMA node affinity mask for "
                        "thread %1%")
                        % num_thread));
            }
            return static_cast<mask_type>(mask);
        }

        UCHAR numa_node = UCHAR(get_numa_node_number(num_thread));
        if (!GetNumaNodeProcessorMask(numa_node, &mask))
        {
            HPX_THROW_EXCEPTION(kernel_error
              , "hpx::threads::windows_topology::init_numa_node_affinity_mask"
              , boost::str(boost::format(
                    "failed to initialize NUMA node affinity mask for "
                    "thread %1%")
                    % num_thread));
        }

        return static_cast<mask_type>(mask);
    } // }}}
Beispiel #6
0
    std::size_t init_numa_node_number(
        std::size_t num_thread
        )
    { // {{{
        if (std::size_t(-1) == num_thread)
             return std::size_t(-1);

        UCHAR node_number = 0;
        if (GetNumaProcessorNode(UCHAR(num_thread), &node_number))
            return node_number;

        std::size_t num_of_cores = hardware_concurrency();
        if (0 == num_of_cores)
            num_of_cores = 1;     // assume one core

        std::size_t num_of_numa_cores = num_of_cores;
        ULONG numa_nodes = 0;
        if (GetNumaHighestNodeNumber(&numa_nodes) && 0 != numa_nodes)
            num_of_numa_cores = num_of_cores / (numa_nodes + 1);

        return num_thread / num_of_numa_cores;
    } // }}}