示例#1
0
    std::size_t init_numa_node_number(
        std::size_t num_thread
        )
    { // {{{
        if (std::size_t(-1) == num_thread)
             return std::size_t(-1);

        UCHAR node_number = 0;
        if (GetNumaProcessorNode(UCHAR(num_thread), &node_number))
            return node_number;

        std::size_t num_of_cores = hardware_concurrency();
        if (0 == num_of_cores)
            num_of_cores = 1;     // assume one core

        std::size_t num_of_numa_cores = num_of_cores;
        ULONG numa_nodes = 0;
        if (GetNumaHighestNodeNumber(&numa_nodes) && 0 != numa_nodes)
            num_of_numa_cores = num_of_cores / (numa_nodes + 1);

        return num_thread / num_of_numa_cores;
    } // }}}
示例#2
0
ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools)
{
    enum { MAX_NODE_NUM = 127 };
    int cpusPerNode[MAX_NODE_NUM + 1];

    memset(cpusPerNode, 0, sizeof(cpusPerNode));
    int numNumaNodes = X265_MIN(getNumaNodeCount(), MAX_NODE_NUM);
    int cpuCount = getCpuCount();
    bool bNumaSupport = false;

#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
    bNumaSupport = true;
#elif HAVE_LIBNUMA
    bNumaSupport = numa_available() >= 0;
#endif


    for (int i = 0; i < cpuCount; i++)
    {
#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
        UCHAR node;
        if (GetNumaProcessorNode((UCHAR)i, &node))
            cpusPerNode[X265_MIN(node, (UCHAR)MAX_NODE_NUM)]++;
        else
#elif HAVE_LIBNUMA
        if (bNumaSupport >= 0)
            cpusPerNode[X265_MIN(numa_node_of_cpu(i), MAX_NODE_NUM)]++;
        else
#endif
            cpusPerNode[0]++;
    }

    if (bNumaSupport && p->logLevel >= X265_LOG_DEBUG)
        for (int i = 0; i < numNumaNodes; i++)
            x265_log(p, X265_LOG_DEBUG, "detected NUMA node %d with %d logical cores\n", i, cpusPerNode[i]);

    /* limit nodes based on param->numaPools */
    if (p->numaPools && *p->numaPools)
    {
        const char *nodeStr = p->numaPools;
        for (int i = 0; i < numNumaNodes; i++)
        {
            if (!*nodeStr)
            {
                cpusPerNode[i] = 0;
                continue;
            }
            else if (*nodeStr == '-')
                cpusPerNode[i] = 0;
            else if (*nodeStr == '*')
                break;
            else if (*nodeStr == '+')
                ;
            else
            {
                int count = atoi(nodeStr);
                cpusPerNode[i] = X265_MIN(count, cpusPerNode[i]);
            }

            /* consume current node string, comma, and white-space */
            while (*nodeStr && *nodeStr != ',')
               ++nodeStr;
            if (*nodeStr == ',' || *nodeStr == ' ')
               ++nodeStr;
        }
    }

    // In the case that numa is disabled and we have more CPUs than 64,
    // spawn the last pool only if the # threads in that pool is > 1/2 max (heuristic)
    if ((numNumaNodes == 1) && (cpusPerNode[0] % MAX_POOL_THREADS < (MAX_POOL_THREADS / 2)))
    {
        cpusPerNode[0] -= (cpusPerNode[0] % MAX_POOL_THREADS);
        x265_log(p, X265_LOG_DEBUG, "Creating only %d worker threads to prevent asymmetry in pools; may not use all HW contexts\n", cpusPerNode[0]);
    }

    numPools = 0;
    for (int i = 0; i < numNumaNodes; i++)
    {
        if (bNumaSupport)
            x265_log(p, X265_LOG_DEBUG, "NUMA node %d may use %d logical cores\n", i, cpusPerNode[i]);
        if (cpusPerNode[i])
            numPools += (cpusPerNode[i] + MAX_POOL_THREADS - 1) / MAX_POOL_THREADS;
    }

    if (!numPools)
        return NULL;

    if (numPools > p->frameNumThreads)
    {
        x265_log(p, X265_LOG_DEBUG, "Reducing number of thread pools for frame thread count\n");
        numPools = X265_MAX(p->frameNumThreads / 2, 1);
    }

    ThreadPool *pools = new ThreadPool[numPools];
    if (pools)
    {
        int maxProviders = (p->frameNumThreads + numPools - 1) / numPools + 1; /* +1 is Lookahead, always assigned to threadpool 0 */
        int node = 0;
        for (int i = 0; i < numPools; i++)
        {
            while (!cpusPerNode[node])
                node++;
            int cores = X265_MIN(MAX_POOL_THREADS, cpusPerNode[node]);
            if (!pools[i].create(cores, maxProviders, node))
            {
                X265_FREE(pools);
                numPools = 0;
                return NULL;
            }
            if (numNumaNodes > 1)
                x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on NUMA node %d\n", i, cores, node);
            else
                x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", cores);
            cpusPerNode[node] -= cores;
        }
    }
    else
        numPools = 0;
    return pools;
}