예제 #1
0
void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
{
    // Only bind threads when MAX_WORKER_THREADS isn't set.
    if (pContext->threadInfo.SINGLE_THREADED || (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false))
    {
        return;
    }

#if defined(_WIN32)

    GROUP_AFFINITY affinity = {};
    affinity.Group = procGroupId;

#if !defined(_WIN64)
    if (threadId >= 32)
    {
        // Hopefully we don't get here.  Logic in CreateThreadPool should prevent this.
        SWR_INVALID("Shouldn't get here");

        // In a 32-bit process on Windows it is impossible to bind
        // to logical processors 32-63 within a processor group.
        // In this case set the mask to 0 and let the system assign
        // the processor.  Hopefully it will make smart choices.
        affinity.Mask = 0;
    }
    else
#endif
    {
        // If MAX_WORKER_THREADS is set, only bind to the proc group,
        // Not the individual HW thread.
        if (!bindProcGroup  && !pContext->threadInfo.MAX_WORKER_THREADS)
        {
            affinity.Mask = KAFFINITY(1) << threadId;
        }
        else
        {
            affinity.Mask = KAFFINITY(0);
        }
    }

    if (!SetThreadGroupAffinity(GetCurrentThread(), &affinity, nullptr))
    {
        SWR_INVALID("Failed to set Thread Affinity");
    }

#elif defined(__linux__) || defined(__gnu_linux__)

    cpu_set_t cpuset;
    pthread_t thread = pthread_self();
    CPU_ZERO(&cpuset);
    CPU_SET(threadId, &cpuset);

    int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
    if (err != 0)
    {
        fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err));
    }

#endif
}
예제 #2
0
파일: backend.cpp 프로젝트: chemecse/mesa
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, STORE_TILES_DESC* pDesc, 
    SWR_RENDERTARGET_ATTACHMENT attachment)
{
    SWR_CONTEXT *pContext = pDC->pContext;

    AR_BEGIN(BEStoreTiles, pDC->drawId);

    SWR_FORMAT srcFormat;
    switch (attachment)
    {
    case SWR_ATTACHMENT_COLOR0:
    case SWR_ATTACHMENT_COLOR1:
    case SWR_ATTACHMENT_COLOR2:
    case SWR_ATTACHMENT_COLOR3:
    case SWR_ATTACHMENT_COLOR4:
    case SWR_ATTACHMENT_COLOR5:
    case SWR_ATTACHMENT_COLOR6:
    case SWR_ATTACHMENT_COLOR7: srcFormat = KNOB_COLOR_HOT_TILE_FORMAT; break;
    case SWR_ATTACHMENT_DEPTH: srcFormat = KNOB_DEPTH_HOT_TILE_FORMAT; break;
    case SWR_ATTACHMENT_STENCIL: srcFormat = KNOB_STENCIL_HOT_TILE_FORMAT; break;
    default: SWR_INVALID("Unknown attachment: %d", attachment); srcFormat = KNOB_COLOR_HOT_TILE_FORMAT; break;
    }

    uint32_t x, y;
    MacroTileMgr::getTileIndices(macroTile, x, y);

    // Only need to store the hottile if it's been rendered to...
    HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, attachment, false);
    if (pHotTile)
    {
        // clear if clear is pending (i.e., not rendered to), then mark as dirty for store.
        if (pHotTile->state == HOTTILE_CLEAR)
        {
            PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
            SWR_ASSERT(pfnClearTiles != nullptr);

            pfnClearTiles(pDC, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
        }

        if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
        {
            int32_t destX = KNOB_MACROTILE_X_DIM * x;
            int32_t destY = KNOB_MACROTILE_Y_DIM * y;

            pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat,
                attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
        }
        

        if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED)
        {
            if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY && pHotTile->state == HOTTILE_RESOLVED))
            {
                pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
            }
        }
    }
    AR_END(BEStoreTiles, 1);
}
예제 #3
0
void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThreadsPerProcGroup)
{
    out_nodes.clear();
    out_numThreadsPerProcGroup = 0;

#if defined(_WIN32)

    std::vector<KAFFINITY> threadMaskPerProcGroup;

    static std::mutex m;
    std::lock_guard<std::mutex> l(m);

    DWORD bufSize = 0;

    BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufSize);
    SWR_ASSERT(ret == FALSE && GetLastError() == ERROR_INSUFFICIENT_BUFFER);

    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
    SWR_ASSERT(pBufferMem);

    ret = GetLogicalProcessorInformationEx(RelationProcessorCore, pBufferMem, &bufSize);
    SWR_ASSERT(ret != FALSE, "Failed to get Processor Topology Information");

    uint32_t count = bufSize / pBufferMem->Size;
    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = pBufferMem;

    for (uint32_t i = 0; i < count; ++i)
    {
        SWR_ASSERT(pBuffer->Relationship == RelationProcessorCore);
        for (uint32_t g = 0; g < pBuffer->Processor.GroupCount; ++g)
        {
            auto& gmask = pBuffer->Processor.GroupMask[g];
            uint32_t threadId = 0;
            uint32_t procGroup = gmask.Group;

            Core* pCore = nullptr;

            uint32_t numThreads = (uint32_t)_mm_popcount_sizeT(gmask.Mask);

            while (BitScanForwardSizeT((unsigned long*)&threadId, gmask.Mask))
            {
                // clear mask
                KAFFINITY threadMask = KAFFINITY(1) << threadId;
                gmask.Mask &= ~threadMask;

                if (procGroup >= threadMaskPerProcGroup.size())
                {
                    threadMaskPerProcGroup.resize(procGroup + 1);
                }

                if (threadMaskPerProcGroup[procGroup] & threadMask)
                {
                    // Already seen this mask.  This means that we are in 32-bit mode and
                    // have seen more than 32 HW threads for this procGroup
                    // Don't use it
#if defined(_WIN64)
                    SWR_INVALID("Shouldn't get here in 64-bit mode");
#endif
                    continue;
                }

                threadMaskPerProcGroup[procGroup] |= (KAFFINITY(1) << threadId);

                // Find Numa Node
                uint32_t numaId = 0;
                PROCESSOR_NUMBER procNum = {};
                procNum.Group = WORD(procGroup);
                procNum.Number = UCHAR(threadId);

                ret = GetNumaProcessorNodeEx(&procNum, (PUSHORT)&numaId);
                SWR_ASSERT(ret);

                // Store data
                if (out_nodes.size() <= numaId)
                {
                    out_nodes.resize(numaId + 1);
                }
                auto& numaNode = out_nodes[numaId];
                numaNode.numaId = numaId;

                uint32_t coreId = 0;

                if (nullptr == pCore)
                {
                    numaNode.cores.push_back(Core());
                    pCore = &numaNode.cores.back();
                    pCore->procGroup = procGroup;
                }
                pCore->threadIds.push_back(threadId);
                if (procGroup == 0)
                {
                    out_numThreadsPerProcGroup++;
                }
            }
        }
        pBuffer = PtrAdd(pBuffer, pBuffer->Size);
    }

    free(pBufferMem);


#elif defined(__linux__) || defined (__gnu_linux__)

    // Parse /proc/cpuinfo to get full topology
    std::ifstream input("/proc/cpuinfo");
    std::string line;
    char* c;
    uint32_t procId = uint32_t(-1);
    uint32_t coreId = uint32_t(-1);
    uint32_t physId = uint32_t(-1);

    while (std::getline(input, line))
    {
        if (line.find("processor") != std::string::npos)
        {
            auto data_start = line.find(": ") + 2;
            procId = std::strtoul(&line.c_str()[data_start], &c, 10);
            continue;
        }
        if (line.find("core id") != std::string::npos)
        {
            auto data_start = line.find(": ") + 2;
            coreId = std::strtoul(&line.c_str()[data_start], &c, 10);
            continue;
        }
        if (line.find("physical id") != std::string::npos)
        {
            auto data_start = line.find(": ") + 2;
            physId = std::strtoul(&line.c_str()[data_start], &c, 10);
            continue;
        }
        if (line.length() == 0)
        {
            if (physId + 1 > out_nodes.size())
                out_nodes.resize(physId + 1);
            auto& numaNode = out_nodes[physId];
            numaNode.numaId = physId;

            if (coreId + 1 > numaNode.cores.size())
                numaNode.cores.resize(coreId + 1);
            auto& core = numaNode.cores[coreId];
            core.procGroup = coreId;
            core.threadIds.push_back(procId);
        }
    }

    out_numThreadsPerProcGroup = 0;
    for (auto &node : out_nodes)
    {
        for (auto &core : node.cores)
        {
            out_numThreadsPerProcGroup += core.threadIds.size();
        }
    }

#elif defined(__APPLE__)

    auto numProcessors = 0;
    auto numCores = 0;
    auto numPhysicalIds = 0;

    int value;
    size_t size = sizeof(value);

    int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
    SWR_ASSERT(result == 0);
    numPhysicalIds = value;

    result = sysctlbyname("hw.logicalcpu", &value, &size, NULL, 0);
    SWR_ASSERT(result == 0);
    numProcessors = value;

    result = sysctlbyname("hw.physicalcpu", &value, &size, NULL, 0);
    SWR_ASSERT(result == 0);
    numCores = value;

    out_nodes.resize(numPhysicalIds);

    for (auto physId = 0; physId < numPhysicalIds; ++physId)
    {
        auto &numaNode = out_nodes[physId];
        auto procId = 0;

        numaNode.cores.resize(numCores);

        while (procId < numProcessors)
        {
            for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, ++procId)
            {
                auto &core = numaNode.cores[coreId];

                core.procGroup = coreId;
                core.threadIds.push_back(procId);
            }
        }
    }

    out_numThreadsPerProcGroup = 0;

    for (auto &node : out_nodes)
    {
        for (auto &core : node.cores)
        {
            out_numThreadsPerProcGroup += core.threadIds.size();
        }
    }

#else

#error Unsupported platform

#endif

    // Prune empty cores and numa nodes
    for (auto node_it = out_nodes.begin(); node_it != out_nodes.end(); )
    {
        // Erase empty cores (first)
        for (auto core_it = node_it->cores.begin(); core_it != node_it->cores.end(); )
        {
            if (core_it->threadIds.size() == 0)
            {
                core_it = node_it->cores.erase(core_it);
            }
            else
            {
                ++core_it;
            }
        }

        // Erase empty numa nodes (second)
        if (node_it->cores.size() == 0)
        {
            node_it = out_nodes.erase(node_it);
        }
        else
        {
            ++node_it;
        }
    }
}
예제 #4
0
HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT*                pContext,
                                DRAW_CONTEXT*               pDC,
                                HANDLE                      hWorkerPrivateData,
                                uint32_t                    macroID,
                                SWR_RENDERTARGET_ATTACHMENT attachment,
                                bool                        create,
                                uint32_t                    numSamples,
                                uint32_t                    renderTargetArrayIndex)
{
    uint32_t x, y;
    MacroTileMgr::getTileIndices(macroID, x, y);

    SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
    SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);

    HotTileSet& tile    = mHotTiles[x][y];
    HOTTILE&    hotTile = tile.Attachment[attachment];
    if (hotTile.pBuffer == NULL)
    {
        if (create)
        {
            uint32_t size     = numSamples * mHotTileSize[attachment];
            uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
            hotTile.pBuffer =
                (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
            hotTile.state                  = HOTTILE_INVALID;
            hotTile.numSamples             = numSamples;
            hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
        }
        else
        {
            return NULL;
        }
    }
    else
    {
        // free the old tile and create a new one with enough space to hold all samples
        if (numSamples > hotTile.numSamples)
        {
            // tile should be either uninitialized or resolved if we're deleting and switching to a
            // new sample count
            SWR_ASSERT((hotTile.state == HOTTILE_INVALID) || (hotTile.state == HOTTILE_RESOLVED) ||
                       (hotTile.state == HOTTILE_CLEAR));
            FreeHotTileMem(hotTile.pBuffer);

            uint32_t size     = numSamples * mHotTileSize[attachment];
            uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
            hotTile.pBuffer =
                (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
            hotTile.state      = HOTTILE_INVALID;
            hotTile.numSamples = numSamples;
        }

        // if requested render target array index isn't currently loaded, need to store out the
        // current hottile and load the requested array slice
        if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
        {
            SWR_FORMAT format;
            switch (attachment)
            {
            case SWR_ATTACHMENT_COLOR0:
            case SWR_ATTACHMENT_COLOR1:
            case SWR_ATTACHMENT_COLOR2:
            case SWR_ATTACHMENT_COLOR3:
            case SWR_ATTACHMENT_COLOR4:
            case SWR_ATTACHMENT_COLOR5:
            case SWR_ATTACHMENT_COLOR6:
            case SWR_ATTACHMENT_COLOR7:
                format = KNOB_COLOR_HOT_TILE_FORMAT;
                break;
            case SWR_ATTACHMENT_DEPTH:
                format = KNOB_DEPTH_HOT_TILE_FORMAT;
                break;
            case SWR_ATTACHMENT_STENCIL:
                format = KNOB_STENCIL_HOT_TILE_FORMAT;
                break;
            default:
                SWR_INVALID("Unknown attachment: %d", attachment);
                format = KNOB_COLOR_HOT_TILE_FORMAT;
                break;
            }

            if (hotTile.state == HOTTILE_CLEAR)
            {
                if (attachment == SWR_ATTACHMENT_STENCIL)
                    ClearStencilHotTile(&hotTile);
                else if (attachment == SWR_ATTACHMENT_DEPTH)
                    ClearDepthHotTile(&hotTile);
                else
                    ClearColorHotTile(&hotTile);

                hotTile.state = HOTTILE_DIRTY;
            }

            if (hotTile.state == HOTTILE_DIRTY)
            {
                pContext->pfnStoreTile(GetPrivateState(pDC),
                                       hWorkerPrivateData,
                                       format,
                                       attachment,
                                       x * KNOB_MACROTILE_X_DIM,
                                       y * KNOB_MACROTILE_Y_DIM,
                                       hotTile.renderTargetArrayIndex,
                                       hotTile.pBuffer);
            }

            pContext->pfnLoadTile(GetPrivateState(pDC),
                                  hWorkerPrivateData,
                                  format,
                                  attachment,
                                  x * KNOB_MACROTILE_X_DIM,
                                  y * KNOB_MACROTILE_Y_DIM,
                                  renderTargetArrayIndex,
                                  hotTile.pBuffer);

            hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
            hotTile.state                  = HOTTILE_DIRTY;
        }
    }
    return &tile.Attachment[attachment];
}