Ejemplo n.º 1
0
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE, uint32_t& drawEnqueued)
{
    // increment our current draw id to the first incomplete draw
    drawEnqueued = GetEnqueuedDraw(pContext);
    while (IDComparesLess(curDrawBE, drawEnqueued))
    {
        DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT];

        // If its not compute and FE is not done then break out of loop.
        if (!pDC->doneFE && !pDC->isCompute) break;

        bool isWorkComplete = pDC->isCompute ?
            pDC->pDispatch->isWorkComplete() :
            pDC->pTileMgr->isWorkComplete();

        if (isWorkComplete)
        {
            curDrawBE++;
            CompleteDrawContextInl(pContext, workerId, pDC);
        }
        else
        {
            break;
        }
    }

    // If there are no more incomplete draws then return false.
    return IDComparesLess(curDrawBE, drawEnqueued);
}
Ejemplo n.º 2
0
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, volatile uint64_t& curDrawBE)
{
    // increment our current draw id to the first incomplete draw
    uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
    while (curDrawBE < drawEnqueued)
    {
        DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];

        // If its not compute and FE is not done then break out of loop.
        if (!pDC->doneFE && !pDC->isCompute) break;

        bool isWorkComplete = (pDC->isCompute) ?
            pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete();

        if (isWorkComplete)
        {
            curDrawBE++;
        }
        else
        {
            break;
        }
    }

    // If there are no more incomplete draws then return false.
    return (curDrawBE >= drawEnqueued) ? false : true;
}
Ejemplo n.º 3
0
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
{
    // Try to grab the next DC from the ring
    uint32_t drawEnqueued = GetEnqueuedDraw(pContext);
    while (IDComparesLess(curDrawFE, drawEnqueued))
    {
        uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT;
        DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
        if (pDC->isCompute || pDC->doneFE)
        {
            CompleteDrawContextInl(pContext, workerId, pDC);
            curDrawFE++;
        }
        else
        {
            break;
        }
    }

    uint32_t lastRetiredFE = curDrawFE - 1;
    uint32_t curDraw = curDrawFE;
    while (IDComparesLess(curDraw, drawEnqueued))
    {
        uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
        DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];

        if (!pDC->isCompute && !pDC->FeLock)
        {
            if (CheckDependencyFE(pContext, pDC, lastRetiredFE))
            {
                return;
            }

            uint32_t initial = InterlockedCompareExchange((volatile uint32_t*)&pDC->FeLock, 1, 0);
            if (initial == 0)
            {
                // successfully grabbed the DC, now run the FE
                pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc);

                CompleteDrawFE(pContext, workerId, pDC);
            }
        }
        curDraw++;
    }
}
Ejemplo n.º 4
0
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE)
{
    // Try to grab the next DC from the ring
    uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
    while (curDrawFE < drawEnqueued)
    {
        uint32_t dcSlot = curDrawFE % KNOB_MAX_DRAWS_IN_FLIGHT;
        DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
        if (pDC->isCompute || pDC->doneFE || pDC->FeLock)
        {
            CompleteDrawContext(pContext, pDC);
            curDrawFE++;
        }
        else
        {
            break;
        }
    }

    uint64_t curDraw = curDrawFE;
    while (curDraw < drawEnqueued)
    {
        uint32_t dcSlot = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT;
        DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];

        if (!pDC->isCompute && !pDC->FeLock)
        {
            uint32_t initial = InterlockedCompareExchange((volatile uint32_t*)&pDC->FeLock, 1, 0);
            if (initial == 0)
            {
                // successfully grabbed the DC, now run the FE
                pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc);

                _ReadWriteBarrier();
                pDC->doneFE = true;
            }
        }
        curDraw++;
    }
}
Ejemplo n.º 5
0
//////////////////////////////////////////////////////////////////////////
/// @brief If there is any BE work then go work on it.
/// @param pContext - pointer to SWR context.
/// @param workerId - The unique worker ID that is assigned to this thread.
/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread
///                    has its own curDrawBE counter and this ensures that each worker processes all the
///                    draws in order.
/// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its
///                      own set and each time it fails to lock a macrotile, because its already locked,
///                      then it will add that tile to the lockedTiles set. As a worker begins to work
///                      on future draws the lockedTiles ensure that it doesn't work on tiles that may
///                      still have work pending in a previous draw. Additionally, the lockedTiles is
///                      hueristic that can steer a worker back to the same macrotile that it had been
///                      working on in a previous draw.
void WorkOnFifoBE(
    SWR_CONTEXT *pContext,
    uint32_t workerId,
    volatile uint64_t &curDrawBE,
    std::unordered_set<uint32_t>& lockedTiles)
{
    // Find the first incomplete draw that has pending work. If no such draw is found then
    // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
    if (FindFirstIncompleteDraw(pContext, curDrawBE) == false)
    {
        return;
    }

    uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;

    // Reset our history for locked tiles. We'll have to re-learn which tiles are locked.
    lockedTiles.clear();

    // Try to work on each draw in order of the available draws in flight.
    //   1. If we're on curDrawBE, we can work on any macrotile that is available.
    //   2. If we're trying to work on draws after curDrawBE, we are restricted to 
    //      working on those macrotiles that are known to be complete in the prior draw to
    //      maintain order. The locked tiles provides the history to ensures this.
    for (uint64_t i = curDrawBE; i < GetEnqueuedDraw(pContext); ++i)
    {
        DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];

        if (pDC->isCompute) return; // We don't look at compute work.

        // First wait for FE to be finished with this draw. This keeps threading model simple
        // but if there are lots of bubbles between draws then serializing FE and BE may
        // need to be revisited.
        if (!pDC->doneFE) break;
        
        // If this draw is dependent on a previous draw then we need to bail.
        if (CheckDependency(pContext, pDC, lastRetiredDraw))
        {
            return;
        }

        // Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it.
        std::vector<uint32_t> &macroTiles = pDC->pTileMgr->getDirtyTiles();

        for (uint32_t tileID : macroTiles)
        {
            MacroTileQueue &tile = pDC->pTileMgr->getMacroTileQueue(tileID);
            
            // can only work on this draw if it's not in use by other threads
            if (lockedTiles.find(tileID) == lockedTiles.end())
            {
                if (tile.getNumQueued())
                {
                    if (tile.tryLock())
                    {
                        BE_WORK *pWork;

                        RDTSC_START(WorkerFoundWork);

                        uint32_t numWorkItems = tile.getNumQueued();

                        if (numWorkItems != 0)
                        {
                            pWork = tile.peek();
                            SWR_ASSERT(pWork);
                            if (pWork->type == DRAW)
                            {
                                InitializeHotTiles(pContext, pDC, tileID, (const TRIANGLE_WORK_DESC*)&pWork->desc);
                            }
                        }

                        while ((pWork = tile.peek()) != nullptr)
                        {
                            pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
                            tile.dequeue();
                        }
                        RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId);

                        _ReadWriteBarrier();

                        pDC->pTileMgr->markTileComplete(tileID);

                        // Optimization: If the draw is complete and we're the last one to have worked on it then
                        // we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete.
                        if ((curDrawBE == i) && pDC->pTileMgr->isWorkComplete())
                        {
                            // We can increment the current BE and safely move to next draw since we know this draw is complete.
                            curDrawBE++;
                            lastRetiredDraw++;

                            lockedTiles.clear();
                            break;
                        }
                    }
                    else
                    {
                        // This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again.
                        lockedTiles.insert(tileID);
                    }
                }
            }
        }
    }
}