INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE, uint32_t& drawEnqueued) { // increment our current draw id to the first incomplete draw drawEnqueued = GetEnqueuedDraw(pContext); while (IDComparesLess(curDrawBE, drawEnqueued)) { DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT]; // If its not compute and FE is not done then break out of loop. if (!pDC->doneFE && !pDC->isCompute) break; bool isWorkComplete = pDC->isCompute ? pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete(); if (isWorkComplete) { curDrawBE++; CompleteDrawContextInl(pContext, workerId, pDC); } else { break; } } // If there are no more incomplete draws then return false. return IDComparesLess(curDrawBE, drawEnqueued); }
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, volatile uint64_t& curDrawBE) { // increment our current draw id to the first incomplete draw uint64_t drawEnqueued = GetEnqueuedDraw(pContext); while (curDrawBE < drawEnqueued) { DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT]; // If its not compute and FE is not done then break out of loop. if (!pDC->doneFE && !pDC->isCompute) break; bool isWorkComplete = (pDC->isCompute) ? pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete(); if (isWorkComplete) { curDrawBE++; } else { break; } } // If there are no more incomplete draws then return false. return (curDrawBE >= drawEnqueued) ? false : true; }
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE) { // Try to grab the next DC from the ring uint32_t drawEnqueued = GetEnqueuedDraw(pContext); while (IDComparesLess(curDrawFE, drawEnqueued)) { uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT; DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot]; if (pDC->isCompute || pDC->doneFE) { CompleteDrawContextInl(pContext, workerId, pDC); curDrawFE++; } else { break; } } uint32_t lastRetiredFE = curDrawFE - 1; uint32_t curDraw = curDrawFE; while (IDComparesLess(curDraw, drawEnqueued)) { uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT; DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot]; if (!pDC->isCompute && !pDC->FeLock) { if (CheckDependencyFE(pContext, pDC, lastRetiredFE)) { return; } uint32_t initial = InterlockedCompareExchange((volatile uint32_t*)&pDC->FeLock, 1, 0); if (initial == 0) { // successfully grabbed the DC, now run the FE pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc); CompleteDrawFE(pContext, workerId, pDC); } } curDraw++; } }
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE) { // Try to grab the next DC from the ring uint64_t drawEnqueued = GetEnqueuedDraw(pContext); while (curDrawFE < drawEnqueued) { uint32_t dcSlot = curDrawFE % KNOB_MAX_DRAWS_IN_FLIGHT; DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot]; if (pDC->isCompute || pDC->doneFE || pDC->FeLock) { CompleteDrawContext(pContext, pDC); curDrawFE++; } else { break; } } uint64_t curDraw = curDrawFE; while (curDraw < drawEnqueued) { uint32_t dcSlot = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT; DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot]; if (!pDC->isCompute && !pDC->FeLock) { uint32_t initial = InterlockedCompareExchange((volatile uint32_t*)&pDC->FeLock, 1, 0); if (initial == 0) { // successfully grabbed the DC, now run the FE pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc); _ReadWriteBarrier(); pDC->doneFE = true; } } curDraw++; } }
////////////////////////////////////////////////////////////////////////// /// @brief If there is any BE work then go work on it. /// @param pContext - pointer to SWR context. /// @param workerId - The unique worker ID that is assigned to this thread. /// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread /// has its own curDrawBE counter and this ensures that each worker processes all the /// draws in order. /// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its /// own set and each time it fails to lock a macrotile, because its already locked, /// then it will add that tile to the lockedTiles set. As a worker begins to work /// on future draws the lockedTiles ensure that it doesn't work on tiles that may /// still have work pending in a previous draw. Additionally, the lockedTiles is /// hueristic that can steer a worker back to the same macrotile that it had been /// working on in a previous draw. void WorkOnFifoBE( SWR_CONTEXT *pContext, uint32_t workerId, volatile uint64_t &curDrawBE, std::unordered_set<uint32_t>& lockedTiles) { // Find the first incomplete draw that has pending work. If no such draw is found then // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE. if (FindFirstIncompleteDraw(pContext, curDrawBE) == false) { return; } uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1; // Reset our history for locked tiles. We'll have to re-learn which tiles are locked. lockedTiles.clear(); // Try to work on each draw in order of the available draws in flight. // 1. If we're on curDrawBE, we can work on any macrotile that is available. // 2. If we're trying to work on draws after curDrawBE, we are restricted to // working on those macrotiles that are known to be complete in the prior draw to // maintain order. The locked tiles provides the history to ensures this. for (uint64_t i = curDrawBE; i < GetEnqueuedDraw(pContext); ++i) { DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute) return; // We don't look at compute work. // First wait for FE to be finished with this draw. This keeps threading model simple // but if there are lots of bubbles between draws then serializing FE and BE may // need to be revisited. if (!pDC->doneFE) break; // If this draw is dependent on a previous draw then we need to bail. if (CheckDependency(pContext, pDC, lastRetiredDraw)) { return; } // Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it. std::vector<uint32_t> ¯oTiles = pDC->pTileMgr->getDirtyTiles(); for (uint32_t tileID : macroTiles) { MacroTileQueue &tile = pDC->pTileMgr->getMacroTileQueue(tileID); // can only work on this draw if it's not in use by other threads if (lockedTiles.find(tileID) == lockedTiles.end()) { if (tile.getNumQueued()) { if (tile.tryLock()) { BE_WORK *pWork; RDTSC_START(WorkerFoundWork); uint32_t numWorkItems = tile.getNumQueued(); if (numWorkItems != 0) { pWork = tile.peek(); SWR_ASSERT(pWork); if (pWork->type == DRAW) { InitializeHotTiles(pContext, pDC, tileID, (const TRIANGLE_WORK_DESC*)&pWork->desc); } } while ((pWork = tile.peek()) != nullptr) { pWork->pfnWork(pDC, workerId, tileID, &pWork->desc); tile.dequeue(); } RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId); _ReadWriteBarrier(); pDC->pTileMgr->markTileComplete(tileID); // Optimization: If the draw is complete and we're the last one to have worked on it then // we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete. if ((curDrawBE == i) && pDC->pTileMgr->isWorkComplete()) { // We can increment the current BE and safely move to next draw since we know this draw is complete. curDrawBE++; lastRetiredDraw++; lockedTiles.clear(); break; } } else { // This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again. lockedTiles.insert(tileID); } } } } } }