////////////////////////////////////////////////////////////////////////// /// @brief InitializeHotTiles /// for draw calls, we initialize the active hot tiles and perform deferred /// load on them if tile is in invalid state. we do this in the outer thread /// loop instead of inside the draw routine itself mainly for performance, /// to avoid unnecessary setup every triangle /// @todo support deferred clear /// @param pCreateInfo - pointer to creation info. void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID) { const API_STATE& state = GetApiState(pDC); HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; uint32_t x, y; MacroTileMgr::getTileIndices(macroID, x, y); x *= KNOB_MACROTILE_X_DIM; y *= KNOB_MACROTILE_Y_DIM; uint32_t numSamples = GetNumSamples(state.rastState.sampleCount); // check RT if enabled unsigned long rtSlot = 0; uint32_t colorHottileEnableMask = state.colorHottileEnable; while (_BitScanForward(&rtSlot, colorHottileEnableMask)) { HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearColorHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } colorHottileEnableMask &= ~(1 << rtSlot); } // check depth if enabled if (state.depthHottileEnable) { HOTTILE* pHotTile = GetHotTile( pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearDepthHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } } // check stencil if enabled if (state.stencilHottileEnable) { HOTTILE* pHotTile = GetHotTile( pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearStencilHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } } }
HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerPrivateData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples, uint32_t renderTargetArrayIndex) { uint32_t x, y; MacroTileMgr::getTileIndices(macroID, x, y); SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X); SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y); HotTileSet& tile = mHotTiles[x][y]; HOTTILE& hotTile = tile.Attachment[attachment]; if (hotTile.pBuffer == NULL) { if (create) { uint32_t size = numSamples * mHotTileSize[attachment]; uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask); hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE); hotTile.state = HOTTILE_INVALID; hotTile.numSamples = numSamples; hotTile.renderTargetArrayIndex = renderTargetArrayIndex; } else { return NULL; } } else { // free the old tile and create a new one with enough space to hold all samples if (numSamples > hotTile.numSamples) { // tile should be either uninitialized or resolved if we're deleting and switching to a // new sample count SWR_ASSERT((hotTile.state == HOTTILE_INVALID) || (hotTile.state == HOTTILE_RESOLVED) || (hotTile.state == HOTTILE_CLEAR)); FreeHotTileMem(hotTile.pBuffer); uint32_t size = numSamples * mHotTileSize[attachment]; uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask); hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE); hotTile.state = HOTTILE_INVALID; hotTile.numSamples = numSamples; } // if requested render target array index isn't currently loaded, need to store out the // current hottile and load the requested array slice if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex) { SWR_FORMAT format; switch (attachment) { case SWR_ATTACHMENT_COLOR0: case SWR_ATTACHMENT_COLOR1: case SWR_ATTACHMENT_COLOR2: case SWR_ATTACHMENT_COLOR3: case SWR_ATTACHMENT_COLOR4: case SWR_ATTACHMENT_COLOR5: case SWR_ATTACHMENT_COLOR6: case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break; case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break; case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break; default: SWR_INVALID("Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break; } if (hotTile.state == HOTTILE_CLEAR) { if (attachment == SWR_ATTACHMENT_STENCIL) ClearStencilHotTile(&hotTile); else if (attachment == SWR_ATTACHMENT_DEPTH) ClearDepthHotTile(&hotTile); else ClearColorHotTile(&hotTile); hotTile.state = HOTTILE_DIRTY; } if (hotTile.state == HOTTILE_DIRTY) { pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment, x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer); } pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment, x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer); hotTile.renderTargetArrayIndex = renderTargetArrayIndex; hotTile.state = HOTTILE_DIRTY; } } return &tile.Attachment[attachment]; }
// for draw calls, we initialize the active hot tiles and perform deferred // load on them if tile is in invalid state. we do this in the outer thread loop instead of inside // the draw routine itself mainly for performance, to avoid unnecessary setup // every triangle // @todo support deferred clear INLINE void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, const TRIANGLE_WORK_DESC* pWork) { const API_STATE& state = GetApiState(pDC); HotTileMgr *pHotTileMgr = pContext->pHotTileMgr; const SWR_PS_STATE& psState = state.psState; uint32_t numRTs = psState.maxRTSlotUsed + 1; uint32_t x, y; MacroTileMgr::getTileIndices(macroID, x, y); x *= KNOB_MACROTILE_X_DIM; y *= KNOB_MACROTILE_Y_DIM; uint32_t numSamples = GetNumSamples(state.rastState.sampleCount); // check RT if enabled if (state.psState.pfnPixelShader != nullptr) { for (uint32_t rt = 0; rt < numRTs; ++rt) { HOTTILE* pHotTile = pHotTileMgr->GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rt), true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_START(BELoadTiles); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rt), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_STOP(BELoadTiles, 0, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { RDTSC_START(BELoadTiles); // Clear the tile. ClearColorHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; RDTSC_STOP(BELoadTiles, 0, 0); } } } // check depth if enabled if (state.depthStencilState.depthTestEnable || state.depthStencilState.depthWriteEnable) { HOTTILE* pHotTile = pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_START(BELoadTiles); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_STOP(BELoadTiles, 0, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { RDTSC_START(BELoadTiles); // Clear the tile. ClearDepthHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; RDTSC_STOP(BELoadTiles, 0, 0); } } // check stencil if enabled if (state.depthStencilState.stencilTestEnable || state.depthStencilState.stencilWriteEnable) { HOTTILE* pHotTile = pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_START(BELoadTiles); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_STOP(BELoadTiles, 0, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { RDTSC_START(BELoadTiles); // Clear the tile. ClearStencilHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; RDTSC_STOP(BELoadTiles, 0, 0); } } }