int main(int argc, char* argv[]) { #ifdef _MSC_VER _CrtSetDbgFlag( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); #endif if (argc < 4) { fprintf (stdout, "usage: dParcelGenerator [GrammarFile] [scannerClasName] [OutputFileName]\n"); fprintf (stdout, "[GrammarFile] name of the file containing a Yacc like Grammar file\n"); fprintf (stdout, "[ScannerClass] Name of the Scanner Class Generated by dLexGenerator\n"); fprintf (stdout, "[OutputFileName] name of the file cpp output file\n"); exit (0); } const char* const inputRulesFileName = argv[1]; const char* const scannerClassName = argv[2]; const char* const outputFileName = argv[3]; if (!CheckDependency (outputFileName, inputRulesFileName)) { FILE* const rules = fopen (inputRulesFileName, "rb"); if (!rules) { fprintf (stdout, "Rule file \"%s\" not found\n", inputRulesFileName); exit (0); } dString buffer; buffer.LoadFile(rules); fclose (rules); dParserCompiler parcel (buffer, outputFileName, scannerClassName); } return 0; }
////////////////////////////////////////////////////////////////////////// /// @brief If there is any compute work then go work on it. /// @param pContext - pointer to SWR context. /// @param workerId - The unique worker ID that is assigned to this thread. /// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread /// has its own curDrawBE counter and this ensures that each worker processes all the /// draws in order. void WorkOnCompute( SWR_CONTEXT *pContext, uint32_t workerId, volatile uint64_t& curDrawBE) { if (FindFirstIncompleteDraw(pContext, curDrawBE) == false) { return; } uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1; DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute == false) return; // check dependencies if (CheckDependency(pContext, pDC, lastRetiredDraw)) { return; } SWR_ASSERT(pDC->pDispatch != nullptr); DispatchQueue& queue = *pDC->pDispatch; // Is there any work remaining? if (queue.getNumQueued() > 0) { bool lastToComplete = false; uint32_t threadGroupId = 0; while (queue.getWork(threadGroupId)) { ProcessComputeBE(pDC, workerId, threadGroupId); lastToComplete = queue.finishedWork(); } _ReadWriteBarrier(); if (lastToComplete) { SWR_ASSERT(queue.isWorkComplete() == true); pDC->doneCompute = true; } } }
////////////////////////////////////////////////////////////////////////// /// @brief If there is any compute work then go work on it. /// @param pContext - pointer to SWR context. /// @param workerId - The unique worker ID that is assigned to this thread. /// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread /// has its own curDrawBE counter and this ensures that each worker processes all the /// draws in order. void WorkOnCompute( SWR_CONTEXT *pContext, uint32_t workerId, uint32_t& curDrawBE) { uint32_t drawEnqueued = 0; if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false) { return; } uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1; for (uint64_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i) { DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute == false) return; // check dependencies if (CheckDependency(pContext, pDC, lastRetiredDraw)) { return; } SWR_ASSERT(pDC->pDispatch != nullptr); DispatchQueue& queue = *pDC->pDispatch; // Is there any work remaining? if (queue.getNumQueued() > 0) { void* pSpillFillBuffer = nullptr; void* pScratchSpace = nullptr; uint32_t threadGroupId = 0; while (queue.getWork(threadGroupId)) { queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace); queue.finishedWork(); } // Ensure all streaming writes are globally visible before moving onto the next draw _mm_mfence(); } } }
////////////////////////////////////////////////////////////////////////// /// @brief If there is any compute work then go work on it. /// @param pContext - pointer to SWR context. /// @param workerId - The unique worker ID that is assigned to this thread. /// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread /// has its own curDrawBE counter and this ensures that each worker processes all the /// draws in order. void WorkOnCompute( SWR_CONTEXT *pContext, uint32_t workerId, uint64_t& curDrawBE) { uint64_t drawEnqueued = 0; if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false) { return; } uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1; for (uint64_t i = curDrawBE; curDrawBE < drawEnqueued; ++i) { DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute == false) return; // check dependencies if (CheckDependency(pContext, pDC, lastRetiredDraw)) { return; } SWR_ASSERT(pDC->pDispatch != nullptr); DispatchQueue& queue = *pDC->pDispatch; // Is there any work remaining? if (queue.getNumQueued() > 0) { void* pSpillFillBuffer = nullptr; uint32_t threadGroupId = 0; while (queue.getWork(threadGroupId)) { ProcessComputeBE(pDC, workerId, threadGroupId, pSpillFillBuffer); queue.finishedWork(); } } } }
////////////////////////////////////////////////////////////////////////// /// @brief If there is any BE work then go work on it. /// @param pContext - pointer to SWR context. /// @param workerId - The unique worker ID that is assigned to this thread. /// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread /// has its own curDrawBE counter and this ensures that each worker processes all the /// draws in order. /// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its /// own set and each time it fails to lock a macrotile, because its already locked, /// then it will add that tile to the lockedTiles set. As a worker begins to work /// on future draws the lockedTiles ensure that it doesn't work on tiles that may /// still have work pending in a previous draw. Additionally, the lockedTiles is /// hueristic that can steer a worker back to the same macrotile that it had been /// working on in a previous draw. /// @returns true if worker thread should shutdown bool WorkOnFifoBE( SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE, TileSet& lockedTiles, uint32_t numaNode, uint32_t numaMask) { bool bShutdown = false; // Find the first incomplete draw that has pending work. If no such draw is found then // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE. uint32_t drawEnqueued = 0; if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false) { return false; } uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1; // Reset our history for locked tiles. We'll have to re-learn which tiles are locked. lockedTiles.clear(); // Try to work on each draw in order of the available draws in flight. // 1. If we're on curDrawBE, we can work on any macrotile that is available. // 2. If we're trying to work on draws after curDrawBE, we are restricted to // working on those macrotiles that are known to be complete in the prior draw to // maintain order. The locked tiles provides the history to ensures this. for (uint32_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i) { DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute) return false; // We don't look at compute work. // First wait for FE to be finished with this draw. This keeps threading model simple // but if there are lots of bubbles between draws then serializing FE and BE may // need to be revisited. if (!pDC->doneFE) return false; // If this draw is dependent on a previous draw then we need to bail. if (CheckDependency(pContext, pDC, lastRetiredDraw)) { return false; } // Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it. auto ¯oTiles = pDC->pTileMgr->getDirtyTiles(); for (auto tile : macroTiles) { uint32_t tileID = tile->mId; // Only work on tiles for this numa node uint32_t x, y; pDC->pTileMgr->getTileIndices(tileID, x, y); if (((x ^ y) & numaMask) != numaNode) { continue; } if (!tile->getNumQueued()) { continue; } // can only work on this draw if it's not in use by other threads if (lockedTiles.find(tileID) != lockedTiles.end()) { continue; } if (tile->tryLock()) { BE_WORK *pWork; RDTSC_BEGIN(WorkerFoundWork, pDC->drawId); uint32_t numWorkItems = tile->getNumQueued(); SWR_ASSERT(numWorkItems); pWork = tile->peek(); SWR_ASSERT(pWork); if (pWork->type == DRAW) { pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, workerId, tileID); } else if (pWork->type == SHUTDOWN) { bShutdown = true; } while ((pWork = tile->peek()) != nullptr) { pWork->pfnWork(pDC, workerId, tileID, &pWork->desc); tile->dequeue(); } RDTSC_END(WorkerFoundWork, numWorkItems); _ReadWriteBarrier(); pDC->pTileMgr->markTileComplete(tileID); // Optimization: If the draw is complete and we're the last one to have worked on it then // we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete. if ((curDrawBE == i) && (bShutdown || pDC->pTileMgr->isWorkComplete())) { // We can increment the current BE and safely move to next draw since we know this draw is complete. curDrawBE++; CompleteDrawContextInl(pContext, workerId, pDC); lastRetiredDraw++; lockedTiles.clear(); break; } if (bShutdown) { break; } } else { // This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again. lockedTiles.insert(tileID); } } } return bShutdown; }
INT8 StartMultiThreadBuild ( BUILD_ITEM **BuildList, UINT32 ThreadNumber, INT8 *BuildDir ) /*++ Routine Description: Start multi-thread build for a specified build list Arguments: BuildList - build list for multi-thread build ThreadNumber - thread number for multi-thread build BuildDir - build dir Returns: 0 - Successfully finished the multi-thread build other value - Build failure --*/ { UINT32 Index; UINT32 Count; BUILD_ITEM *PreviousBuildItem; BUILD_ITEM *CurrentBuildItem; BUILD_ITEM *NextBuildItem; HANDLE *ThreadHandle; INT8 Cmd[MAX_PATH]; mError = 0; mDone = 0; mThreadNumber = ThreadNumber; mBuildDir = BuildDir; mPendingList = *BuildList; *BuildList = NULL; mWaitingList = NULL; mBuildingList = NULL; mDoneList = NULL; // // Do nothing when mPendingList is empty // if (mPendingList == NULL) { return 0; } // // Get build item count of mPendingList // Count = 0; CurrentBuildItem = mPendingList; while (CurrentBuildItem != NULL) { Count++; CurrentBuildItem = CurrentBuildItem->Next; } // // The semaphore is also used to wake up child threads for exit, // so need to make sure "maximum count" >= "thread number". // if (Count < ThreadNumber) { Count = ThreadNumber; } // // Init mSemaphoreHandle // mSemaphoreHandle = CreateSemaphore ( NULL, // default security attributes 0, // initial count Count, // maximum count NULL // unnamed semaphore ); if (mSemaphoreHandle == NULL) { Error (NULL, 0, 0, NULL, "failed to create semaphore"); RestoreBuildList (BuildList); return 1; } // // Init mEventHandle // mEventHandle = CreateEvent( NULL, // default security attributes FALSE, // auto-reset event TRUE, // initial state is signaled NULL // object not named ); if (mEventHandle == NULL) { Error (NULL, 0, 0, NULL, "failed to create event"); CloseHandle (mSemaphoreHandle); RestoreBuildList (BuildList); return 1; } // // Init mCriticalSection // InitializeCriticalSection (&mCriticalSection); // // Create build item log dir // sprintf (mLogDir, "%s\\Log", mBuildDir); _mkdir (mLogDir); // // Create child threads for muti-thread build // ThreadHandle = malloc (ThreadNumber * sizeof (HANDLE)); if (ThreadHandle == NULL) { Error (NULL, 0, 0, NULL, "failed to allocate memory"); CloseHandle (mSemaphoreHandle); CloseHandle (mEventHandle); RestoreBuildList (BuildList); return 1; } for (Index = 0; Index < ThreadNumber; Index++) { ThreadHandle[Index] = CreateThread ( NULL, // default security attributes 0, // use default stack size ThreadProc, // thread function (LPVOID)Index, // argument to thread function: use Index as thread id 0, // use default creation flags NULL // thread identifier not needed ); if (ThreadHandle[Index] == NULL) { Error (NULL, 0, 0, NULL, "failed to create Thread_%d", Index); mError = 1; ThreadNumber = Index; // // Make sure to wake up every child thread for exit // ReleaseSemaphore (mSemaphoreHandle, ThreadNumber, NULL); break; } } // // Loop until error occurred or no more build items pending for build // for (;;) { WaitForSingleObject (mEventHandle, INFINITE); if (mError) { break; } Count = 0; EnterCriticalSection (&mCriticalSection); PreviousBuildItem = NULL; CurrentBuildItem = mPendingList; while (CurrentBuildItem != NULL) { NextBuildItem = CurrentBuildItem->Next; if (CheckDependency (CurrentBuildItem->DependencyList)) { // // Move the current build item from mPendingList // if (PreviousBuildItem != NULL) { PreviousBuildItem->Next = NextBuildItem; } else { mPendingList = NextBuildItem; } // // Add the current build item to the head of mWaitingList // CurrentBuildItem->Next = mWaitingList; mWaitingList = CurrentBuildItem; Count++; } else { PreviousBuildItem = CurrentBuildItem; } CurrentBuildItem = NextBuildItem; } LeaveCriticalSection (&mCriticalSection); ReleaseSemaphore (mSemaphoreHandle, Count, NULL); if (mPendingList == NULL) { break; } } // // Wait until all threads have terminated // WaitForMultipleObjects (ThreadNumber, ThreadHandle, TRUE, INFINITE); if (mError && (mBuildingList != NULL)) { // // Dump build failure log of the first build item which doesn't finish the build // printf ("\tnmake -nologo -f %s all\n", mBuildingList->Makefile); sprintf (Cmd, "type %s\\%s_%s_%d.txt 2>NUL", mLogDir, mBuildingList->BaseName, mBuildingList->Processor, mBuildingList->Index); _flushall (); if (system (Cmd)) { Error (NULL, 0, 0, NULL, "failed to run \"%s\"", Cmd); } } DeleteCriticalSection (&mCriticalSection); for (Index = 0; Index < ThreadNumber; Index++) { CloseHandle (ThreadHandle[Index]); } free (ThreadHandle); CloseHandle (mSemaphoreHandle); CloseHandle (mEventHandle); RestoreBuildList (BuildList); return mError; }
////////////////////////////////////////////////////////////////////////// /// @brief If there is any BE work then go work on it. /// @param pContext - pointer to SWR context. /// @param workerId - The unique worker ID that is assigned to this thread. /// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread /// has its own curDrawBE counter and this ensures that each worker processes all the /// draws in order. /// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its /// own set and each time it fails to lock a macrotile, because its already locked, /// then it will add that tile to the lockedTiles set. As a worker begins to work /// on future draws the lockedTiles ensure that it doesn't work on tiles that may /// still have work pending in a previous draw. Additionally, the lockedTiles is /// hueristic that can steer a worker back to the same macrotile that it had been /// working on in a previous draw. void WorkOnFifoBE( SWR_CONTEXT *pContext, uint32_t workerId, volatile uint64_t &curDrawBE, std::unordered_set<uint32_t>& lockedTiles) { // Find the first incomplete draw that has pending work. If no such draw is found then // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE. if (FindFirstIncompleteDraw(pContext, curDrawBE) == false) { return; } uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1; // Reset our history for locked tiles. We'll have to re-learn which tiles are locked. lockedTiles.clear(); // Try to work on each draw in order of the available draws in flight. // 1. If we're on curDrawBE, we can work on any macrotile that is available. // 2. If we're trying to work on draws after curDrawBE, we are restricted to // working on those macrotiles that are known to be complete in the prior draw to // maintain order. The locked tiles provides the history to ensures this. for (uint64_t i = curDrawBE; i < GetEnqueuedDraw(pContext); ++i) { DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT]; if (pDC->isCompute) return; // We don't look at compute work. // First wait for FE to be finished with this draw. This keeps threading model simple // but if there are lots of bubbles between draws then serializing FE and BE may // need to be revisited. if (!pDC->doneFE) break; // If this draw is dependent on a previous draw then we need to bail. if (CheckDependency(pContext, pDC, lastRetiredDraw)) { return; } // Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it. std::vector<uint32_t> ¯oTiles = pDC->pTileMgr->getDirtyTiles(); for (uint32_t tileID : macroTiles) { MacroTileQueue &tile = pDC->pTileMgr->getMacroTileQueue(tileID); // can only work on this draw if it's not in use by other threads if (lockedTiles.find(tileID) == lockedTiles.end()) { if (tile.getNumQueued()) { if (tile.tryLock()) { BE_WORK *pWork; RDTSC_START(WorkerFoundWork); uint32_t numWorkItems = tile.getNumQueued(); if (numWorkItems != 0) { pWork = tile.peek(); SWR_ASSERT(pWork); if (pWork->type == DRAW) { InitializeHotTiles(pContext, pDC, tileID, (const TRIANGLE_WORK_DESC*)&pWork->desc); } } while ((pWork = tile.peek()) != nullptr) { pWork->pfnWork(pDC, workerId, tileID, &pWork->desc); tile.dequeue(); } RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId); _ReadWriteBarrier(); pDC->pTileMgr->markTileComplete(tileID); // Optimization: If the draw is complete and we're the last one to have worked on it then // we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete. if ((curDrawBE == i) && pDC->pTileMgr->isWorkComplete()) { // We can increment the current BE and safely move to next draw since we know this draw is complete. curDrawBE++; lastRetiredDraw++; lockedTiles.clear(); break; } } else { // This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again. lockedTiles.insert(tileID); } } } } } }