//----------------------------------------------------------------------------- /// Return GPU-time in text format, to be parsed by the Client and displayed as its own timeline. /// \return A line-delimited, ASCII-encoded, version of the GPU Trace data. //----------------------------------------------------------------------------- std::string VktTraceAnalyzerLayer::GetGPUTraceTXT() { gtASCIIString appendString = ""; VktFrameProfilerLayer* pProfilerLayer = VktFrameProfilerLayer::Instance(); WaitAndFetchResults(pProfilerLayer); // During QueueSubmit we stored ProfilerResults in mEntriesWithProfilingResults. Form a response using it here. ProfilerResultsMap& profiledCmdBufResultsMap = pProfilerLayer->GetCmdBufProfilerResultsMap(); // Gather all profiler results if (!profiledCmdBufResultsMap.empty()) { std::vector<ProfilerResult*> flatResults; for (ProfilerResultsMap::iterator profIt = profiledCmdBufResultsMap.begin(); profIt != profiledCmdBufResultsMap.end(); ++profIt) { QueueWrapperToProfilingResultsMap& resultsPerThread = profIt->second; for (QueueWrapperToProfilingResultsMap::iterator queuesIt = resultsPerThread.begin(); queuesIt != resultsPerThread.end(); ++queuesIt) { // This structure holds all of the profiler results that were collected at QueueSubmit. The form is LinkId->ProfilerResult. const SampleIdToProfilerResultMap* pResults = queuesIt->second; for (SampleIdToProfilerResultMap::const_iterator sampleIdIt = pResults->begin(); sampleIdIt != pResults->end(); ++sampleIdIt) { ProfilerResult* pResult = sampleIdIt->second; pResult->measurementInfo.idInfo.pWrappedQueue = queuesIt->first; flatResults.push_back(pResult); } } } const UINT numResults = (UINT)flatResults.size(); // We'll need to insert the GPU Trace section header before the response data, even if there aren't any results. appendString += "//==GPU Trace=="; appendString += "\n"; appendString += "//API="; appendString += GetAPIString(); appendString += "\n"; appendString += "//CommandBufEventCount="; appendString += IntToString((INT)numResults); appendString += "\n"; #ifdef WIN32 sort(flatResults.begin(), flatResults.end(), SortByStartTime); for (UINT i = 0; i < numResults; i++) { ProfilerResultToStr(flatResults[i], appendString); } #else ProfilerResult* pFlatResults = new ProfilerResult[numResults]; for (UINT i = 0; i < numResults; i++) { pFlatResults[i] = *(flatResults[i]); } qsort(pFlatResults, numResults, sizeof(ProfilerResult), (compfn)SortByStartTime); for (UINT i = 0; i < numResults; i++) { ProfilerResultToStr(&pFlatResults[i], appendString); } delete[] pFlatResults; pFlatResults = nullptr; #endif } else { appendString += "NODATA"; } return appendString.asCharArray(); }
//----------------------------------------------------------------------------- /// Submit command buffers and gather results. /// \param queue The queue issued work to. /// \param submitCount The number of submits. /// \param pSubmits The submit info structures. /// \param fence The fence wrapping this submit. //----------------------------------------------------------------------------- VkResult VktWrappedQueue::QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) { m_executionID++; VkResult result = VK_INCOMPLETE; VktTraceAnalyzerLayer* pTraceAnalyzer = VktTraceAnalyzerLayer::Instance(); VktFrameProfilerLayer* pFrameProfiler = VktFrameProfilerLayer::Instance(); // Use this calibration timestamp structure to convert GPU events to the CPU timeline. CalibrationTimestampPair calibrationTimestamps = {}; calibrationTimestamps.mQueueCanBeTimestamped = true; VkFence fenceToWaitOn = fence; bool usingInternalFence = false; std::vector<VktWrappedCmdBuf*> wrappedCmdBufs; GatherWrappedCommandBufs(submitCount, pSubmits, wrappedCmdBufs); for (UINT i = 0; i < wrappedCmdBufs.size(); i++) { wrappedCmdBufs[i]->SetProfilerExecutionId(m_executionID); wrappedCmdBufs[i]->IncrementSubmitCount(); } // Surround the execution of CommandBuffers with timestamps so we can determine when the GPU work occurred in the CPU timeline. if (pTraceAnalyzer->ShouldCollectTrace() && pFrameProfiler->ShouldCollectGPUTime()) { // Collect calibration timestamps in case we need to align GPU events against the CPU timeline. if (calibrationTimestamps.mQueueCanBeTimestamped) { pFrameProfiler->CollectCalibrationTimestamps(this, &calibrationTimestamps); } else { Log(logTRACE, "Did not collect calibration timestamps for Queue '0x%p'\n", this); } // Inject our own fence if the app did not supply one if (fenceToWaitOn == VK_NULL_HANDLE) { // Create internal fence VkFenceCreateInfo fenceCreateInfo = {}; VkResult fenceResult = VK_INCOMPLETE; fenceResult = device_dispatch_table(queue)->CreateFence(m_createInfo.device, &fenceCreateInfo, nullptr, &fenceToWaitOn); VKT_ASSERT(fenceResult == VK_SUCCESS); usingInternalFence = true; } } // Invoke the real call to execute on the GPU result = QueueSubmit_ICD(queue, submitCount, pSubmits, fenceToWaitOn); if (pTraceAnalyzer->ShouldCollectTrace() && pFrameProfiler->ShouldCollectGPUTime()) { // Collect the CPU and GPU frequency to convert timestamps. QueryPerformanceFrequency(&calibrationTimestamps.cpuFrequency); #if GATHER_PROFILER_RESULTS_WITH_WORKERS SpawnWorker(&calibrationTimestamps, this, fenceToWaitOn, usingInternalFence, wrappedCmdBufs); #else VkResult waitResult = VK_TIMEOUT; #if GPU_FENCES_FOR_PROFILER_WAIT do { waitResult = device_dispatch_table(m_createInfo.device)->WaitForFences(m_createInfo.device, 1, &fenceToWaitOn, VK_TRUE, GPU_FENCE_TIMEOUT_TIME); } while (waitResult == VK_TIMEOUT); #else waitResult = device_dispatch_table(queue)->QueueWaitIdle(queue); #endif if (calibrationTimestamps.mQueueCanBeTimestamped) { // Put all results into thread ID 0 bucket const UINT32 threadID = 0; std::vector<ProfilerResult> results; for (UINT i = 0; i < wrappedCmdBufs.size(); i++) { ProfilerResultCode getResultsResult = PROFILER_FAIL; getResultsResult = wrappedCmdBufs[i]->GetCmdBufResultsST(results); VKT_ASSERT(getResultsResult != PROFILER_FAIL); } pFrameProfiler->VerifyAlignAndStoreResults(this, results, &calibrationTimestamps, threadID, VktTraceAnalyzerLayer::Instance()->GetFrameStartTime()); // Free the fence we created earlier if (usingInternalFence) { device_dispatch_table(m_createInfo.device)->DestroyFence(m_createInfo.device, fenceToWaitOn, nullptr); } } else { Log(logTRACE, "Didn't collect calibration timestamps for Queue '0x%p'.\n", this); } #endif } #if GATHER_PROFILER_RESULTS_WITH_WORKERS == 0 for (UINT i = 0; i < wrappedCmdBufs.size(); i++) { wrappedCmdBufs[i]->DestroyDynamicProfilers(); } #endif return result; }
//----------------------------------------------------------------------------- /// Return GPU-time in text format, to be parsed by the Client and displayed as its own timeline. /// \return A line-delimited, ASCII-encoded, version of the GPU Trace data. //----------------------------------------------------------------------------- std::string VktTraceAnalyzerLayer::GetGPUTraceTXT() { gtASCIIString appendString = ""; VktFrameProfilerLayer* pProfilerLayer = VktFrameProfilerLayer::Instance(); WaitAndFetchResults(pProfilerLayer); // Query the CPU clock frequency so we can accurately convert to wall clock time. GPS_TIMESTAMP cpuClockFrequency; QueryPerformanceFrequency(&cpuClockFrequency); // During QueueSubmit we stored ProfilerResults in mEntriesWithProfilingResults. Form a response using it here. ProfilerResultsMap& profiledCmdBufResultsMap = pProfilerLayer->GetCmdBufProfilerResultsMap(); // During QueueSubmit we stored ProfilerResults in mEntriesWithProfilingResults. Form a response using it here. if (!profiledCmdBufResultsMap.empty()) { // Keep a count of the number of lines that we'll write in the response string. UINT numResponseLines = 0; gtASCIIString profiledCommandsLinesStr; ProfilerResultsMap::iterator profIt; for (profIt = profiledCmdBufResultsMap.begin(); profIt != profiledCmdBufResultsMap.end(); ++profIt) { QueueWrapperToProfilingResultsMap::iterator queuesWithProfilingResults; QueueWrapperToProfilingResultsMap& resultsPerThread = profIt->second; for (queuesWithProfilingResults = resultsPerThread.begin(); queuesWithProfilingResults != resultsPerThread.end(); ++queuesWithProfilingResults) { VktWrappedQueue* pWrappedQueue = queuesWithProfilingResults->first; const double timestampFrequency = pWrappedQueue->GetTimestampFrequency(); // This structure holds all of the profiler results that were collected at QueueSubmit. The form is LinkId->ProfilerResult. const SampleIdToProfilerResultMap* pQueueResults = queuesWithProfilingResults->second; SampleIdToProfilerResultMap::const_iterator queueResultsIterInner; for (queueResultsIterInner = pQueueResults->begin(); queueResultsIterInner != pQueueResults->end(); ++queueResultsIterInner) { UINT64 sampleId = queueResultsIterInner->first; const ProfilerResult* pResult = queueResultsIterInner->second; // Convert timestamps to milliseconds by using the clock frequency. #ifndef CODEXL_GRAPHICS double preStartTimestamp = (pResult->timestampResult.rawClocks.preStart / timestampFrequency) * 1000.0; #endif double startTimestamp = (pResult->timestampResult.rawClocks.start / timestampFrequency) * 1000.0; double endTimestamp = (pResult->timestampResult.rawClocks.end / timestampFrequency) * 1000.0; FuncId funcId = (FuncId)pResult->measurementInfo.idInfo.funcId; gtASCIIString funcName = GetFunctionNameFromId(funcId); gtASCIIString retVal = "void"; gtASCIIString params = ""; if (pResult->measurementInfo.idInfo.funcId != FuncId_WholeCmdBuf) { VktAPIEntry* pResultEntry = pProfilerLayer->FindInvocationBySampleId(sampleId); if (pResultEntry != nullptr) { // Convert the functionID and return values from integers into full strings that we can use in the response. funcName = GetFunctionNameFromId(pResultEntry->mFunctionId); retVal = (pResultEntry->m_returnValue != -1) ? VktUtil::WriteResultCodeEnumAsString(pResultEntry->m_returnValue) : "void"; params = pResultEntry->mParameters.asCharArray(); } } UINT queueIndex = pWrappedQueue->GetQueueIndex(); #ifndef CODEXL_GRAPHICS std::string queueInfo = ""; #else std::string queueInfo = "0"; #endif // Vulkan Response line format: // CommandQueuePtr CommandBufferType CommandBufferPtr APIType FuncId Vulkan_FuncName(Args) = ReturnValue StartTime EndTime SampleId profiledCommandsLinesStr += "0x"; profiledCommandsLinesStr += IntToString(queueIndex); profiledCommandsLinesStr += " "; profiledCommandsLinesStr += queueInfo.c_str(); profiledCommandsLinesStr += " "; profiledCommandsLinesStr += "0x"; profiledCommandsLinesStr += UINT64ToHexString((UINT64)pResult->measurementInfo.idInfo.pWrappedCmdBuf->AppHandle()); profiledCommandsLinesStr += " "; profiledCommandsLinesStr += IntToString(VktTraceAnalyzerLayer::Instance()->GetAPIGroupFromAPI(funcId)); profiledCommandsLinesStr += " "; profiledCommandsLinesStr += IntToString(funcId); profiledCommandsLinesStr += " "; profiledCommandsLinesStr += "Vulkan_"; profiledCommandsLinesStr += funcName; profiledCommandsLinesStr += "("; profiledCommandsLinesStr += params; profiledCommandsLinesStr += ") = "; profiledCommandsLinesStr += retVal; #ifndef CODEXL_GRAPHICS profiledCommandsLinesStr += " "; profiledCommandsLinesStr += DoubleToString(preStartTimestamp); #endif profiledCommandsLinesStr += " "; profiledCommandsLinesStr += DoubleToString(startTimestamp); profiledCommandsLinesStr += " "; profiledCommandsLinesStr += DoubleToString(endTimestamp); profiledCommandsLinesStr += " "; profiledCommandsLinesStr += UINT64ToString(pResult->measurementInfo.idInfo.sampleId); profiledCommandsLinesStr += "\n"; // We just added another line to our response buffer. Increment the count that the client will read. numResponseLines++; } } } // We'll need to insert the GPU Trace section header before the response data, even if there aren't any results. appendString += "//==GPU Trace=="; appendString += "\n"; appendString += "//CommandBufEventCount="; appendString += DWORDToString(numResponseLines); appendString += "\n"; // Include the response lines after the section header. if (numResponseLines > 0) { appendString += profiledCommandsLinesStr; } } else { appendString += "NODATA"; } return appendString.asCharArray(); }