//-----------------------------------------------------------------------------
/// Return GPU-time in text format, to be parsed by the Client and displayed as its own timeline.
/// \return A line-delimited, ASCII-encoded, version of the GPU Trace data.
//-----------------------------------------------------------------------------
std::string VktTraceAnalyzerLayer::GetGPUTraceTXT()
{
    gtASCIIString appendString = "";

    VktFrameProfilerLayer* pProfilerLayer = VktFrameProfilerLayer::Instance();

    WaitAndFetchResults(pProfilerLayer);

    // During QueueSubmit we stored ProfilerResults in mEntriesWithProfilingResults. Form a response using it here.
    ProfilerResultsMap& profiledCmdBufResultsMap = pProfilerLayer->GetCmdBufProfilerResultsMap();

    // Gather all profiler results
    if (!profiledCmdBufResultsMap.empty())
    {
        std::vector<ProfilerResult*> flatResults;

        for (ProfilerResultsMap::iterator profIt = profiledCmdBufResultsMap.begin(); profIt != profiledCmdBufResultsMap.end(); ++profIt)
        {
            QueueWrapperToProfilingResultsMap& resultsPerThread = profIt->second;

            for (QueueWrapperToProfilingResultsMap::iterator queuesIt = resultsPerThread.begin();
                 queuesIt != resultsPerThread.end();
                 ++queuesIt)
            {
                // This structure holds all of the profiler results that were collected at QueueSubmit. The form is LinkId->ProfilerResult.
                const SampleIdToProfilerResultMap* pResults = queuesIt->second;

                for (SampleIdToProfilerResultMap::const_iterator sampleIdIt = pResults->begin();
                     sampleIdIt != pResults->end();
                     ++sampleIdIt)
                {
                    ProfilerResult* pResult = sampleIdIt->second;
                    pResult->measurementInfo.idInfo.pWrappedQueue = queuesIt->first;

                    flatResults.push_back(pResult);
                }
            }
        }

        const UINT numResults = (UINT)flatResults.size();

        // We'll need to insert the GPU Trace section header before the response data, even if there aren't any results.
        appendString += "//==GPU Trace==";
        appendString += "\n";

        appendString += "//API=";
        appendString += GetAPIString();
        appendString += "\n";

        appendString += "//CommandBufEventCount=";
        appendString += IntToString((INT)numResults);
        appendString += "\n";

#ifdef WIN32
        sort(flatResults.begin(), flatResults.end(), SortByStartTime);

        for (UINT i = 0; i < numResults; i++)
        {
            ProfilerResultToStr(flatResults[i], appendString);
        }
#else
        ProfilerResult* pFlatResults = new ProfilerResult[numResults];

        for (UINT i = 0; i < numResults; i++)
        {
            pFlatResults[i] = *(flatResults[i]);
        }

        qsort(pFlatResults, numResults, sizeof(ProfilerResult), (compfn)SortByStartTime);

        for (UINT i = 0; i < numResults; i++)
        {
            ProfilerResultToStr(&pFlatResults[i], appendString);
        }

        delete[] pFlatResults;
        pFlatResults = nullptr;
#endif
    }
    else
    {
        appendString += "NODATA";
    }

    return appendString.asCharArray();
}
//-----------------------------------------------------------------------------
/// Submit command buffers and gather results.
/// \param queue The queue issued work to.
/// \param submitCount The number of submits.
/// \param pSubmits The submit info structures.
/// \param fence The fence wrapping this submit.
//-----------------------------------------------------------------------------
VkResult VktWrappedQueue::QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence)
{
    m_executionID++;

    VkResult result = VK_INCOMPLETE;

    VktTraceAnalyzerLayer* pTraceAnalyzer = VktTraceAnalyzerLayer::Instance();
    VktFrameProfilerLayer* pFrameProfiler = VktFrameProfilerLayer::Instance();

    // Use this calibration timestamp structure to convert GPU events to the CPU timeline.
    CalibrationTimestampPair calibrationTimestamps = {};
    calibrationTimestamps.mQueueCanBeTimestamped = true;

    VkFence fenceToWaitOn = fence;
    bool usingInternalFence = false;

    std::vector<VktWrappedCmdBuf*> wrappedCmdBufs;
    GatherWrappedCommandBufs(submitCount, pSubmits, wrappedCmdBufs);

    for (UINT i = 0; i < wrappedCmdBufs.size(); i++)
    {
        wrappedCmdBufs[i]->SetProfilerExecutionId(m_executionID);
        wrappedCmdBufs[i]->IncrementSubmitCount();
    }

    // Surround the execution of CommandBuffers with timestamps so we can determine when the GPU work occurred in the CPU timeline.
    if (pTraceAnalyzer->ShouldCollectTrace() && pFrameProfiler->ShouldCollectGPUTime())
    {
        // Collect calibration timestamps in case we need to align GPU events against the CPU timeline.
        if (calibrationTimestamps.mQueueCanBeTimestamped)
        {
            pFrameProfiler->CollectCalibrationTimestamps(this, &calibrationTimestamps);
        }
        else
        {
            Log(logTRACE, "Did not collect calibration timestamps for Queue '0x%p'\n", this);
        }

        // Inject our own fence if the app did not supply one
        if (fenceToWaitOn == VK_NULL_HANDLE)
        {
            // Create internal fence
            VkFenceCreateInfo fenceCreateInfo = {};
            VkResult fenceResult = VK_INCOMPLETE;
            fenceResult = device_dispatch_table(queue)->CreateFence(m_createInfo.device, &fenceCreateInfo, nullptr, &fenceToWaitOn);
            VKT_ASSERT(fenceResult == VK_SUCCESS);

            usingInternalFence = true;
        }
    }

    // Invoke the real call to execute on the GPU
    result = QueueSubmit_ICD(queue, submitCount, pSubmits, fenceToWaitOn);

    if (pTraceAnalyzer->ShouldCollectTrace() && pFrameProfiler->ShouldCollectGPUTime())
    {
        // Collect the CPU and GPU frequency to convert timestamps.
        QueryPerformanceFrequency(&calibrationTimestamps.cpuFrequency);

#if GATHER_PROFILER_RESULTS_WITH_WORKERS
        SpawnWorker(&calibrationTimestamps, this, fenceToWaitOn, usingInternalFence, wrappedCmdBufs);
#else
        VkResult waitResult = VK_TIMEOUT;

#if GPU_FENCES_FOR_PROFILER_WAIT
        do
        {
            waitResult = device_dispatch_table(m_createInfo.device)->WaitForFences(m_createInfo.device, 1, &fenceToWaitOn, VK_TRUE, GPU_FENCE_TIMEOUT_TIME);
        } while (waitResult == VK_TIMEOUT);
#else
        waitResult = device_dispatch_table(queue)->QueueWaitIdle(queue);
#endif

        if (calibrationTimestamps.mQueueCanBeTimestamped)
        {
            // Put all results into thread ID 0 bucket
            const UINT32 threadID = 0;
            std::vector<ProfilerResult> results;

            for (UINT i = 0; i < wrappedCmdBufs.size(); i++)
            {
                ProfilerResultCode getResultsResult = PROFILER_FAIL;
                getResultsResult = wrappedCmdBufs[i]->GetCmdBufResultsST(results);
                VKT_ASSERT(getResultsResult != PROFILER_FAIL);
            }

            pFrameProfiler->VerifyAlignAndStoreResults(this, results, &calibrationTimestamps, threadID, VktTraceAnalyzerLayer::Instance()->GetFrameStartTime());

            // Free the fence we created earlier
            if (usingInternalFence)
            {
                device_dispatch_table(m_createInfo.device)->DestroyFence(m_createInfo.device, fenceToWaitOn, nullptr);
            }
        }
        else
        {
            Log(logTRACE, "Didn't collect calibration timestamps for Queue '0x%p'.\n", this);
        }
#endif
    }

#if GATHER_PROFILER_RESULTS_WITH_WORKERS == 0
    for (UINT i = 0; i < wrappedCmdBufs.size(); i++)
    {
        wrappedCmdBufs[i]->DestroyDynamicProfilers();
    }
#endif

    return result;
}
//-----------------------------------------------------------------------------
/// Return GPU-time in text format, to be parsed by the Client and displayed as its own timeline.
/// \return A line-delimited, ASCII-encoded, version of the GPU Trace data.
//-----------------------------------------------------------------------------
std::string VktTraceAnalyzerLayer::GetGPUTraceTXT()
{
    gtASCIIString appendString = "";

    VktFrameProfilerLayer* pProfilerLayer = VktFrameProfilerLayer::Instance();

    WaitAndFetchResults(pProfilerLayer);

    // Query the CPU clock frequency so we can accurately convert to wall clock time.
    GPS_TIMESTAMP cpuClockFrequency;
    QueryPerformanceFrequency(&cpuClockFrequency);

    // During QueueSubmit we stored ProfilerResults in mEntriesWithProfilingResults. Form a response using it here.
    ProfilerResultsMap& profiledCmdBufResultsMap = pProfilerLayer->GetCmdBufProfilerResultsMap();

    // During QueueSubmit we stored ProfilerResults in mEntriesWithProfilingResults. Form a response using it here.
    if (!profiledCmdBufResultsMap.empty())
    {
        // Keep a count of the number of lines that we'll write in the response string.
        UINT numResponseLines = 0;

        gtASCIIString profiledCommandsLinesStr;

        ProfilerResultsMap::iterator profIt;

        for (profIt = profiledCmdBufResultsMap.begin(); profIt != profiledCmdBufResultsMap.end(); ++profIt)
        {
            QueueWrapperToProfilingResultsMap::iterator queuesWithProfilingResults;

            QueueWrapperToProfilingResultsMap& resultsPerThread = profIt->second;

            for (queuesWithProfilingResults = resultsPerThread.begin(); queuesWithProfilingResults != resultsPerThread.end(); ++queuesWithProfilingResults)
            {
                VktWrappedQueue* pWrappedQueue = queuesWithProfilingResults->first;

                const double timestampFrequency = pWrappedQueue->GetTimestampFrequency();

                // This structure holds all of the profiler results that were collected at QueueSubmit. The form is LinkId->ProfilerResult.
                const SampleIdToProfilerResultMap* pQueueResults = queuesWithProfilingResults->second;

                SampleIdToProfilerResultMap::const_iterator queueResultsIterInner;

                for (queueResultsIterInner = pQueueResults->begin(); queueResultsIterInner != pQueueResults->end(); ++queueResultsIterInner)
                {
                    UINT64 sampleId = queueResultsIterInner->first;
                    const ProfilerResult* pResult = queueResultsIterInner->second;

                    // Convert timestamps to milliseconds by using the clock frequency.
#ifndef CODEXL_GRAPHICS
                    double preStartTimestamp = (pResult->timestampResult.rawClocks.preStart / timestampFrequency) * 1000.0;
#endif
                    double startTimestamp = (pResult->timestampResult.rawClocks.start / timestampFrequency) * 1000.0;
                    double endTimestamp = (pResult->timestampResult.rawClocks.end / timestampFrequency) * 1000.0;

                    FuncId funcId = (FuncId)pResult->measurementInfo.idInfo.funcId;

                    gtASCIIString funcName = GetFunctionNameFromId(funcId);
                    gtASCIIString retVal = "void";
                    gtASCIIString params = "";

                    if (pResult->measurementInfo.idInfo.funcId != FuncId_WholeCmdBuf)
                    {
                        VktAPIEntry* pResultEntry = pProfilerLayer->FindInvocationBySampleId(sampleId);

                        if (pResultEntry != nullptr)
                        {
                            // Convert the functionID and return values from integers into full strings that we can use in the response.
                            funcName = GetFunctionNameFromId(pResultEntry->mFunctionId);
                            retVal = (pResultEntry->m_returnValue != -1) ? VktUtil::WriteResultCodeEnumAsString(pResultEntry->m_returnValue) : "void";
                            params = pResultEntry->mParameters.asCharArray();
                        }
                    }

                    UINT queueIndex = pWrappedQueue->GetQueueIndex();

#ifndef CODEXL_GRAPHICS
                    std::string queueInfo = "";
#else
                    std::string queueInfo = "0";
#endif

                    // Vulkan Response line format:
                    // CommandQueuePtr CommandBufferType CommandBufferPtr APIType FuncId Vulkan_FuncName(Args) = ReturnValue StartTime EndTime SampleId

                    profiledCommandsLinesStr += "0x";
                    profiledCommandsLinesStr += IntToString(queueIndex);
                    profiledCommandsLinesStr += " ";

                    profiledCommandsLinesStr += queueInfo.c_str();
                    profiledCommandsLinesStr += " ";

                    profiledCommandsLinesStr += "0x";
                    profiledCommandsLinesStr += UINT64ToHexString((UINT64)pResult->measurementInfo.idInfo.pWrappedCmdBuf->AppHandle());
                    profiledCommandsLinesStr += " ";

                    profiledCommandsLinesStr += IntToString(VktTraceAnalyzerLayer::Instance()->GetAPIGroupFromAPI(funcId));
                    profiledCommandsLinesStr += " ";

                    profiledCommandsLinesStr += IntToString(funcId);
                    profiledCommandsLinesStr += " ";

                    profiledCommandsLinesStr += "Vulkan_";
                    profiledCommandsLinesStr += funcName;

                    profiledCommandsLinesStr += "(";
                    profiledCommandsLinesStr += params;
                    profiledCommandsLinesStr += ") = ";

                    profiledCommandsLinesStr += retVal;

#ifndef CODEXL_GRAPHICS
                    profiledCommandsLinesStr += " ";
                    profiledCommandsLinesStr += DoubleToString(preStartTimestamp);
#endif

                    profiledCommandsLinesStr += " ";
                    profiledCommandsLinesStr += DoubleToString(startTimestamp);

                    profiledCommandsLinesStr += " ";
                    profiledCommandsLinesStr += DoubleToString(endTimestamp);

                    profiledCommandsLinesStr += " ";
                    profiledCommandsLinesStr += UINT64ToString(pResult->measurementInfo.idInfo.sampleId);

                    profiledCommandsLinesStr += "\n";

                    // We just added another line to our response buffer. Increment the count that the client will read.
                    numResponseLines++;
                }
            }
        }

        // We'll need to insert the GPU Trace section header before the response data, even if there aren't any results.
        appendString += "//==GPU Trace==";
        appendString += "\n";

        appendString += "//CommandBufEventCount=";
        appendString += DWORDToString(numResponseLines);
        appendString += "\n";

        // Include the response lines after the section header.
        if (numResponseLines > 0)
        {
            appendString += profiledCommandsLinesStr;
        }
    }
    else
    {
        appendString += "NODATA";
    }

    return appendString.asCharArray();
}