VkResult VktWrappedQueue::QueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence)
{
    const FuncId funcId = FuncId_vkQueueBindSparse;

    VkResult result = VK_INCOMPLETE;

    if (m_createInfo.pInterceptMgr->ShouldCollectTrace())
    {
        char argumentsBuffer[ARGUMENTS_BUFFER_SIZE];
        sprintf_s(argumentsBuffer, ARGUMENTS_BUFFER_SIZE, "0x%p, %u, 0x%p, 0x%p",
            queue,
            bindInfoCount,
            PrintArrayWithFormatter(bindInfoCount, pBindInfo, "0x%p").c_str(),
            fence);

        VktAPIEntry* pNewEntry = m_createInfo.pInterceptMgr->PreCall(funcId, argumentsBuffer);
        result = device_dispatch_table(queue)->QueueBindSparse(queue, bindInfoCount, pBindInfo, fence);
        m_createInfo.pInterceptMgr->PostCall(pNewEntry, result);
    }
    else
    {
        result = device_dispatch_table(queue)->QueueBindSparse(queue, bindInfoCount, pBindInfo, fence);
    }

    return result;
}
示例#2
0
VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkDevice device, const char *pName) {
    if (!strcmp("vkGetDeviceProcAddr", pName))
        return (PFN_vkVoidFunction)vkGetDeviceProcAddr;
    if (!strcmp("vkDestroyDevice", pName))
        return (PFN_vkVoidFunction)basic_DestroyDevice;
    if (!strcmp("vkLayerBasicEXT", pName))
        return (PFN_vkVoidFunction)vkLayerBasicEXT;

    if (device == NULL)
        return NULL;

    if (device_dispatch_table(device)->GetDeviceProcAddr == NULL)
        return NULL;
    return device_dispatch_table(device)->GetDeviceProcAddr(device, pName);
}
示例#3
0
//-----------------------------------------------------------------------------
/// Perform all profiler initialization.
/// \param config A pointer to a profiler configuration structure.
/// \returns The result code for initialization.
//-----------------------------------------------------------------------------
VkResult VktCmdBufProfiler::Init(const VktCmdBufProfilerConfig& config)   ///< [in] Pointer to profiler configuration
{
    VkResult result = VK_INCOMPLETE;

    if ((config.physicalDevice != VK_NULL_HANDLE) &&
        (config.device != VK_NULL_HANDLE))
    {
        memcpy(&m_config, &config, sizeof(m_config));

        m_pInstanceDT = instance_dispatch_table(config.physicalDevice);
        m_pDeviceDT = device_dispatch_table(config.device);

        m_pInstanceDT->GetPhysicalDeviceMemoryProperties(config.physicalDevice, &m_memProps);

        m_pInstanceDT->GetPhysicalDeviceProperties(config.physicalDevice, &m_physicalDeviceProps);

        m_gpuTimestampFreq = 1000000000.0f / m_physicalDeviceProps.limits.timestampPeriod;

        m_maxQueriesPerGroup = m_config.measurementsPerGroup * ProfilerTimestampsPerMeasurement;

        ClearCmdBufData();

        result = VK_SUCCESS;
    }

    return result;
}
//-----------------------------------------------------------------------------
/// Kill all info retained by this thread.
//-----------------------------------------------------------------------------
void VktWrappedQueue::EndCollection()
{
    ScopeLock lock(&m_workerThreadInfoMutex);

    for (UINT i = 0; i < m_workerThreadInfo.size(); i++)
    {
        // Delete profiler memory
        for (UINT j = 0; j < m_workerThreadInfo[i]->m_inputs.cmdBufs.size(); j++)
        {
            VktWrappedCmdBuf* pCmdBuf = m_workerThreadInfo[i]->m_inputs.cmdBufs[j];

            if (pCmdBuf != nullptr)
            {
                pCmdBuf->DestroyDynamicProfilers();
            }
        }

        // Free the fence we created earlier
        if (m_workerThreadInfo[i]->m_inputs.internalFence)
        {
            device_dispatch_table(m_createInfo.device)->DestroyFence(m_createInfo.device, m_workerThreadInfo[i]->m_inputs.fenceToWaitOn, nullptr);
        }

        m_workerThreadInfo[i]->m_outputs.results.clear();

        CloseHandle(m_workerThreadInfo[i]->m_threadInfo.threadHandle);
        SAFE_DELETE(m_workerThreadInfo[i]);
    }

    m_workerThreadInfo.clear();
}
//-----------------------------------------------------------------------------
/// Profiler results collection worker function.
/// \param lpParam A void pointer to the incoming VktWorkerInfo argument.
/// \returns Always 0.
//-----------------------------------------------------------------------------
DWORD WINAPI ThreadFunc(LPVOID lpParam)
{
    VktWorkerInfo* pWorkerInfo = (VktWorkerInfo*)lpParam;

    pWorkerInfo->m_threadInfo.workerThreadID = osGetCurrentThreadId();

    VkResult waitResult = VK_TIMEOUT;

#if GPU_FENCES_FOR_PROFILER_WAIT
    VkDevice device = pWorkerInfo->m_inputs.pQueue->ParentDevice();
    do
    {
        waitResult = device_dispatch_table(device)->WaitForFences(device, 1, &pWorkerInfo->m_inputs.fenceToWaitOn, VK_TRUE, GPU_FENCE_TIMEOUT_TIME);
    } while (waitResult == VK_TIMEOUT);
#else
    VkQueue queue = pWorkerInfo->m_inputs.pQueue->AppHandle();
    waitResult = device_dispatch_table(queue)->QueueWaitIdle(queue);
#endif

    if (pWorkerInfo->m_inputs.timestampPair.mQueueCanBeTimestamped)
    {
        for (UINT i = 0; i < pWorkerInfo->m_inputs.cmdBufs.size(); i++)
        {
            VktWrappedCmdBuf* pWrappedCmdBuf = pWorkerInfo->m_inputs.cmdBufs[i];

            ProfilerResultCode profResult = pWrappedCmdBuf->GetCmdBufResultsMT(pWorkerInfo->m_inputs.executionID, pWorkerInfo->m_outputs.results);

            if (profResult != PROFILER_SUCCESS)
            {
                const char* profilerErrorCode = VktCmdBufProfiler::PrintProfilerResult(profResult);

                // Report that a problem occurred in retrieving full profiler results.
                Log(logERROR, "Failed to retrieve full profiler results: CmdBuf 0x%p, Queue 0x%p, ErrorCode %s\n",
                    pWorkerInfo->m_inputs.cmdBufs[i], pWorkerInfo->m_inputs.pQueue, profilerErrorCode);
            }
        }
    }

    // This will only be set to true if the GPU results have come back in time.
    pWorkerInfo->m_outputs.bResultsGathered = true;

    return 0;
}
VkResult VktWrappedQueue::QueueWaitIdle(VkQueue queue)
{
    const FuncId funcId = FuncId_vkQueueWaitIdle;

    VkResult result = VK_INCOMPLETE;

    if (m_createInfo.pInterceptMgr->ShouldCollectTrace())
    {
        char argumentsBuffer[ARGUMENTS_BUFFER_SIZE];
        sprintf_s(argumentsBuffer, ARGUMENTS_BUFFER_SIZE, "0x%p", queue);

        VktAPIEntry* pNewEntry = m_createInfo.pInterceptMgr->PreCall(funcId, argumentsBuffer);
        result = device_dispatch_table(queue)->QueueWaitIdle(queue);
        m_createInfo.pInterceptMgr->PostCall(pNewEntry, result);
    }
    else
    {
        result = device_dispatch_table(queue)->QueueWaitIdle(queue);
    }

    return result;
}
示例#7
0
//-----------------------------------------------------------------------------
/// Profiler results collection worker function.
/// \param lpParam A void pointer to the incoming VktWorkerInfo argument.
/// \returns Always 0.
//-----------------------------------------------------------------------------
DWORD WINAPI ThreadFunc(LPVOID lpParam)
{
    VktWorkerInfo* pWorkerInfo = (VktWorkerInfo*)lpParam;

    pWorkerInfo->m_threadInfo.workerThreadID = osGetCurrentThreadId();

    VkResult waitResult = VK_TIMEOUT;

#if GPU_FENCES_FOR_PROFILER_WAIT
    VkDevice device = pWorkerInfo->m_inputs.pQueue->ParentDevice();
    do
    {
        waitResult = device_dispatch_table(device)->WaitForFences(device, 1, &pWorkerInfo->m_inputs.fenceToWaitOn, VK_TRUE, GPU_FENCE_TIMEOUT_TIME);
    } while (waitResult == VK_TIMEOUT);
#else
    VkQueue queue = pWorkerInfo->m_inputs.pQueue->AppHandle();
    waitResult = device_dispatch_table(queue)->QueueWaitIdle(queue);
#endif

    for (UINT i = 0; i < pWorkerInfo->m_inputs.cmdBufData.size(); i++)
    {
        VktWrappedCmdBuf* pWrappedCmdBuf = pWorkerInfo->m_inputs.cmdBufData[i].pCmdBuf;
        UINT64 targetFillId = pWorkerInfo->m_inputs.cmdBufData[i].targetFillID;
        UINT profiledCallCount = pWorkerInfo->m_inputs.cmdBufData[i].profiledCallCount;

        ProfilerResultCode profResult = pWrappedCmdBuf->GetCmdBufResultsMT(targetFillId, profiledCallCount, pWorkerInfo->m_outputs.results);

        if (profResult != PROFILER_SUCCESS)
        {
            const char* profilerErrorCode = VktCmdBufProfiler::PrintProfilerResult(profResult);

            // Report that a problem occurred in retrieving full profiler results.
            Log(logERROR, "Failed to retrieve full profiler results: CmdBuf 0x%p, Queue 0x%p, ErrorCode %s\n",
                pWorkerInfo->m_inputs.cmdBufData[i].pCmdBuf, pWorkerInfo->m_inputs.pQueue, profilerErrorCode);
        }
    }

    return 0;
}
VkResult VktWrappedQueue::QueuePresentKHR_ICD(VkQueue queue, const VkPresentInfoKHR* pPresentInfo)
{
    return device_dispatch_table(queue)->QueuePresentKHR(queue, pPresentInfo);
}
//-----------------------------------------------------------------------------
/// Submit command buffers and gather results.
/// \param queue The queue issued work to.
/// \param submitCount The number of submits.
/// \param pSubmits The submit info structures.
/// \param fence The fence wrapping this submit.
//-----------------------------------------------------------------------------
VkResult VktWrappedQueue::QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence)
{
    m_executionID++;

    VkResult result = VK_INCOMPLETE;

    VktTraceAnalyzerLayer* pTraceAnalyzer = VktTraceAnalyzerLayer::Instance();
    VktFrameProfilerLayer* pFrameProfiler = VktFrameProfilerLayer::Instance();

    // Use this calibration timestamp structure to convert GPU events to the CPU timeline.
    CalibrationTimestampPair calibrationTimestamps = {};
    calibrationTimestamps.mQueueCanBeTimestamped = true;

    VkFence fenceToWaitOn = fence;
    bool usingInternalFence = false;

    std::vector<VktWrappedCmdBuf*> wrappedCmdBufs;
    GatherWrappedCommandBufs(submitCount, pSubmits, wrappedCmdBufs);

    for (UINT i = 0; i < wrappedCmdBufs.size(); i++)
    {
        wrappedCmdBufs[i]->SetProfilerExecutionId(m_executionID);
        wrappedCmdBufs[i]->IncrementSubmitCount();
    }

    // Surround the execution of CommandBuffers with timestamps so we can determine when the GPU work occurred in the CPU timeline.
    if (pTraceAnalyzer->ShouldCollectTrace() && pFrameProfiler->ShouldCollectGPUTime())
    {
        // Collect calibration timestamps in case we need to align GPU events against the CPU timeline.
        if (calibrationTimestamps.mQueueCanBeTimestamped)
        {
            pFrameProfiler->CollectCalibrationTimestamps(this, &calibrationTimestamps);
        }
        else
        {
            Log(logTRACE, "Did not collect calibration timestamps for Queue '0x%p'\n", this);
        }

        // Inject our own fence if the app did not supply one
        if (fenceToWaitOn == VK_NULL_HANDLE)
        {
            // Create internal fence
            VkFenceCreateInfo fenceCreateInfo = {};
            VkResult fenceResult = VK_INCOMPLETE;
            fenceResult = device_dispatch_table(queue)->CreateFence(m_createInfo.device, &fenceCreateInfo, nullptr, &fenceToWaitOn);
            VKT_ASSERT(fenceResult == VK_SUCCESS);

            usingInternalFence = true;
        }
    }

    // Invoke the real call to execute on the GPU
    result = QueueSubmit_ICD(queue, submitCount, pSubmits, fenceToWaitOn);

    if (pTraceAnalyzer->ShouldCollectTrace() && pFrameProfiler->ShouldCollectGPUTime())
    {
        // Collect the CPU and GPU frequency to convert timestamps.
        QueryPerformanceFrequency(&calibrationTimestamps.cpuFrequency);

#if GATHER_PROFILER_RESULTS_WITH_WORKERS
        SpawnWorker(&calibrationTimestamps, this, fenceToWaitOn, usingInternalFence, wrappedCmdBufs);
#else
        VkResult waitResult = VK_TIMEOUT;

#if GPU_FENCES_FOR_PROFILER_WAIT
        do
        {
            waitResult = device_dispatch_table(m_createInfo.device)->WaitForFences(m_createInfo.device, 1, &fenceToWaitOn, VK_TRUE, GPU_FENCE_TIMEOUT_TIME);
        } while (waitResult == VK_TIMEOUT);
#else
        waitResult = device_dispatch_table(queue)->QueueWaitIdle(queue);
#endif

        if (calibrationTimestamps.mQueueCanBeTimestamped)
        {
            // Put all results into thread ID 0 bucket
            const UINT32 threadID = 0;
            std::vector<ProfilerResult> results;

            for (UINT i = 0; i < wrappedCmdBufs.size(); i++)
            {
                ProfilerResultCode getResultsResult = PROFILER_FAIL;
                getResultsResult = wrappedCmdBufs[i]->GetCmdBufResultsST(results);
                VKT_ASSERT(getResultsResult != PROFILER_FAIL);
            }

            pFrameProfiler->VerifyAlignAndStoreResults(this, results, &calibrationTimestamps, threadID, VktTraceAnalyzerLayer::Instance()->GetFrameStartTime());

            // Free the fence we created earlier
            if (usingInternalFence)
            {
                device_dispatch_table(m_createInfo.device)->DestroyFence(m_createInfo.device, fenceToWaitOn, nullptr);
            }
        }
        else
        {
            Log(logTRACE, "Didn't collect calibration timestamps for Queue '0x%p'.\n", this);
        }
#endif
    }

#if GATHER_PROFILER_RESULTS_WITH_WORKERS == 0
    for (UINT i = 0; i < wrappedCmdBufs.size(); i++)
    {
        wrappedCmdBufs[i]->DestroyDynamicProfilers();
    }
#endif

    return result;
}
//-----------------------------------------------------------------------------
/// Collect and store calibration timestamps from the CPU and GPU to align execution results in a single timeline.
/// \param pWrappedQueue The Queue responsible for work submission.
/// \param pTimestamps The timestamps structure used to hold timestamps occurring before and after workload execution.
//-----------------------------------------------------------------------------
VkResult VktFrameProfilerLayer::CollectCalibrationTimestamps(VktWrappedQueue* pWrappedQueue, CalibrationTimestampPair* pTimestamps)
{
    VkResult result = VK_INCOMPLETE;

#if MANUAL_TIMESTAMP_CALIBRATION
    if ((pWrappedQueue != nullptr) && (pTimestamps != nullptr))
    {
        VkQueue queue = pWrappedQueue->AppHandle();
        VkDevice device = pWrappedQueue->ParentDevice();

        TimestampedCmdBufConfig config = {};
        config.device           = device;
        config.physicalDevice   = pWrappedQueue->PhysicalDevice();
        config.mapTimestampMem  = false;
        config.pipelineLoc      = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
        config.queueFamilyIndex = pWrappedQueue->GetQueueFamilyIndex();

        VktTimestampedCmdBuf* pTimestampedCmdBuf = VktTimestampedCmdBuf::Create(config);

        if (pTimestampedCmdBuf != nullptr)
        {
            const VkCommandBuffer cmdBufs[] = { pTimestampedCmdBuf->CmdBufHandle() };

            VkSubmitInfo submitInfo = {};
            submitInfo.sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO;
            submitInfo.pNext                = nullptr;
            submitInfo.waitSemaphoreCount   = 0;
            submitInfo.pWaitSemaphores      = nullptr;
            submitInfo.pWaitDstStageMask    = nullptr;
            submitInfo.commandBufferCount   = 1;
            submitInfo.pCommandBuffers      = cmdBufs;
            submitInfo.signalSemaphoreCount = 0;
            submitInfo.pSignalSemaphores    = nullptr;

            VkFence fence = VK_NULL_HANDLE;
            VkFenceCreateInfo fenceCreateInfo = {};
            result = device_dispatch_table(queue)->CreateFence(device, &fenceCreateInfo, nullptr, &fence);

            if (result == VK_SUCCESS)
            {
                LARGE_INTEGER largeInt = {};

                result = pWrappedQueue->QueueSubmit_ICD(queue, 1, &submitInfo, fence);
                VKT_ASSERT(result == VK_SUCCESS);

                VkResult waitResult = VK_TIMEOUT;
                do
                {
                    waitResult = device_dispatch_table(device)->WaitForFences(device, 1, &fence, VK_TRUE, GPU_FENCE_TIMEOUT_TIME);
                } while (waitResult == VK_TIMEOUT);

                // Fetch the GPU counter
                pTimestampedCmdBuf->GetTimestampResult(&pTimestamps->mBeforeExecutionGPUTimestamp);

#ifdef WIN32
                // Immediately after, fetch the CPU counter
                QueryPerformanceCounter(&largeInt);

                pTimestamps->mBeforeExecutionCPUTimestamp = largeInt.QuadPart;
#endif

                pTimestamps->mQueueFrequency = (UINT64)pWrappedQueue->GetTimestampFrequency();

                device_dispatch_table(device)->DestroyFence(device, fence, nullptr);
            }

            delete pTimestampedCmdBuf;
            pTimestampedCmdBuf = nullptr;
        }
    }
#else
    UNREFERENCED_PARAMETER(pWrappedQueue);
    UNREFERENCED_PARAMETER(pTimestamps);
#endif

    return result;
}
示例#11
0
/* hook DestroyDevice to remove tableMap entry */
VK_LAYER_EXPORT VKAPI_ATTR void VKAPI_CALL basic_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) {
    dispatch_key key = get_dispatch_key(device);
    device_dispatch_table(device)->DestroyDevice(device, pAllocator);
    destroy_device_dispatch_table(key);
}