tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth ) : m_cmdbuf( cmdbuf ) , m_ctx( ctx ) #ifdef TRACY_ON_DEMAND , m_active( s_profiler.IsConnected() ) #endif { #ifdef TRACY_ON_DEMAND if( !m_active ) return; #endif const auto thread = GetThreadHandle(); const auto queryId = ctx->NextQueryId(); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId ); Magic magic; auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic ); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.thread, thread ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); tail.store( magic + 1, std::memory_order_release ); s_profiler.SendCallstack( depth, thread ); }
nvh::Profiler::SectionID ProfilerVK::beginSection(const char* name, VkCommandBuffer cmd) { nvh::Profiler::gpuTimeProvider_fn fnProvider = [&](SectionID i, uint32_t queryFrame, double& gpuTime) { uint32_t idxBegin = getTimerIdx(i, queryFrame, true); uint32_t idxEnd = getTimerIdx(i, queryFrame, false); uint64_t times[2]; VkResult result = vkGetQueryPoolResults(m_device, m_queryPool, idxBegin, 2, sizeof(uint64_t) * 2, times, 0, VK_QUERY_RESULT_64_BIT); if(result == VK_SUCCESS) { gpuTime = (double(times[1] - times[0]) * double(m_frequency)) / double(1000); return true; } else { return false; } }; SectionID slot = Profiler::beginSection(name, "VK ", fnProvider); if (getRequiredTimers() > m_queryPoolSize) { resize(); } uint32_t idx = getTimerIdx(slot, getSubFrame(), true); // clear begin and end vkCmdResetQueryPool(cmd, m_queryPool, idx, 2); // not ideal to do this per query vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_queryPool, idx); return slot; }
void ProfilerVK::endSection(SectionID slot, VkCommandBuffer cmd) { uint32_t idx = getTimerIdx(slot, getSubFrame(), false); vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_queryPool, idx); Profiler::endSection(slot); }
VulkanQuery* VulkanQueryPool::beginTimerQuery(VulkanCmdBuffer* cb) { Lock(mMutex); VulkanQuery* query = getQuery(VK_QUERY_TYPE_TIMESTAMP); query->mFree = false; VkCommandBuffer vkCmdBuf = cb->getHandle(); cb->resetQuery(query); vkCmdWriteTimestamp(vkCmdBuf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, query->mPool, query->mQueryIdx); // Note: Must happen only here because we need to check VulkanResource::isBound under the same mutex cb->registerResource(query, VulkanUseFlag::Write); return query; }
//------------------------------------------------------------------------------ // method for GPUInterface //------------------------------------------------------------------------------ void RendererVk::TimerSetup(nv_helpers::Profiler::TimerIdx idx) { VkResult result = VK_ERROR_INITIALIZATION_FAILED; if(m_bValid == false) return; ::VkCommandBuffer timerCmd; timerCmd = nvk.vkAllocateCommandBuffer(m_cmdPool, true); nvk.vkBeginCommandBuffer(timerCmd, true); vkCmdResetQueryPool(timerCmd, m_timePool, idx, 1); // not ideal to do this per query vkCmdWriteTimestamp(timerCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_timePool, idx); nvk.vkEndCommandBuffer(timerCmd); nvk.vkQueueSubmit(NVK::VkSubmitInfo(0, NULL, NULL, 1, &timerCmd, 0, NULL), NULL); }
tracy_force_inline ~VkCtxScope() { #ifdef TRACY_ON_DEMAND if( !m_active ) return; #endif const auto queryId = m_ctx->NextQueryId(); vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_ctx->m_query, queryId ); Magic magic; auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic ); MemWrite( &item->hdr.type, QueueType::GpuZoneEnd ); MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() ); tail.store( magic + 1, std::memory_order_release ); }
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) : m_device( device ) , m_queue( queue ) , m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) ) , m_head( 0 ) , m_tail( 0 ) , m_oldCnt( 0 ) { assert( m_context != 255 ); VkPhysicalDeviceProperties prop; vkGetPhysicalDeviceProperties( physdev, &prop ); const float period = prop.limits.timestampPeriod; VkQueryPoolCreateInfo poolInfo = {}; poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; poolInfo.queryCount = QueryCount; poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; vkCreateQueryPool( device, &poolInfo, nullptr, &m_query ); VkCommandBufferBeginInfo beginInfo = {}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &cmdbuf; vkBeginCommandBuffer( cmdbuf, &beginInfo ); vkCmdResetQueryPool( cmdbuf, m_query, 0, QueryCount ); vkEndCommandBuffer( cmdbuf ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueWaitIdle( queue ); vkBeginCommandBuffer( cmdbuf, &beginInfo ); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); vkEndCommandBuffer( cmdbuf ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueWaitIdle( queue ); int64_t tcpu = Profiler::GetTime(); int64_t tgpu; vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); vkBeginCommandBuffer( cmdbuf, &beginInfo ); vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ); vkEndCommandBuffer( cmdbuf ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueWaitIdle( queue ); Magic magic; auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic ); MemWrite( &item->hdr.type, QueueType::GpuNewContext ); MemWrite( &item->gpuNewContext.cpuTime, tcpu ); MemWrite( &item->gpuNewContext.gpuTime, tgpu ); memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); #ifdef TRACY_ON_DEMAND s_profiler.DeferItem( *item ); #endif tail.store( magic + 1, std::memory_order_release ); }
void operator()(const command_buffer &command_buffer, command_recorder &) && override final { vkCmdWriteTimestamp(command_buffer, static_cast<VkPipelineStageFlagBits>(stage), pool, index); }