nvh::Profiler::SectionID ProfilerVK::beginSection(const char* name, VkCommandBuffer cmd) { nvh::Profiler::gpuTimeProvider_fn fnProvider = [&](SectionID i, uint32_t queryFrame, double& gpuTime) { uint32_t idxBegin = getTimerIdx(i, queryFrame, true); uint32_t idxEnd = getTimerIdx(i, queryFrame, false); uint64_t times[2]; VkResult result = vkGetQueryPoolResults(m_device, m_queryPool, idxBegin, 2, sizeof(uint64_t) * 2, times, 0, VK_QUERY_RESULT_64_BIT); if(result == VK_SUCCESS) { gpuTime = (double(times[1] - times[0]) * double(m_frequency)) / double(1000); return true; } else { return false; } }; SectionID slot = Profiler::beginSection(name, "VK ", fnProvider); if (getRequiredTimers() > m_queryPoolSize) { resize(); } uint32_t idx = getTimerIdx(slot, getSubFrame(), true); // clear begin and end vkCmdResetQueryPool(cmd, m_queryPool, idx, 2); // not ideal to do this per query vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_queryPool, idx); return slot; }
inline Profiler::Slot Profiler::beginSection( const char* name, GPUInterface* gpuif ) { GLuint queryFrame = m_numFrames % FRAME_DELAY; Slot slot = m_frameEntries++; if (slot >= m_entries.size()){ grow((unsigned int)(m_entries.size() * 2)); if (gpuif){ gpuif->TimerGrow( getRequiredTimers() ); } } if (m_entries[slot].name != name || m_entries[slot].gpuif != gpuif ) { m_entries[slot].name = name; m_entries[slot].gpuif = gpuif; m_resetDelay = CONFIG_DELAY; } int level = m_level++; m_entries[slot].level = level; m_entries[slot].splitter = false; #ifdef SUPPORT_NVTOOLSEXT { nvtxEventAttributes_t eventAttrib = {0}; eventAttrib.version = NVTX_VERSION; eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; eventAttrib.colorType = NVTX_COLOR_ARGB; unsigned char color[4]; color[0] = 255; color[1] = 0; color[2] = slot % 2 ? 127 : 255; color[3] = 255; color[2] -= level * 16; color[3] -= level * 16; eventAttrib.color = *(uint32_t*)(color); eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; eventAttrib.message.ascii = name; nvtxRangePushEx(&eventAttrib); } #endif if (gpuif){ gpuif->TimerSetup( getTimerIdx(slot,queryFrame,true) ); } else{ glQueryCounter(m_entries[slot].queries[queryFrame],GL_TIMESTAMP); } m_entries[slot].deltas[queryFrame] = -getMicroSeconds(); return slot; }
void ProfilerVK::resize() { if (getRequiredTimers() < m_queryPoolSize) return; if (m_queryPool) { // not exactly efficient, but when timers changed a lot, we have a slow frame anyway // cleaner would be allocating more pools vkDeviceWaitIdle(m_device); vkDestroyQueryPool(m_device, m_queryPool, m_allocator); } VkQueryPoolCreateInfo create_info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; create_info.queryType = VK_QUERY_TYPE_TIMESTAMP; create_info.queryCount = getRequiredTimers(); m_queryPoolSize = create_info.queryCount; VkResult res = vkCreateQueryPool(m_device, &create_info, m_allocator, &m_queryPool); assert(res == VK_SUCCESS); }