nvh::Profiler::SectionID ProfilerVK::beginSection(const char* name, VkCommandBuffer cmd)
{
  nvh::Profiler::gpuTimeProvider_fn fnProvider = [&](SectionID i, uint32_t queryFrame, double& gpuTime) {
    uint32_t idxBegin = getTimerIdx(i, queryFrame, true);
    uint32_t idxEnd   = getTimerIdx(i, queryFrame, false);

    uint64_t times[2];
    VkResult result = vkGetQueryPoolResults(m_device, m_queryPool, idxBegin, 2, sizeof(uint64_t) * 2, times, 0, VK_QUERY_RESULT_64_BIT);
   
    if(result == VK_SUCCESS)
    {
      gpuTime = (double(times[1] - times[0]) * double(m_frequency)) / double(1000);
      return true;
    }
    else
    {
      return false;
    }
  };


  SectionID slot = Profiler::beginSection(name, "VK ", fnProvider);

  if (getRequiredTimers() > m_queryPoolSize) {
    resize();
  }

  uint32_t idx = getTimerIdx(slot, getSubFrame(), true);
  // clear begin and end
  vkCmdResetQueryPool(cmd, m_queryPool, idx, 2);  // not ideal to do this per query
  vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_queryPool, idx);

  return slot;
}
Beispiel #2
0
  inline Profiler::Slot Profiler::beginSection( const char* name, GPUInterface* gpuif )
  {
    GLuint queryFrame = m_numFrames % FRAME_DELAY;
    Slot slot = m_frameEntries++;
    if (slot >= m_entries.size()){
      grow((unsigned int)(m_entries.size() * 2));
      if (gpuif){
        gpuif->TimerGrow( getRequiredTimers() );
      }
    }
 
    if (m_entries[slot].name != name ||
        m_entries[slot].gpuif != gpuif )
    {
      m_entries[slot].name = name;
      m_entries[slot].gpuif = gpuif;
      m_resetDelay = CONFIG_DELAY;
    }

    int level = m_level++;
    m_entries[slot].level = level;
    m_entries[slot].splitter = false;

#ifdef SUPPORT_NVTOOLSEXT
    {
      nvtxEventAttributes_t eventAttrib = {0};
      eventAttrib.version = NVTX_VERSION;
      eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
      eventAttrib.colorType = NVTX_COLOR_ARGB;

      unsigned char color[4];
      color[0] = 255;
      color[1] = 0;
      color[2] = slot % 2 ? 127 : 255;
      color[3] = 255;
      
      color[2] -= level * 16;
      color[3] -= level * 16;

      eventAttrib.color = *(uint32_t*)(color);
      eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
      eventAttrib.message.ascii = name;
      nvtxRangePushEx(&eventAttrib);
    }
#endif

    if (gpuif){
      gpuif->TimerSetup( getTimerIdx(slot,queryFrame,true) );
    }
    else{
      glQueryCounter(m_entries[slot].queries[queryFrame],GL_TIMESTAMP);
    }
    
    m_entries[slot].deltas[queryFrame] = -getMicroSeconds();

    return slot;
  }
void ProfilerVK::resize()
{
  if (getRequiredTimers() < m_queryPoolSize) return;

  if (m_queryPool) {
    // not exactly efficient, but when timers changed a lot, we have a slow frame anyway
    // cleaner would be allocating more pools
    vkDeviceWaitIdle(m_device);
    vkDestroyQueryPool(m_device, m_queryPool, m_allocator);
  }

  VkQueryPoolCreateInfo create_info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
  create_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
  create_info.queryCount = getRequiredTimers();
  m_queryPoolSize = create_info.queryCount;

  VkResult res = vkCreateQueryPool(m_device, &create_info, m_allocator, &m_queryPool);
  assert(res == VK_SUCCESS);
}