Example #1
0
VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount,
                                      const VkSubmitInfo *pSubmits, VkFence fence)
{
  SCOPED_DBG_SINK();

  size_t tempmemSize = sizeof(VkSubmitInfo) * submitCount;

  // need to count how many semaphore and command buffer arrays to allocate for
  for(uint32_t i = 0; i < submitCount; i++)
  {
    tempmemSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
    tempmemSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
    tempmemSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
  }

  byte *memory = GetTempMemory(tempmemSize);

  VkSubmitInfo *unwrappedSubmits = (VkSubmitInfo *)memory;
  VkSemaphore *unwrappedWaitSems = (VkSemaphore *)(unwrappedSubmits + submitCount);

  for(uint32_t i = 0; i < submitCount; i++)
  {
    RDCASSERT(pSubmits[i].sType == VK_STRUCTURE_TYPE_SUBMIT_INFO && pSubmits[i].pNext == NULL);
    unwrappedSubmits[i] = pSubmits[i];

    unwrappedSubmits[i].pWaitSemaphores =
        unwrappedSubmits[i].waitSemaphoreCount ? unwrappedWaitSems : NULL;
    for(uint32_t o = 0; o < unwrappedSubmits[i].waitSemaphoreCount; o++)
      unwrappedWaitSems[o] = Unwrap(pSubmits[i].pWaitSemaphores[o]);
    unwrappedWaitSems += unwrappedSubmits[i].waitSemaphoreCount;

    VkCommandBuffer *unwrappedCommandBuffers = (VkCommandBuffer *)unwrappedWaitSems;

    unwrappedSubmits[i].pCommandBuffers =
        unwrappedSubmits[i].commandBufferCount ? unwrappedCommandBuffers : NULL;
    for(uint32_t o = 0; o < unwrappedSubmits[i].commandBufferCount; o++)
      unwrappedCommandBuffers[o] = Unwrap(pSubmits[i].pCommandBuffers[o]);
    unwrappedCommandBuffers += unwrappedSubmits[i].commandBufferCount;

    VkSemaphore *unwrappedSignalSems = (VkSemaphore *)unwrappedCommandBuffers;

    unwrappedSubmits[i].pSignalSemaphores =
        unwrappedSubmits[i].signalSemaphoreCount ? unwrappedSignalSems : NULL;
    for(uint32_t o = 0; o < unwrappedSubmits[i].signalSemaphoreCount; o++)
      unwrappedSignalSems[o] = Unwrap(pSubmits[i].pSignalSemaphores[o]);
  }

  VkResult ret =
      ObjDisp(queue)->QueueSubmit(Unwrap(queue), submitCount, unwrappedSubmits, Unwrap(fence));

  bool capframe = false;
  set<ResourceId> refdIDs;

  for(uint32_t s = 0; s < submitCount; s++)
  {
    for(uint32_t i = 0; i < pSubmits[s].commandBufferCount; i++)
    {
      ResourceId cmd = GetResID(pSubmits[s].pCommandBuffers[i]);

      VkResourceRecord *record = GetRecord(pSubmits[s].pCommandBuffers[i]);

      {
        SCOPED_LOCK(m_ImageLayoutsLock);
        GetResourceManager()->ApplyBarriers(record->bakedCommands->cmdInfo->imgbarriers,
                                            m_ImageLayouts);
      }

      // need to lock the whole section of code, not just the check on
      // m_State, as we also need to make sure we don't check the state,
      // start marking dirty resources then while we're doing so the
      // state becomes capframe.
      // the next sections where we mark resources referenced and add
      // the submit chunk to the frame record don't have to be protected.
      // Only the decision of whether we're inframe or not, and marking
      // dirty.
      {
        SCOPED_LOCK(m_CapTransitionLock);
        if(m_State == WRITING_CAPFRAME)
        {
          for(auto it = record->bakedCommands->cmdInfo->dirtied.begin();
              it != record->bakedCommands->cmdInfo->dirtied.end(); ++it)
            GetResourceManager()->MarkPendingDirty(*it);

          capframe = true;
        }
        else
        {
          for(auto it = record->bakedCommands->cmdInfo->dirtied.begin();
              it != record->bakedCommands->cmdInfo->dirtied.end(); ++it)
            GetResourceManager()->MarkDirtyResource(*it);
        }
      }

      if(capframe)
      {
        // for each bound descriptor set, mark it referenced as well as all resources currently
        // bound to it
        for(auto it = record->bakedCommands->cmdInfo->boundDescSets.begin();
            it != record->bakedCommands->cmdInfo->boundDescSets.end(); ++it)
        {
          GetResourceManager()->MarkResourceFrameReferenced(GetResID(*it), eFrameRef_Read);

          VkResourceRecord *setrecord = GetRecord(*it);

          for(auto refit = setrecord->descInfo->bindFrameRefs.begin();
              refit != setrecord->descInfo->bindFrameRefs.end(); ++refit)
          {
            refdIDs.insert(refit->first);
            GetResourceManager()->MarkResourceFrameReferenced(refit->first, refit->second.second);

            if(refit->second.first & DescriptorSetData::SPARSE_REF_BIT)
            {
              VkResourceRecord *sparserecord = GetResourceManager()->GetResourceRecord(refit->first);

              GetResourceManager()->MarkSparseMapReferenced(sparserecord->sparseInfo);
            }
          }
        }

        for(auto it = record->bakedCommands->cmdInfo->sparse.begin();
            it != record->bakedCommands->cmdInfo->sparse.end(); ++it)
          GetResourceManager()->MarkSparseMapReferenced(*it);

        // pull in frame refs from this baked command buffer
        record->bakedCommands->AddResourceReferences(GetResourceManager());
        record->bakedCommands->AddReferencedIDs(refdIDs);

        // ref the parent command buffer by itself, this will pull in the cmd buffer pool
        GetResourceManager()->MarkResourceFrameReferenced(record->GetResourceID(), eFrameRef_Read);

        for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->subcmds.size(); sub++)
        {
          record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddResourceReferences(
              GetResourceManager());
          record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddReferencedIDs(refdIDs);
          GetResourceManager()->MarkResourceFrameReferenced(
              record->bakedCommands->cmdInfo->subcmds[sub]->GetResourceID(), eFrameRef_Read);

          record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddRef();
        }

        GetResourceManager()->MarkResourceFrameReferenced(GetResID(queue), eFrameRef_Read);

        if(fence != VK_NULL_HANDLE)
          GetResourceManager()->MarkResourceFrameReferenced(GetResID(fence), eFrameRef_Read);

        {
          SCOPED_LOCK(m_CmdBufferRecordsLock);
          m_CmdBufferRecords.push_back(record->bakedCommands);
          for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->subcmds.size(); sub++)
            m_CmdBufferRecords.push_back(record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands);
        }

        record->bakedCommands->AddRef();
      }

      record->cmdInfo->dirtied.clear();
    }
  }

  if(capframe)
  {
    vector<VkResourceRecord *> maps;
    {
      SCOPED_LOCK(m_CoherentMapsLock);
      maps = m_CoherentMaps;
    }

    for(auto it = maps.begin(); it != maps.end(); ++it)
    {
      VkResourceRecord *record = *it;
      MemMapState &state = *record->memMapState;

      // potential persistent map
      if(state.mapCoherent && state.mappedPtr && !state.mapFlushed)
      {
        // only need to flush memory that could affect this submitted batch of work
        if(refdIDs.find(record->GetResourceID()) == refdIDs.end())
        {
          RDCDEBUG("Map of memory %llu not referenced in this queue - not flushing",
                   record->GetResourceID());
          continue;
        }

        size_t diffStart = 0, diffEnd = 0;
        bool found = true;

// enabled as this is necessary for programs with very large coherent mappings
// (> 1GB) as otherwise more than a couple of vkQueueSubmit calls leads to vast
// memory allocation. There might still be bugs lurking in here though
#if 1
        // this causes vkFlushMappedMemoryRanges call to allocate and copy to refData
        // from serialised buffer. We want to copy *precisely* the serialised data,
        // otherwise there is a gap in time between serialising out a snapshot of
        // the buffer and whenever we then copy into the ref data, e.g. below.
        // during this time, data could be written to the buffer and it won't have
        // been caught in the serialised snapshot, and if it doesn't change then
        // it *also* won't be caught in any future FindDiffRange() calls.
        //
        // Likewise once refData is allocated, the call below will also update it
        // with the data serialised out for the same reason.
        //
        // Note: it's still possible that data is being written to by the
        // application while it's being serialised out in the snapshot below. That
        // is OK, since the application is responsible for ensuring it's not writing
        // data that would be needed by the GPU in this submit. As long as the
        // refdata we use for future use is identical to what was serialised, we
        // shouldn't miss anything
        state.needRefData = true;

        // if we have a previous set of data, compare.
        // otherwise just serialise it all
        if(state.refData)
          found = FindDiffRange((byte *)state.mappedPtr, state.refData, (size_t)state.mapSize,
                                diffStart, diffEnd);
        else
#endif
          diffEnd = (size_t)state.mapSize;

        if(found)
        {
          // MULTIDEVICE should find the device for this queue.
          // MULTIDEVICE only want to flush maps associated with this queue
          VkDevice dev = GetDev();

          {
            RDCLOG("Persistent map flush forced for %llu (%llu -> %llu)", record->GetResourceID(),
                   (uint64_t)diffStart, (uint64_t)diffEnd);
            VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL,
                                         (VkDeviceMemory)(uint64_t)record->Resource,
                                         state.mapOffset + diffStart, diffEnd - diffStart};
            vkFlushMappedMemoryRanges(dev, 1, &range);
            state.mapFlushed = false;
          }

          GetResourceManager()->MarkPendingDirty(record->GetResourceID());
        }
        else
        {
          RDCDEBUG("Persistent map flush not needed for %llu", record->GetResourceID());
        }
      }
    }

    {
      CACHE_THREAD_SERIALISER();

      for(uint32_t s = 0; s < submitCount; s++)
      {
        SCOPED_SERIALISE_CONTEXT(QUEUE_SUBMIT);
        Serialise_vkQueueSubmit(localSerialiser, queue, 1, &pSubmits[s], fence);

        m_FrameCaptureRecord->AddChunk(scope.Get());

        for(uint32_t sem = 0; sem < pSubmits[s].waitSemaphoreCount; sem++)
          GetResourceManager()->MarkResourceFrameReferenced(
              GetResID(pSubmits[s].pWaitSemaphores[sem]), eFrameRef_Read);

        for(uint32_t sem = 0; sem < pSubmits[s].signalSemaphoreCount; sem++)
          GetResourceManager()->MarkResourceFrameReferenced(
              GetResID(pSubmits[s].pSignalSemaphores[sem]), eFrameRef_Read);
      }
    }
  }

  return ret;
}
void STDMETHODCALLTYPE WrappedID3D12CommandQueue::ExecuteCommandLists(
    UINT NumCommandLists, ID3D12CommandList *const *ppCommandLists)
{
  ID3D12CommandList **unwrapped = m_pDevice->GetTempArray<ID3D12CommandList *>(NumCommandLists);
  for(UINT i = 0; i < NumCommandLists; i++)
    unwrapped[i] = Unwrap(ppCommandLists[i]);

  m_pReal->ExecuteCommandLists(NumCommandLists, unwrapped);

  if(m_State >= WRITING)
  {
    SCOPED_LOCK(m_Lock);
    SCOPED_LOCK(m_pDevice->GetCapTransitionLock());

    bool capframe = (m_State == WRITING_CAPFRAME);
    set<ResourceId> refdIDs;

    for(UINT i = 0; i < NumCommandLists; i++)
    {
      D3D12ResourceRecord *record = GetRecord(ppCommandLists[i]);

      if(record->ContainsExecuteIndirect)
        m_QueueRecord->ContainsExecuteIndirect = true;

      m_pDevice->ApplyBarriers(record->bakedCommands->cmdInfo->barriers);

      // need to lock the whole section of code, not just the check on
      // m_State, as we also need to make sure we don't check the state,
      // start marking dirty resources then while we're doing so the
      // state becomes capframe.
      // the next sections where we mark resources referenced and add
      // the submit chunk to the frame record don't have to be protected.
      // Only the decision of whether we're inframe or not, and marking
      // dirty.
      if(capframe)
      {
        for(auto it = record->bakedCommands->cmdInfo->dirtied.begin();
            it != record->bakedCommands->cmdInfo->dirtied.end(); ++it)
          GetResourceManager()->MarkPendingDirty(*it);
      }
      else
      {
        for(auto it = record->bakedCommands->cmdInfo->dirtied.begin();
            it != record->bakedCommands->cmdInfo->dirtied.end(); ++it)
          GetResourceManager()->MarkDirtyResource(*it);
      }

      if(capframe)
      {
        // any descriptor copies or writes could reference new resources not in the
        // bound descs list yet. So we take all of those referenced descriptors and
        // include them to see if we need to flush
        std::vector<D3D12Descriptor> dynDescRefs;
        m_pDevice->GetDynamicDescriptorReferences(dynDescRefs);

        for(size_t d = 0; d < dynDescRefs.size(); d++)
        {
          ResourceId id, id2;
          FrameRefType ref = eFrameRef_Read;

          dynDescRefs[d].GetRefIDs(id, id2, ref);

          if(id != ResourceId())
          {
            refdIDs.insert(id);
            GetResourceManager()->MarkResourceFrameReferenced(id, ref);
          }

          if(id2 != ResourceId())
          {
            refdIDs.insert(id2);
            GetResourceManager()->MarkResourceFrameReferenced(id2, ref);
          }
        }

        // for each bound descriptor table, mark it referenced as well as all resources currently
        // bound to it
        for(auto it = record->bakedCommands->cmdInfo->boundDescs.begin();
            it != record->bakedCommands->cmdInfo->boundDescs.end(); ++it)
        {
          D3D12Descriptor *desc = *it;

          ResourceId id, id2;
          FrameRefType ref = eFrameRef_Read;

          desc->GetRefIDs(id, id2, ref);

          if(id != ResourceId())
          {
            refdIDs.insert(id);
            GetResourceManager()->MarkResourceFrameReferenced(id, ref);
          }

          if(id2 != ResourceId())
          {
            refdIDs.insert(id2);
            GetResourceManager()->MarkResourceFrameReferenced(id2, ref);
          }
        }

        // pull in frame refs from this baked command list
        record->bakedCommands->AddResourceReferences(GetResourceManager());
        record->bakedCommands->AddReferencedIDs(refdIDs);

        // reference all executed bundles as well
        for(size_t b = 0; b < record->bakedCommands->cmdInfo->bundles.size(); b++)
        {
          record->bakedCommands->cmdInfo->bundles[b]->bakedCommands->AddResourceReferences(
              GetResourceManager());
          record->bakedCommands->cmdInfo->bundles[b]->bakedCommands->AddReferencedIDs(refdIDs);
          GetResourceManager()->MarkResourceFrameReferenced(
              record->bakedCommands->cmdInfo->bundles[b]->GetResourceID(), eFrameRef_Read);

          record->bakedCommands->cmdInfo->bundles[b]->bakedCommands->AddRef();
        }

        {
          m_CmdListRecords.push_back(record->bakedCommands);
          for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->bundles.size(); sub++)
            m_CmdListRecords.push_back(record->bakedCommands->cmdInfo->bundles[sub]->bakedCommands);
        }

        record->bakedCommands->AddRef();
      }

      record->cmdInfo->dirtied.clear();
    }

    if(capframe)
    {
      vector<MapState> maps = m_pDevice->GetMaps();

      for(auto it = maps.begin(); it != maps.end(); ++it)
      {
        WrappedID3D12Resource *res = it->res;
        UINT subres = it->subres;
        size_t size = (size_t)it->totalSize;

        // only need to flush memory that could affect this submitted batch of work
        if(refdIDs.find(res->GetResourceID()) == refdIDs.end())
        {
          RDCDEBUG("Map of memory %llu not referenced in this queue - not flushing",
                   res->GetResourceID());
          continue;
        }

        size_t diffStart = 0, diffEnd = 0;
        bool found = true;

        byte *ref = res->GetShadow(subres);
        byte *data = res->GetMap(subres);

        if(ref)
          found = FindDiffRange(data, ref, size, diffStart, diffEnd);
        else
          diffEnd = size;

        if(found)
        {
          RDCLOG("Persistent map flush forced for %llu (%llu -> %llu)", res->GetResourceID(),
                 (uint64_t)diffStart, (uint64_t)diffEnd);

          D3D12_RANGE range = {diffStart, diffEnd};

          m_pDevice->MapDataWrite(res, subres, data, range);

          if(ref == NULL)
          {
            res->AllocShadow(subres, size);

            ref = res->GetShadow(subres);
          }

          // update comparison shadow for next time
          memcpy(ref, res->GetMap(subres), size);

          GetResourceManager()->MarkPendingDirty(res->GetResourceID());
        }
        else
        {
          RDCDEBUG("Persistent map flush not needed for %llu", res->GetResourceID());
        }
      }

      for(UINT i = 0; i < NumCommandLists; i++)
      {
        SCOPED_SERIALISE_CONTEXT(EXECUTE_CMD_LISTS);
        Serialise_ExecuteCommandLists(1, ppCommandLists + i);

        m_QueueRecord->AddChunk(scope.Get());
      }
    }
  }
}