VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence) { SCOPED_DBG_SINK(); size_t tempmemSize = sizeof(VkSubmitInfo) * submitCount; // need to count how many semaphore and command buffer arrays to allocate for for(uint32_t i = 0; i < submitCount; i++) { tempmemSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer); tempmemSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore); tempmemSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore); } byte *memory = GetTempMemory(tempmemSize); VkSubmitInfo *unwrappedSubmits = (VkSubmitInfo *)memory; VkSemaphore *unwrappedWaitSems = (VkSemaphore *)(unwrappedSubmits + submitCount); for(uint32_t i = 0; i < submitCount; i++) { RDCASSERT(pSubmits[i].sType == VK_STRUCTURE_TYPE_SUBMIT_INFO && pSubmits[i].pNext == NULL); unwrappedSubmits[i] = pSubmits[i]; unwrappedSubmits[i].pWaitSemaphores = unwrappedSubmits[i].waitSemaphoreCount ? unwrappedWaitSems : NULL; for(uint32_t o = 0; o < unwrappedSubmits[i].waitSemaphoreCount; o++) unwrappedWaitSems[o] = Unwrap(pSubmits[i].pWaitSemaphores[o]); unwrappedWaitSems += unwrappedSubmits[i].waitSemaphoreCount; VkCommandBuffer *unwrappedCommandBuffers = (VkCommandBuffer *)unwrappedWaitSems; unwrappedSubmits[i].pCommandBuffers = unwrappedSubmits[i].commandBufferCount ? unwrappedCommandBuffers : NULL; for(uint32_t o = 0; o < unwrappedSubmits[i].commandBufferCount; o++) unwrappedCommandBuffers[o] = Unwrap(pSubmits[i].pCommandBuffers[o]); unwrappedCommandBuffers += unwrappedSubmits[i].commandBufferCount; VkSemaphore *unwrappedSignalSems = (VkSemaphore *)unwrappedCommandBuffers; unwrappedSubmits[i].pSignalSemaphores = unwrappedSubmits[i].signalSemaphoreCount ? unwrappedSignalSems : NULL; for(uint32_t o = 0; o < unwrappedSubmits[i].signalSemaphoreCount; o++) unwrappedSignalSems[o] = Unwrap(pSubmits[i].pSignalSemaphores[o]); } VkResult ret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), submitCount, unwrappedSubmits, Unwrap(fence)); bool capframe = false; set<ResourceId> refdIDs; for(uint32_t s = 0; s < submitCount; s++) { for(uint32_t i = 0; i < pSubmits[s].commandBufferCount; i++) { ResourceId cmd = GetResID(pSubmits[s].pCommandBuffers[i]); VkResourceRecord *record = GetRecord(pSubmits[s].pCommandBuffers[i]); { SCOPED_LOCK(m_ImageLayoutsLock); GetResourceManager()->ApplyBarriers(record->bakedCommands->cmdInfo->imgbarriers, m_ImageLayouts); } // need to lock the whole section of code, not just the check on // m_State, as we also need to make sure we don't check the state, // start marking dirty resources then while we're doing so the // state becomes capframe. // the next sections where we mark resources referenced and add // the submit chunk to the frame record don't have to be protected. // Only the decision of whether we're inframe or not, and marking // dirty. { SCOPED_LOCK(m_CapTransitionLock); if(m_State == WRITING_CAPFRAME) { for(auto it = record->bakedCommands->cmdInfo->dirtied.begin(); it != record->bakedCommands->cmdInfo->dirtied.end(); ++it) GetResourceManager()->MarkPendingDirty(*it); capframe = true; } else { for(auto it = record->bakedCommands->cmdInfo->dirtied.begin(); it != record->bakedCommands->cmdInfo->dirtied.end(); ++it) GetResourceManager()->MarkDirtyResource(*it); } } if(capframe) { // for each bound descriptor set, mark it referenced as well as all resources currently // bound to it for(auto it = record->bakedCommands->cmdInfo->boundDescSets.begin(); it != record->bakedCommands->cmdInfo->boundDescSets.end(); ++it) { GetResourceManager()->MarkResourceFrameReferenced(GetResID(*it), eFrameRef_Read); VkResourceRecord *setrecord = GetRecord(*it); for(auto refit = setrecord->descInfo->bindFrameRefs.begin(); refit != setrecord->descInfo->bindFrameRefs.end(); ++refit) { refdIDs.insert(refit->first); GetResourceManager()->MarkResourceFrameReferenced(refit->first, refit->second.second); if(refit->second.first & DescriptorSetData::SPARSE_REF_BIT) { VkResourceRecord *sparserecord = GetResourceManager()->GetResourceRecord(refit->first); GetResourceManager()->MarkSparseMapReferenced(sparserecord->sparseInfo); } } } for(auto it = record->bakedCommands->cmdInfo->sparse.begin(); it != record->bakedCommands->cmdInfo->sparse.end(); ++it) GetResourceManager()->MarkSparseMapReferenced(*it); // pull in frame refs from this baked command buffer record->bakedCommands->AddResourceReferences(GetResourceManager()); record->bakedCommands->AddReferencedIDs(refdIDs); // ref the parent command buffer by itself, this will pull in the cmd buffer pool GetResourceManager()->MarkResourceFrameReferenced(record->GetResourceID(), eFrameRef_Read); for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->subcmds.size(); sub++) { record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddResourceReferences( GetResourceManager()); record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddReferencedIDs(refdIDs); GetResourceManager()->MarkResourceFrameReferenced( record->bakedCommands->cmdInfo->subcmds[sub]->GetResourceID(), eFrameRef_Read); record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddRef(); } GetResourceManager()->MarkResourceFrameReferenced(GetResID(queue), eFrameRef_Read); if(fence != VK_NULL_HANDLE) GetResourceManager()->MarkResourceFrameReferenced(GetResID(fence), eFrameRef_Read); { SCOPED_LOCK(m_CmdBufferRecordsLock); m_CmdBufferRecords.push_back(record->bakedCommands); for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->subcmds.size(); sub++) m_CmdBufferRecords.push_back(record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands); } record->bakedCommands->AddRef(); } record->cmdInfo->dirtied.clear(); } } if(capframe) { vector<VkResourceRecord *> maps; { SCOPED_LOCK(m_CoherentMapsLock); maps = m_CoherentMaps; } for(auto it = maps.begin(); it != maps.end(); ++it) { VkResourceRecord *record = *it; MemMapState &state = *record->memMapState; // potential persistent map if(state.mapCoherent && state.mappedPtr && !state.mapFlushed) { // only need to flush memory that could affect this submitted batch of work if(refdIDs.find(record->GetResourceID()) == refdIDs.end()) { RDCDEBUG("Map of memory %llu not referenced in this queue - not flushing", record->GetResourceID()); continue; } size_t diffStart = 0, diffEnd = 0; bool found = true; // enabled as this is necessary for programs with very large coherent mappings // (> 1GB) as otherwise more than a couple of vkQueueSubmit calls leads to vast // memory allocation. There might still be bugs lurking in here though #if 1 // this causes vkFlushMappedMemoryRanges call to allocate and copy to refData // from serialised buffer. We want to copy *precisely* the serialised data, // otherwise there is a gap in time between serialising out a snapshot of // the buffer and whenever we then copy into the ref data, e.g. below. // during this time, data could be written to the buffer and it won't have // been caught in the serialised snapshot, and if it doesn't change then // it *also* won't be caught in any future FindDiffRange() calls. // // Likewise once refData is allocated, the call below will also update it // with the data serialised out for the same reason. // // Note: it's still possible that data is being written to by the // application while it's being serialised out in the snapshot below. That // is OK, since the application is responsible for ensuring it's not writing // data that would be needed by the GPU in this submit. As long as the // refdata we use for future use is identical to what was serialised, we // shouldn't miss anything state.needRefData = true; // if we have a previous set of data, compare. // otherwise just serialise it all if(state.refData) found = FindDiffRange((byte *)state.mappedPtr, state.refData, (size_t)state.mapSize, diffStart, diffEnd); else #endif diffEnd = (size_t)state.mapSize; if(found) { // MULTIDEVICE should find the device for this queue. // MULTIDEVICE only want to flush maps associated with this queue VkDevice dev = GetDev(); { RDCLOG("Persistent map flush forced for %llu (%llu -> %llu)", record->GetResourceID(), (uint64_t)diffStart, (uint64_t)diffEnd); VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, (VkDeviceMemory)(uint64_t)record->Resource, state.mapOffset + diffStart, diffEnd - diffStart}; vkFlushMappedMemoryRanges(dev, 1, &range); state.mapFlushed = false; } GetResourceManager()->MarkPendingDirty(record->GetResourceID()); } else { RDCDEBUG("Persistent map flush not needed for %llu", record->GetResourceID()); } } } { CACHE_THREAD_SERIALISER(); for(uint32_t s = 0; s < submitCount; s++) { SCOPED_SERIALISE_CONTEXT(QUEUE_SUBMIT); Serialise_vkQueueSubmit(localSerialiser, queue, 1, &pSubmits[s], fence); m_FrameCaptureRecord->AddChunk(scope.Get()); for(uint32_t sem = 0; sem < pSubmits[s].waitSemaphoreCount; sem++) GetResourceManager()->MarkResourceFrameReferenced( GetResID(pSubmits[s].pWaitSemaphores[sem]), eFrameRef_Read); for(uint32_t sem = 0; sem < pSubmits[s].signalSemaphoreCount; sem++) GetResourceManager()->MarkResourceFrameReferenced( GetResID(pSubmits[s].pSignalSemaphores[sem]), eFrameRef_Read); } } } return ret; }
void STDMETHODCALLTYPE WrappedID3D12CommandQueue::ExecuteCommandLists( UINT NumCommandLists, ID3D12CommandList *const *ppCommandLists) { ID3D12CommandList **unwrapped = m_pDevice->GetTempArray<ID3D12CommandList *>(NumCommandLists); for(UINT i = 0; i < NumCommandLists; i++) unwrapped[i] = Unwrap(ppCommandLists[i]); m_pReal->ExecuteCommandLists(NumCommandLists, unwrapped); if(m_State >= WRITING) { SCOPED_LOCK(m_Lock); SCOPED_LOCK(m_pDevice->GetCapTransitionLock()); bool capframe = (m_State == WRITING_CAPFRAME); set<ResourceId> refdIDs; for(UINT i = 0; i < NumCommandLists; i++) { D3D12ResourceRecord *record = GetRecord(ppCommandLists[i]); if(record->ContainsExecuteIndirect) m_QueueRecord->ContainsExecuteIndirect = true; m_pDevice->ApplyBarriers(record->bakedCommands->cmdInfo->barriers); // need to lock the whole section of code, not just the check on // m_State, as we also need to make sure we don't check the state, // start marking dirty resources then while we're doing so the // state becomes capframe. // the next sections where we mark resources referenced and add // the submit chunk to the frame record don't have to be protected. // Only the decision of whether we're inframe or not, and marking // dirty. if(capframe) { for(auto it = record->bakedCommands->cmdInfo->dirtied.begin(); it != record->bakedCommands->cmdInfo->dirtied.end(); ++it) GetResourceManager()->MarkPendingDirty(*it); } else { for(auto it = record->bakedCommands->cmdInfo->dirtied.begin(); it != record->bakedCommands->cmdInfo->dirtied.end(); ++it) GetResourceManager()->MarkDirtyResource(*it); } if(capframe) { // any descriptor copies or writes could reference new resources not in the // bound descs list yet. So we take all of those referenced descriptors and // include them to see if we need to flush std::vector<D3D12Descriptor> dynDescRefs; m_pDevice->GetDynamicDescriptorReferences(dynDescRefs); for(size_t d = 0; d < dynDescRefs.size(); d++) { ResourceId id, id2; FrameRefType ref = eFrameRef_Read; dynDescRefs[d].GetRefIDs(id, id2, ref); if(id != ResourceId()) { refdIDs.insert(id); GetResourceManager()->MarkResourceFrameReferenced(id, ref); } if(id2 != ResourceId()) { refdIDs.insert(id2); GetResourceManager()->MarkResourceFrameReferenced(id2, ref); } } // for each bound descriptor table, mark it referenced as well as all resources currently // bound to it for(auto it = record->bakedCommands->cmdInfo->boundDescs.begin(); it != record->bakedCommands->cmdInfo->boundDescs.end(); ++it) { D3D12Descriptor *desc = *it; ResourceId id, id2; FrameRefType ref = eFrameRef_Read; desc->GetRefIDs(id, id2, ref); if(id != ResourceId()) { refdIDs.insert(id); GetResourceManager()->MarkResourceFrameReferenced(id, ref); } if(id2 != ResourceId()) { refdIDs.insert(id2); GetResourceManager()->MarkResourceFrameReferenced(id2, ref); } } // pull in frame refs from this baked command list record->bakedCommands->AddResourceReferences(GetResourceManager()); record->bakedCommands->AddReferencedIDs(refdIDs); // reference all executed bundles as well for(size_t b = 0; b < record->bakedCommands->cmdInfo->bundles.size(); b++) { record->bakedCommands->cmdInfo->bundles[b]->bakedCommands->AddResourceReferences( GetResourceManager()); record->bakedCommands->cmdInfo->bundles[b]->bakedCommands->AddReferencedIDs(refdIDs); GetResourceManager()->MarkResourceFrameReferenced( record->bakedCommands->cmdInfo->bundles[b]->GetResourceID(), eFrameRef_Read); record->bakedCommands->cmdInfo->bundles[b]->bakedCommands->AddRef(); } { m_CmdListRecords.push_back(record->bakedCommands); for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->bundles.size(); sub++) m_CmdListRecords.push_back(record->bakedCommands->cmdInfo->bundles[sub]->bakedCommands); } record->bakedCommands->AddRef(); } record->cmdInfo->dirtied.clear(); } if(capframe) { vector<MapState> maps = m_pDevice->GetMaps(); for(auto it = maps.begin(); it != maps.end(); ++it) { WrappedID3D12Resource *res = it->res; UINT subres = it->subres; size_t size = (size_t)it->totalSize; // only need to flush memory that could affect this submitted batch of work if(refdIDs.find(res->GetResourceID()) == refdIDs.end()) { RDCDEBUG("Map of memory %llu not referenced in this queue - not flushing", res->GetResourceID()); continue; } size_t diffStart = 0, diffEnd = 0; bool found = true; byte *ref = res->GetShadow(subres); byte *data = res->GetMap(subres); if(ref) found = FindDiffRange(data, ref, size, diffStart, diffEnd); else diffEnd = size; if(found) { RDCLOG("Persistent map flush forced for %llu (%llu -> %llu)", res->GetResourceID(), (uint64_t)diffStart, (uint64_t)diffEnd); D3D12_RANGE range = {diffStart, diffEnd}; m_pDevice->MapDataWrite(res, subres, data, range); if(ref == NULL) { res->AllocShadow(subres, size); ref = res->GetShadow(subres); } // update comparison shadow for next time memcpy(ref, res->GetMap(subres), size); GetResourceManager()->MarkPendingDirty(res->GetResourceID()); } else { RDCDEBUG("Persistent map flush not needed for %llu", res->GetResourceID()); } } for(UINT i = 0; i < NumCommandLists; i++) { SCOPED_SERIALISE_CONTEXT(EXECUTE_CMD_LISTS); Serialise_ExecuteCommandLists(1, ppCommandLists + i); m_QueueRecord->AddChunk(scope.Get()); } } } }