MemoryAllocation WrappedVulkan::AllocateMemoryForResource(VkBuffer buf, MemoryScope scope, MemoryType type) { VkDevice d = GetDev(); VkMemoryRequirements mrq = {}; ObjDisp(d)->GetBufferMemoryRequirements(Unwrap(d), Unwrap(buf), &mrq); return AllocateMemoryForResource(true, mrq, scope, type); }
MemoryAllocation WrappedVulkan::AllocateMemoryForResource(VkImage im, MemoryScope scope, MemoryType type) { VkDevice d = GetDev(); VkMemoryRequirements mrq = {}; ObjDisp(d)->GetImageMemoryRequirements(Unwrap(d), Unwrap(im), &mrq); return AllocateMemoryForResource(false, mrq, scope, type); }
void WrappedVulkan::FreeAllMemory(MemoryScope scope) { VkDevice d = GetDev(); std::vector<MemoryAllocation> &allocList = m_MemoryBlocks[(size_t)scope]; for(MemoryAllocation alloc : allocList) { ObjDisp(d)->FreeMemory(Unwrap(d), Unwrap(alloc.mem), NULL); GetResourceManager()->ReleaseWrappedResource(alloc.mem); } allocList.clear(); }
bool WrappedVulkan::Serialise_vkCmdWaitEvents( Serialiser* localSerialiser, VkCommandBuffer cmdBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers) { SERIALISE_ELEMENT(ResourceId, cmdid, GetResID(cmdBuffer)); SERIALISE_ELEMENT(VkPipelineStageFlagBits, srcStages, (VkPipelineStageFlagBits)srcStageMask); SERIALISE_ELEMENT(VkPipelineStageFlagBits, destStages, (VkPipelineStageFlagBits)dstStageMask); // we don't serialise the original events as we are going to replace this // with our own SERIALISE_ELEMENT(uint32_t, memCount, memoryBarrierCount); SERIALISE_ELEMENT(uint32_t, bufCount, bufferMemoryBarrierCount); SERIALISE_ELEMENT(uint32_t, imgCount, imageMemoryBarrierCount); // we keep the original memory barriers SERIALISE_ELEMENT_ARR(VkMemoryBarrier, memBarriers, pMemoryBarriers, memCount); SERIALISE_ELEMENT_ARR(VkBufferMemoryBarrier, bufMemBarriers, pBufferMemoryBarriers, bufCount); SERIALISE_ELEMENT_ARR(VkImageMemoryBarrier, imgMemBarriers, pImageMemoryBarriers, imgCount); vector<VkImageMemoryBarrier> imgBarriers; vector<VkBufferMemoryBarrier> bufBarriers; // it's possible for buffer or image to be NULL if it refers to a resource that is otherwise // not in the log (barriers do not mark resources referenced). If the resource in question does // not exist, then it's safe to skip this barrier. if(m_State < WRITING) { for(uint32_t i=0; i < bufCount; i++) if(bufMemBarriers[i].buffer != VK_NULL_HANDLE) bufBarriers.push_back(bufMemBarriers[i]); for(uint32_t i=0; i < imgCount; i++) { if(imgMemBarriers[i].image != VK_NULL_HANDLE) { imgBarriers.push_back(imgMemBarriers[i]); ReplacePresentableImageLayout(imgBarriers.back().oldLayout); ReplacePresentableImageLayout(imgBarriers.back().newLayout); } } } SAFE_DELETE_ARRAY(bufMemBarriers); SAFE_DELETE_ARRAY(imgMemBarriers); // see top of this file for current event/fence handling if(m_State == EXECUTING) { if(ShouldRerecordCmd(cmdid) && InRerecordRange()) { cmdBuffer = RerecordCmdBuf(cmdid); VkEventCreateInfo evInfo = { VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, NULL, 0, }; VkEvent ev = VK_NULL_HANDLE; ObjDisp(cmdBuffer)->CreateEvent(Unwrap(GetDev()), &evInfo, NULL, &ev); // don't wrap this event ObjDisp(cmdBuffer)->ResetEvent(Unwrap(GetDev()), ev); ObjDisp(cmdBuffer)->CmdSetEvent(Unwrap(cmdBuffer), ev, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); ObjDisp(cmdBuffer)->CmdWaitEvents(Unwrap(cmdBuffer), 1, &ev, (VkPipelineStageFlags)srcStages, (VkPipelineStageFlags)destStages, memCount, memBarriers, (uint32_t)bufBarriers.size(), &bufBarriers[0], (uint32_t)imgBarriers.size(), &imgBarriers[0]); // register to clean this event up once we're done replaying this section of the log m_CleanupEvents.push_back(ev); ResourceId cmd = GetResID(RerecordCmdBuf(cmdid)); GetResourceManager()->RecordBarriers(m_BakedCmdBufferInfo[cmd].imgbarriers, m_ImageLayouts, (uint32_t)imgBarriers.size(), &imgBarriers[0]); } } else if(m_State == READING) { cmdBuffer = GetResourceManager()->GetLiveHandle<VkCommandBuffer>(cmdid); VkEventCreateInfo evInfo = { VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, NULL, 0, }; VkEvent ev = VK_NULL_HANDLE; ObjDisp(cmdBuffer)->CreateEvent(Unwrap(GetDev()), &evInfo, NULL, &ev); // don't wrap this event ObjDisp(cmdBuffer)->ResetEvent(Unwrap(GetDev()), ev); ObjDisp(cmdBuffer)->CmdSetEvent(Unwrap(cmdBuffer), ev, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); ObjDisp(cmdBuffer)->CmdWaitEvents(Unwrap(cmdBuffer), 1, &ev, (VkPipelineStageFlags)srcStages, (VkPipelineStageFlags)destStages, memCount, memBarriers, (uint32_t)bufBarriers.size(), &bufBarriers[0], (uint32_t)imgBarriers.size(), &imgBarriers[0]); // register to clean this event up once we're done replaying this section of the log m_CleanupEvents.push_back(ev); ResourceId cmd = GetResID(cmdBuffer); GetResourceManager()->RecordBarriers(m_BakedCmdBufferInfo[cmd].imgbarriers, m_ImageLayouts, (uint32_t)imgBarriers.size(), &imgBarriers[0]); } SAFE_DELETE_ARRAY(memBarriers); return true; }
VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence) { SCOPED_DBG_SINK(); size_t tempmemSize = sizeof(VkSubmitInfo) * submitCount; // need to count how many semaphore and command buffer arrays to allocate for for(uint32_t i = 0; i < submitCount; i++) { tempmemSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer); tempmemSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore); tempmemSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore); } byte *memory = GetTempMemory(tempmemSize); VkSubmitInfo *unwrappedSubmits = (VkSubmitInfo *)memory; VkSemaphore *unwrappedWaitSems = (VkSemaphore *)(unwrappedSubmits + submitCount); for(uint32_t i = 0; i < submitCount; i++) { RDCASSERT(pSubmits[i].sType == VK_STRUCTURE_TYPE_SUBMIT_INFO && pSubmits[i].pNext == NULL); unwrappedSubmits[i] = pSubmits[i]; unwrappedSubmits[i].pWaitSemaphores = unwrappedSubmits[i].waitSemaphoreCount ? unwrappedWaitSems : NULL; for(uint32_t o = 0; o < unwrappedSubmits[i].waitSemaphoreCount; o++) unwrappedWaitSems[o] = Unwrap(pSubmits[i].pWaitSemaphores[o]); unwrappedWaitSems += unwrappedSubmits[i].waitSemaphoreCount; VkCommandBuffer *unwrappedCommandBuffers = (VkCommandBuffer *)unwrappedWaitSems; unwrappedSubmits[i].pCommandBuffers = unwrappedSubmits[i].commandBufferCount ? unwrappedCommandBuffers : NULL; for(uint32_t o = 0; o < unwrappedSubmits[i].commandBufferCount; o++) unwrappedCommandBuffers[o] = Unwrap(pSubmits[i].pCommandBuffers[o]); unwrappedCommandBuffers += unwrappedSubmits[i].commandBufferCount; VkSemaphore *unwrappedSignalSems = (VkSemaphore *)unwrappedCommandBuffers; unwrappedSubmits[i].pSignalSemaphores = unwrappedSubmits[i].signalSemaphoreCount ? unwrappedSignalSems : NULL; for(uint32_t o = 0; o < unwrappedSubmits[i].signalSemaphoreCount; o++) unwrappedSignalSems[o] = Unwrap(pSubmits[i].pSignalSemaphores[o]); } VkResult ret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), submitCount, unwrappedSubmits, Unwrap(fence)); bool capframe = false; set<ResourceId> refdIDs; for(uint32_t s = 0; s < submitCount; s++) { for(uint32_t i = 0; i < pSubmits[s].commandBufferCount; i++) { ResourceId cmd = GetResID(pSubmits[s].pCommandBuffers[i]); VkResourceRecord *record = GetRecord(pSubmits[s].pCommandBuffers[i]); { SCOPED_LOCK(m_ImageLayoutsLock); GetResourceManager()->ApplyBarriers(record->bakedCommands->cmdInfo->imgbarriers, m_ImageLayouts); } // need to lock the whole section of code, not just the check on // m_State, as we also need to make sure we don't check the state, // start marking dirty resources then while we're doing so the // state becomes capframe. // the next sections where we mark resources referenced and add // the submit chunk to the frame record don't have to be protected. // Only the decision of whether we're inframe or not, and marking // dirty. { SCOPED_LOCK(m_CapTransitionLock); if(m_State == WRITING_CAPFRAME) { for(auto it = record->bakedCommands->cmdInfo->dirtied.begin(); it != record->bakedCommands->cmdInfo->dirtied.end(); ++it) GetResourceManager()->MarkPendingDirty(*it); capframe = true; } else { for(auto it = record->bakedCommands->cmdInfo->dirtied.begin(); it != record->bakedCommands->cmdInfo->dirtied.end(); ++it) GetResourceManager()->MarkDirtyResource(*it); } } if(capframe) { // for each bound descriptor set, mark it referenced as well as all resources currently // bound to it for(auto it = record->bakedCommands->cmdInfo->boundDescSets.begin(); it != record->bakedCommands->cmdInfo->boundDescSets.end(); ++it) { GetResourceManager()->MarkResourceFrameReferenced(GetResID(*it), eFrameRef_Read); VkResourceRecord *setrecord = GetRecord(*it); for(auto refit = setrecord->descInfo->bindFrameRefs.begin(); refit != setrecord->descInfo->bindFrameRefs.end(); ++refit) { refdIDs.insert(refit->first); GetResourceManager()->MarkResourceFrameReferenced(refit->first, refit->second.second); if(refit->second.first & DescriptorSetData::SPARSE_REF_BIT) { VkResourceRecord *sparserecord = GetResourceManager()->GetResourceRecord(refit->first); GetResourceManager()->MarkSparseMapReferenced(sparserecord->sparseInfo); } } } for(auto it = record->bakedCommands->cmdInfo->sparse.begin(); it != record->bakedCommands->cmdInfo->sparse.end(); ++it) GetResourceManager()->MarkSparseMapReferenced(*it); // pull in frame refs from this baked command buffer record->bakedCommands->AddResourceReferences(GetResourceManager()); record->bakedCommands->AddReferencedIDs(refdIDs); // ref the parent command buffer by itself, this will pull in the cmd buffer pool GetResourceManager()->MarkResourceFrameReferenced(record->GetResourceID(), eFrameRef_Read); for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->subcmds.size(); sub++) { record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddResourceReferences( GetResourceManager()); record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddReferencedIDs(refdIDs); GetResourceManager()->MarkResourceFrameReferenced( record->bakedCommands->cmdInfo->subcmds[sub]->GetResourceID(), eFrameRef_Read); record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands->AddRef(); } GetResourceManager()->MarkResourceFrameReferenced(GetResID(queue), eFrameRef_Read); if(fence != VK_NULL_HANDLE) GetResourceManager()->MarkResourceFrameReferenced(GetResID(fence), eFrameRef_Read); { SCOPED_LOCK(m_CmdBufferRecordsLock); m_CmdBufferRecords.push_back(record->bakedCommands); for(size_t sub = 0; sub < record->bakedCommands->cmdInfo->subcmds.size(); sub++) m_CmdBufferRecords.push_back(record->bakedCommands->cmdInfo->subcmds[sub]->bakedCommands); } record->bakedCommands->AddRef(); } record->cmdInfo->dirtied.clear(); } } if(capframe) { vector<VkResourceRecord *> maps; { SCOPED_LOCK(m_CoherentMapsLock); maps = m_CoherentMaps; } for(auto it = maps.begin(); it != maps.end(); ++it) { VkResourceRecord *record = *it; MemMapState &state = *record->memMapState; // potential persistent map if(state.mapCoherent && state.mappedPtr && !state.mapFlushed) { // only need to flush memory that could affect this submitted batch of work if(refdIDs.find(record->GetResourceID()) == refdIDs.end()) { RDCDEBUG("Map of memory %llu not referenced in this queue - not flushing", record->GetResourceID()); continue; } size_t diffStart = 0, diffEnd = 0; bool found = true; // enabled as this is necessary for programs with very large coherent mappings // (> 1GB) as otherwise more than a couple of vkQueueSubmit calls leads to vast // memory allocation. There might still be bugs lurking in here though #if 1 // this causes vkFlushMappedMemoryRanges call to allocate and copy to refData // from serialised buffer. We want to copy *precisely* the serialised data, // otherwise there is a gap in time between serialising out a snapshot of // the buffer and whenever we then copy into the ref data, e.g. below. // during this time, data could be written to the buffer and it won't have // been caught in the serialised snapshot, and if it doesn't change then // it *also* won't be caught in any future FindDiffRange() calls. // // Likewise once refData is allocated, the call below will also update it // with the data serialised out for the same reason. // // Note: it's still possible that data is being written to by the // application while it's being serialised out in the snapshot below. That // is OK, since the application is responsible for ensuring it's not writing // data that would be needed by the GPU in this submit. As long as the // refdata we use for future use is identical to what was serialised, we // shouldn't miss anything state.needRefData = true; // if we have a previous set of data, compare. // otherwise just serialise it all if(state.refData) found = FindDiffRange((byte *)state.mappedPtr, state.refData, (size_t)state.mapSize, diffStart, diffEnd); else #endif diffEnd = (size_t)state.mapSize; if(found) { // MULTIDEVICE should find the device for this queue. // MULTIDEVICE only want to flush maps associated with this queue VkDevice dev = GetDev(); { RDCLOG("Persistent map flush forced for %llu (%llu -> %llu)", record->GetResourceID(), (uint64_t)diffStart, (uint64_t)diffEnd); VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, (VkDeviceMemory)(uint64_t)record->Resource, state.mapOffset + diffStart, diffEnd - diffStart}; vkFlushMappedMemoryRanges(dev, 1, &range); state.mapFlushed = false; } GetResourceManager()->MarkPendingDirty(record->GetResourceID()); } else { RDCDEBUG("Persistent map flush not needed for %llu", record->GetResourceID()); } } } { CACHE_THREAD_SERIALISER(); for(uint32_t s = 0; s < submitCount; s++) { SCOPED_SERIALISE_CONTEXT(QUEUE_SUBMIT); Serialise_vkQueueSubmit(localSerialiser, queue, 1, &pSubmits[s], fence); m_FrameCaptureRecord->AddChunk(scope.Get()); for(uint32_t sem = 0; sem < pSubmits[s].waitSemaphoreCount; sem++) GetResourceManager()->MarkResourceFrameReferenced( GetResID(pSubmits[s].pWaitSemaphores[sem]), eFrameRef_Read); for(uint32_t sem = 0; sem < pSubmits[s].signalSemaphoreCount; sem++) GetResourceManager()->MarkResourceFrameReferenced( GetResID(pSubmits[s].pSignalSemaphores[sem]), eFrameRef_Read); } } } return ret; }
VkResult WrappedVulkan::vkQueuePresentKHR( VkQueue queue, const VkPresentInfoKHR* pPresentInfo) { if(m_State == WRITING_IDLE) { RenderDoc::Inst().Tick(); GetResourceManager()->FlushPendingDirty(); } m_FrameCounter++; // first present becomes frame #1, this function is at the end of the frame if(pPresentInfo->swapchainCount > 1 && (m_FrameCounter % 100) == 0) { RDCWARN("Presenting multiple swapchains at once - only first will be processed"); } vector<VkSwapchainKHR> unwrappedSwaps; vector<VkSemaphore> unwrappedSems; VkPresentInfoKHR unwrappedInfo = *pPresentInfo; for(uint32_t i=0; i < unwrappedInfo.swapchainCount; i++) unwrappedSwaps.push_back(Unwrap(unwrappedInfo.pSwapchains[i])); for(uint32_t i=0; i < unwrappedInfo.waitSemaphoreCount; i++) unwrappedSems.push_back(Unwrap(unwrappedInfo.pWaitSemaphores[i])); unwrappedInfo.pSwapchains = unwrappedInfo.swapchainCount ? &unwrappedSwaps[0] : NULL; unwrappedInfo.pWaitSemaphores = unwrappedInfo.waitSemaphoreCount ? &unwrappedSems[0] : NULL; // Don't support any extensions for present info RDCASSERT(pPresentInfo->pNext == NULL); VkResourceRecord *swaprecord = GetRecord(pPresentInfo->pSwapchains[0]); RDCASSERT(swaprecord->swapInfo); SwapchainInfo &swapInfo = *swaprecord->swapInfo; bool activeWindow = RenderDoc::Inst().IsActiveWindow(LayerDisp(m_Instance), swapInfo.wndHandle); // need to record which image was last flipped so we can get the correct backbuffer // for a thumbnail in EndFrameCapture swapInfo.lastPresent = pPresentInfo->pImageIndices[0]; m_LastSwap = swaprecord->GetResourceID(); VkImage backbuffer = swapInfo.images[pPresentInfo->pImageIndices[0]].im; if(m_State == WRITING_IDLE) { m_FrameTimes.push_back(m_FrameTimer.GetMilliseconds()); m_TotalTime += m_FrameTimes.back(); m_FrameTimer.Restart(); // update every second if(m_TotalTime > 1000.0) { m_MinFrametime = 10000.0; m_MaxFrametime = 0.0; m_AvgFrametime = 0.0; m_TotalTime = 0.0; for(size_t i=0; i < m_FrameTimes.size(); i++) { m_AvgFrametime += m_FrameTimes[i]; if(m_FrameTimes[i] < m_MinFrametime) m_MinFrametime = m_FrameTimes[i]; if(m_FrameTimes[i] > m_MaxFrametime) m_MaxFrametime = m_FrameTimes[i]; } m_AvgFrametime /= double(m_FrameTimes.size()); m_FrameTimes.clear(); } uint32_t overlay = RenderDoc::Inst().GetOverlayBits(); if(overlay & eRENDERDOC_Overlay_Enabled) { VkRenderPass rp = swapInfo.rp; VkImage im = swapInfo.images[pPresentInfo->pImageIndices[0]].im; VkFramebuffer fb = swapInfo.images[pPresentInfo->pImageIndices[0]].fb; VkLayerDispatchTable *vt = ObjDisp(GetDev()); TextPrintState textstate = { GetNextCmd(), rp, fb, swapInfo.extent.width, swapInfo.extent.height }; VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT }; VkResult vkr = vt->BeginCommandBuffer(Unwrap(textstate.cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); VkImageMemoryBarrier bbBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, NULL, 0, 0, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, Unwrap(im), { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 } }; bbBarrier.srcAccessMask = VK_ACCESS_ALL_READ_BITS; bbBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; DoPipelineBarrier(textstate.cmd, 1, &bbBarrier); GetDebugManager()->BeginText(textstate); if(activeWindow) { vector<RENDERDOC_InputButton> keys = RenderDoc::Inst().GetCaptureKeys(); string overlayText = "Vulkan. "; for(size_t i=0; i < keys.size(); i++) { if(i > 0) overlayText += ", "; overlayText += ToStr::Get(keys[i]); } if(!keys.empty()) overlayText += " to capture."; if(overlay & eRENDERDOC_Overlay_FrameNumber) { overlayText += StringFormat::Fmt(" Frame: %d.", m_FrameCounter); } if(overlay & eRENDERDOC_Overlay_FrameRate) { overlayText += StringFormat::Fmt(" %.2lf ms (%.2lf .. %.2lf) (%.0lf FPS)", m_AvgFrametime, m_MinFrametime, m_MaxFrametime, 1000.0f/m_AvgFrametime); } float y=0.0f; if(!overlayText.empty()) { GetDebugManager()->RenderText(textstate, 0.0f, y, overlayText.c_str()); y += 1.0f; } if(overlay & eRENDERDOC_Overlay_CaptureList) { GetDebugManager()->RenderText(textstate, 0.0f, y, "%d Captures saved.\n", (uint32_t)m_FrameRecord.size()); y += 1.0f; uint64_t now = Timing::GetUnixTimestamp(); for(size_t i=0; i < m_FrameRecord.size(); i++) { if(now - m_FrameRecord[i].frameInfo.captureTime < 20) { GetDebugManager()->RenderText(textstate, 0.0f, y, "Captured frame %d.\n", m_FrameRecord[i].frameInfo.frameNumber); y += 1.0f; } } } #if !defined(RELEASE) GetDebugManager()->RenderText(textstate, 0.0f, y, "%llu chunks - %.2f MB", Chunk::NumLiveChunks(), float(Chunk::TotalMem())/1024.0f/1024.0f); y += 1.0f; #endif } else { vector<RENDERDOC_InputButton> keys = RenderDoc::Inst().GetFocusKeys(); string str = "Vulkan. Inactive swapchain."; for(size_t i=0; i < keys.size(); i++) { if(i == 0) str += " "; else str += ", "; str += ToStr::Get(keys[i]); } if(!keys.empty()) str += " to cycle between swapchains"; GetDebugManager()->RenderText(textstate, 0.0f, 0.0f, str.c_str()); } GetDebugManager()->EndText(textstate); std::swap(bbBarrier.oldLayout, bbBarrier.newLayout); bbBarrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; bbBarrier.dstAccessMask = VK_ACCESS_ALL_READ_BITS; DoPipelineBarrier(textstate.cmd, 1, &bbBarrier); ObjDisp(textstate.cmd)->EndCommandBuffer(Unwrap(textstate.cmd)); SubmitCmds(); FlushQ(); } } VkResult vkr = ObjDisp(queue)->QueuePresentKHR(Unwrap(queue), &unwrappedInfo); if(!activeWindow) return vkr; RenderDoc::Inst().SetCurrentDriver(RDC_Vulkan); // kill any current capture that isn't application defined if(m_State == WRITING_CAPFRAME && !m_AppControlledCapture) RenderDoc::Inst().EndFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle); if(RenderDoc::Inst().ShouldTriggerCapture(m_FrameCounter) && m_State == WRITING_IDLE) { RenderDoc::Inst().StartFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle); m_AppControlledCapture = false; } return vkr; }
bool WrappedVulkan::ReleaseResource(WrappedVkRes *res) { if(res == NULL) return true; // MULTIDEVICE need to get the actual device that created this object VkDevice dev = GetDev(); const VkLayerDispatchTable *vt = ObjDisp(dev); WrappedVkNonDispRes *nondisp = (WrappedVkNonDispRes *)res; WrappedVkDispRes *disp = (WrappedVkDispRes *)res; uint64_t handle = (uint64_t)nondisp; switch(IdentifyTypeByPtr(res)) { case eResSurface: case eResSwapchain: if(m_State >= WRITING) RDCERR("Swapchain/swapchain object is leaking"); else RDCERR("Should be no swapchain/surface objects created on replay"); break; case eResUnknown: RDCERR("Unknown resource type!"); break; case eResCommandBuffer: // special case here, on replay we don't have the tracking // to remove these with the parent object so do it here. // This ensures we clean up after ourselves with a well- // behaved application. if(m_State < WRITING) GetResourceManager()->ReleaseWrappedResource((VkCommandBuffer)res); break; case eResDescriptorSet: if(m_State < WRITING) GetResourceManager()->ReleaseWrappedResource(VkDescriptorSet(handle)); break; case eResPhysicalDevice: if(m_State < WRITING) GetResourceManager()->ReleaseWrappedResource((VkPhysicalDevice)disp); break; case eResQueue: if(m_State < WRITING) GetResourceManager()->ReleaseWrappedResource((VkQueue)disp); break; case eResDevice: // these are explicitly released elsewhere, do not need to destroy // any API objects. // On replay though we do need to tidy up book-keeping for these. if(m_State < WRITING) { GetResourceManager()->ReleaseCurrentResource(disp->id); GetResourceManager()->RemoveWrapper(ToTypedHandle(disp->real.As<VkDevice>())); } break; case eResInstance: if(m_State < WRITING) { GetResourceManager()->ReleaseCurrentResource(disp->id); GetResourceManager()->RemoveWrapper(ToTypedHandle(disp->real.As<VkInstance>())); } break; case eResDeviceMemory: { VkDeviceMemory real = nondisp->real.As<VkDeviceMemory>(); GetResourceManager()->ReleaseWrappedResource(VkDeviceMemory(handle)); vt->FreeMemory(Unwrap(dev), real, NULL); break; } case eResBuffer: { VkBuffer real = nondisp->real.As<VkBuffer>(); GetResourceManager()->ReleaseWrappedResource(VkBuffer(handle)); vt->DestroyBuffer(Unwrap(dev), real, NULL); break; } case eResBufferView: { VkBufferView real = nondisp->real.As<VkBufferView>(); GetResourceManager()->ReleaseWrappedResource(VkBufferView(handle)); vt->DestroyBufferView(Unwrap(dev), real, NULL); break; } case eResImage: { VkImage real = nondisp->real.As<VkImage>(); GetResourceManager()->ReleaseWrappedResource(VkImage(handle)); vt->DestroyImage(Unwrap(dev), real, NULL); break; } case eResImageView: { VkImageView real = nondisp->real.As<VkImageView>(); GetResourceManager()->ReleaseWrappedResource(VkImageView(handle)); vt->DestroyImageView(Unwrap(dev), real, NULL); break; } case eResFramebuffer: { VkFramebuffer real = nondisp->real.As<VkFramebuffer>(); GetResourceManager()->ReleaseWrappedResource(VkFramebuffer(handle)); vt->DestroyFramebuffer(Unwrap(dev), real, NULL); break; } case eResRenderPass: { VkRenderPass real = nondisp->real.As<VkRenderPass>(); GetResourceManager()->ReleaseWrappedResource(VkRenderPass(handle)); vt->DestroyRenderPass(Unwrap(dev), real, NULL); break; } case eResShaderModule: { VkShaderModule real = nondisp->real.As<VkShaderModule>(); GetResourceManager()->ReleaseWrappedResource(VkShaderModule(handle)); vt->DestroyShaderModule(Unwrap(dev), real, NULL); break; } case eResPipelineCache: { VkPipelineCache real = nondisp->real.As<VkPipelineCache>(); GetResourceManager()->ReleaseWrappedResource(VkPipelineCache(handle)); vt->DestroyPipelineCache(Unwrap(dev), real, NULL); break; } case eResPipelineLayout: { VkPipelineLayout real = nondisp->real.As<VkPipelineLayout>(); GetResourceManager()->ReleaseWrappedResource(VkPipelineLayout(handle)); vt->DestroyPipelineLayout(Unwrap(dev), real, NULL); break; } case eResPipeline: { VkPipeline real = nondisp->real.As<VkPipeline>(); GetResourceManager()->ReleaseWrappedResource(VkPipeline(handle)); vt->DestroyPipeline(Unwrap(dev), real, NULL); break; } case eResSampler: { VkSampler real = nondisp->real.As<VkSampler>(); GetResourceManager()->ReleaseWrappedResource(VkSampler(handle)); vt->DestroySampler(Unwrap(dev), real, NULL); break; } case eResDescriptorPool: { VkDescriptorPool real = nondisp->real.As<VkDescriptorPool>(); GetResourceManager()->ReleaseWrappedResource(VkDescriptorPool(handle)); vt->DestroyDescriptorPool(Unwrap(dev), real, NULL); break; } case eResDescriptorSetLayout: { VkDescriptorSetLayout real = nondisp->real.As<VkDescriptorSetLayout>(); GetResourceManager()->ReleaseWrappedResource(VkDescriptorSetLayout(handle)); vt->DestroyDescriptorSetLayout(Unwrap(dev), real, NULL); break; } case eResCommandPool: { VkCommandPool real = nondisp->real.As<VkCommandPool>(); GetResourceManager()->ReleaseWrappedResource(VkCommandPool(handle)); vt->DestroyCommandPool(Unwrap(dev), real, NULL); break; } case eResFence: { VkFence real = nondisp->real.As<VkFence>(); GetResourceManager()->ReleaseWrappedResource(VkFence(handle)); vt->DestroyFence(Unwrap(dev), real, NULL); break; } case eResEvent: { VkEvent real = nondisp->real.As<VkEvent>(); GetResourceManager()->ReleaseWrappedResource(VkEvent(handle)); vt->DestroyEvent(Unwrap(dev), real, NULL); break; } case eResQueryPool: { VkQueryPool real = nondisp->real.As<VkQueryPool>(); GetResourceManager()->ReleaseWrappedResource(VkQueryPool(handle)); vt->DestroyQueryPool(Unwrap(dev), real, NULL); break; } case eResSemaphore: { VkSemaphore real = nondisp->real.As<VkSemaphore>(); GetResourceManager()->ReleaseWrappedResource(VkSemaphore(handle)); vt->DestroySemaphore(Unwrap(dev), real, NULL); break; } } return true; }
MemoryAllocation WrappedVulkan::AllocateMemoryForResource(bool buffer, VkMemoryRequirements mrq, MemoryScope scope, MemoryType type) { MemoryAllocation ret; ret.scope = scope; ret.type = type; ret.buffer = buffer; ret.size = AlignUp(mrq.size, mrq.alignment); RDCDEBUG("Allocating 0x%llx with alignment 0x%llx in 0x%x for a %s (%s in %s)", ret.size, mrq.alignment, mrq.memoryTypeBits, buffer ? "buffer" : "image", ToStr(type).c_str(), ToStr(scope).c_str()); std::vector<MemoryAllocation> &blockList = m_MemoryBlocks[(size_t)scope]; // first try to find a match int i = 0; for(MemoryAllocation &block : blockList) { RDCDEBUG( "Considering block %d: memory type %u and type %s. Total size 0x%llx, current offset " "0x%llx, last alloc was %s", i, block.memoryTypeIndex, ToStr(block.type).c_str(), block.size, block.offs, block.buffer ? "buffer" : "image"); i++; // skip this block if it's not the memory type we want if(ret.type != block.type || (mrq.memoryTypeBits & (1 << block.memoryTypeIndex)) == 0) { RDCDEBUG("block type %d or memory type %d is incompatible", block.type, block.memoryTypeIndex); continue; } // offs is where we can put our next sub-allocation VkDeviceSize offs = block.offs; // if we are on a buffer/image, account for any alignment we might have to do if(ret.buffer != block.buffer) offs = AlignUp(offs, m_PhysicalDeviceData.props.limits.bufferImageGranularity); // align as required by the resource offs = AlignUp(offs, mrq.alignment); if(offs > block.size) { RDCDEBUG("Next offset 0x%llx would be off the end of the memory (size 0x%llx).", offs, block.size); continue; } VkDeviceSize avail = block.size - offs; RDCDEBUG("At next offset 0x%llx, there's 0x%llx bytes available for 0x%llx bytes requested", offs, avail, ret.size); // if the allocation will fit, we've found our candidate. if(ret.size <= avail) { // update the block offset and buffer/image bit block.offs = offs + ret.size; block.buffer = ret.buffer; // update our return value ret.offs = offs; ret.mem = block.mem; RDCDEBUG("Allocating using this block: 0x%llx -> 0x%llx", ret.offs, block.offs); // stop searching break; } } if(ret.mem == VK_NULL_HANDLE) { RDCDEBUG("No available block found - allocating new block"); VkDeviceSize &allocSize = m_MemoryBlockSize[(size_t)scope]; // we start allocating 32M, then increment each time we need a new block. switch(allocSize) { case 0: allocSize = 32; break; case 32: allocSize = 64; break; case 64: allocSize = 128; break; case 128: case 256: allocSize = 256; break; default: RDCDEBUG("Unexpected previous allocation size 0x%llx bytes, allocating 256MB", allocSize); allocSize = 256; break; } uint32_t memoryTypeIndex = 0; switch(ret.type) { case MemoryType::Upload: memoryTypeIndex = GetUploadMemoryIndex(mrq.memoryTypeBits); break; case MemoryType::GPULocal: memoryTypeIndex = GetGPULocalMemoryIndex(mrq.memoryTypeBits); break; case MemoryType::Readback: memoryTypeIndex = GetReadbackMemoryIndex(mrq.memoryTypeBits); break; } VkMemoryAllocateInfo info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, allocSize * 1024 * 1024, memoryTypeIndex, }; if(ret.size > info.allocationSize) { // if we get an over-sized allocation, first try to immediately jump to the largest block // size. allocSize = 256; info.allocationSize = allocSize * 1024 * 1024; // if it's still over-sized, just allocate precisely enough and give it a dedicated allocation if(ret.size > info.allocationSize) { RDCDEBUG("Over-sized allocation for 0x%llx bytes", ret.size); info.allocationSize = ret.size; } } RDCDEBUG("Creating new allocation of 0x%llx bytes", info.allocationSize); MemoryAllocation chunk; chunk.buffer = ret.buffer; chunk.memoryTypeIndex = memoryTypeIndex; chunk.scope = scope; chunk.type = type; chunk.size = info.allocationSize; // the offset starts immediately after this allocation chunk.offs = ret.size; VkDevice d = GetDev(); // do the actual allocation VkResult vkr = ObjDisp(d)->AllocateMemory(Unwrap(d), &info, NULL, &chunk.mem); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(Unwrap(d), chunk.mem); // push the new chunk blockList.push_back(chunk); // return the first bytes in the new chunk ret.offs = 0; ret.mem = chunk.mem; } return ret; }
VkResult WrappedVulkan::vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo) { if(m_State == WRITING_IDLE) { RenderDoc::Inst().Tick(); GetResourceManager()->FlushPendingDirty(); } m_FrameCounter++; // first present becomes frame #1, this function is at the end of the frame if(pPresentInfo->swapchainCount > 1 && (m_FrameCounter % 100) == 0) { RDCWARN("Presenting multiple swapchains at once - only first will be processed"); } vector<VkSwapchainKHR> unwrappedSwaps; vector<VkSemaphore> unwrappedSems; VkPresentInfoKHR unwrappedInfo = *pPresentInfo; for(uint32_t i = 0; i < unwrappedInfo.swapchainCount; i++) unwrappedSwaps.push_back(Unwrap(unwrappedInfo.pSwapchains[i])); for(uint32_t i = 0; i < unwrappedInfo.waitSemaphoreCount; i++) unwrappedSems.push_back(Unwrap(unwrappedInfo.pWaitSemaphores[i])); unwrappedInfo.pSwapchains = unwrappedInfo.swapchainCount ? &unwrappedSwaps[0] : NULL; unwrappedInfo.pWaitSemaphores = unwrappedInfo.waitSemaphoreCount ? &unwrappedSems[0] : NULL; // Don't support any extensions for present info RDCASSERT(pPresentInfo->pNext == NULL); VkResourceRecord *swaprecord = GetRecord(pPresentInfo->pSwapchains[0]); RDCASSERT(swaprecord->swapInfo); SwapchainInfo &swapInfo = *swaprecord->swapInfo; bool activeWindow = RenderDoc::Inst().IsActiveWindow(LayerDisp(m_Instance), swapInfo.wndHandle); // need to record which image was last flipped so we can get the correct backbuffer // for a thumbnail in EndFrameCapture swapInfo.lastPresent = pPresentInfo->pImageIndices[0]; m_LastSwap = swaprecord->GetResourceID(); if(m_State == WRITING_IDLE) { uint32_t overlay = RenderDoc::Inst().GetOverlayBits(); if(overlay & eRENDERDOC_Overlay_Enabled) { VkRenderPass rp = swapInfo.rp; VkImage im = swapInfo.images[pPresentInfo->pImageIndices[0]].im; VkFramebuffer fb = swapInfo.images[pPresentInfo->pImageIndices[0]].fb; VkLayerDispatchTable *vt = ObjDisp(GetDev()); TextPrintState textstate = { GetNextCmd(), rp, fb, swapInfo.extent.width, swapInfo.extent.height, swapInfo.format}; VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; VkResult vkr = vt->BeginCommandBuffer(Unwrap(textstate.cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); VkImageMemoryBarrier bbBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, NULL, 0, 0, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, Unwrap(im), {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}}; bbBarrier.srcAccessMask = VK_ACCESS_ALL_READ_BITS; bbBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; DoPipelineBarrier(textstate.cmd, 1, &bbBarrier); GetDebugManager()->BeginText(textstate); int flags = activeWindow ? RenderDoc::eOverlay_ActiveWindow : 0; string overlayText = RenderDoc::Inst().GetOverlayText(RDC_Vulkan, m_FrameCounter, flags); if(!overlayText.empty()) GetDebugManager()->RenderText(textstate, 0.0f, 0.0f, overlayText.c_str()); GetDebugManager()->EndText(textstate); std::swap(bbBarrier.oldLayout, bbBarrier.newLayout); bbBarrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; bbBarrier.dstAccessMask = VK_ACCESS_ALL_READ_BITS; DoPipelineBarrier(textstate.cmd, 1, &bbBarrier); ObjDisp(textstate.cmd)->EndCommandBuffer(Unwrap(textstate.cmd)); SubmitCmds(); FlushQ(); } } VkResult vkr = ObjDisp(queue)->QueuePresentKHR(Unwrap(queue), &unwrappedInfo); if(!activeWindow) return vkr; RenderDoc::Inst().SetCurrentDriver(RDC_Vulkan); // kill any current capture that isn't application defined if(m_State == WRITING_CAPFRAME && !m_AppControlledCapture) RenderDoc::Inst().EndFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle); if(RenderDoc::Inst().ShouldTriggerCapture(m_FrameCounter) && m_State == WRITING_IDLE) { RenderDoc::Inst().StartFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle); m_AppControlledCapture = false; } return vkr; }