void D3D11Replay::FillTimers(D3D11CounterContext &ctx, const DrawcallDescription &drawnode) { const D3D11_QUERY_DESC qtimedesc = {D3D11_QUERY_TIMESTAMP, 0}; const D3D11_QUERY_DESC qstatsdesc = {D3D11_QUERY_PIPELINE_STATISTICS, 0}; const D3D11_QUERY_DESC qoccldesc = {D3D11_QUERY_OCCLUSION, 0}; if(drawnode.children.empty()) return; for(size_t i = 0; i < drawnode.children.size(); i++) { const DrawcallDescription &d = drawnode.children[i]; FillTimers(ctx, drawnode.children[i]); if(d.events.empty()) continue; GPUTimer *timer = NULL; HRESULT hr = S_OK; { ctx.timers.push_back(GPUTimer()); timer = &ctx.timers.back(); timer->eventId = d.eventId; timer->before = timer->after = timer->stats = timer->occlusion = NULL; hr = m_pDevice->CreateQuery(&qtimedesc, &timer->before); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qtimedesc, &timer->after); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qstatsdesc, &timer->stats); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qoccldesc, &timer->occlusion); RDCASSERTEQUAL(hr, S_OK); } m_pDevice->ReplayLog(ctx.eventStart, d.eventId, eReplay_WithoutDraw); SerializeImmediateContext(); if(timer->stats) m_pImmediateContext->Begin(timer->stats); if(timer->occlusion) m_pImmediateContext->Begin(timer->occlusion); if(timer->before && timer->after) m_pImmediateContext->End(timer->before); m_pDevice->ReplayLog(ctx.eventStart, d.eventId, eReplay_OnlyDraw); if(timer->before && timer->after) m_pImmediateContext->End(timer->after); if(timer->occlusion) m_pImmediateContext->End(timer->occlusion); if(timer->stats) m_pImmediateContext->End(timer->stats); ctx.eventStart = d.eventId + 1; } }
void D3D11DebugManager::FillTimers(CounterContext &ctx, const DrawcallTreeNode &drawnode) { const D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; if(drawnode.children.empty()) return; for(size_t i = 0; i < drawnode.children.size(); i++) { const FetchDrawcall &d = drawnode.children[i].draw; FillTimers(ctx, drawnode.children[i]); if(d.events.count == 0) continue; GPUTimer *timer = NULL; HRESULT hr = S_OK; { if(ctx.reuseIdx == -1) { ctx.timers.push_back(GPUTimer()); timer = &ctx.timers.back(); timer->eventID = d.eventID; timer->before = timer->after = NULL; hr = m_pDevice->CreateQuery(&qdesc, &timer->before); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qdesc, &timer->after); RDCASSERTEQUAL(hr, S_OK); } else { timer = &ctx.timers[ctx.reuseIdx++]; } } m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_WithoutDraw); m_pImmediateContext->Flush(); if(timer->before && timer->after) { m_pImmediateContext->End(timer->before); m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_OnlyDraw); m_pImmediateContext->End(timer->after); } else { m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_OnlyDraw); } ctx.eventStart = d.eventID + 1; } }
bool WrappedVulkan::Serialise_vkAllocateMemory( Serialiser* localSerialiser, VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) { SERIALISE_ELEMENT(ResourceId, devId, GetResID(device)); SERIALISE_ELEMENT(VkMemoryAllocateInfo, info, *pAllocateInfo); SERIALISE_ELEMENT(ResourceId, id, GetResID(*pMemory)); if(m_State == READING) { VkDeviceMemory mem = VK_NULL_HANDLE; device = GetResourceManager()->GetLiveHandle<VkDevice>(devId); // serialised memory type index is non-remapped, so we remap now. // PORTABILITY may need to re-write info to change memory type index to the // appropriate index on replay info.memoryTypeIndex = m_PhysicalDeviceData.memIdxMap[info.memoryTypeIndex]; VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &info, NULL, &mem); if(ret != VK_SUCCESS) { RDCERR("Failed on resource serialise-creation, VkResult: 0x%08x", ret); } else { ResourceId live = GetResourceManager()->WrapResource(Unwrap(device), mem); GetResourceManager()->AddLiveResource(id, mem); m_CreationInfo.m_Memory[live].Init(GetResourceManager(), m_CreationInfo, &info); // create a buffer with the whole memory range bound, for copying to and from // conveniently (for initial state data) VkBuffer buf = VK_NULL_HANDLE; VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0, info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT, }; ret = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf); RDCASSERTEQUAL(ret, VK_SUCCESS); ResourceId bufid = GetResourceManager()->WrapResource(Unwrap(device), buf); ObjDisp(device)->BindBufferMemory(Unwrap(device), Unwrap(buf), Unwrap(mem), 0); // register as a live-only resource, so it is cleaned up properly GetResourceManager()->AddLiveResource(bufid, buf); m_CreationInfo.m_Memory[live].wholeMemBuf = buf; } } return true; }
void VulkanReplay::OutputWindow::CreateSurface(VkInstance inst) { VkAndroidSurfaceCreateInfoKHR createInfo; createInfo.sType = VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR; createInfo.pNext = NULL; createInfo.flags = 0; createInfo.window = wnd; VkResult vkr = ObjDisp(inst)->CreateAndroidSurfaceKHR(Unwrap(inst), &createInfo, NULL, &surface); RDCASSERTEQUAL(vkr, VK_SUCCESS); }
void VulkanReplay::OutputWindow::CreateSurface(VkInstance inst) { VkWin32SurfaceCreateInfoKHR createInfo; createInfo.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; createInfo.pNext = NULL; createInfo.flags = 0; createInfo.hwnd = wnd; GetModuleHandleExA( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (const char *)&dllLocator, (HMODULE *)&createInfo.hinstance); VkResult vkr = ObjDisp(inst)->CreateWin32SurfaceKHR(Unwrap(inst), &createInfo, NULL, &surface); RDCASSERTEQUAL(vkr, VK_SUCCESS); }
void VulkanReplay::RenderMesh(uint32_t eventId, const vector<MeshFormat> &secondaryDraws, const MeshDisplay &cfg) { if(cfg.position.vertexResourceId == ResourceId() || cfg.position.numIndices == 0) return; auto it = m_OutputWindows.find(m_ActiveWinID); if(m_ActiveWinID == 0 || it == m_OutputWindows.end()) return; OutputWindow &outw = it->second; // if the swapchain failed to create, do nothing. We will try to recreate it // again in CheckResizeOutputWindow (once per render 'frame') if(outw.swap == VK_NULL_HANDLE) return; VkDevice dev = m_pDriver->GetDev(); VkCommandBuffer cmd = m_pDriver->GetNextCmd(); const VkLayerDispatchTable *vt = ObjDisp(dev); VkResult vkr = VK_SUCCESS; VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); VkRenderPassBeginInfo rpbegin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, NULL, Unwrap(outw.rpdepth), Unwrap(outw.fbdepth), {{ 0, 0, }, {m_DebugWidth, m_DebugHeight}}, 0, NULL, }; vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE); VkViewport viewport = {0.0f, 0.0f, (float)m_DebugWidth, (float)m_DebugHeight, 0.0f, 1.0f}; vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport); Matrix4f projMat = Matrix4f::Perspective(90.0f, 0.1f, 100000.0f, float(m_DebugWidth) / float(m_DebugHeight)); Matrix4f InvProj = projMat.Inverse(); Matrix4f camMat = cfg.cam ? ((Camera *)cfg.cam)->GetMatrix() : Matrix4f::Identity(); Matrix4f ModelViewProj = projMat.Mul(camMat); Matrix4f guessProjInv; if(cfg.position.unproject) { // the derivation of the projection matrix might not be right (hell, it could be an // orthographic projection). But it'll be close enough likely. Matrix4f guessProj = cfg.position.farPlane != FLT_MAX ? Matrix4f::Perspective(cfg.fov, cfg.position.nearPlane, cfg.position.farPlane, cfg.aspect) : Matrix4f::ReversePerspective(cfg.fov, cfg.position.nearPlane, cfg.aspect); if(cfg.ortho) { guessProj = Matrix4f::Orthographic(cfg.position.nearPlane, cfg.position.farPlane); } guessProjInv = guessProj.Inverse(); ModelViewProj = projMat.Mul(camMat.Mul(guessProjInv)); } if(!secondaryDraws.empty()) { size_t mapsUsed = 0; for(size_t i = 0; i < secondaryDraws.size(); i++) { const MeshFormat &fmt = secondaryDraws[i]; if(fmt.vertexResourceId != ResourceId()) { // TODO should move the color to a push constant so we don't have to map all the time uint32_t uboOffs = 0; MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); data->mvp = ModelViewProj; data->color = Vec4f(fmt.meshColor.x, fmt.meshColor.y, fmt.meshColor.z, fmt.meshColor.w); data->homogenousInput = cfg.position.unproject; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->displayFormat = MESHDISPLAY_SOLID; data->rawoutput = 0; m_MeshRender.UBO.Unmap(); mapsUsed++; if(mapsUsed + 1 >= m_MeshRender.UBO.GetRingCount()) { // flush and sync so we can use more maps vt->CmdEndRenderPass(Unwrap(cmd)); vkr = vt->EndCommandBuffer(Unwrap(cmd)); RDCASSERTEQUAL(vkr, VK_SUCCESS); m_pDriver->SubmitCmds(); m_pDriver->FlushQ(); mapsUsed = 0; cmd = m_pDriver->GetNextCmd(); vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE); vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport); } MeshDisplayPipelines secondaryCache = GetDebugManager()->CacheMeshDisplayPipelines( m_MeshRender.PipeLayout, secondaryDraws[i], secondaryDraws[i]); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(secondaryCache.pipes[MeshDisplayPipelines::ePipe_WireDepth])); VkBuffer vb = m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(fmt.vertexResourceId); VkDeviceSize offs = fmt.vertexByteOffset; vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(vb), &offs); if(fmt.indexByteStride) { VkIndexType idxtype = VK_INDEX_TYPE_UINT16; if(fmt.indexByteStride == 4) idxtype = VK_INDEX_TYPE_UINT32; if(fmt.indexResourceId != ResourceId()) { VkBuffer ib = m_pDriver->GetResourceManager()->GetLiveHandle<VkBuffer>(fmt.indexResourceId); vt->CmdBindIndexBuffer(Unwrap(cmd), Unwrap(ib), fmt.indexByteOffset, idxtype); } vt->CmdDrawIndexed(Unwrap(cmd), fmt.numIndices, 1, 0, fmt.baseVertex, 0); } else { vt->CmdDraw(Unwrap(cmd), fmt.numIndices, 1, 0, 0); } } } { // flush and sync so we can use more maps vt->CmdEndRenderPass(Unwrap(cmd)); vkr = vt->EndCommandBuffer(Unwrap(cmd)); RDCASSERTEQUAL(vkr, VK_SUCCESS); m_pDriver->SubmitCmds(); m_pDriver->FlushQ(); cmd = m_pDriver->GetNextCmd(); vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE); vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport); } } MeshDisplayPipelines cache = GetDebugManager()->CacheMeshDisplayPipelines( m_MeshRender.PipeLayout, cfg.position, cfg.second); if(cfg.position.vertexResourceId != ResourceId()) { VkBuffer vb = m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.position.vertexResourceId); VkDeviceSize offs = cfg.position.vertexByteOffset; // we source all data from the first instanced value in the instanced case, so make sure we // offset correctly here. if(cfg.position.instanced) offs += cfg.position.vertexByteStride * (cfg.curInstance / cfg.position.instStepRate); vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(vb), &offs); } SolidShade solidShadeMode = cfg.solidShadeMode; // can't support secondary shading without a buffer - no pipeline will have been created if(solidShadeMode == SolidShade::Secondary && cfg.second.vertexResourceId == ResourceId()) solidShadeMode = SolidShade::NoSolid; if(solidShadeMode == SolidShade::Secondary) { VkBuffer vb = m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.second.vertexResourceId); VkDeviceSize offs = cfg.second.vertexByteOffset; // we source all data from the first instanced value in the instanced case, so make sure we // offset correctly here. if(cfg.second.instanced) offs += cfg.second.vertexByteStride * (cfg.curInstance / cfg.second.instStepRate); vt->CmdBindVertexBuffers(Unwrap(cmd), 1, 1, UnwrapPtr(vb), &offs); } // solid render if(solidShadeMode != SolidShade::NoSolid && cfg.position.topology < Topology::PatchList) { VkPipeline pipe = VK_NULL_HANDLE; switch(solidShadeMode) { default: case SolidShade::Solid: pipe = cache.pipes[MeshDisplayPipelines::ePipe_SolidDepth]; break; case SolidShade::Lit: pipe = cache.pipes[MeshDisplayPipelines::ePipe_Lit]; break; case SolidShade::Secondary: pipe = cache.pipes[MeshDisplayPipelines::ePipe_Secondary]; break; } // can't support lit rendering without the pipeline - maybe geometry shader wasn't supported. if(solidShadeMode == SolidShade::Lit && pipe == VK_NULL_HANDLE) pipe = cache.pipes[MeshDisplayPipelines::ePipe_SolidDepth]; uint32_t uboOffs = 0; MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); if(solidShadeMode == SolidShade::Lit) data->invProj = projMat.Inverse(); data->mvp = ModelViewProj; data->color = Vec4f(0.8f, 0.8f, 0.0f, 1.0f); data->homogenousInput = cfg.position.unproject; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->displayFormat = (uint32_t)solidShadeMode; data->rawoutput = 0; if(solidShadeMode == SolidShade::Secondary && cfg.second.showAlpha) data->displayFormat = MESHDISPLAY_SECONDARY_ALPHA; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(pipe)); if(cfg.position.indexByteStride) { VkIndexType idxtype = VK_INDEX_TYPE_UINT16; if(cfg.position.indexByteStride == 4) idxtype = VK_INDEX_TYPE_UINT32; if(cfg.position.indexResourceId != ResourceId()) { VkBuffer ib = m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.position.indexResourceId); vt->CmdBindIndexBuffer(Unwrap(cmd), Unwrap(ib), cfg.position.indexByteOffset, idxtype); } vt->CmdDrawIndexed(Unwrap(cmd), cfg.position.numIndices, 1, 0, cfg.position.baseVertex, 0); } else { vt->CmdDraw(Unwrap(cmd), cfg.position.numIndices, 1, 0, 0); } } // wireframe render if(solidShadeMode == SolidShade::NoSolid || cfg.wireframeDraw || cfg.position.topology >= Topology::PatchList) { Vec4f wireCol = Vec4f(cfg.position.meshColor.x, cfg.position.meshColor.y, cfg.position.meshColor.z, 1.0f); uint32_t uboOffs = 0; MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); data->mvp = ModelViewProj; data->color = wireCol; data->displayFormat = (uint32_t)SolidShade::Solid; data->homogenousInput = cfg.position.unproject; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->rawoutput = 0; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_WireDepth])); if(cfg.position.indexByteStride) { VkIndexType idxtype = VK_INDEX_TYPE_UINT16; if(cfg.position.indexByteStride == 4) idxtype = VK_INDEX_TYPE_UINT32; if(cfg.position.indexResourceId != ResourceId()) { VkBuffer ib = m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.position.indexResourceId); vt->CmdBindIndexBuffer(Unwrap(cmd), Unwrap(ib), cfg.position.indexByteOffset, idxtype); } vt->CmdDrawIndexed(Unwrap(cmd), cfg.position.numIndices, 1, 0, cfg.position.baseVertex, 0); } else { vt->CmdDraw(Unwrap(cmd), cfg.position.numIndices, 1, 0, 0); } } MeshFormat helper; helper.indexByteStride = 2; helper.topology = Topology::LineList; helper.format.type = ResourceFormatType::Regular; helper.format.compByteWidth = 4; helper.format.compCount = 4; helper.format.compType = CompType::Float; helper.vertexByteStride = sizeof(Vec4f); // cache pipelines for use in drawing wireframe helpers cache = GetDebugManager()->CacheMeshDisplayPipelines(m_MeshRender.PipeLayout, helper, helper); if(cfg.showBBox) { Vec4f a = Vec4f(cfg.minBounds.x, cfg.minBounds.y, cfg.minBounds.z, cfg.minBounds.w); Vec4f b = Vec4f(cfg.maxBounds.x, cfg.maxBounds.y, cfg.maxBounds.z, cfg.maxBounds.w); Vec4f TLN = Vec4f(a.x, b.y, a.z, 1.0f); // TopLeftNear, etc... Vec4f TRN = Vec4f(b.x, b.y, a.z, 1.0f); Vec4f BLN = Vec4f(a.x, a.y, a.z, 1.0f); Vec4f BRN = Vec4f(b.x, a.y, a.z, 1.0f); Vec4f TLF = Vec4f(a.x, b.y, b.z, 1.0f); Vec4f TRF = Vec4f(b.x, b.y, b.z, 1.0f); Vec4f BLF = Vec4f(a.x, a.y, b.z, 1.0f); Vec4f BRF = Vec4f(b.x, a.y, b.z, 1.0f); // 12 frustum lines => 24 verts Vec4f bbox[24] = { TLN, TRN, TRN, BRN, BRN, BLN, BLN, TLN, TLN, TLF, TRN, TRF, BLN, BLF, BRN, BRF, TLF, TRF, TRF, BRF, BRF, BLF, BLF, TLF, }; VkDeviceSize vboffs = 0; Vec4f *ptr = (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs); memcpy(ptr, bbox, sizeof(bbox)); m_MeshRender.BBoxVB.Unmap(); vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs); uint32_t uboOffs = 0; MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); data->mvp = ModelViewProj; data->color = Vec4f(0.2f, 0.2f, 1.0f, 1.0f); data->displayFormat = (uint32_t)SolidShade::Solid; data->homogenousInput = 0; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->rawoutput = 0; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_WireDepth])); vt->CmdDraw(Unwrap(cmd), 24, 1, 0, 0); } // draw axis helpers if(!cfg.position.unproject) { VkDeviceSize vboffs = 0; vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.AxisFrustumVB.buf), &vboffs); uint32_t uboOffs = 0; MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); data->mvp = ModelViewProj; data->color = Vec4f(1.0f, 0.0f, 0.0f, 1.0f); data->displayFormat = (uint32_t)SolidShade::Solid; data->homogenousInput = 0; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->rawoutput = 0; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Wire])); vt->CmdDraw(Unwrap(cmd), 2, 1, 0, 0); // poke the color (this would be a good candidate for a push constant) data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); data->mvp = ModelViewProj; data->color = Vec4f(0.0f, 1.0f, 0.0f, 1.0f); data->displayFormat = (uint32_t)SolidShade::Solid; data->homogenousInput = 0; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->rawoutput = 0; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdDraw(Unwrap(cmd), 2, 1, 2, 0); data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); data->mvp = ModelViewProj; data->color = Vec4f(0.0f, 0.0f, 1.0f, 1.0f); data->displayFormat = (uint32_t)SolidShade::Solid; data->homogenousInput = 0; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->rawoutput = 0; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdDraw(Unwrap(cmd), 2, 1, 4, 0); } // 'fake' helper frustum if(cfg.position.unproject) { VkDeviceSize vboffs = sizeof(Vec4f) * 6; // skim the axis helpers vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.AxisFrustumVB.buf), &vboffs); uint32_t uboOffs = 0; MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); data->mvp = ModelViewProj; data->color = Vec4f(1.0f, 1.0f, 1.0f, 1.0f); data->displayFormat = (uint32_t)SolidShade::Solid; data->homogenousInput = 0; data->pointSpriteSize = Vec2f(0.0f, 0.0f); data->rawoutput = 0; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Wire])); vt->CmdDraw(Unwrap(cmd), 24, 1, 0, 0); } // show highlighted vertex if(cfg.highlightVert != ~0U) { { // need to end our cmd buffer, it might be submitted in GetBufferData when caching highlight // data vt->CmdEndRenderPass(Unwrap(cmd)); vkr = vt->EndCommandBuffer(Unwrap(cmd)); RDCASSERTEQUAL(vkr, VK_SUCCESS); #if ENABLED(SINGLE_FLUSH_VALIDATE) m_pDriver->SubmitCmds(); #endif } m_HighlightCache.CacheHighlightingData(eventId, cfg); { // get a new cmdbuffer and begin it cmd = m_pDriver->GetNextCmd(); vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE); vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport); } Topology meshtopo = cfg.position.topology; /////////////////////////////////////////////////////////////// // vectors to be set from buffers, depending on topology // this vert (blue dot, required) FloatVector activeVertex; // primitive this vert is a part of (red prim, optional) vector<FloatVector> activePrim; // for patch lists, to show other verts in patch (green dots, optional) // for non-patch lists, we use the activePrim and adjacentPrimVertices // to show what other verts are related vector<FloatVector> inactiveVertices; // adjacency (line or tri, strips or lists) (green prims, optional) // will be N*M long, N adjacent prims of M verts each. M = primSize below vector<FloatVector> adjacentPrimVertices; helper.topology = Topology::TriangleList; uint32_t primSize = 3; // number of verts per primitive if(meshtopo == Topology::LineList || meshtopo == Topology::LineStrip || meshtopo == Topology::LineList_Adj || meshtopo == Topology::LineStrip_Adj) { primSize = 2; helper.topology = Topology::LineList; } else { // update the cache, as it's currently linelist helper.topology = Topology::TriangleList; cache = GetDebugManager()->CacheMeshDisplayPipelines(m_MeshRender.PipeLayout, helper, helper); } bool valid = m_HighlightCache.FetchHighlightPositions(cfg, activeVertex, activePrim, adjacentPrimVertices, inactiveVertices); if(valid) { //////////////////////////////////////////////////////////////// // prepare rendering (for both vertices & primitives) // if data is from post transform, it will be in clipspace if(cfg.position.unproject) ModelViewProj = projMat.Mul(camMat.Mul(guessProjInv)); else ModelViewProj = projMat.Mul(camMat); MeshUBOData uniforms = {}; uniforms.mvp = ModelViewProj; uniforms.color = Vec4f(1.0f, 1.0f, 1.0f, 1.0f); uniforms.displayFormat = (uint32_t)SolidShade::Solid; uniforms.homogenousInput = cfg.position.unproject; uniforms.pointSpriteSize = Vec2f(0.0f, 0.0f); uint32_t uboOffs = 0; MeshUBOData *ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); *ubodata = uniforms; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Solid])); //////////////////////////////////////////////////////////////// // render primitives // Draw active primitive (red) uniforms.color = Vec4f(1.0f, 0.0f, 0.0f, 1.0f); // poke the color (this would be a good candidate for a push constant) ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); *ubodata = uniforms; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); if(activePrim.size() >= primSize) { VkDeviceSize vboffs = 0; Vec4f *ptr = (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(Vec4f) * primSize); memcpy(ptr, &activePrim[0], sizeof(Vec4f) * primSize); m_MeshRender.BBoxVB.Unmap(); vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs); vt->CmdDraw(Unwrap(cmd), primSize, 1, 0, 0); } // Draw adjacent primitives (green) uniforms.color = Vec4f(0.0f, 1.0f, 0.0f, 1.0f); // poke the color (this would be a good candidate for a push constant) ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); *ubodata = uniforms; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); if(adjacentPrimVertices.size() >= primSize && (adjacentPrimVertices.size() % primSize) == 0) { VkDeviceSize vboffs = 0; Vec4f *ptr = (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(Vec4f) * adjacentPrimVertices.size()); memcpy(ptr, &adjacentPrimVertices[0], sizeof(Vec4f) * adjacentPrimVertices.size()); m_MeshRender.BBoxVB.Unmap(); vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs); vt->CmdDraw(Unwrap(cmd), (uint32_t)adjacentPrimVertices.size(), 1, 0, 0); } //////////////////////////////////////////////////////////////// // prepare to render dots float scale = 800.0f / float(m_DebugHeight); float asp = float(m_DebugWidth) / float(m_DebugHeight); uniforms.pointSpriteSize = Vec2f(scale / asp, scale); // Draw active vertex (blue) uniforms.color = Vec4f(0.0f, 0.0f, 1.0f, 1.0f); // poke the color (this would be a good candidate for a push constant) ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); *ubodata = uniforms; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); // vertices are drawn with tri strips helper.topology = Topology::TriangleStrip; cache = GetDebugManager()->CacheMeshDisplayPipelines(m_MeshRender.PipeLayout, helper, helper); FloatVector vertSprite[4] = { activeVertex, activeVertex, activeVertex, activeVertex, }; vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Solid])); { VkDeviceSize vboffs = 0; Vec4f *ptr = (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(vertSprite)); memcpy(ptr, &vertSprite[0], sizeof(vertSprite)); m_MeshRender.BBoxVB.Unmap(); vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs); vt->CmdDraw(Unwrap(cmd), 4, 1, 0, 0); } // Draw inactive vertices (green) uniforms.color = Vec4f(0.0f, 1.0f, 0.0f, 1.0f); // poke the color (this would be a good candidate for a push constant) ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs); *ubodata = uniforms; m_MeshRender.UBO.Unmap(); vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_MeshRender.PipeLayout), 0, 1, UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs); if(!inactiveVertices.empty()) { VkDeviceSize vboffs = 0; FloatVector *ptr = (FloatVector *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(vertSprite)); for(size_t i = 0; i < inactiveVertices.size(); i++) { *ptr++ = inactiveVertices[i]; *ptr++ = inactiveVertices[i]; *ptr++ = inactiveVertices[i]; *ptr++ = inactiveVertices[i]; } m_MeshRender.BBoxVB.Unmap(); for(size_t i = 0; i < inactiveVertices.size(); i++) { vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs); vt->CmdDraw(Unwrap(cmd), 4, 1, 0, 0); vboffs += sizeof(FloatVector) * 4; } } } } vt->CmdEndRenderPass(Unwrap(cmd)); vkr = vt->EndCommandBuffer(Unwrap(cmd)); RDCASSERTEQUAL(vkr, VK_SUCCESS); #if ENABLED(SINGLE_FLUSH_VALIDATE) m_pDriver->SubmitCmds(); #endif }
VkResult WrappedVulkan::vkAllocateMemory( VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) { VkMemoryAllocateInfo info = *pAllocateInfo; if(m_State >= WRITING) info.memoryTypeIndex = GetRecord(device)->memIdxMap[info.memoryTypeIndex]; VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &info, pAllocator, pMemory); if(ret == VK_SUCCESS) { ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pMemory); if(m_State >= WRITING) { Chunk *chunk = NULL; { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CONTEXT(ALLOC_MEM); Serialise_vkAllocateMemory(localSerialiser, device, pAllocateInfo, NULL, pMemory); chunk = scope.Get(); } // create resource record for gpu memory VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pMemory); RDCASSERT(record); record->AddChunk(chunk); record->Length = pAllocateInfo->allocationSize; uint32_t memProps = m_PhysicalDeviceData.fakeMemProps->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags; // if memory is not host visible, so not mappable, don't create map state at all if((memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) { record->memMapState = new MemMapState(); record->memMapState->mapCoherent = (memProps & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; record->memMapState->refData = NULL; } } else { GetResourceManager()->AddLiveResource(id, *pMemory); m_CreationInfo.m_Memory[id].Init(GetResourceManager(), m_CreationInfo, pAllocateInfo); // create a buffer with the whole memory range bound, for copying to and from // conveniently (for initial state data) VkBuffer buf = VK_NULL_HANDLE; VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0, info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT, }; ret = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf); RDCASSERTEQUAL(ret, VK_SUCCESS); ResourceId bufid = GetResourceManager()->WrapResource(Unwrap(device), buf); ObjDisp(device)->BindBufferMemory(Unwrap(device), Unwrap(buf), Unwrap(*pMemory), 0); // register as a live-only resource, so it is cleaned up properly GetResourceManager()->AddLiveResource(bufid, buf); m_CreationInfo.m_Memory[id].wholeMemBuf = buf; } } return ret; }
VkResult WrappedVulkan::vkCreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { VkDeviceCreateInfo createInfo = *pCreateInfo; uint32_t qCount = 0; VkResult vkr = VK_SUCCESS; ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, NULL); VkQueueFamilyProperties *props = new VkQueueFamilyProperties[qCount]; ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, props); // find a queue that supports all capabilities, and if one doesn't exist, add it. bool found = false; uint32_t qFamilyIdx = 0; VkQueueFlags search = (VK_QUEUE_GRAPHICS_BIT); // for queue priorities, if we need it float one = 1.0f; // if we need to change the requested queues, it will point to this VkDeviceQueueCreateInfo *modQueues = NULL; for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++) { uint32_t idx = createInfo.pQueueCreateInfos[i].queueFamilyIndex; RDCASSERT(idx < qCount); // this requested queue is one we can use too if((props[idx].queueFlags & search) == search && createInfo.pQueueCreateInfos[i].queueCount > 0) { qFamilyIdx = idx; found = true; break; } } // if we didn't find it, search for which queue family we should add a request for if(!found) { RDCDEBUG("App didn't request a queue family we can use - adding our own"); for(uint32_t i=0; i < qCount; i++) { if((props[i].queueFlags & search) == search) { qFamilyIdx = i; found = true; break; } } if(!found) { SAFE_DELETE_ARRAY(props); RDCERR("Can't add a queue with required properties for RenderDoc! Unsupported configuration"); return VK_ERROR_INITIALIZATION_FAILED; } // we found the queue family, add it modQueues = new VkDeviceQueueCreateInfo[createInfo.queueCreateInfoCount + 1]; for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++) modQueues[i] = createInfo.pQueueCreateInfos[i]; modQueues[createInfo.queueCreateInfoCount].queueFamilyIndex = qFamilyIdx; modQueues[createInfo.queueCreateInfoCount].queueCount = 1; modQueues[createInfo.queueCreateInfoCount].pQueuePriorities = &one; createInfo.pQueueCreateInfos = modQueues; createInfo.queueCreateInfoCount++; } SAFE_DELETE_ARRAY(props); m_QueueFamilies.resize(createInfo.queueCreateInfoCount); for(size_t i=0; i < createInfo.queueCreateInfoCount; i++) { uint32_t family = createInfo.pQueueCreateInfos[i].queueFamilyIndex; uint32_t count = createInfo.pQueueCreateInfos[i].queueCount; m_QueueFamilies.resize(RDCMAX(m_QueueFamilies.size(), size_t(family+1))); m_QueueFamilies[family] = new VkQueue[count]; for(uint32_t q=0; q < count; q++) m_QueueFamilies[family][q] = VK_NULL_HANDLE; } VkLayerDeviceCreateInfo *layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext; // step through the chain of pNext until we get to the link info while(layerCreateInfo && (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || layerCreateInfo->function != VK_LAYER_LINK_INFO) ) { layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext; } RDCASSERT(layerCreateInfo); PFN_vkGetDeviceProcAddr gdpa = layerCreateInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr; PFN_vkGetInstanceProcAddr gipa = layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr; // move chain on for next layer layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext; PFN_vkCreateDevice createFunc = (PFN_vkCreateDevice)gipa(VK_NULL_HANDLE, "vkCreateDevice"); // now search again through for the loader data callback (if it exists) layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext; // step through the chain of pNext while(layerCreateInfo && (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || layerCreateInfo->function != VK_LOADER_DATA_CALLBACK) ) { layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext; } // if we found one (we might not - on old loaders), then store the func ptr for // use instead of SetDispatchTableOverMagicNumber if(layerCreateInfo) { RDCASSERT(m_SetDeviceLoaderData == layerCreateInfo->u.pfnSetDeviceLoaderData || m_SetDeviceLoaderData == NULL, m_SetDeviceLoaderData, layerCreateInfo->u.pfnSetDeviceLoaderData); m_SetDeviceLoaderData = layerCreateInfo->u.pfnSetDeviceLoaderData; } VkResult ret = createFunc(Unwrap(physicalDevice), &createInfo, pAllocator, pDevice); // don't serialise out any of the pNext stuff for layer initialisation // (note that we asserted above that there was nothing else in the chain) createInfo.pNext = NULL; if(ret == VK_SUCCESS) { InitDeviceTable(*pDevice, gdpa); ResourceId id = GetResourceManager()->WrapResource(*pDevice, *pDevice); if(m_State >= WRITING) { Chunk *chunk = NULL; { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CONTEXT(CREATE_DEVICE); Serialise_vkCreateDevice(localSerialiser, physicalDevice, &createInfo, NULL, pDevice); chunk = scope.Get(); } VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pDevice); RDCASSERT(record); record->AddChunk(chunk); record->memIdxMap = GetRecord(physicalDevice)->memIdxMap; record->instDevInfo = new InstanceDeviceInfo(); #undef CheckExt #define CheckExt(name) record->instDevInfo->name = GetRecord(m_Instance)->instDevInfo->name; // inherit extension enablement from instance, that way GetDeviceProcAddress can check // for enabled extensions for instance functions CheckInstanceExts(); #undef CheckExt #define CheckExt(name) if(!strcmp(createInfo.ppEnabledExtensionNames[i], STRINGIZE(name))) { record->instDevInfo->name = true; } for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++) { CheckDeviceExts(); } InitDeviceExtensionTables(*pDevice); GetRecord(m_Instance)->AddParent(record); } else { GetResourceManager()->AddLiveResource(id, *pDevice); } VkDevice device = *pDevice; RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE m_PhysicalDevice = physicalDevice; m_Device = device; m_QueueFamilyIdx = qFamilyIdx; if(m_InternalCmds.cmdpool == VK_NULL_HANDLE) { VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, NULL, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, qFamilyIdx }; vkr = ObjDisp(device)->CreateCommandPool(Unwrap(device), &poolInfo, NULL, &m_InternalCmds.cmdpool); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(Unwrap(device), m_InternalCmds.cmdpool); } ObjDisp(physicalDevice)->GetPhysicalDeviceProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.props); ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.memProps); ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &m_PhysicalDeviceData.features); m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); m_PhysicalDeviceData.fakeMemProps = GetRecord(physicalDevice)->memProps; m_DebugManager = new VulkanDebugManager(this, device); } SAFE_DELETE_ARRAY(modQueues); return ret; }
VkResult WrappedVulkan::vkEnumeratePhysicalDevices( VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices) { uint32_t count; VkResult vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, NULL); if(vkr != VK_SUCCESS) return vkr; VkPhysicalDevice *devices = new VkPhysicalDevice[count]; vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, devices); RDCASSERTEQUAL(vkr, VK_SUCCESS); m_PhysicalDevices.resize(count); for(uint32_t i=0; i < count; i++) { // it's perfectly valid for enumerate type functions to return the same handle // each time. If that happens, we will already have a wrapper created so just // return the wrapped object to the user and do nothing else if(m_PhysicalDevices[i] != VK_NULL_HANDLE) { GetWrapped(m_PhysicalDevices[i])->RewrapObject(devices[i]); devices[i] = m_PhysicalDevices[i]; } else { GetResourceManager()->WrapResource(instance, devices[i]); if(m_State >= WRITING) { // add the record first since it's used in the serialise function below to fetch // the memory indices VkResourceRecord *record = GetResourceManager()->AddResourceRecord(devices[i]); RDCASSERT(record); record->memProps = new VkPhysicalDeviceMemoryProperties(); ObjDisp(devices[i])->GetPhysicalDeviceMemoryProperties(Unwrap(devices[i]), record->memProps); m_PhysicalDevices[i] = devices[i]; // we remap memory indices to discourage coherent maps as much as possible RemapMemoryIndices(record->memProps, &record->memIdxMap); { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS); Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]); record->AddChunk(scope.Get()); } VkResourceRecord *instrecord = GetRecord(instance); instrecord->AddParent(record); // treat physical devices as pool members of the instance (ie. freed when the instance dies) { instrecord->LockChunks(); instrecord->pooledChildren.push_back(record); instrecord->UnlockChunks(); } } } } if(pPhysicalDeviceCount) *pPhysicalDeviceCount = count; if(pPhysicalDevices) memcpy(pPhysicalDevices, devices, count*sizeof(VkPhysicalDevice)); SAFE_DELETE_ARRAY(devices); return VK_SUCCESS; }
VkResult WrappedVulkan::vkAllocateMemory( VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) { VkMemoryAllocateInfo info = *pAllocateInfo; if(m_State >= WRITING) { info.memoryTypeIndex = GetRecord(device)->memIdxMap[info.memoryTypeIndex]; // we need to be able to allocate a buffer that covers the whole memory range. However // if the memory is e.g. 100 bytes (arbitrary example) and buffers have memory requirements // such that it must be bound to a multiple of 128 bytes, then we can't create a buffer // that entirely covers a 100 byte allocation. // To get around this, we create a buffer of the allocation's size with the properties we // want, check its required size, then bump up the allocation size to that as if the application // had requested more. We're assuming here no system will require something like "buffer of // size N must be bound to memory of size N+O for some value of O overhead bytes". // // this could be optimised as maybe we'll be creating buffers of multiple sizes, but allocation // in vulkan is already expensive and making it a little more expensive isn't a big deal. VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0, info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT, }; // since this is very short lived, it's not wrapped VkBuffer buf; VkResult vkr = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf); RDCASSERTEQUAL(vkr, VK_SUCCESS); if(vkr == VK_SUCCESS && buf != VK_NULL_HANDLE) { VkMemoryRequirements mrq = { 0 }; ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), buf, &mrq); RDCASSERTMSG("memory requirements less than desired size", mrq.size >= bufInfo.size, mrq.size, bufInfo.size); // round up allocation size to allow creation of buffers if(mrq.size >= bufInfo.size) info.allocationSize = mrq.size; } ObjDisp(device)->DestroyBuffer(Unwrap(device), buf, NULL); } VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &info, pAllocator, pMemory); // restore the memoryTypeIndex to the original, as that's what we want to serialise, // but maintain any potential modifications we made to info.allocationSize info.memoryTypeIndex = pAllocateInfo->memoryTypeIndex; if(ret == VK_SUCCESS) { ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pMemory); if(m_State >= WRITING) { Chunk *chunk = NULL; { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CONTEXT(ALLOC_MEM); Serialise_vkAllocateMemory(localSerialiser, device, &info, NULL, pMemory); chunk = scope.Get(); } // create resource record for gpu memory VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pMemory); RDCASSERT(record); record->AddChunk(chunk); record->Length = info.allocationSize; uint32_t memProps = m_PhysicalDeviceData.fakeMemProps->memoryTypes[info.memoryTypeIndex].propertyFlags; // if memory is not host visible, so not mappable, don't create map state at all if((memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) { record->memMapState = new MemMapState(); record->memMapState->mapCoherent = (memProps & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; record->memMapState->refData = NULL; } } else { GetResourceManager()->AddLiveResource(id, *pMemory); m_CreationInfo.m_Memory[id].Init(GetResourceManager(), m_CreationInfo, &info); // create a buffer with the whole memory range bound, for copying to and from // conveniently (for initial state data) VkBuffer buf = VK_NULL_HANDLE; VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0, info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT, }; ret = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf); RDCASSERTEQUAL(ret, VK_SUCCESS); ResourceId bufid = GetResourceManager()->WrapResource(Unwrap(device), buf); ObjDisp(device)->BindBufferMemory(Unwrap(device), Unwrap(buf), Unwrap(*pMemory), 0); // register as a live-only resource, so it is cleaned up properly GetResourceManager()->AddLiveResource(bufid, buf); m_CreationInfo.m_Memory[id].wholeMemBuf = buf; } } return ret; }
ResourceId D3D12Replay::RenderOverlay(ResourceId texid, CompType typeHint, DebugOverlay overlay, uint32_t eventId, const vector<uint32_t> &passEvents) { ID3D12Resource *resource = WrappedID3D12Resource::GetList()[texid]; if(resource == NULL) return ResourceId(); D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc(); std::vector<D3D12_RESOURCE_BARRIER> barriers; int resType = 0; GetDebugManager()->PrepareTextureSampling(resource, typeHint, resType, barriers); D3D12_RESOURCE_DESC overlayTexDesc; overlayTexDesc.Alignment = 0; overlayTexDesc.DepthOrArraySize = 1; overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM; overlayTexDesc.Height = resourceDesc.Height; overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; overlayTexDesc.MipLevels = 1; overlayTexDesc.SampleDesc = resourceDesc.SampleDesc; overlayTexDesc.Width = resourceDesc.Width; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; D3D12_RESOURCE_DESC currentOverlayDesc; RDCEraseEl(currentOverlayDesc); if(m_Overlay.Texture) currentOverlayDesc = m_Overlay.Texture->GetDesc(); WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture; // need to recreate backing custom render tex if(overlayTexDesc.Width != currentOverlayDesc.Width || overlayTexDesc.Height != currentOverlayDesc.Height || overlayTexDesc.Format != currentOverlayDesc.Format || overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count || overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality) { SAFE_RELEASE(m_Overlay.Texture); m_Overlay.resourceId = ResourceId(); ID3D12Resource *customRenderTex = NULL; HRESULT hr = m_pDevice->CreateCommittedResource( &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL, __uuidof(ID3D12Resource), (void **)&customRenderTex); if(FAILED(hr)) { RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str()); return ResourceId(); } wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex; customRenderTex->SetName(L"customRenderTex"); m_Overlay.Texture = wrappedCustomRenderTex; m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID(); } D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState; ID3D12Resource *renderDepth = NULL; D3D12Descriptor *dsView = GetWrapped(rs.dsv); D3D12_RESOURCE_DESC depthTexDesc = {}; D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {}; if(dsView) { ID3D12Resource *realDepth = dsView->nonsamp.resource; dsViewDesc = dsView->nonsamp.dsv; depthTexDesc = realDepth->GetDesc(); depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; depthTexDesc.Alignment = 0; HRESULT hr = S_OK; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc, D3D12_RESOURCE_STATE_COPY_DEST, NULL, __uuidof(ID3D12Resource), (void **)&renderDepth); if(FAILED(hr)) { RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str()); return m_Overlay.resourceId; } renderDepth->SetName(L"Overlay renderDepth"); ID3D12GraphicsCommandList *list = m_pDevice->GetNewList(); const vector<D3D12_RESOURCE_STATES> &states = m_pDevice->GetSubresourceStates(GetResID(realDepth)); vector<D3D12_RESOURCE_BARRIER> depthBarriers; depthBarriers.reserve(states.size()); for(size_t i = 0; i < states.size(); i++) { D3D12_RESOURCE_BARRIER b; // skip unneeded barriers if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE) continue; b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; b.Transition.pResource = realDepth; b.Transition.Subresource = (UINT)i; b.Transition.StateBefore = states[i]; b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; depthBarriers.push_back(b); } if(!depthBarriers.empty()) list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]); list->CopyResource(renderDepth, realDepth); for(size_t i = 0; i < depthBarriers.size(); i++) std::swap(depthBarriers[i].Transition.StateBefore, depthBarriers[i].Transition.StateAfter); if(!depthBarriers.empty()) list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]); D3D12_RESOURCE_BARRIER b = {}; b.Transition.pResource = renderDepth; b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; // prepare tex resource for copying list->ResourceBarrier(1, &b); list->Close(); } D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {}; rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rtDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM; rtDesc.Texture2D.MipSlice = 0; rtDesc.Texture2D.PlaneSlice = 0; if(overlayTexDesc.SampleDesc.Count > 1 || overlayTexDesc.SampleDesc.Quality > 0) rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV); m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv); ID3D12GraphicsCommandList *list = m_pDevice->GetNewList(); FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f}; list->ClearRenderTargetView(rtv, black, 0, NULL); D3D12_CPU_DESCRIPTOR_HANDLE dsv = {}; if(renderDepth) { dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV); m_pDevice->CreateDepthStencilView( renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv); } D3D12_DEPTH_STENCIL_DESC dsDesc; dsDesc.BackFace.StencilFailOp = dsDesc.BackFace.StencilPassOp = dsDesc.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; dsDesc.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; dsDesc.FrontFace.StencilFailOp = dsDesc.FrontFace.StencilPassOp = dsDesc.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; dsDesc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; dsDesc.DepthEnable = TRUE; dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; dsDesc.StencilEnable = FALSE; dsDesc.StencilReadMask = dsDesc.StencilWriteMask = 0xff; WrappedID3D12PipelineState *pipe = NULL; if(rs.pipe != ResourceId()) pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping) { // just need the basic texture } else if(overlay == DebugOverlay::Drawcall) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); float overlayConsts[4] = {0.8f, 0.1f, 0.8f, 1.0f}; ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts); psoDesc.PS.pShaderBytecode = ps->GetBufferPointer(); psoDesc.PS.BytecodeLength = ps->GetBufferSize(); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f}; list->ClearRenderTargetView(rtv, clearColour, 0, NULL); list->Close(); list = NULL; ID3D12PipelineState *pso = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pso); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(ps); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(pso); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE(); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(pso); SAFE_RELEASE(ps); } } else if(overlay == DebugOverlay::BackfaceCull) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode; float redCol[4] = {1.0f, 0.0f, 0.0f, 1.0f}; ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(redCol); float greenCol[4] = {0.0f, 1.0f, 0.0f, 1.0f}; ID3DBlob *green = m_pDevice->GetShaderCache()->MakeFixedColShader(greenCol); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; psoDesc.PS.pShaderBytecode = red->GetBufferPointer(); psoDesc.PS.BytecodeLength = red->GetBufferSize(); list->Close(); list = NULL; ID3D12PipelineState *redPSO = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&redPSO); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(red); SAFE_RELEASE(green); return m_Overlay.resourceId; } psoDesc.RasterizerState.CullMode = origCull; psoDesc.PS.pShaderBytecode = green->GetBufferPointer(); psoDesc.PS.BytecodeLength = green->GetBufferSize(); ID3D12PipelineState *greenPSO = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&greenPSO); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(red); SAFE_RELEASE(redPSO); SAFE_RELEASE(green); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(redPSO); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE(); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs.pipe = GetResID(greenPSO); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(red); SAFE_RELEASE(green); SAFE_RELEASE(redPSO); SAFE_RELEASE(greenPSO); } } else if(overlay == DebugOverlay::Wireframe) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); float overlayConsts[] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 1.0f}; ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts); psoDesc.PS.pShaderBytecode = ps->GetBufferPointer(); psoDesc.PS.BytecodeLength = ps->GetBufferSize(); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; overlayConsts[3] = 0.0f; list->ClearRenderTargetView(rtv, overlayConsts, 0, NULL); list->Close(); list = NULL; ID3D12PipelineState *pso = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pso); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(ps); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(pso); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = dsv; m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(pso); SAFE_RELEASE(ps); } } else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw) { vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::ClearBeforeDraw) events.clear(); events.push_back(eventId); if(!events.empty()) { list->Close(); list = NULL; bool rtSingle = rs.rtSingle; std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rts = rs.rts; if(overlay == DebugOverlay::ClearBeforePass) m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); list = m_pDevice->GetNewList(); for(size_t i = 0; i < rts.size(); i++) { D3D12Descriptor *desc = rtSingle ? GetWrapped(rts[0]) : GetWrapped(rts[i]); if(desc) { if(rtSingle) desc += i; Unwrap(list)->ClearRenderTargetView(UnwrapCPU(desc), black, 0, NULL); } } list->Close(); list = NULL; for(size_t i = 0; i < events.size(); i++) { m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw); if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size()) m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw); } } } else if(overlay == DebugOverlay::ViewportScissor) { if(pipe && pipe->IsGraphics() && !rs.views.empty()) { list->OMSetRenderTargets(1, &rtv, TRUE, NULL); D3D12_VIEWPORT viewport = rs.views[0]; list->RSSetViewports(1, &viewport); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); list->SetPipelineState(m_General.FixedColPipe); list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig); DebugPixelCBufferData pixelData = {0}; // border colour (dark, 2px, opaque) pixelData.WireframeColour = Vec3f(0.1f, 0.1f, 0.1f); // inner colour (light, transparent) pixelData.Channels = Vec4f(0.2f, 0.2f, 0.9f, 0.7f); pixelData.OutputDisplayFormat = 0; pixelData.RangeMinimum = viewport.TopLeftX; pixelData.InverseRangeSize = viewport.TopLeftY; pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f); D3D12_GPU_VIRTUAL_ADDRESS viewCB = GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData)); list->SetGraphicsRootConstantBufferView(0, viewCB); list->SetGraphicsRootConstantBufferView(1, viewCB); list->SetGraphicsRootConstantBufferView(2, viewCB); Vec4f dummy; list->SetGraphicsRoot32BitConstants(3, 4, &dummy.x, 0); float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f}; list->OMSetBlendFactor(factor); list->DrawInstanced(3, 1, 0, 0); viewport.TopLeftX = (float)rs.scissors[0].left; viewport.TopLeftY = (float)rs.scissors[0].top; viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left); viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top); list->RSSetViewports(1, &viewport); pixelData.OutputDisplayFormat = 1; pixelData.RangeMinimum = viewport.TopLeftX; pixelData.InverseRangeSize = viewport.TopLeftY; pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f); D3D12_GPU_VIRTUAL_ADDRESS scissorCB = GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData)); list->SetGraphicsRootConstantBufferView(1, scissorCB); list->DrawInstanced(3, 1, 0, 0); } } else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass) { if(pipe && pipe->IsGraphics()) { SCOPED_TIMER("Triangle size"); vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::TriangleSizeDraw) events.clear(); while(!events.empty()) { const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[0]); // remove any non-drawcalls, like the pass boundary. if(!(draw->flags & DrawFlags::Drawcall)) events.erase(events.begin()); else break; } events.push_back(eventId); D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = pipe->GetGraphicsDesc(); pipeDesc.pRootSignature = m_General.ConstOnlyRootSig; pipeDesc.SampleMask = 0xFFFFFFFF; pipeDesc.SampleDesc.Count = 1; pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; pipeDesc.NumRenderTargets = 1; RDCEraseEl(pipeDesc.RTVFormats); pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; D3D12_INPUT_ELEMENT_DESC ia[2] = {}; ia[0].SemanticName = "pos"; ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; ia[1].SemanticName = "sec"; ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; ia[1].InputSlot = 1; ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; pipeDesc.InputLayout.NumElements = 2; pipeDesc.InputLayout.pInputElementDescs = ia; pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize(); pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer(); RDCEraseEl(pipeDesc.HS); RDCEraseEl(pipeDesc.DS); pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize(); pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer(); pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize(); pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer(); pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER) pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL; if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS) pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; // enough for all primitive topology types ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {}; DebugVertexCBuffer vertexData = {}; vertexData.LineStrip = 0; vertexData.ModelViewProj = Matrix4f::Identity(); vertexData.SpriteSize = Vec2f(); Vec4f viewport(rs.views[0].Width, rs.views[0].Height); if(rs.dsv.ptr) { D3D12_CPU_DESCRIPTOR_HANDLE realDSV = Unwrap(rs.dsv); list->OMSetRenderTargets(1, &rtv, TRUE, &realDSV); } list->RSSetViewports(1, &rs.views[0]); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig); list->SetGraphicsRootConstantBufferView( 0, GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData))); list->SetGraphicsRootConstantBufferView( 1, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp))); list->SetGraphicsRootConstantBufferView( 2, GetDebugManager()->UploadConstants(&viewport, sizeof(viewport))); list->SetGraphicsRoot32BitConstants(3, 4, &viewport.x, 0); for(size_t i = 0; i < events.size(); i++) { const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[i]); for(uint32_t inst = 0; draw && inst < RDCMAX(1U, draw->numInstances); inst++) { MeshFormat fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::GSOut); if(fmt.vertexResourceId == ResourceId()) fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::VSOut); if(fmt.vertexResourceId != ResourceId()) { D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology); if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST || topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST) pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP || topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST || topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ || topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ) pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; else pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; list->IASetPrimitiveTopology(topo); if(pipes[pipeDesc.PrimitiveTopologyType] == NULL) { HRESULT hr = m_pDevice->CreateGraphicsPipelineState( &pipeDesc, __uuidof(ID3D12PipelineState), (void **)&pipes[pipeDesc.PrimitiveTopologyType]); RDCASSERTEQUAL(hr, S_OK); } ID3D12Resource *vb = m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId); D3D12_VERTEX_BUFFER_VIEW vbView = {}; vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset; vbView.StrideInBytes = fmt.vertexByteStride; vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset); // second bind is just a dummy, so we don't have to make a shader // that doesn't accept the secondary stream list->IASetVertexBuffers(0, 1, &vbView); list->IASetVertexBuffers(1, 1, &vbView); list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]); if(fmt.indexByteStride && fmt.indexResourceId != ResourceId()) { ID3D12Resource *ib = m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId); D3D12_INDEX_BUFFER_VIEW view; view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset; view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset); view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; list->IASetIndexBuffer(&view); list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0); } else { list->DrawInstanced(fmt.numIndices, 1, 0, 0); } } } } list->Close(); list = NULL; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); for(size_t i = 0; i < ARRAY_COUNT(pipes); i++) SAFE_RELEASE(pipes[i]); } // restore back to normal m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw); } else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw) { SCOPED_TIMER("Quad Overdraw"); vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::QuadOverdrawDraw) events.clear(); events.push_back(eventId); if(!events.empty()) { if(overlay == DebugOverlay::QuadOverdrawPass) { list->Close(); m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); list = m_pDevice->GetNewList(); } uint32_t width = uint32_t(resourceDesc.Width >> 1); uint32_t height = resourceDesc.Height >> 1; width = RDCMAX(1U, width); height = RDCMAX(1U, height); D3D12_RESOURCE_DESC uavTexDesc = {}; uavTexDesc.Alignment = 0; uavTexDesc.DepthOrArraySize = 4; uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; uavTexDesc.Format = DXGI_FORMAT_R32_UINT; uavTexDesc.Height = height; uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; uavTexDesc.MipLevels = 1; uavTexDesc.SampleDesc.Count = 1; uavTexDesc.SampleDesc.Quality = 0; uavTexDesc.Width = width; ID3D12Resource *overdrawTex = NULL; HRESULT hr = m_pDevice->CreateCommittedResource( &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex); if(FAILED(hr)) { RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str()); list->Close(); list = NULL; return m_Overlay.resourceId; } m_pDevice->CreateShaderResourceView(overdrawTex, NULL, GetDebugManager()->GetCPUHandle(OVERDRAW_SRV)); m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL, GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)); m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL, GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV)); UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV), GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV), overdrawTex, zeroes, 0, NULL); list->Close(); list = NULL; #if ENABLED(SINGLE_FLUSH_VALIDATE) m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); #endif m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); D3D12_SHADER_BYTECODE quadWrite; quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize(); quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer(); // declare callback struct here D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, events, ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV))); m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full); // resolve pass { list = m_pDevice->GetNewList(); D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {}; // make sure UAV work is done then prepare for reading in PS overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; overdrawBarriers[0].UAV.pResource = overdrawTex; overdrawBarriers[1].Transition.pResource = overdrawTex; overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // prepare tex resource for copying list->ResourceBarrier(2, overdrawBarriers); list->OMSetRenderTargets(1, &rtv, TRUE, NULL); list->RSSetViewports(1, &rs.views[0]); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); list->SetPipelineState(m_Overlay.QuadResolvePipe); list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig); GetDebugManager()->SetDescriptorHeaps(list, true, false); list->SetGraphicsRootConstantBufferView( 0, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp))); list->SetGraphicsRootDescriptorTable(1, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV)); list->DrawInstanced(3, 1, 0, 0); list->Close(); list = NULL; } m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it) { SAFE_RELEASE(it->second.pipe); SAFE_RELEASE(it->second.sig); } SAFE_RELEASE(overdrawTex); } if(overlay == DebugOverlay::QuadOverdrawPass) m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw); }
vector<CounterResult> D3D11Replay::FetchCounters(const vector<GPUCounter> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } SCOPED_TIMER("Fetch Counters, counters to fetch %u", counters.size()); vector<GPUCounter> d3dCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters), [](const GPUCounter &c) { return !IsAMDCounter(c); }); if(m_pAMDCounters) { // Filter out the AMD counters vector<GPUCounter> amdCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(amdCounters), [](const GPUCounter &c) { return IsAMDCounter(c); }); if(!amdCounters.empty()) { ret = FetchCountersAMD(amdCounters); } } if(d3dCounters.empty()) { return ret; } D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0}; ID3D11Query *disjoint = NULL; D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; ID3D11Query *start = NULL; HRESULT hr = S_OK; hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint); if(FAILED(hr)) { RDCERR("Failed to create disjoint query HRESULT: %s", ToStr(hr).c_str()); return ret; } hr = m_pDevice->CreateQuery(&qdesc, &start); if(FAILED(hr)) { RDCERR("Failed to create start query HRESULT: %s", ToStr(hr).c_str()); return ret; } D3D11CounterContext ctx; { { m_pImmediateContext->Begin(disjoint); m_pImmediateContext->End(start); ctx.eventStart = 0; FillTimers(ctx, m_pImmediateContext->GetRootDraw()); m_pImmediateContext->End(disjoint); } { D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; do { hr = m_pImmediateContext->GetData(disjoint, &disjointData, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0); } while(hr == S_FALSE); RDCASSERTEQUAL(hr, S_OK); RDCASSERT(!disjointData.Disjoint); double ticksToSecs = double(disjointData.Frequency); UINT64 a = 0; hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t i = 0; i < ctx.timers.size(); i++) { if(ctx.timers[i].before && ctx.timers[i].after && ctx.timers[i].stats && ctx.timers[i].occlusion) { hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 b = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); double duration = (double(b - a) / ticksToSecs); a = b; D3D11_QUERY_DATA_PIPELINE_STATISTICS pipelineStats; hr = m_pImmediateContext->GetData(ctx.timers[i].stats, &pipelineStats, sizeof(D3D11_QUERY_DATA_PIPELINE_STATISTICS), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 occlusion = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].occlusion, &occlusion, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t c = 0; c < d3dCounters.size(); c++) { switch(d3dCounters[c]) { case GPUCounter::EventGPUDuration: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, duration)); break; case GPUCounter::InputVerticesRead: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::InputVerticesRead, pipelineStats.IAVertices)); break; case GPUCounter::IAPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::IAPrimitives, pipelineStats.IAPrimitives)); break; case GPUCounter::VSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::VSInvocations, pipelineStats.VSInvocations)); break; case GPUCounter::GSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSInvocations, pipelineStats.GSInvocations)); break; case GPUCounter::GSPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSPrimitives, pipelineStats.GSPrimitives)); break; case GPUCounter::RasterizerInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizerInvocations, pipelineStats.CInvocations)); break; case GPUCounter::RasterizedPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizedPrimitives, pipelineStats.CPrimitives)); break; case GPUCounter::PSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::PSInvocations, pipelineStats.PSInvocations)); break; case GPUCounter::HSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::HSInvocations, pipelineStats.HSInvocations)); break; case GPUCounter::DSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::DSInvocations, pipelineStats.DSInvocations)); break; case GPUCounter::CSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::CSInvocations, pipelineStats.CSInvocations)); break; case GPUCounter::SamplesWritten: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::SamplesWritten, occlusion)); break; } } } else { for(size_t c = 0; c < d3dCounters.size(); c++) { switch(d3dCounters[c]) { case GPUCounter::EventGPUDuration: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, -1.0)); break; case GPUCounter::InputVerticesRead: case GPUCounter::IAPrimitives: case GPUCounter::GSPrimitives: case GPUCounter::RasterizerInvocations: case GPUCounter::RasterizedPrimitives: case GPUCounter::VSInvocations: case GPUCounter::HSInvocations: case GPUCounter::DSInvocations: case GPUCounter::GSInvocations: case GPUCounter::PSInvocations: case GPUCounter::CSInvocations: case GPUCounter::SamplesWritten: ret.push_back( CounterResult(ctx.timers[i].eventId, d3dCounters[c], 0xFFFFFFFFFFFFFFFF)); break; } } } } } } for(size_t i = 0; i < ctx.timers.size(); i++) { SAFE_RELEASE(ctx.timers[i].before); SAFE_RELEASE(ctx.timers[i].after); SAFE_RELEASE(ctx.timers[i].stats); SAFE_RELEASE(ctx.timers[i].occlusion); } SAFE_RELEASE(disjoint); SAFE_RELEASE(start); return ret; }
void VulkanDebugManager::CopyDepthArrayToTex2DMS(VkImage destMS, VkImage srcArray, VkExtent3D extent, uint32_t layers, uint32_t samples, VkFormat fmt) { VkImageAspectFlags aspectFlags = VK_IMAGE_ASPECT_DEPTH_BIT; int pipeIndex = 0; switch(fmt) { case VK_FORMAT_D16_UNORM: pipeIndex = 0; break; case VK_FORMAT_D16_UNORM_S8_UINT: pipeIndex = 1; aspectFlags |= VK_IMAGE_ASPECT_STENCIL_BIT; break; case VK_FORMAT_X8_D24_UNORM_PACK32: pipeIndex = 2; break; case VK_FORMAT_D24_UNORM_S8_UINT: pipeIndex = 3; aspectFlags |= VK_IMAGE_ASPECT_STENCIL_BIT; break; case VK_FORMAT_D32_SFLOAT: pipeIndex = 4; break; case VK_FORMAT_D32_SFLOAT_S8_UINT: pipeIndex = 5; aspectFlags |= VK_IMAGE_ASPECT_STENCIL_BIT; break; default: RDCERR("Unexpected depth format: %d", fmt); return; } // 0-based from 2x MSAA uint32_t sampleIndex = SampleIndex((VkSampleCountFlagBits)samples) - 1; if(sampleIndex >= ARRAY_COUNT(m_DepthArray2MSPipe[0])) { RDCERR("Unsupported sample count %u", samples); return; } VkPipeline pipe = m_DepthArray2MSPipe[pipeIndex][sampleIndex]; if(pipe == VK_NULL_HANDLE) return; VkDevice dev = m_Device; VkResult vkr = VK_SUCCESS; VkImageView srcDepthView = VK_NULL_HANDLE, srcStencilView = VK_NULL_HANDLE; VkImageView *destView = new VkImageView[layers]; VkImageViewCreateInfo viewInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, NULL, 0, srcArray, VK_IMAGE_VIEW_TYPE_2D_ARRAY, fmt, {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO}, { VK_IMAGE_ASPECT_DEPTH_BIT, 0, VK_REMAINING_MIP_LEVELS, 0, VK_REMAINING_ARRAY_LAYERS, }, }; vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &srcDepthView); RDCASSERTEQUAL(vkr, VK_SUCCESS); if(aspectFlags & VK_IMAGE_ASPECT_STENCIL_BIT) { viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &srcStencilView); RDCASSERTEQUAL(vkr, VK_SUCCESS); } viewInfo.subresourceRange.aspectMask = aspectFlags; viewInfo.image = destMS; viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; for(uint32_t i = 0; i < layers; i++) { viewInfo.subresourceRange.baseArrayLayer = i; viewInfo.subresourceRange.layerCount = 1; vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &destView[i]); RDCASSERTEQUAL(vkr, VK_SUCCESS); } VkDescriptorImageInfo srcdesc[2]; srcdesc[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; srcdesc[0].imageView = srcDepthView; srcdesc[0].sampler = Unwrap(m_ArrayMSSampler); // not used - we use texelFetch srcdesc[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; srcdesc[1].imageView = srcStencilView; srcdesc[1].sampler = Unwrap(m_ArrayMSSampler); // not used - we use texelFetch VkWriteDescriptorSet writeSet[] = { {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 0, 0, 1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &srcdesc[0], NULL, NULL}, {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 1, 0, 1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &srcdesc[1], NULL, NULL}, }; if(aspectFlags & VK_IMAGE_ASPECT_STENCIL_BIT) ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), 2, writeSet, 0, NULL); else ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), 1, writeSet, 0, NULL); // create a bespoke framebuffer and renderpass for rendering VkAttachmentDescription attDesc = {0, fmt, (VkSampleCountFlagBits)samples, VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL}; VkAttachmentReference attRef = {0, VK_IMAGE_LAYOUT_GENERAL}; VkSubpassDescription sub = {}; sub.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; sub.pDepthStencilAttachment = &attRef; VkRenderPassCreateInfo rpinfo = { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, NULL, 0, 1, &attDesc, 1, &sub, 0, NULL, // dependencies }; VkRenderPass rp = VK_NULL_HANDLE; ObjDisp(dev)->CreateRenderPass(Unwrap(dev), &rpinfo, NULL, &rp); VkFramebufferCreateInfo fbinfo = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, NULL, 0, rp, 1, NULL, extent.width, extent.height, 1, }; VkFramebuffer *fb = new VkFramebuffer[layers]; for(uint32_t i = 0; i < layers; i++) { fbinfo.pAttachments = destView + i; vkr = ObjDisp(dev)->CreateFramebuffer(Unwrap(dev), &fbinfo, NULL, &fb[i]); RDCASSERTEQUAL(vkr, VK_SUCCESS); } VkCommandBuffer cmd = m_pDriver->GetNextCmd(); VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; ObjDisp(cmd)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); VkClearValue clearval = {}; VkRenderPassBeginInfo rpbegin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, NULL, rp, VK_NULL_HANDLE, {{0, 0}, {extent.width, extent.height}}, 1, &clearval, }; uint32_t numStencil = 1; if(aspectFlags & VK_IMAGE_ASPECT_STENCIL_BIT) numStencil = 256; Vec4u params; params.x = samples; params.y = 0; // currentSample; for(uint32_t i = 0; i < layers; i++) { rpbegin.framebuffer = fb[i]; ObjDisp(cmd)->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE); ObjDisp(cmd)->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(pipe)); ObjDisp(cmd)->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(m_ArrayMSPipeLayout), 0, 1, UnwrapPtr(m_ArrayMSDescSet), 0, NULL); VkViewport viewport = {0.0f, 0.0f, (float)extent.width, (float)extent.height, 0.0f, 1.0f}; ObjDisp(cmd)->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport); params.z = i; // currentSlice; for(uint32_t s = 0; s < numStencil; s++) { params.w = numStencil == 1 ? 1000 : s; // currentStencil; ObjDisp(cmd)->CmdSetStencilReference(Unwrap(cmd), VK_STENCIL_FRONT_AND_BACK, s); ObjDisp(cmd)->CmdPushConstants(Unwrap(cmd), Unwrap(m_ArrayMSPipeLayout), VK_SHADER_STAGE_ALL, 0, sizeof(Vec4u), ¶ms); ObjDisp(cmd)->CmdDraw(Unwrap(cmd), 4, 1, 0, 0); } ObjDisp(cmd)->CmdEndRenderPass(Unwrap(cmd)); } ObjDisp(cmd)->EndCommandBuffer(Unwrap(cmd)); // submit cmds and wait for idle so we can readback m_pDriver->SubmitCmds(); m_pDriver->FlushQ(); for(uint32_t i = 0; i < layers; i++) ObjDisp(dev)->DestroyFramebuffer(Unwrap(dev), fb[i], NULL); ObjDisp(dev)->DestroyRenderPass(Unwrap(dev), rp, NULL); ObjDisp(dev)->DestroyImageView(Unwrap(dev), srcDepthView, NULL); if(srcStencilView != VK_NULL_HANDLE) ObjDisp(dev)->DestroyImageView(Unwrap(dev), srcStencilView, NULL); for(uint32_t i = 0; i < layers; i++) ObjDisp(dev)->DestroyImageView(Unwrap(dev), destView[i], NULL); SAFE_DELETE_ARRAY(destView); SAFE_DELETE_ARRAY(fb); }
void VulkanDebugManager::CopyArrayToTex2DMS(VkImage destMS, VkImage srcArray, VkExtent3D extent, uint32_t layers, uint32_t samples, VkFormat fmt) { if(!m_pDriver->GetDeviceFeatures().shaderStorageImageMultisample || !m_pDriver->GetDeviceFeatures().shaderStorageImageWriteWithoutFormat) return; if(m_Array2MSPipe == VK_NULL_HANDLE) return; if(IsDepthOrStencilFormat(fmt)) { CopyDepthArrayToTex2DMS(destMS, srcArray, extent, layers, samples, fmt); return; } VkDevice dev = m_Device; VkResult vkr = VK_SUCCESS; VkImageView srcView, destView; VkImageViewCreateInfo viewInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, NULL, 0, srcArray, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_FORMAT_UNDEFINED, {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}, { VK_IMAGE_ASPECT_COLOR_BIT, 0, VK_REMAINING_MIP_LEVELS, 0, VK_REMAINING_ARRAY_LAYERS, }, }; uint32_t bs = GetByteSize(1, 1, 1, fmt, 0); if(bs == 1) viewInfo.format = VK_FORMAT_R8_UINT; else if(bs == 2) viewInfo.format = VK_FORMAT_R16_UINT; else if(bs == 4) viewInfo.format = VK_FORMAT_R32_UINT; else if(bs == 8) viewInfo.format = VK_FORMAT_R32G32_UINT; else if(bs == 16) viewInfo.format = VK_FORMAT_R32G32B32A32_UINT; if(viewInfo.format == VK_FORMAT_UNDEFINED) { RDCERR("Can't copy Array to MS with format %s", ToStr(fmt).c_str()); return; } if(IsStencilOnlyFormat(fmt)) viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; else if(IsDepthOrStencilFormat(fmt)) viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &srcView); RDCASSERTEQUAL(vkr, VK_SUCCESS); viewInfo.image = destMS; viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &destView); RDCASSERTEQUAL(vkr, VK_SUCCESS); VkDescriptorImageInfo srcdesc = {0}; srcdesc.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; srcdesc.imageView = srcView; srcdesc.sampler = Unwrap(m_ArrayMSSampler); // not used - we use texelFetch VkDescriptorImageInfo destdesc = {0}; destdesc.imageLayout = VK_IMAGE_LAYOUT_GENERAL; destdesc.imageView = destView; VkWriteDescriptorSet writeSet[] = { {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 0, 0, 1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &srcdesc, NULL, NULL}, {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 2, 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &destdesc, NULL, NULL}, }; ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), ARRAY_COUNT(writeSet), writeSet, 0, NULL); VkCommandBuffer cmd = m_pDriver->GetNextCmd(); VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; ObjDisp(cmd)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); ObjDisp(cmd)->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_COMPUTE, Unwrap(m_Array2MSPipe)); ObjDisp(cmd)->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_COMPUTE, Unwrap(m_ArrayMSPipeLayout), 0, 1, UnwrapPtr(m_ArrayMSDescSet), 0, NULL); Vec4u params = {samples, 0, 0, 0}; ObjDisp(cmd)->CmdPushConstants(Unwrap(cmd), Unwrap(m_ArrayMSPipeLayout), VK_SHADER_STAGE_ALL, 0, sizeof(Vec4u), ¶ms); ObjDisp(cmd)->CmdDispatch(Unwrap(cmd), extent.width, extent.height, layers * samples); ObjDisp(cmd)->EndCommandBuffer(Unwrap(cmd)); // submit cmds and wait for idle so we can readback m_pDriver->SubmitCmds(); m_pDriver->FlushQ(); ObjDisp(dev)->DestroyImageView(Unwrap(dev), srcView, NULL); ObjDisp(dev)->DestroyImageView(Unwrap(dev), destView, NULL); }
VkResult WrappedVulkan::vkCreateRenderPass( VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass) { VkResult ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), pCreateInfo, pAllocator, pRenderPass); if(ret == VK_SUCCESS) { ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pRenderPass); if(m_State >= WRITING) { Chunk *chunk = NULL; { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CONTEXT(CREATE_RENDERPASS); Serialise_vkCreateRenderPass(localSerialiser, device, pCreateInfo, NULL, pRenderPass); chunk = scope.Get(); } VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pRenderPass); record->AddChunk(chunk); } else { GetResourceManager()->AddLiveResource(id, *pRenderPass); VulkanCreationInfo::RenderPass rpinfo; rpinfo.Init(GetResourceManager(), m_CreationInfo, pCreateInfo); VkRenderPassCreateInfo info = *pCreateInfo; VkAttachmentDescription atts[16]; RDCASSERT(ARRAY_COUNT(atts) >= (size_t)info.attachmentCount); // make a version of the render pass that loads from its attachments, // so it can be used for replaying a single draw after a render pass // without doing a clear or a DONT_CARE load. for(uint32_t i=0; i < info.attachmentCount; i++) { atts[i] = info.pAttachments[i]; atts[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; atts[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } info.pAttachments = atts; ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), &info, NULL, &rpinfo.loadRP); RDCASSERTEQUAL(ret, VK_SUCCESS); ResourceId loadRPid = GetResourceManager()->WrapResource(Unwrap(device), rpinfo.loadRP); // register as a live-only resource, so it is cleaned up properly GetResourceManager()->AddLiveResource(loadRPid, rpinfo.loadRP); m_CreationInfo.m_RenderPass[id] = rpinfo; } } return ret; }
bool WrappedVulkan::Serialise_vkCreateRenderPass( Serialiser* localSerialiser, VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass) { SERIALISE_ELEMENT(ResourceId, devId, GetResID(device)); SERIALISE_ELEMENT(VkRenderPassCreateInfo, info, *pCreateInfo); SERIALISE_ELEMENT(ResourceId, id, GetResID(*pRenderPass)); if(m_State == READING) { device = GetResourceManager()->GetLiveHandle<VkDevice>(devId); VkRenderPass rp = VK_NULL_HANDLE; VulkanCreationInfo::RenderPass rpinfo; rpinfo.Init(GetResourceManager(), m_CreationInfo, &info); // we want to store off the data so we can display it after the pass. // override any user-specified DONT_CARE. VkAttachmentDescription *att = (VkAttachmentDescription *)info.pAttachments; for(uint32_t i=0; i < info.attachmentCount; i++) { att[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; att[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; // renderpass can't start or end in presentable layout on replay ReplacePresentableImageLayout(att[i].initialLayout); ReplacePresentableImageLayout(att[i].finalLayout); } VkResult ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), &info, NULL, &rp); if(ret != VK_SUCCESS) { RDCERR("Failed on resource serialise-creation, VkResult: 0x%08x", ret); } else { ResourceId live; if(GetResourceManager()->HasWrapper(ToTypedHandle(rp))) { live = GetResourceManager()->GetNonDispWrapper(rp)->id; // destroy this instance of the duplicate, as we must have matching create/destroy // calls and there won't be a wrapped resource hanging around to destroy this one. ObjDisp(device)->DestroyRenderPass(Unwrap(device), rp, NULL); // whenever the new ID is requested, return the old ID, via replacements. GetResourceManager()->ReplaceResource(id, GetResourceManager()->GetOriginalID(live)); } else { live = GetResourceManager()->WrapResource(Unwrap(device), rp); GetResourceManager()->AddLiveResource(id, rp); // make a version of the render pass that loads from its attachments, // so it can be used for replaying a single draw after a render pass // without doing a clear or a DONT_CARE load. for(uint32_t i=0; i < info.attachmentCount; i++) { att[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; att[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), &info, NULL, &rpinfo.loadRP); RDCASSERTEQUAL(ret, VK_SUCCESS); // handle the loadRP being a duplicate if(GetResourceManager()->HasWrapper(ToTypedHandle(rpinfo.loadRP))) { // just fetch the existing wrapped object rpinfo.loadRP = (VkRenderPass)(uint64_t)GetResourceManager()->GetNonDispWrapper(rpinfo.loadRP); // destroy this instance of the duplicate, as we must have matching create/destroy // calls and there won't be a wrapped resource hanging around to destroy this one. ObjDisp(device)->DestroyRenderPass(Unwrap(device), rpinfo.loadRP, NULL); // don't need to ReplaceResource as no IDs are involved } else { ResourceId loadRPid = GetResourceManager()->WrapResource(Unwrap(device), rpinfo.loadRP); // register as a live-only resource, so it is cleaned up properly GetResourceManager()->AddLiveResource(loadRPid, rpinfo.loadRP); } m_CreationInfo.m_RenderPass[live] = rpinfo; } } } return true; }
void DoSerialise(SerialiserType &ser, D3D12Descriptor &el) { D3D12DescriptorType type = el.GetType(); ser.Serialise("type", type); ID3D12DescriptorHeap *heap = (ID3D12DescriptorHeap *)el.samp.heap; ser.Serialise("heap", heap); ser.Serialise("index", el.samp.idx); if(ser.IsReading()) { el.samp.heap = (WrappedID3D12DescriptorHeap *)heap; // for sampler types, this will be overwritten when serialising the sampler descriptor el.nonsamp.type = type; } switch(type) { case D3D12DescriptorType::Sampler: { ser.Serialise("Descriptor", el.samp.desc); RDCASSERTEQUAL(el.GetType(), D3D12DescriptorType::Sampler); break; } case D3D12DescriptorType::CBV: { ser.Serialise("Descriptor", el.nonsamp.cbv); break; } case D3D12DescriptorType::SRV: { ser.Serialise("Resource", el.nonsamp.resource); ser.Serialise("Descriptor", el.nonsamp.srv); break; } case D3D12DescriptorType::RTV: { ser.Serialise("Resource", el.nonsamp.resource); ser.Serialise("Descriptor", el.nonsamp.rtv); break; } case D3D12DescriptorType::DSV: { ser.Serialise("Resource", el.nonsamp.resource); ser.Serialise("Descriptor", el.nonsamp.dsv); break; } case D3D12DescriptorType::UAV: { ser.Serialise("Resource", el.nonsamp.resource); ser.Serialise("CounterResource", el.nonsamp.uav.counterResource); // special case because of extra resource and squeezed descriptor D3D12_UNORDERED_ACCESS_VIEW_DESC desc = el.nonsamp.uav.desc.AsDesc(); ser.Serialise("Descriptor", desc); el.nonsamp.uav.desc.Init(desc); break; } case D3D12DescriptorType::Undefined: { el.nonsamp.type = type; break; } } }
void PreDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) { if(std::find(m_Events.begin(), m_Events.end(), eid) == m_Events.end()) return; // we customise the pipeline to disable framebuffer writes, but perform normal testing // and substitute our quad calculation fragment shader that writes to a storage image // that is bound in a new root signature element. D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState; m_PrevState = rs; // check cache first CachedPipeline cache = m_PipelineCache[rs.pipe]; // if we don't get a hit, create a modified pipeline if(cache.pipe == NULL) { HRESULT hr = S_OK; WrappedID3D12RootSignature *sig = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>( rs.graphics.rootsig); // need to be able to add a descriptor table with our UAV without hitting the 64 DWORD limit RDCASSERT(sig->sig.dwordLength < 64); D3D12RootSignature modsig = sig->sig; // make sure no other UAV tables overlap. We can't remove elements entirely because then the // root signature indices wouldn't match up as expected. // Instead move them into an unused space. for(size_t i = 0; i < modsig.params.size(); i++) { if(modsig.params[i].ShaderVisibility == D3D12_SHADER_VISIBILITY_PIXEL) { if(modsig.params[i].ParameterType == D3D12_ROOT_PARAMETER_TYPE_UAV) { modsig.params[i].Descriptor.RegisterSpace = modsig.numSpaces; } else if(modsig.params[i].ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) { for(size_t r = 0; r < modsig.params[i].ranges.size(); r++) { modsig.params[i].ranges[r].RegisterSpace = modsig.numSpaces; } } } } D3D12_DESCRIPTOR_RANGE1 range; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; range.NumDescriptors = 1; range.BaseShaderRegister = 0; range.RegisterSpace = 0; range.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; range.OffsetInDescriptorsFromTableStart = 0; modsig.params.push_back(D3D12RootSignatureParameter()); D3D12RootSignatureParameter ¶m = modsig.params.back(); param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; param.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; param.DescriptorTable.NumDescriptorRanges = 1; param.DescriptorTable.pDescriptorRanges = ⦥ cache.sigElem = uint32_t(modsig.params.size() - 1); std::vector<D3D12_ROOT_PARAMETER1> params; params.resize(modsig.params.size()); for(size_t i = 0; i < params.size(); i++) params[i] = modsig.params[i]; ID3DBlob *root = m_pDevice->GetShaderCache()->MakeRootSig(modsig); hr = m_pDevice->CreateRootSignature(0, root->GetBufferPointer(), root->GetBufferSize(), __uuidof(ID3D12RootSignature), (void **)&cache.sig); RDCASSERTEQUAL(hr, S_OK); SAFE_RELEASE(root); WrappedID3D12PipelineState *origPSO = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); RDCASSERT(origPSO->IsGraphics()); D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = origPSO->GetGraphicsDesc(); for(size_t i = 0; i < ARRAY_COUNT(pipeDesc.BlendState.RenderTarget); i++) pipeDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = 0; // disable depth/stencil writes pipeDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; pipeDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; pipeDesc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; pipeDesc.DepthStencilState.StencilWriteMask = 0; pipeDesc.PS = m_QuadWritePS; pipeDesc.pRootSignature = cache.sig; hr = m_pDevice->CreateGraphicsPipelineState(&pipeDesc, __uuidof(ID3D12PipelineState), (void **)&cache.pipe); RDCASSERTEQUAL(hr, S_OK); m_PipelineCache[rs.pipe] = cache; } // modify state for first draw call rs.pipe = GetResID(cache.pipe); rs.graphics.rootsig = GetResID(cache.sig); if(rs.graphics.sigelems.size() <= cache.sigElem) rs.graphics.sigelems.resize(cache.sigElem + 1); PortableHandle uav = m_UAV; // if a CBV_SRV_UAV heap is already set, we need to copy our descriptor in // if we haven't already. Otherwise we can set our own heap. for(size_t i = 0; i < rs.heaps.size(); i++) { WrappedID3D12DescriptorHeap *h = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12DescriptorHeap>(rs.heaps[i]); if(h->GetDesc().Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { // use the last descriptor D3D12_CPU_DESCRIPTOR_HANDLE dst = h->GetCPUDescriptorHandleForHeapStart(); dst.ptr += (h->GetDesc().NumDescriptors - 1) * sizeof(D3D12Descriptor); if(m_CopiedHeaps.find(rs.heaps[i]) == m_CopiedHeaps.end()) { WrappedID3D12DescriptorHeap *h2 = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12DescriptorHeap>(m_UAV.heap); D3D12_CPU_DESCRIPTOR_HANDLE src = h2->GetCPUDescriptorHandleForHeapStart(); src.ptr += m_UAV.index * sizeof(D3D12Descriptor); // can't do a copy because the src heap is CPU write-only (shader visible). So instead, // create directly D3D12Descriptor *srcDesc = (D3D12Descriptor *)src.ptr; srcDesc->Create(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, m_pDevice, dst); m_CopiedHeaps.insert(rs.heaps[i]); } uav = ToPortableHandle(dst); break; } } if(uav.heap == m_UAV.heap) rs.heaps.push_back(m_UAV.heap); rs.graphics.sigelems[cache.sigElem] = D3D12RenderState::SignatureElement(eRootTable, uav.heap, uav.index); // as we're changing the root signature, we need to reapply all elements, // so just apply all state if(cmd) rs.ApplyState(cmd); }
bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices( Serialiser* localSerialiser, VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices) { SERIALISE_ELEMENT(ResourceId, inst, GetResID(instance)); SERIALISE_ELEMENT(uint32_t, physIndex, *pPhysicalDeviceCount); SERIALISE_ELEMENT(ResourceId, physId, GetResID(*pPhysicalDevices)); uint32_t memIdxMap[32] = {0}; if(m_State >= WRITING) memcpy(memIdxMap, GetRecord(*pPhysicalDevices)->memIdxMap, sizeof(memIdxMap)); localSerialiser->SerialisePODArray<32>("memIdxMap", memIdxMap); // not used at the moment but useful for reference and might be used // in the future VkPhysicalDeviceProperties physProps; VkPhysicalDeviceMemoryProperties memProps; VkPhysicalDeviceFeatures physFeatures; if(m_State >= WRITING) { ObjDisp(instance)->GetPhysicalDeviceProperties(Unwrap(*pPhysicalDevices), &physProps); ObjDisp(instance)->GetPhysicalDeviceMemoryProperties(Unwrap(*pPhysicalDevices), &memProps); ObjDisp(instance)->GetPhysicalDeviceFeatures(Unwrap(*pPhysicalDevices), &physFeatures); } localSerialiser->Serialise("physProps", physProps); localSerialiser->Serialise("memProps", memProps); localSerialiser->Serialise("physFeatures", physFeatures); VkPhysicalDevice pd = VK_NULL_HANDLE; if(m_State >= WRITING) { pd = *pPhysicalDevices; } else { uint32_t count; VkPhysicalDevice *devices; instance = GetResourceManager()->GetLiveHandle<VkInstance>(inst); VkResult vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, NULL); RDCASSERTEQUAL(vkr, VK_SUCCESS); RDCASSERT(count > physIndex); devices = new VkPhysicalDevice[count]; if(physIndex >= m_PhysicalDevices.size()) { m_PhysicalDevices.resize(physIndex+1); m_MemIdxMaps.resize(physIndex+1); } vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, devices); RDCASSERTEQUAL(vkr, VK_SUCCESS); // PORTABILITY match up physical devices to those available on replay pd = devices[physIndex]; for(size_t i=0; i < m_PhysicalDevices.size(); i++) { // physical devices might be re-created inside EnumeratePhysicalDevices every time, so // we need to re-wrap any previously enumerated physical devices if(m_PhysicalDevices[i] != VK_NULL_HANDLE) { RDCASSERTNOTEQUAL(i, physIndex); GetWrapped(m_PhysicalDevices[i])->RewrapObject(devices[i]); } } SAFE_DELETE_ARRAY(devices); GetResourceManager()->WrapResource(instance, pd); GetResourceManager()->AddLiveResource(physId, pd); m_PhysicalDevices[physIndex] = pd; uint32_t *storedMap = new uint32_t[32]; memcpy(storedMap, memIdxMap, sizeof(memIdxMap)); m_MemIdxMaps[physIndex] = storedMap; RDCLOG("Captured log describes physical device %u:", physIndex); RDCLOG(" - %s (ver %x) - %04x:%04x", physProps.deviceName, physProps.driverVersion, physProps.vendorID, physProps.deviceID); ObjDisp(pd)->GetPhysicalDeviceProperties(Unwrap(pd), &physProps); ObjDisp(pd)->GetPhysicalDeviceMemoryProperties(Unwrap(pd), &memProps); ObjDisp(pd)->GetPhysicalDeviceFeatures(Unwrap(pd), &physFeatures); RDCLOG("Replaying on physical device %u:", physIndex); RDCLOG(" - %s (ver %x) - %04x:%04x", physProps.deviceName, physProps.driverVersion, physProps.vendorID, physProps.deviceID); } return true; }
void D3D12Replay::InitPostVSBuffers(uint32_t eventId) { // go through any aliasing if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end()) eventId = m_PostVSAlias[eventId]; if(m_PostVSData.find(eventId) != m_PostVSData.end()) return; D3D12CommandData *cmd = m_pDevice->GetQueue()->GetCommandData(); const D3D12RenderState &rs = cmd->m_RenderState; if(rs.pipe == ResourceId()) return; WrappedID3D12PipelineState *origPSO = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); if(!origPSO->IsGraphics()) return; D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = origPSO->GetGraphicsDesc(); if(psoDesc.VS.BytecodeLength == 0) return; WrappedID3D12Shader *vs = origPSO->VS(); D3D_PRIMITIVE_TOPOLOGY topo = rs.topo; const DrawcallDescription *drawcall = m_pDevice->GetDrawcall(eventId); if(drawcall->numIndices == 0) return; DXBC::DXBCFile *dxbcVS = vs->GetDXBC(); RDCASSERT(dxbcVS); DXBC::DXBCFile *dxbcGS = NULL; WrappedID3D12Shader *gs = origPSO->GS(); if(gs) { dxbcGS = gs->GetDXBC(); RDCASSERT(dxbcGS); } DXBC::DXBCFile *dxbcDS = NULL; WrappedID3D12Shader *ds = origPSO->DS(); if(ds) { dxbcDS = ds->GetDXBC(); RDCASSERT(dxbcDS); } ID3D12RootSignature *soSig = NULL; HRESULT hr = S_OK; { WrappedID3D12RootSignature *sig = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(rs.graphics.rootsig); D3D12RootSignature rootsig = sig->sig; // create a root signature that allows stream out, if necessary if((rootsig.Flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT) == 0) { rootsig.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT; ID3DBlob *blob = m_pDevice->GetShaderCache()->MakeRootSig(rootsig); hr = m_pDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), __uuidof(ID3D12RootSignature), (void **)&soSig); if(FAILED(hr)) { RDCERR("Couldn't enable stream-out in root signature: HRESULT: %s", ToStr(hr).c_str()); return; } SAFE_RELEASE(blob); } } vector<D3D12_SO_DECLARATION_ENTRY> sodecls; UINT stride = 0; int posidx = -1; int numPosComponents = 0; if(!dxbcVS->m_OutputSig.empty()) { for(const SigParameter &sign : dxbcVS->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } if(stride == 0) { RDCERR("Didn't get valid stride! Setting to 4 bytes"); stride = 4; } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // set up stream output entries and buffers psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; psoDesc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM; // disable all other shader stages psoDesc.HS.BytecodeLength = 0; psoDesc.HS.pShaderBytecode = NULL; psoDesc.DS.BytecodeLength = 0; psoDesc.DS.pShaderBytecode = NULL; psoDesc.GS.BytecodeLength = 0; psoDesc.GS.pShaderBytecode = NULL; psoDesc.PS.BytecodeLength = 0; psoDesc.PS.pShaderBytecode = NULL; // disable any rasterization/use of output targets psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; if(soSig) psoDesc.pRootSignature = soSig; // render as points psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; // disable outputs psoDesc.NumRenderTargets = 0; RDCEraseEl(psoDesc.RTVFormats); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } ID3D12Resource *idxBuf = NULL; bool recreate = false; uint64_t outputSize = uint64_t(drawcall->numIndices) * drawcall->numInstances * stride; if(m_SOBufferSize < outputSize) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < outputSize) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output data", oldSize, m_SOBufferSize); recreate = true; } ID3D12GraphicsCommandList *list = NULL; if(!(drawcall->flags & DrawFlags::UseIBuffer)) { if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } else // drawcall is indexed { bytebuf idxdata; GetBufferData(rs.ibuffer.buf, rs.ibuffer.offs + drawcall->indexOffset * rs.ibuffer.bytewidth, RDCMIN(drawcall->numIndices * rs.ibuffer.bytewidth, rs.ibuffer.size), idxdata); vector<uint32_t> indices; uint16_t *idx16 = (uint16_t *)&idxdata[0]; uint32_t *idx32 = (uint32_t *)&idxdata[0]; // only read as many indices as were available in the buffer uint32_t numIndices = RDCMIN(uint32_t(idxdata.size() / rs.ibuffer.bytewidth), drawcall->numIndices); uint32_t idxclamp = 0; if(drawcall->baseVertex < 0) idxclamp = uint32_t(-drawcall->baseVertex); // grab all unique vertex indices referenced for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; auto it = std::lower_bound(indices.begin(), indices.end(), i32); if(it != indices.end() && *it == i32) continue; indices.insert(it, i32); } // if we read out of bounds, we'll also have a 0 index being referenced // (as 0 is read). Don't insert 0 if we already have 0 though if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0)) indices.insert(indices.begin(), 0); // An index buffer could be something like: 500, 501, 502, 501, 503, 502 // in which case we can't use the existing index buffer without filling 499 slots of vertex // data with padding. Instead we rebase the indices based on the smallest vertex so it becomes // 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer. // // Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512 // which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid. // We just stream-out a tightly packed list of unique indices, and then remap the index buffer // so that what did point to 500 points to 0 (accounting for rebasing), and what did point // to 510 now points to 3 (accounting for the unique sort). // we use a map here since the indices may be sparse. Especially considering if an index // is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries. map<uint32_t, size_t> indexRemap; for(size_t i = 0; i < indices.size(); i++) { // by definition, this index will only appear once in indices[] indexRemap[indices[i]] = i; } if(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for indices", oldSize, m_SOBufferSize); recreate = true; } if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } GetDebugManager()->FillBuffer(m_SOPatchedIndexBuffer, 0, &indices[0], indices.size() * sizeof(uint32_t)); D3D12_INDEX_BUFFER_VIEW patchedIB; patchedIB.BufferLocation = m_SOPatchedIndexBuffer->GetGPUVirtualAddress(); patchedIB.Format = DXGI_FORMAT_R32_UINT; patchedIB.SizeInBytes = UINT(indices.size() * sizeof(uint32_t)); list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); list->IASetIndexBuffer(&patchedIB); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawIndexedInstanced((UINT)indices.size(), drawcall->numInstances, 0, 0, drawcall->instanceOffset); uint32_t stripCutValue = 0; if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF) stripCutValue = 0xffff; else if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF) stripCutValue = 0xffffffff; // rebase existing index buffer to point to the right elements in our stream-out'd // vertex buffer for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // preserve primitive restart indices if(stripCutValue && i32 == stripCutValue) continue; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; if(rs.ibuffer.bytewidth == 2) idx16[i] = uint16_t(indexRemap[i32]); else idx32[i] = uint32_t(indexRemap[i32]); } idxBuf = NULL; if(!idxdata.empty()) { D3D12_RESOURCE_DESC idxBufDesc; idxBufDesc.Alignment = 0; idxBufDesc.DepthOrArraySize = 1; idxBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; idxBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; idxBufDesc.Format = DXGI_FORMAT_UNKNOWN; idxBufDesc.Height = 1; idxBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; idxBufDesc.MipLevels = 1; idxBufDesc.SampleDesc.Count = 1; idxBufDesc.SampleDesc.Quality = 0; idxBufDesc.Width = idxdata.size(); D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &idxBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&idxBuf); RDCASSERTEQUAL(hr, S_OK); SetObjName(idxBuf, StringFormat::Fmt("PostVS idxBuf for %u", eventId)); GetDebugManager()->FillBuffer(idxBuf, 0, &idxdata[0], idxdata.size()); } } D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t numBytesWritten = *(uint64_t *)byteData; if(numBytesWritten == 0) { m_PostVSData[eventId] = D3D12PostVSData(); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } // skip past the counter byteData += 64; uint64_t numPrims = numBytesWritten / stride; ID3D12Resource *vsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&vsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(vsoutBuffer) { SetObjName(vsoutBuffer, StringFormat::Fmt("PostVS vsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(vsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(uint64_t i = 1; numPosComponents == 4 && i < numPrims; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = vsoutBuffer; m_PostVSData[eventId].vsout.vertStride = stride; m_PostVSData[eventId].vsout.nearPlane = nearp; m_PostVSData[eventId].vsout.farPlane = farp; m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer); m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices; m_PostVSData[eventId].vsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].vsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].vsout.idxBuf = NULL; if(m_PostVSData[eventId].vsout.useIndices && idxBuf) { m_PostVSData[eventId].vsout.idxBuf = idxBuf; m_PostVSData[eventId].vsout.idxFmt = rs.ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; } m_PostVSData[eventId].vsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].vsout.topo = topo; } else { // empty vertex output signature m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = NULL; m_PostVSData[eventId].vsout.instStride = 0; m_PostVSData[eventId].vsout.vertStride = 0; m_PostVSData[eventId].vsout.nearPlane = 0.0f; m_PostVSData[eventId].vsout.farPlane = 0.0f; m_PostVSData[eventId].vsout.useIndices = false; m_PostVSData[eventId].vsout.hasPosOut = false; m_PostVSData[eventId].vsout.idxBuf = NULL; m_PostVSData[eventId].vsout.topo = topo; } if(dxbcGS || dxbcDS) { stride = 0; posidx = -1; numPosComponents = 0; DXBC::DXBCFile *lastShader = dxbcGS; if(dxbcDS) lastShader = dxbcDS; sodecls.clear(); for(const SigParameter &sign : lastShader->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; // for now, skip streams that aren't stream 0 if(sign.stream != 0) continue; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // enable the other shader stages again if(origPSO->DS()) psoDesc.DS = origPSO->DS()->GetDesc(); if(origPSO->HS()) psoDesc.HS = origPSO->HS()->GetDesc(); if(origPSO->GS()) psoDesc.GS = origPSO->GS()->GetDesc(); // configure new SO declarations psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; // we're using the same topology this time psoDesc.PrimitiveTopologyType = origPSO->graphics->PrimitiveTopologyType; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; ID3D12GraphicsCommandList *list = NULL; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // draws with multiple instances must be replayed one at a time so we can record the number of // primitives from each drawcall, as due to expansion this can vary per-instance. if(drawcall->numInstances > 1) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // do a dummy draw to make sure we have enough space in the output buffer list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); D3D12_QUERY_DATA_SO_STATISTICS result = *data; range.End = 0; m_SOStagingBuffer->Unmap(0, &range); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; GetDebugManager()->ResetDebugAlloc(); // now do the actual stream out list = GetDebugManager()->ResetDebugList(); // first need to reset the counter byte values which may have either been written to above, or // are newly created { D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); std::swap(sobarr.Transition.StateBefore, sobarr.Transition.StateAfter); list->ResourceBarrier(1, &sobarr); } rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } // reserve space for enough 'buffer filled size' locations view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + AlignUp(uint64_t(drawcall->numInstances * sizeof(UINT64)), 64ULL); // do incremental draws to get the output size. We have to do this O(N^2) style because // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N instances // and count the total number of verts each time, then we can see from the difference how much // each instance wrote. for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++) { if(drawcall->flags & DrawFlags::UseIBuffer) { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawIndexedInstanced(drawcall->numIndices, inst, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawInstanced(drawcall->numIndices, inst, drawcall->vertexOffset, drawcall->instanceOffset); } } list->Close(); l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); // the last draw will have written the actual data we want into the buffer } else { // this only loops if we find from a query that we need to resize up while(true) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); continue; } range.End = 0; m_SOStagingBuffer->Unmap(0, &range); GetDebugManager()->ResetDebugAlloc(); break; } } list = GetDebugManager()->ResetDebugList(); D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t *counters = (uint64_t *)byteData; uint64_t numBytesWritten = 0; std::vector<D3D12PostVSData::InstData> instData; if(drawcall->numInstances > 1) { uint64_t prevByteCount = 0; for(uint32_t inst = 0; inst < drawcall->numInstances; inst++) { uint64_t byteCount = counters[inst]; D3D12PostVSData::InstData d; d.numVerts = uint32_t((byteCount - prevByteCount) / stride); d.bufOffset = prevByteCount; prevByteCount = byteCount; instData.push_back(d); } numBytesWritten = prevByteCount; } else { numBytesWritten = counters[0]; } if(numBytesWritten == 0) { SAFE_RELEASE(soSig); return; } // skip past the counter(s) byteData += (view.BufferLocation - m_SOBuffer->GetGPUVirtualAddress()); uint64_t numVerts = numBytesWritten / stride; ID3D12Resource *gsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&gsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(gsoutBuffer) { SetObjName(gsoutBuffer, StringFormat::Fmt("PostVS gsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(gsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(UINT64 i = 1; numPosComponents == 4 && i < numVerts; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].gsout.buf = gsoutBuffer; m_PostVSData[eventId].gsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].gsout.vertStride = stride; m_PostVSData[eventId].gsout.nearPlane = nearp; m_PostVSData[eventId].gsout.farPlane = farp; m_PostVSData[eventId].gsout.useIndices = false; m_PostVSData[eventId].gsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].gsout.idxBuf = NULL; topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; if(lastShader == dxbcGS) { for(size_t i = 0; i < dxbcGS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcGS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY) { topo = decl.outTopology; break; } } } else if(lastShader == dxbcDS) { for(size_t i = 0; i < dxbcDS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcDS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_TESS_DOMAIN) { if(decl.domain == DXBC::DOMAIN_ISOLINE) topo = D3D_PRIMITIVE_TOPOLOGY_LINELIST; else topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; } } } m_PostVSData[eventId].gsout.topo = topo; // streamout expands strips unfortunately if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; m_PostVSData[eventId].gsout.numVerts = (uint32_t)numVerts; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.numVerts /= RDCMAX(1U, drawcall->numInstances); m_PostVSData[eventId].gsout.instData = instData; } SAFE_RELEASE(soSig); }
bool WrappedVulkan::Serialise_vkCreateDevice( Serialiser* localSerialiser, VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { SERIALISE_ELEMENT(ResourceId, physId, GetResID(physicalDevice)); SERIALISE_ELEMENT(VkDeviceCreateInfo, serCreateInfo, *pCreateInfo); SERIALISE_ELEMENT(ResourceId, devId, GetResID(*pDevice)); if(m_State == READING) { // we must make any modifications locally, so the free of pointers // in the serialised VkDeviceCreateInfo don't double-free VkDeviceCreateInfo createInfo = serCreateInfo; std::vector<string> Extensions; for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++) { // don't include the debug marker extension if(strcmp(createInfo.ppEnabledExtensionNames[i], VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) Extensions.push_back(createInfo.ppEnabledExtensionNames[i]); } std::vector<string> Layers; for(uint32_t i=0; i < createInfo.enabledLayerCount; i++) Layers.push_back(createInfo.ppEnabledLayerNames[i]); StripUnwantedLayers(Layers); AddRequiredExtensions(false, Extensions); #if defined(FORCE_VALIDATION_LAYERS) Layers.push_back("VK_LAYER_LUNARG_standard_validation"); #endif createInfo.enabledLayerCount = (uint32_t)Layers.size(); const char **layerArray = NULL; if(!Layers.empty()) { layerArray = new const char *[createInfo.enabledLayerCount]; for(uint32_t i=0; i < createInfo.enabledLayerCount; i++) layerArray[i] = Layers[i].c_str(); createInfo.ppEnabledLayerNames = layerArray; } createInfo.enabledExtensionCount = (uint32_t)Extensions.size(); const char **extArray = NULL; if(!Extensions.empty()) { extArray = new const char *[createInfo.enabledExtensionCount]; for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++) extArray[i] = Extensions[i].c_str(); createInfo.ppEnabledExtensionNames = extArray; } physicalDevice = GetResourceManager()->GetLiveHandle<VkPhysicalDevice>(physId); VkDevice device; uint32_t qCount = 0; ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, NULL); VkQueueFamilyProperties *props = new VkQueueFamilyProperties[qCount]; ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, props); bool found = false; uint32_t qFamilyIdx = 0; VkQueueFlags search = (VK_QUEUE_GRAPHICS_BIT); // for queue priorities, if we need it float one = 1.0f; // if we need to change the requested queues, it will point to this VkDeviceQueueCreateInfo *modQueues = NULL; for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++) { uint32_t idx = createInfo.pQueueCreateInfos[i].queueFamilyIndex; RDCASSERT(idx < qCount); // this requested queue is one we can use too if((props[idx].queueFlags & search) == search && createInfo.pQueueCreateInfos[i].queueCount > 0) { qFamilyIdx = idx; found = true; break; } } // if we didn't find it, search for which queue family we should add a request for if(!found) { RDCDEBUG("App didn't request a queue family we can use - adding our own"); for(uint32_t i=0; i < qCount; i++) { if((props[i].queueFlags & search) == search) { qFamilyIdx = i; found = true; break; } } if(!found) { SAFE_DELETE_ARRAY(props); RDCERR("Can't add a queue with required properties for RenderDoc! Unsupported configuration"); } else { // we found the queue family, add it modQueues = new VkDeviceQueueCreateInfo[createInfo.queueCreateInfoCount + 1]; for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++) modQueues[i] = createInfo.pQueueCreateInfos[i]; modQueues[createInfo.queueCreateInfoCount].queueFamilyIndex = qFamilyIdx; modQueues[createInfo.queueCreateInfoCount].queueCount = 1; modQueues[createInfo.queueCreateInfoCount].pQueuePriorities = &one; createInfo.pQueueCreateInfos = modQueues; createInfo.queueCreateInfoCount++; } } SAFE_DELETE_ARRAY(props); VkPhysicalDeviceFeatures enabledFeatures = {0}; if(createInfo.pEnabledFeatures != NULL) enabledFeatures = *createInfo.pEnabledFeatures; createInfo.pEnabledFeatures = &enabledFeatures; VkPhysicalDeviceFeatures availFeatures = {0}; ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &availFeatures); if(availFeatures.fillModeNonSolid) enabledFeatures.fillModeNonSolid = true; else RDCWARN("fillModeNonSolid = false, wireframe overlay will be solid"); if(availFeatures.robustBufferAccess) enabledFeatures.robustBufferAccess = true; else RDCWARN("robustBufferAccess = false, out of bounds access due to bugs in application or RenderDoc may cause crashes"); if(availFeatures.vertexPipelineStoresAndAtomics) enabledFeatures.vertexPipelineStoresAndAtomics = true; else RDCWARN("vertexPipelineStoresAndAtomics = false, output mesh data will not be available"); uint32_t numExts = 0; VkResult vkr = ObjDisp(physicalDevice)->EnumerateDeviceExtensionProperties(Unwrap(physicalDevice), NULL, &numExts, NULL); RDCASSERTEQUAL(vkr, VK_SUCCESS); VkExtensionProperties *exts = new VkExtensionProperties[numExts]; vkr = ObjDisp(physicalDevice)->EnumerateDeviceExtensionProperties(Unwrap(physicalDevice), NULL, &numExts, exts); RDCASSERTEQUAL(vkr, VK_SUCCESS); for(uint32_t i=0; i < numExts; i++) RDCLOG("Ext %u: %s (%u)", i, exts[i].extensionName, exts[i].specVersion); SAFE_DELETE_ARRAY(exts); // PORTABILITY check that extensions and layers supported in capture (from createInfo) are supported in replay vkr = GetDeviceDispatchTable(NULL)->CreateDevice(Unwrap(physicalDevice), &createInfo, NULL, &device); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(device, device); GetResourceManager()->AddLiveResource(devId, device); InitDeviceReplayTables(Unwrap(device)); RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE m_PhysicalDevice = physicalDevice; m_Device = device; m_QueueFamilyIdx = qFamilyIdx; if(m_InternalCmds.cmdpool == VK_NULL_HANDLE) { VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, NULL, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, qFamilyIdx }; vkr = ObjDisp(device)->CreateCommandPool(Unwrap(device), &poolInfo, NULL, &m_InternalCmds.cmdpool); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(Unwrap(device), m_InternalCmds.cmdpool); } ObjDisp(physicalDevice)->GetPhysicalDeviceProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.props); ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.memProps); ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &m_PhysicalDeviceData.features); for(int i=VK_FORMAT_BEGIN_RANGE+1; i < VK_FORMAT_END_RANGE; i++) ObjDisp(physicalDevice)->GetPhysicalDeviceFormatProperties(Unwrap(physicalDevice), VkFormat(i), &m_PhysicalDeviceData.fmtprops[i]); m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); for(size_t i=0; i < m_PhysicalDevices.size(); i++) { if(physicalDevice == m_PhysicalDevices[i]) { m_PhysicalDeviceData.memIdxMap = m_MemIdxMaps[i]; break; } } m_DebugManager = new VulkanDebugManager(this, device); SAFE_DELETE_ARRAY(modQueues); SAFE_DELETE_ARRAY(layerArray); SAFE_DELETE_ARRAY(extArray); } return true; }
void WrappedID3D12CommandQueue::ReplayLog(LogState readType, uint32_t startEventID, uint32_t endEventID, bool partial) { m_State = readType; D3D12ChunkType header = (D3D12ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false); RDCASSERTEQUAL(header, CONTEXT_CAPTURE_HEADER); m_pDevice->Serialise_BeginCaptureFrame(!partial); if(readType == READING) { GetResourceManager()->ApplyInitialContents(); m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); } m_pSerialiser->PopContext(header); m_Cmd.m_RootEvents.clear(); if(m_State == EXECUTING) { FetchAPIEvent ev = GetEvent(startEventID); m_Cmd.m_RootEventID = ev.eventID; // if not partial, we need to be sure to replay // past the command list records, so can't // skip to the file offset of the first event if(partial) m_pSerialiser->SetOffset(ev.fileOffset); m_Cmd.m_FirstEventID = startEventID; m_Cmd.m_LastEventID = endEventID; } else if(m_State == READING) { m_Cmd.m_RootEventID = 1; m_Cmd.m_RootDrawcallID = 1; m_Cmd.m_FirstEventID = 0; m_Cmd.m_LastEventID = ~0U; } for(;;) { if(m_State == EXECUTING && m_Cmd.m_RootEventID > endEventID) { // we can just break out if we've done all the events desired. // note that the command list events aren't 'real' and we just blaze through them break; } uint64_t offset = m_pSerialiser->GetOffset(); D3D12ChunkType context = (D3D12ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false); m_Cmd.m_LastCmdListID = ResourceId(); ProcessChunk(offset, context); RenderDoc::Inst().SetProgress(FileInitialRead, float(offset) / float(m_pSerialiser->GetSize())); // for now just abort after capture scope. Really we'd need to support multiple frames // but for now this will do. if(context == CONTEXT_CAPTURE_FOOTER) break; // break out if we were only executing one event if(m_State == EXECUTING && startEventID == endEventID) break; // increment root event ID either if we didn't just replay a cmd // buffer event, OR if we are doing a frame sub-section replay, // in which case it's up to the calling code to make sure we only // replay inside a command list (if we crossed command list // boundaries, the event IDs would no longer match up). if(m_Cmd.m_LastCmdListID == ResourceId() || startEventID > 1) { m_Cmd.m_RootEventID++; if(startEventID > 1) m_pSerialiser->SetOffset(GetEvent(m_Cmd.m_RootEventID).fileOffset); } else { m_Cmd.m_BakedCmdListInfo[m_Cmd.m_LastCmdListID].curEventID++; } } if(m_State == READING) { struct SortEID { bool operator()(const FetchAPIEvent &a, const FetchAPIEvent &b) { return a.eventID < b.eventID; } }; std::sort(m_Cmd.m_Events.begin(), m_Cmd.m_Events.end(), SortEID()); } for(int p = 0; p < D3D12CommandData::ePartialNum; p++) SAFE_RELEASE(m_Cmd.m_Partial[p].resultPartialCmdList); for(auto it = m_Cmd.m_RerecordCmds.begin(); it != m_Cmd.m_RerecordCmds.end(); ++it) SAFE_RELEASE(it->second); m_Cmd.m_RerecordCmds.clear(); m_State = READING; }
void WrappedVulkan::Initialise(VkInitParams ¶ms) { m_InitParams = params; params.AppName = string("RenderDoc @ ") + params.AppName; params.EngineName = string("RenderDoc @ ") + params.EngineName; // PORTABILITY verify that layers/extensions are available StripUnwantedLayers(params.Layers); #if defined(FORCE_VALIDATION_LAYERS) params.Layers.push_back("VK_LAYER_LUNARG_standard_validation"); params.Extensions.push_back("VK_EXT_debug_report"); #endif // strip out any WSI extensions. We'll add the ones we want for creating windows // on the current platforms below, and we don't replay any of the WSI functionality // directly so these extensions aren't needed for(auto it = params.Extensions.begin(); it != params.Extensions.end();) { if(*it == "VK_KHR_xlib_surface" || *it == "VK_KHR_xcb_surface" || *it == "VK_KHR_wayland_surface" || *it == "VK_KHR_mir_surface" || *it == "VK_KHR_android_surface" || *it == "VK_KHR_win32_surface") { it = params.Extensions.erase(it); } else { ++it; } } AddRequiredExtensions(true, params.Extensions); const char **layerscstr = new const char *[params.Layers.size()]; for(size_t i=0; i < params.Layers.size(); i++) layerscstr[i] = params.Layers[i].c_str(); const char **extscstr = new const char *[params.Extensions.size()]; for(size_t i=0; i < params.Extensions.size(); i++) extscstr[i] = params.Extensions[i].c_str(); VkApplicationInfo appinfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO, NULL, params.AppName.c_str(), params.AppVersion, params.EngineName.c_str(), params.EngineVersion, VK_API_VERSION_1_0, }; VkInstanceCreateInfo instinfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, NULL, 0, &appinfo, (uint32_t)params.Layers.size(), layerscstr, (uint32_t)params.Extensions.size(), extscstr, }; m_Instance = VK_NULL_HANDLE; VkResult ret = GetInstanceDispatchTable(NULL)->CreateInstance(&instinfo, NULL, &m_Instance); RDCASSERTEQUAL(ret, VK_SUCCESS); InitInstanceReplayTables(m_Instance); GetResourceManager()->WrapResource(m_Instance, m_Instance); GetResourceManager()->AddLiveResource(params.InstanceID, m_Instance); m_DbgMsgCallback = VK_NULL_HANDLE; m_PhysicalDevice = VK_NULL_HANDLE; m_Device = VK_NULL_HANDLE; m_QueueFamilyIdx = ~0U; m_Queue = VK_NULL_HANDLE; m_InternalCmds.Reset(); if(ObjDisp(m_Instance)->CreateDebugReportCallbackEXT) { VkDebugReportCallbackCreateInfoEXT debugInfo = {}; debugInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT; debugInfo.pNext = NULL; debugInfo.pfnCallback = &DebugCallbackStatic; debugInfo.pUserData = this; debugInfo.flags = VK_DEBUG_REPORT_WARNING_BIT_EXT|VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT|VK_DEBUG_REPORT_ERROR_BIT_EXT; ObjDisp(m_Instance)->CreateDebugReportCallbackEXT(Unwrap(m_Instance), &debugInfo, NULL, &m_DbgMsgCallback); } SAFE_DELETE_ARRAY(layerscstr); SAFE_DELETE_ARRAY(extscstr); }
void WrappedID3D11DeviceContext::ReplayLog(LogState readType, uint32_t startEventID, uint32_t endEventID, bool partial) { m_State = readType; m_DoStateVerify = true; D3D11ChunkType header = (D3D11ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false); RDCASSERTEQUAL(header, CONTEXT_CAPTURE_HEADER); ResourceId id; m_pSerialiser->Serialise("context", id); WrappedID3D11DeviceContext *context = (WrappedID3D11DeviceContext *)m_pDevice->GetResourceManager()->GetLiveResource(id); RDCASSERT(WrappedID3D11DeviceContext::IsAlloc(context) && context == this); Serialise_BeginCaptureFrame(!partial); m_pSerialiser->PopContext(header); m_CurEvents.clear(); if(m_State == EXECUTING) { FetchAPIEvent ev = GetEvent(startEventID); m_CurEventID = ev.eventID; m_pSerialiser->SetOffset(ev.fileOffset); } else if(m_State == READING) { m_CurEventID = 1; } if(m_State == EXECUTING) { ClearMaps(); for(size_t i=0; i < m_pDevice->GetNumDeferredContexts(); i++) { WrappedID3D11DeviceContext *defcontext = m_pDevice->GetDeferredContext(i); defcontext->ClearMaps(); } } m_pDevice->GetResourceManager()->MarkInFrame(true); uint64_t startOffset = m_pSerialiser->GetOffset(); while(1) { if(m_State == EXECUTING && m_CurEventID > endEventID) { // we can just break out if we've done all the events desired. break; } uint64_t offset = m_pSerialiser->GetOffset(); D3D11ChunkType chunktype = (D3D11ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false); ProcessChunk(offset, chunktype, false); RenderDoc::Inst().SetProgress(FrameEventsRead, float(offset - startOffset)/float(m_pSerialiser->GetSize())); // for now just abort after capture scope. Really we'd need to support multiple frames // but for now this will do. if(chunktype == CONTEXT_CAPTURE_FOOTER) break; m_CurEventID++; } if(m_State == READING) { m_pDevice->GetFrameRecord().back().drawcallList = m_ParentDrawcall.Bake(); m_pDevice->GetFrameRecord().back().frameInfo.debugMessages = m_pDevice->GetDebugMessages(); int initialSkips = 0; for(auto it=WrappedID3D11Buffer::m_BufferList.begin(); it != WrappedID3D11Buffer::m_BufferList.end(); ++it) m_ResourceUses[it->first]; for(auto it=WrappedID3D11Texture1D::m_TextureList.begin(); it != WrappedID3D11Texture1D::m_TextureList.end(); ++it) m_ResourceUses[it->first]; for(auto it=WrappedID3D11Texture2D::m_TextureList.begin(); it != WrappedID3D11Texture2D::m_TextureList.end(); ++it) m_ResourceUses[it->first]; for(auto it=WrappedID3D11Texture3D::m_TextureList.begin(); it != WrappedID3D11Texture3D::m_TextureList.end(); ++it) m_ResourceUses[it->first]; // it's easier to remove duplicate usages here than check it as we go. // this means if textures are bound in multiple places in the same draw // we don't have duplicate uses for(auto it = m_ResourceUses.begin(); it != m_ResourceUses.end(); ++it) { vector<EventUsage> &v = it->second; std::sort(v.begin(), v.end()); v.erase( std::unique(v.begin(), v.end()), v.end() ); #if 0 ResourceId resid = m_pDevice->GetResourceManager()->GetOriginalID(it->first); if(m_pDevice->GetResourceManager()->GetInitialContents(resid).resource == NULL) continue; // code disabled for now as skipping these initial states // doesn't seem to produce any measurable improvement in any case // I've checked RDCDEBUG("Resource %llu", resid); if(v.empty()) { RDCDEBUG("Never used!"); initialSkips++; } else { bool written = false; for(auto usit = v.begin(); usit != v.end(); ++usit) { ResourceUsage u = usit->usage; if(u == eUsage_SO || (u >= eUsage_VS_RWResource && u <= eUsage_CS_RWResource) || u == eUsage_DepthStencilTarget || u == eUsage_ColourTarget) { written = true; break; } } if(written) { RDCDEBUG("Written in frame - needs initial state"); } else { RDCDEBUG("Never written to in the frame"); initialSkips++; } } #endif } //RDCDEBUG("Can skip %d initial states.", initialSkips); } m_pDevice->GetResourceManager()->MarkInFrame(false); m_State = READING; m_DoStateVerify = false; }
bool WrappedVulkan::Serialise_vkCreateSwapchainKHR( Serialiser* localSerialiser, VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapChain) { SERIALISE_ELEMENT(ResourceId, devId, GetResID(device)); SERIALISE_ELEMENT(VkSwapchainCreateInfoKHR, info, *pCreateInfo); SERIALISE_ELEMENT(ResourceId, id, GetResID(*pSwapChain)); uint32_t numIms = 0; if(m_State >= WRITING) { VkResult vkr = VK_SUCCESS; vkr = ObjDisp(device)->GetSwapchainImagesKHR(Unwrap(device), Unwrap(*pSwapChain), &numIms, NULL); RDCASSERTEQUAL(vkr, VK_SUCCESS); } SERIALISE_ELEMENT(uint32_t, numSwapImages, numIms); SERIALISE_ELEMENT(VkSharingMode, sharingMode, pCreateInfo->imageSharingMode); if(m_State == READING) { // use original ID because we don't create a live version of the swapchain SwapchainInfo &swapinfo = m_CreationInfo.m_SwapChain[id]; swapinfo.format = info.imageFormat; swapinfo.extent = info.imageExtent; swapinfo.arraySize = info.imageArrayLayers; swapinfo.images.resize(numSwapImages); device = GetResourceManager()->GetLiveHandle<VkDevice>(devId); const VkImageCreateInfo imInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, NULL, 0, VK_IMAGE_TYPE_2D, info.imageFormat, { info.imageExtent.width, info.imageExtent.height, 1 }, 1, info.imageArrayLayers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT| VK_IMAGE_USAGE_TRANSFER_DST_BIT| VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT| VK_IMAGE_USAGE_SAMPLED_BIT, sharingMode, 0, NULL, VK_IMAGE_LAYOUT_UNDEFINED, }; for(uint32_t i=0; i < numSwapImages; i++) { VkDeviceMemory mem = VK_NULL_HANDLE; VkImage im = VK_NULL_HANDLE; VkResult vkr = ObjDisp(device)->CreateImage(Unwrap(device), &imInfo, NULL, &im); RDCASSERTEQUAL(vkr, VK_SUCCESS); ResourceId liveId = GetResourceManager()->WrapResource(Unwrap(device), im); VkMemoryRequirements mrq = {0}; ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(im), &mrq); VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, mrq.size, GetGPULocalMemoryIndex(mrq.memoryTypeBits), }; vkr = ObjDisp(device)->AllocateMemory(Unwrap(device), &allocInfo, NULL, &mem); RDCASSERTEQUAL(vkr, VK_SUCCESS); ResourceId memid = GetResourceManager()->WrapResource(Unwrap(device), mem); // register as a live-only resource, so it is cleaned up properly GetResourceManager()->AddLiveResource(memid, mem); vkr = ObjDisp(device)->BindImageMemory(Unwrap(device), Unwrap(im), Unwrap(mem), 0); RDCASSERTEQUAL(vkr, VK_SUCCESS); // image live ID will be assigned separately in Serialise_vkGetSwapChainInfoWSI // memory doesn't have a live ID swapinfo.images[i].im = im; // fill out image info so we track resource state barriers // sneaky-cheeky use of the swapchain's ID here (it's not a live ID because // we don't create a live swapchain). This will be picked up in // Serialise_vkGetSwapchainImagesKHR to set the data for the live IDs on the // swapchain images. VulkanCreationInfo::Image &iminfo = m_CreationInfo.m_Image[id]; iminfo.type = VK_IMAGE_TYPE_2D; iminfo.format = info.imageFormat; iminfo.extent.width = info.imageExtent.width; iminfo.extent.height = info.imageExtent.height; iminfo.extent.depth = 1; iminfo.mipLevels = 1; iminfo.arrayLayers = info.imageArrayLayers; iminfo.creationFlags = eTextureCreate_SRV|eTextureCreate_RTV|eTextureCreate_SwapBuffer; iminfo.cube = false; iminfo.samples = VK_SAMPLE_COUNT_1_BIT; m_CreationInfo.m_Names[liveId] = StringFormat::Fmt("Presentable Image %u", i); VkImageSubresourceRange range; range.baseMipLevel = range.baseArrayLayer = 0; range.levelCount = 1; range.layerCount = info.imageArrayLayers; range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; m_ImageLayouts[liveId].subresourceStates.clear(); m_ImageLayouts[liveId].subresourceStates.push_back(ImageRegionState(range, UNKNOWN_PREV_IMG_LAYOUT, VK_IMAGE_LAYOUT_UNDEFINED)); } } return true; }
VkResult WrappedVulkan::vkCreateSwapchainKHR( VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapChain) { VkSwapchainCreateInfoKHR createInfo = *pCreateInfo; // make sure we can readback to get the screenshot createInfo.imageUsage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; createInfo.surface = Unwrap(createInfo.surface); createInfo.oldSwapchain = Unwrap(createInfo.oldSwapchain); VkResult ret = ObjDisp(device)->CreateSwapchainKHR(Unwrap(device), &createInfo, pAllocator, pSwapChain); if(ret == VK_SUCCESS) { ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pSwapChain); if(m_State >= WRITING) { Chunk *chunk = NULL; { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CONTEXT(CREATE_SWAP_BUFFER); Serialise_vkCreateSwapchainKHR(localSerialiser, device, pCreateInfo, NULL, pSwapChain); chunk = scope.Get(); } VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pSwapChain); record->AddChunk(chunk); record->swapInfo = new SwapchainInfo(); SwapchainInfo &swapInfo = *record->swapInfo; // sneaky casting of window handle into record swapInfo.wndHandle = (RENDERDOC_WindowHandle)GetRecord(pCreateInfo->surface); { SCOPED_LOCK(m_SwapLookupLock); m_SwapLookup[swapInfo.wndHandle] = *pSwapChain; } RenderDoc::Inst().AddFrameCapturer(LayerDisp(m_Instance), swapInfo.wndHandle, this); swapInfo.format = pCreateInfo->imageFormat; swapInfo.extent = pCreateInfo->imageExtent; swapInfo.arraySize = pCreateInfo->imageArrayLayers; VkResult vkr = VK_SUCCESS; const VkLayerDispatchTable *vt = ObjDisp(device); { VkAttachmentDescription attDesc = { 0, pCreateInfo->imageFormat, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }; VkAttachmentReference attRef = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }; VkSubpassDescription sub = { 0, VK_PIPELINE_BIND_POINT_GRAPHICS, 0, NULL, // inputs 1, &attRef, // color NULL, // resolve NULL, // depth-stencil 0, NULL, // preserve }; VkRenderPassCreateInfo rpinfo = { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, NULL, 0, 1, &attDesc, 1, &sub, 0, NULL, // dependencies }; vkr = vt->CreateRenderPass(Unwrap(device), &rpinfo, NULL, &swapInfo.rp); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(Unwrap(device), swapInfo.rp); } // serialise out the swap chain images { uint32_t numSwapImages; VkResult ret = vt->GetSwapchainImagesKHR(Unwrap(device), Unwrap(*pSwapChain), &numSwapImages, NULL); RDCASSERTEQUAL(ret, VK_SUCCESS); swapInfo.lastPresent = 0; swapInfo.images.resize(numSwapImages); for(uint32_t i=0; i < numSwapImages; i++) { swapInfo.images[i].im = VK_NULL_HANDLE; swapInfo.images[i].view = VK_NULL_HANDLE; swapInfo.images[i].fb = VK_NULL_HANDLE; } VkImage* images = new VkImage[numSwapImages]; // go through our own function so we assign these images IDs ret = vkGetSwapchainImagesKHR(device, *pSwapChain, &numSwapImages, images); RDCASSERTEQUAL(ret, VK_SUCCESS); for(uint32_t i=0; i < numSwapImages; i++) { SwapchainInfo::SwapImage &swapImInfo = swapInfo.images[i]; // memory doesn't exist for genuine WSI created images swapImInfo.im = images[i]; ResourceId imid = GetResID(images[i]); VkImageSubresourceRange range; range.baseMipLevel = range.baseArrayLayer = 0; range.levelCount = 1; range.layerCount = pCreateInfo->imageArrayLayers; range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; // fill out image info so we track resource state barriers { SCOPED_LOCK(m_ImageLayoutsLock); m_ImageLayouts[imid].subresourceStates.clear(); m_ImageLayouts[imid].subresourceStates.push_back(ImageRegionState(range, UNKNOWN_PREV_IMG_LAYOUT, VK_IMAGE_LAYOUT_UNDEFINED)); } { VkImageViewCreateInfo info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, NULL, 0, Unwrap(images[i]), VK_IMAGE_VIEW_TYPE_2D, pCreateInfo->imageFormat, { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }, }; vkr = vt->CreateImageView(Unwrap(device), &info, NULL, &swapImInfo.view); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(Unwrap(device), swapImInfo.view); VkFramebufferCreateInfo fbinfo = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, NULL, 0, Unwrap(swapInfo.rp), 1, UnwrapPtr(swapImInfo.view), (uint32_t)pCreateInfo->imageExtent.width, (uint32_t)pCreateInfo->imageExtent.height, 1, }; vkr = vt->CreateFramebuffer(Unwrap(device), &fbinfo, NULL, &swapImInfo.fb); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(Unwrap(device), swapImInfo.fb); } } SAFE_DELETE_ARRAY(images); } } else { GetResourceManager()->AddLiveResource(id, *pSwapChain); } } return ret; }
MeshDisplayPipelines VulkanDebugManager::CacheMeshDisplayPipelines(VkPipelineLayout pipeLayout, const MeshFormat &primary, const MeshFormat &secondary) { // generate a key to look up the map uint64_t key = 0; uint64_t bit = 0; if(primary.indexByteStride == 4) key |= 1ULL << bit; bit++; RDCASSERT((uint32_t)primary.topology < 64); key |= uint64_t((uint32_t)primary.topology & 0x3f) << bit; bit += 6; VkFormat primaryFmt = MakeVkFormat(primary.format); VkFormat secondaryFmt = secondary.vertexResourceId == ResourceId() ? VK_FORMAT_UNDEFINED : MakeVkFormat(secondary.format); RDCCOMPILE_ASSERT(VK_FORMAT_RANGE_SIZE <= 255, "Mesh pipeline cache key needs an extra bit for format"); key |= uint64_t((uint32_t)primaryFmt & 0xff) << bit; bit += 8; key |= uint64_t((uint32_t)secondaryFmt & 0xff) << bit; bit += 8; RDCASSERT(primary.vertexByteStride <= 0xffff); key |= uint64_t((uint32_t)primary.vertexByteStride & 0xffff) << bit; bit += 16; if(secondary.vertexResourceId != ResourceId()) { RDCASSERT(secondary.vertexByteStride <= 0xffff); key |= uint64_t((uint32_t)secondary.vertexByteStride & 0xffff) << bit; } bit += 16; if(primary.instanced) key |= 1ULL << bit; bit++; if(secondary.instanced) key |= 1ULL << bit; bit++; // only 64 bits, make sure they all fit RDCASSERT(bit < 64); MeshDisplayPipelines &cache = m_CachedMeshPipelines[key]; if(cache.pipes[(uint32_t)SolidShade::NoSolid] != VK_NULL_HANDLE) return cache; const VkLayerDispatchTable *vt = ObjDisp(m_Device); VkResult vkr = VK_SUCCESS; // should we try and evict old pipelines from the cache here? // or just keep them forever VkVertexInputBindingDescription binds[] = { // primary {0, primary.vertexByteStride, primary.instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX}, // secondary {1, secondary.vertexByteStride, secondary.instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX}}; RDCASSERT(primaryFmt != VK_FORMAT_UNDEFINED); VkVertexInputAttributeDescription vertAttrs[] = { // primary { 0, 0, primaryFmt, 0, }, // secondary { 1, 0, primaryFmt, 0, }, }; VkPipelineVertexInputStateCreateInfo vi = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, NULL, 0, 1, binds, 2, vertAttrs, }; VkPipelineShaderStageCreateInfo stages[3] = { {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, NULL, 0, VK_SHADER_STAGE_ALL_GRAPHICS, VK_NULL_HANDLE, "main", NULL}, {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, NULL, 0, VK_SHADER_STAGE_ALL_GRAPHICS, VK_NULL_HANDLE, "main", NULL}, {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, NULL, 0, VK_SHADER_STAGE_ALL_GRAPHICS, VK_NULL_HANDLE, "main", NULL}, }; VkPipelineInputAssemblyStateCreateInfo ia = { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, NULL, 0, primary.topology >= Topology::PatchList ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : MakeVkPrimitiveTopology(primary.topology), false, }; VkRect2D scissor = {{0, 0}, {16384, 16384}}; VkPipelineViewportStateCreateInfo vp = { VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, NULL, 0, 1, NULL, 1, &scissor}; VkPipelineRasterizationStateCreateInfo rs = { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, NULL, 0, false, false, VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE, false, 0.0f, 0.0f, 0.0f, 1.0f, }; VkPipelineMultisampleStateCreateInfo msaa = { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, NULL, 0, VULKAN_MESH_VIEW_SAMPLES, false, 0.0f, NULL, false, false}; VkPipelineDepthStencilStateCreateInfo ds = { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, NULL, 0, true, true, VK_COMPARE_OP_LESS_OR_EQUAL, false, false, {VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 0, 0, 0}, {VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 0, 0, 0}, 0.0f, 1.0f, }; VkPipelineColorBlendAttachmentState attState = { false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 0xf, }; VkPipelineColorBlendStateCreateInfo cb = { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, NULL, 0, false, VK_LOGIC_OP_NO_OP, 1, &attState, {1.0f, 1.0f, 1.0f, 1.0f}}; VkDynamicState dynstates[] = {VK_DYNAMIC_STATE_VIEWPORT}; VkPipelineDynamicStateCreateInfo dyn = { VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, NULL, 0, ARRAY_COUNT(dynstates), dynstates, }; VkRenderPass rp; // compatible render pass { VkAttachmentDescription attDesc[] = { {0, VK_FORMAT_R8G8B8A8_SRGB, VULKAN_MESH_VIEW_SAMPLES, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}, {0, VK_FORMAT_D32_SFLOAT, VULKAN_MESH_VIEW_SAMPLES, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}, }; VkAttachmentReference attRef = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; VkAttachmentReference dsRef = {1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; VkSubpassDescription sub = { 0, VK_PIPELINE_BIND_POINT_GRAPHICS, 0, NULL, // inputs 1, &attRef, // color NULL, // resolve &dsRef, // depth-stencil 0, NULL, // preserve }; VkRenderPassCreateInfo rpinfo = { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, NULL, 0, 2, attDesc, 1, &sub, 0, NULL, // dependencies }; vt->CreateRenderPass(Unwrap(m_Device), &rpinfo, NULL, &rp); } VkGraphicsPipelineCreateInfo pipeInfo = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, NULL, 0, 2, stages, &vi, &ia, NULL, // tess &vp, &rs, &msaa, &ds, &cb, &dyn, Unwrap(pipeLayout), rp, 0, // sub pass VK_NULL_HANDLE, // base pipeline handle 0, // base pipeline index }; // wireframe pipeline stages[0].module = Unwrap(m_pDriver->GetShaderCache()->GetBuiltinModule(BuiltinShader::MeshVS)); stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; stages[1].module = Unwrap(m_pDriver->GetShaderCache()->GetBuiltinModule(BuiltinShader::MeshFS)); stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; rs.polygonMode = VK_POLYGON_MODE_LINE; rs.lineWidth = 1.0f; ds.depthTestEnable = false; vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL, &cache.pipes[MeshDisplayPipelines::ePipe_Wire]); RDCASSERTEQUAL(vkr, VK_SUCCESS); ds.depthTestEnable = true; vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL, &cache.pipes[MeshDisplayPipelines::ePipe_WireDepth]); RDCASSERTEQUAL(vkr, VK_SUCCESS); // solid shading pipeline rs.polygonMode = VK_POLYGON_MODE_FILL; ds.depthTestEnable = false; vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL, &cache.pipes[MeshDisplayPipelines::ePipe_Solid]); RDCASSERTEQUAL(vkr, VK_SUCCESS); ds.depthTestEnable = true; vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL, &cache.pipes[MeshDisplayPipelines::ePipe_SolidDepth]); RDCASSERTEQUAL(vkr, VK_SUCCESS); if(secondary.vertexResourceId != ResourceId()) { // pull secondary information from second vertex buffer vertAttrs[1].binding = 1; vertAttrs[1].format = secondaryFmt; RDCASSERT(secondaryFmt != VK_FORMAT_UNDEFINED); vi.vertexBindingDescriptionCount = 2; vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL, &cache.pipes[MeshDisplayPipelines::ePipe_Secondary]); RDCASSERTEQUAL(vkr, VK_SUCCESS); } vertAttrs[1].binding = 0; vi.vertexBindingDescriptionCount = 1; // flat lit pipeline, needs geometry shader to calculate face normals stages[2].module = Unwrap(m_pDriver->GetShaderCache()->GetBuiltinModule(BuiltinShader::MeshGS)); stages[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT; pipeInfo.stageCount = 3; if(stages[2].module != VK_NULL_HANDLE) { vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL, &cache.pipes[MeshDisplayPipelines::ePipe_Lit]); RDCASSERTEQUAL(vkr, VK_SUCCESS); } for(uint32_t i = 0; i < MeshDisplayPipelines::ePipe_Count; i++) if(cache.pipes[i] != VK_NULL_HANDLE) m_pDriver->GetResourceManager()->WrapResource(Unwrap(m_Device), cache.pipes[i]); vt->DestroyRenderPass(Unwrap(m_Device), rp, NULL); return cache; }
VkResult WrappedVulkan::vkQueuePresentKHR( VkQueue queue, const VkPresentInfoKHR* pPresentInfo) { if(m_State == WRITING_IDLE) { RenderDoc::Inst().Tick(); GetResourceManager()->FlushPendingDirty(); } m_FrameCounter++; // first present becomes frame #1, this function is at the end of the frame if(pPresentInfo->swapchainCount > 1 && (m_FrameCounter % 100) == 0) { RDCWARN("Presenting multiple swapchains at once - only first will be processed"); } vector<VkSwapchainKHR> unwrappedSwaps; vector<VkSemaphore> unwrappedSems; VkPresentInfoKHR unwrappedInfo = *pPresentInfo; for(uint32_t i=0; i < unwrappedInfo.swapchainCount; i++) unwrappedSwaps.push_back(Unwrap(unwrappedInfo.pSwapchains[i])); for(uint32_t i=0; i < unwrappedInfo.waitSemaphoreCount; i++) unwrappedSems.push_back(Unwrap(unwrappedInfo.pWaitSemaphores[i])); unwrappedInfo.pSwapchains = unwrappedInfo.swapchainCount ? &unwrappedSwaps[0] : NULL; unwrappedInfo.pWaitSemaphores = unwrappedInfo.waitSemaphoreCount ? &unwrappedSems[0] : NULL; // Don't support any extensions for present info RDCASSERT(pPresentInfo->pNext == NULL); VkResourceRecord *swaprecord = GetRecord(pPresentInfo->pSwapchains[0]); RDCASSERT(swaprecord->swapInfo); SwapchainInfo &swapInfo = *swaprecord->swapInfo; bool activeWindow = RenderDoc::Inst().IsActiveWindow(LayerDisp(m_Instance), swapInfo.wndHandle); // need to record which image was last flipped so we can get the correct backbuffer // for a thumbnail in EndFrameCapture swapInfo.lastPresent = pPresentInfo->pImageIndices[0]; m_LastSwap = swaprecord->GetResourceID(); VkImage backbuffer = swapInfo.images[pPresentInfo->pImageIndices[0]].im; if(m_State == WRITING_IDLE) { m_FrameTimes.push_back(m_FrameTimer.GetMilliseconds()); m_TotalTime += m_FrameTimes.back(); m_FrameTimer.Restart(); // update every second if(m_TotalTime > 1000.0) { m_MinFrametime = 10000.0; m_MaxFrametime = 0.0; m_AvgFrametime = 0.0; m_TotalTime = 0.0; for(size_t i=0; i < m_FrameTimes.size(); i++) { m_AvgFrametime += m_FrameTimes[i]; if(m_FrameTimes[i] < m_MinFrametime) m_MinFrametime = m_FrameTimes[i]; if(m_FrameTimes[i] > m_MaxFrametime) m_MaxFrametime = m_FrameTimes[i]; } m_AvgFrametime /= double(m_FrameTimes.size()); m_FrameTimes.clear(); } uint32_t overlay = RenderDoc::Inst().GetOverlayBits(); if(overlay & eRENDERDOC_Overlay_Enabled) { VkRenderPass rp = swapInfo.rp; VkImage im = swapInfo.images[pPresentInfo->pImageIndices[0]].im; VkFramebuffer fb = swapInfo.images[pPresentInfo->pImageIndices[0]].fb; VkLayerDispatchTable *vt = ObjDisp(GetDev()); TextPrintState textstate = { GetNextCmd(), rp, fb, swapInfo.extent.width, swapInfo.extent.height }; VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT }; VkResult vkr = vt->BeginCommandBuffer(Unwrap(textstate.cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); VkImageMemoryBarrier bbBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, NULL, 0, 0, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, Unwrap(im), { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 } }; bbBarrier.srcAccessMask = VK_ACCESS_ALL_READ_BITS; bbBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; DoPipelineBarrier(textstate.cmd, 1, &bbBarrier); GetDebugManager()->BeginText(textstate); if(activeWindow) { vector<RENDERDOC_InputButton> keys = RenderDoc::Inst().GetCaptureKeys(); string overlayText = "Vulkan. "; for(size_t i=0; i < keys.size(); i++) { if(i > 0) overlayText += ", "; overlayText += ToStr::Get(keys[i]); } if(!keys.empty()) overlayText += " to capture."; if(overlay & eRENDERDOC_Overlay_FrameNumber) { overlayText += StringFormat::Fmt(" Frame: %d.", m_FrameCounter); } if(overlay & eRENDERDOC_Overlay_FrameRate) { overlayText += StringFormat::Fmt(" %.2lf ms (%.2lf .. %.2lf) (%.0lf FPS)", m_AvgFrametime, m_MinFrametime, m_MaxFrametime, 1000.0f/m_AvgFrametime); } float y=0.0f; if(!overlayText.empty()) { GetDebugManager()->RenderText(textstate, 0.0f, y, overlayText.c_str()); y += 1.0f; } if(overlay & eRENDERDOC_Overlay_CaptureList) { GetDebugManager()->RenderText(textstate, 0.0f, y, "%d Captures saved.\n", (uint32_t)m_FrameRecord.size()); y += 1.0f; uint64_t now = Timing::GetUnixTimestamp(); for(size_t i=0; i < m_FrameRecord.size(); i++) { if(now - m_FrameRecord[i].frameInfo.captureTime < 20) { GetDebugManager()->RenderText(textstate, 0.0f, y, "Captured frame %d.\n", m_FrameRecord[i].frameInfo.frameNumber); y += 1.0f; } } } #if !defined(RELEASE) GetDebugManager()->RenderText(textstate, 0.0f, y, "%llu chunks - %.2f MB", Chunk::NumLiveChunks(), float(Chunk::TotalMem())/1024.0f/1024.0f); y += 1.0f; #endif } else { vector<RENDERDOC_InputButton> keys = RenderDoc::Inst().GetFocusKeys(); string str = "Vulkan. Inactive swapchain."; for(size_t i=0; i < keys.size(); i++) { if(i == 0) str += " "; else str += ", "; str += ToStr::Get(keys[i]); } if(!keys.empty()) str += " to cycle between swapchains"; GetDebugManager()->RenderText(textstate, 0.0f, 0.0f, str.c_str()); } GetDebugManager()->EndText(textstate); std::swap(bbBarrier.oldLayout, bbBarrier.newLayout); bbBarrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; bbBarrier.dstAccessMask = VK_ACCESS_ALL_READ_BITS; DoPipelineBarrier(textstate.cmd, 1, &bbBarrier); ObjDisp(textstate.cmd)->EndCommandBuffer(Unwrap(textstate.cmd)); SubmitCmds(); FlushQ(); } } VkResult vkr = ObjDisp(queue)->QueuePresentKHR(Unwrap(queue), &unwrappedInfo); if(!activeWindow) return vkr; RenderDoc::Inst().SetCurrentDriver(RDC_Vulkan); // kill any current capture that isn't application defined if(m_State == WRITING_CAPFRAME && !m_AppControlledCapture) RenderDoc::Inst().EndFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle); if(RenderDoc::Inst().ShouldTriggerCapture(m_FrameCounter) && m_State == WRITING_IDLE) { RenderDoc::Inst().StartFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle); m_AppControlledCapture = false; } return vkr; }
vector<CounterResult> VulkanReplay::FetchCounters(const vector<GPUCounter> &counters) { uint32_t maxEID = m_pDriver->GetMaxEID(); VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures(); VkDevice dev = m_pDriver->GetDev(); VkQueryPoolCreateInfo timeStampPoolCreateInfo = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_TIMESTAMP, maxEID * 2, 0}; VkQueryPoolCreateInfo occlusionPoolCreateInfo = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_OCCLUSION, maxEID, 0}; VkQueryPipelineStatisticFlags pipeStatsFlags = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; VkQueryPoolCreateInfo pipeStatsPoolCreateInfo = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_PIPELINE_STATISTICS, maxEID, pipeStatsFlags}; VkQueryPool timeStampPool; VkResult vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &timeStampPoolCreateInfo, NULL, &timeStampPool); RDCASSERTEQUAL(vkr, VK_SUCCESS); bool occlNeeded = false; bool statsNeeded = false; for(size_t c = 0; c < counters.size(); c++) { switch(counters[c]) { case GPUCounter::InputVerticesRead: case GPUCounter::IAPrimitives: case GPUCounter::GSPrimitives: case GPUCounter::RasterizerInvocations: case GPUCounter::RasterizedPrimitives: case GPUCounter::VSInvocations: case GPUCounter::TCSInvocations: case GPUCounter::TESInvocations: case GPUCounter::GSInvocations: case GPUCounter::PSInvocations: case GPUCounter::CSInvocations: statsNeeded = true; break; case GPUCounter::SamplesWritten: occlNeeded = true; break; default: break; } } VkQueryPool occlusionPool = VK_NULL_HANDLE; if(availableFeatures.occlusionQueryPrecise && occlNeeded) { vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &occlusionPoolCreateInfo, NULL, &occlusionPool); RDCASSERTEQUAL(vkr, VK_SUCCESS); } VkQueryPool pipeStatsPool = VK_NULL_HANDLE; if(availableFeatures.pipelineStatisticsQuery && statsNeeded) { vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &pipeStatsPoolCreateInfo, NULL, &pipeStatsPool); RDCASSERTEQUAL(vkr, VK_SUCCESS); } VkCommandBuffer cmd = m_pDriver->GetNextCmd(); VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), timeStampPool, 0, maxEID * 2); if(occlusionPool != VK_NULL_HANDLE) ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), occlusionPool, 0, maxEID); if(pipeStatsPool != VK_NULL_HANDLE) ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), pipeStatsPool, 0, maxEID); vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd)); RDCASSERTEQUAL(vkr, VK_SUCCESS); #if ENABLED(SINGLE_FLUSH_VALIDATE) m_pDriver->SubmitCmds(); #endif VulkanGPUTimerCallback cb(m_pDriver, this, timeStampPool, occlusionPool, pipeStatsPool); // replay the events to perform all the queries m_pDriver->ReplayLog(0, maxEID, eReplay_Full); vector<uint64_t> m_TimeStampData; m_TimeStampData.resize(cb.m_Results.size() * 2); vkr = ObjDisp(dev)->GetQueryPoolResults( Unwrap(dev), timeStampPool, 0, (uint32_t)m_TimeStampData.size(), sizeof(uint64_t) * m_TimeStampData.size(), &m_TimeStampData[0], sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); RDCASSERTEQUAL(vkr, VK_SUCCESS); ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), timeStampPool, NULL); vector<uint64_t> m_OcclusionData; m_OcclusionData.resize(cb.m_Results.size()); if(occlusionPool != VK_NULL_HANDLE) { vkr = ObjDisp(dev)->GetQueryPoolResults( Unwrap(dev), occlusionPool, 0, (uint32_t)m_OcclusionData.size(), sizeof(uint64_t) * m_OcclusionData.size(), &m_OcclusionData[0], sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); RDCASSERTEQUAL(vkr, VK_SUCCESS); ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), occlusionPool, NULL); } vector<uint64_t> m_PipeStatsData; m_PipeStatsData.resize(cb.m_Results.size() * 11); if(pipeStatsPool != VK_NULL_HANDLE) { vkr = ObjDisp(dev)->GetQueryPoolResults( Unwrap(dev), pipeStatsPool, 0, (uint32_t)cb.m_Results.size(), sizeof(uint64_t) * m_PipeStatsData.size(), &m_PipeStatsData[0], sizeof(uint64_t) * 11, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); RDCASSERTEQUAL(vkr, VK_SUCCESS); ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), pipeStatsPool, NULL); } vector<CounterResult> ret; for(size_t i = 0; i < cb.m_Results.size(); i++) { for(size_t c = 0; c < counters.size(); c++) { CounterResult result; result.eventId = cb.m_Results[i]; result.counter = counters[c]; switch(counters[c]) { case GPUCounter::EventGPUDuration: { uint64_t delta = m_TimeStampData[i * 2 + 1] - m_TimeStampData[i * 2 + 0]; result.value.d = (double(m_pDriver->GetDeviceProps().limits.timestampPeriod) * double(delta)) // nanoseconds / (1000.0 * 1000.0 * 1000.0); // to seconds } break; case GPUCounter::InputVerticesRead: result.value.u64 = m_PipeStatsData[i * 11 + 0]; break; case GPUCounter::IAPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 1]; break; case GPUCounter::GSPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 4]; break; case GPUCounter::RasterizerInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 5]; break; case GPUCounter::RasterizedPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 6]; break; case GPUCounter::SamplesWritten: result.value.u64 = m_OcclusionData[i]; break; case GPUCounter::VSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 2]; break; case GPUCounter::TCSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 8]; break; case GPUCounter::TESInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break; case GPUCounter::GSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 3]; break; case GPUCounter::PSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break; case GPUCounter::CSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 10]; break; default: break; } ret.push_back(result); } } for(size_t i = 0; i < cb.m_AliasEvents.size(); i++) { for(size_t c = 0; c < counters.size(); c++) { CounterResult search; search.counter = counters[c]; search.eventId = cb.m_AliasEvents[i].first; // find the result we're aliasing auto it = std::find(ret.begin(), ret.end(), search); if(it != ret.end()) { // duplicate the result and append CounterResult aliased = *it; aliased.eventId = cb.m_AliasEvents[i].second; ret.push_back(aliased); } else { RDCERR("Expected to find alias-target result for EID %u counter %u, but didn't", search.eventId, search.counter); } } } // sort so that the alias results appear in the right places std::sort(ret.begin(), ret.end()); return ret; }
vector<CounterResult> D3D11DebugManager::FetchCounters(const vector<uint32_t> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } uint32_t counterID = counters[0]; RDCASSERT(counters.size() == 1); RDCASSERT(counterID == eCounter_EventGPUDuration); SCOPED_TIMER("Fetch Counters for %u", counterID); D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0}; ID3D11Query *disjoint = NULL; D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; ID3D11Query *start = NULL; HRESULT hr = S_OK; hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint); if(FAILED(hr)) { RDCERR("Failed to create disjoint query %08x", hr); return ret; } hr = m_pDevice->CreateQuery(&qdesc, &start); if(FAILED(hr)) { RDCERR("Failed to create start query %08x", hr); return ret; } CounterContext ctx; for(int loop = 0; loop < 1; loop++) { { m_pImmediateContext->Begin(disjoint); m_pImmediateContext->End(start); ctx.eventStart = 0; ctx.reuseIdx = loop == 0 ? -1 : 0; FillTimers(ctx, m_WrappedContext->GetRootDraw()); m_pImmediateContext->End(disjoint); } { D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; do { hr = m_pImmediateContext->GetData(disjoint, &disjointData, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0); } while(hr == S_FALSE); RDCASSERTEQUAL(hr, S_OK); RDCASSERT(!disjointData.Disjoint); double ticksToSecs = double(disjointData.Frequency); UINT64 a = 0; hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t i = 0; i < ctx.timers.size(); i++) { if(ctx.timers[i].before && ctx.timers[i].after) { hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 b = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); double duration = (double(b - a) / ticksToSecs); ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, duration)); a = b; } else { ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, 0.0)); } } } } for(size_t i = 0; i < ctx.timers.size(); i++) { SAFE_RELEASE(ctx.timers[i].before); SAFE_RELEASE(ctx.timers[i].after); } SAFE_RELEASE(disjoint); SAFE_RELEASE(start); return ret; }