void AppTest::InitBundles() { CHK(Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_BUNDLE, IID_PPV_ARGS(CommandBundleAllocator.GetAddressOf()))); for (unsigned int i = 0; i < BundleCount; i++) { const unsigned int bundlesPerThread = BundleCount / ThreadCount; const unsigned int threadID = i / bundlesPerThread; CHK(Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_BUNDLE, CommandBundleAllocator.Get(), PSO.Get(), IID_PPV_ARGS(CommandBundleArray[i].GetAddressOf()))); ID3D12GraphicsCommandList* commandBundle = CommandBundleArray[i].Get(); const unsigned int offset = ObjectsPerBundle * i; commandBundle->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); commandBundle->SetGraphicsRootSignature(RootSignature.Get()); if (UseRootLevelCBV) { for (unsigned int command = 0; command < ObjectsPerBundle; command++) { commandBundle->SetGraphicsRootConstantBufferView(0, PerObjectConstantBuffers.GetGPUHandle(offset + command)); commandBundle->DrawIndexedInstanced(IndexCount, 1, 0, 0, 0); } } else { D3D12_GPU_DESCRIPTOR_HANDLE descriptorHandle; ID3D12DescriptorHeap* descriptorHeap = ConstantBufferDescriptorHeap->GetBaseHeap(); commandBundle->SetDescriptorHeaps(1, &descriptorHeap); for (unsigned int command = 0; command < ObjectsPerBundle; command++) { descriptorHandle.ptr = ConstantBufferDescriptorHeap->GetDescriptorGPUHandle(offset + command); commandBundle->SetGraphicsRootDescriptorTable(0, descriptorHandle); commandBundle->DrawIndexedInstanced(IndexCount, 1, 0, 0, 0); } } commandBundle->Close(); } }
void afDraw(PrimitiveTopology pt, int numVertices, int start, int instanceCount) { ID3D12GraphicsCommandList* list = deviceMan.GetCommandList(); list->IASetPrimitiveTopology(pt); list->DrawInstanced(numVertices, instanceCount, start, 0); }
void DXMultiAdapterRenderer::PopulateCommandLists() { // Primary rendering command list { ComPtr<ID3D12Resource> curPrimaryRenderTarget = mDXDevices[Device_Primary]->mRenderTargets[mCurrentFrameIndex]; // Reset allocator and command list for current render target ThrowIfFailed(mDXDevices[Device_Primary]->mCommandAllocator->Reset()); // Only do this when all command lists have finished executing ID3D12GraphicsCommandList* primaryCommandList = mCommandLists[Primary_CommandList_Scene].Get(); ThrowIfFailed(primaryCommandList->Reset(mDXDevices[Device_Primary]->mCommandAllocator.Get(), nullptr)); primaryCommandList->RSSetViewports(1, &mViewport); primaryCommandList->RSSetScissorRects(1, &mScissorRect); ID3D12DescriptorHeap* descHeaps[] = { mDXDevices[Device_Primary]->mCbvSrvUavHeap.Get() }; primaryCommandList->SetDescriptorHeaps(_countof(descHeaps), descHeaps); // Allow for rending to current render target primaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curPrimaryRenderTarget.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(mDXDevices[Device_Primary]->mRtvHeap->GetCPUDescriptorHandleForHeapStart(), mCurrentFrameIndex, mDXDevices[Device_Primary]->mRtvDescriptorSize); primaryCommandList->OMSetRenderTargets(1, &rtvHandle, true, nullptr); primaryCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); primaryCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Primary_Quad_Top]); // Render top portion of scene on primary GPU primaryCommandList->SetGraphicsRootSignature(mRootSignatures[Primary_RootSignature_Scene].Get()); primaryCommandList->SetGraphicsRootDescriptorTable(0, mTimePrimaryCbvHandle); primaryCommandList->SetPipelineState(mPipelineStates[Primary_PipelineState_Scene].Get()); primaryCommandList->DrawInstanced(4, 1, 0, 0); // Indicate that render target will be used for copy primaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curPrimaryRenderTarget.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); ThrowIfFailed(mCommandLists[Primary_CommandList_Scene]->Close()); } // Copy command list - in DXCrossAdapterResources object { mCrossAdapterResources->PopulateCommandList(mCurrentFrameIndex); } // Reset command allocator for secondary device - used by all following command lists ThrowIfFailed(mDXDevices[Device_Secondary]->mCommandAllocator->Reset()); // Only do this when all command lists have finished executing // Secondary rendering command list { ID3D12GraphicsCommandList* secondaryCommandList = mCommandLists[Secondary_CommandList_Scene].Get(); ThrowIfFailed(secondaryCommandList->Reset(mDXDevices[Device_Secondary]->mCommandAllocator.Get(), nullptr)); secondaryCommandList->RSSetViewports(1, &mViewport); secondaryCommandList->RSSetScissorRects(1, &mScissorRect); ID3D12DescriptorHeap* descHeaps[] = { mDXDevices[Device_Secondary]->mCbvSrvUavHeap.Get() }; secondaryCommandList->SetDescriptorHeaps(_countof(descHeaps), descHeaps); secondaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mTexture.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); secondaryCommandList->OMSetRenderTargets(1, &mTextureRtvHandle, true, nullptr); secondaryCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); secondaryCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Secondary_Quad_Bottom]); // Quad geometry // Render bottom portion of scene on secondary GPU secondaryCommandList->SetGraphicsRootSignature(mRootSignatures[Secondary_RootSignature_Scene].Get()); secondaryCommandList->SetGraphicsRootDescriptorTable(0, mTimeSecondaryCbvHandle); secondaryCommandList->SetPipelineState(mPipelineStates[Secondary_PipelineState_Scene].Get()); secondaryCommandList->DrawInstanced(4, 1, 0, 0); secondaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mTexture.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); ThrowIfFailed(mCommandLists[Secondary_CommandList_Scene]->Close()); } // Overlay Command List { ID3D12GraphicsCommandList* overlayCommandList = mCommandLists[Secondary_CommandList_Combine_Scene].Get(); ComPtr<ID3D12Resource> curSecondaryRenderTarget = mDXDevices[Device_Secondary]->mRenderTargets[mCurrentFrameIndex]; // Get secondary render target for current frame ThrowIfFailed(overlayCommandList->Reset(mDXDevices[Device_Secondary]->mCommandAllocator.Get(), nullptr)); overlayCommandList->RSSetViewports(1, &mViewport); overlayCommandList->RSSetScissorRects(1, &mScissorRect); ID3D12DescriptorHeap* descHeaps[] = { mDXDevices[Device_Secondary]->mCbvSrvUavHeap.Get() }; overlayCommandList->SetDescriptorHeaps(_countof(descHeaps), descHeaps); overlayCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curSecondaryRenderTarget.Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(mDXDevices[Device_Secondary]->mRtvHeap->GetCPUDescriptorHandleForHeapStart(), mCurrentFrameIndex, mDXDevices[Device_Secondary]->mRtvDescriptorSize); overlayCommandList->OMSetRenderTargets(1, &rtvHandle, true, nullptr); static const float clearColor[4] = { 0.0f, 1.0f, 0.0f, 1.0f }; overlayCommandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr); overlayCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); // Render top quad with cross-adapter texture overlayCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Secondary_Quad_Combine_Top]); // Quad geometry CD3DX12_GPU_DESCRIPTOR_HANDLE crossAdapterSrvHandle(mDXDevices[Device_Secondary]->mCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart(), mCurrentFrameIndex, mDXDevices[Device_Secondary]->mCbvSrvUavDescriptorSize); overlayCommandList->SetGraphicsRootSignature(mRootSignatures[Secondary_RootSignature_CrossAdapter].Get()); overlayCommandList->SetPipelineState(mPipelineStates[Secondary_PipelineState_CrossAdapter].Get()); overlayCommandList->SetGraphicsRootDescriptorTable(0, mTransformTopCbvHandle); overlayCommandList->SetGraphicsRootDescriptorTable(1, crossAdapterSrvHandle); // Scale and translate quad to top of screen mConstantBufferTransformTopData.modelViewProjection = DirectX::XMMatrixIdentity(); mConstantBufferTransformTopData.modelViewProjection *= DirectX::XMMatrixTranslation(0.0, ((1.0f / mSharePercentage) - 1.0f) + 0.015f, 0.0); mConstantBufferTransformTopData.modelViewProjection *= DirectX::XMMatrixScaling(1.0, mSharePercentage, 1.0); UpdateConstantBuffer(mConstantBufferTransformTop, &mConstantBufferTransformTopData, sizeof(mConstantBufferTransformTopData)); overlayCommandList->DrawInstanced(4, 1, 0, 0); // Render bottom quad with texture from secondary GPU overlayCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Secondary_Quad_Combine_Bottom]); // Quad geometry overlayCommandList->SetGraphicsRootDescriptorTable(0, mTransformBottomCbvHandle); overlayCommandList->SetGraphicsRootDescriptorTable(1, mTextureSrvHandle); // Scale and translate quad to bottom of screen mConstantBufferTransformBottomData.modelViewProjection = DirectX::XMMatrixIdentity(); mConstantBufferTransformBottomData.modelViewProjection *= DirectX::XMMatrixTranslation(0.0, -((1.0f / (1.0f - mSharePercentage)) - 1.0f) - 0.015f, 0.0); mConstantBufferTransformBottomData.modelViewProjection *= DirectX::XMMatrixScaling(1.0, (1.0f - mSharePercentage), 1.0); UpdateConstantBuffer(mConstantBufferTransformBottom, &mConstantBufferTransformBottomData, sizeof(mConstantBufferTransformBottomData)); overlayCommandList->DrawInstanced(4, 1, 0, 0); overlayCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curSecondaryRenderTarget.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); ThrowIfFailed(mCommandLists[Secondary_CommandList_Combine_Scene]->Close()); } }
void D3D12Replay::InitPostVSBuffers(uint32_t eventId) { // go through any aliasing if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end()) eventId = m_PostVSAlias[eventId]; if(m_PostVSData.find(eventId) != m_PostVSData.end()) return; D3D12CommandData *cmd = m_pDevice->GetQueue()->GetCommandData(); const D3D12RenderState &rs = cmd->m_RenderState; if(rs.pipe == ResourceId()) return; WrappedID3D12PipelineState *origPSO = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); if(!origPSO->IsGraphics()) return; D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = origPSO->GetGraphicsDesc(); if(psoDesc.VS.BytecodeLength == 0) return; WrappedID3D12Shader *vs = origPSO->VS(); D3D_PRIMITIVE_TOPOLOGY topo = rs.topo; const DrawcallDescription *drawcall = m_pDevice->GetDrawcall(eventId); if(drawcall->numIndices == 0) return; DXBC::DXBCFile *dxbcVS = vs->GetDXBC(); RDCASSERT(dxbcVS); DXBC::DXBCFile *dxbcGS = NULL; WrappedID3D12Shader *gs = origPSO->GS(); if(gs) { dxbcGS = gs->GetDXBC(); RDCASSERT(dxbcGS); } DXBC::DXBCFile *dxbcDS = NULL; WrappedID3D12Shader *ds = origPSO->DS(); if(ds) { dxbcDS = ds->GetDXBC(); RDCASSERT(dxbcDS); } ID3D12RootSignature *soSig = NULL; HRESULT hr = S_OK; { WrappedID3D12RootSignature *sig = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(rs.graphics.rootsig); D3D12RootSignature rootsig = sig->sig; // create a root signature that allows stream out, if necessary if((rootsig.Flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT) == 0) { rootsig.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT; ID3DBlob *blob = m_pDevice->GetShaderCache()->MakeRootSig(rootsig); hr = m_pDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), __uuidof(ID3D12RootSignature), (void **)&soSig); if(FAILED(hr)) { RDCERR("Couldn't enable stream-out in root signature: HRESULT: %s", ToStr(hr).c_str()); return; } SAFE_RELEASE(blob); } } vector<D3D12_SO_DECLARATION_ENTRY> sodecls; UINT stride = 0; int posidx = -1; int numPosComponents = 0; if(!dxbcVS->m_OutputSig.empty()) { for(const SigParameter &sign : dxbcVS->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } if(stride == 0) { RDCERR("Didn't get valid stride! Setting to 4 bytes"); stride = 4; } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // set up stream output entries and buffers psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; psoDesc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM; // disable all other shader stages psoDesc.HS.BytecodeLength = 0; psoDesc.HS.pShaderBytecode = NULL; psoDesc.DS.BytecodeLength = 0; psoDesc.DS.pShaderBytecode = NULL; psoDesc.GS.BytecodeLength = 0; psoDesc.GS.pShaderBytecode = NULL; psoDesc.PS.BytecodeLength = 0; psoDesc.PS.pShaderBytecode = NULL; // disable any rasterization/use of output targets psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; if(soSig) psoDesc.pRootSignature = soSig; // render as points psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; // disable outputs psoDesc.NumRenderTargets = 0; RDCEraseEl(psoDesc.RTVFormats); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } ID3D12Resource *idxBuf = NULL; bool recreate = false; uint64_t outputSize = uint64_t(drawcall->numIndices) * drawcall->numInstances * stride; if(m_SOBufferSize < outputSize) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < outputSize) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output data", oldSize, m_SOBufferSize); recreate = true; } ID3D12GraphicsCommandList *list = NULL; if(!(drawcall->flags & DrawFlags::UseIBuffer)) { if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } else // drawcall is indexed { bytebuf idxdata; GetBufferData(rs.ibuffer.buf, rs.ibuffer.offs + drawcall->indexOffset * rs.ibuffer.bytewidth, RDCMIN(drawcall->numIndices * rs.ibuffer.bytewidth, rs.ibuffer.size), idxdata); vector<uint32_t> indices; uint16_t *idx16 = (uint16_t *)&idxdata[0]; uint32_t *idx32 = (uint32_t *)&idxdata[0]; // only read as many indices as were available in the buffer uint32_t numIndices = RDCMIN(uint32_t(idxdata.size() / rs.ibuffer.bytewidth), drawcall->numIndices); uint32_t idxclamp = 0; if(drawcall->baseVertex < 0) idxclamp = uint32_t(-drawcall->baseVertex); // grab all unique vertex indices referenced for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; auto it = std::lower_bound(indices.begin(), indices.end(), i32); if(it != indices.end() && *it == i32) continue; indices.insert(it, i32); } // if we read out of bounds, we'll also have a 0 index being referenced // (as 0 is read). Don't insert 0 if we already have 0 though if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0)) indices.insert(indices.begin(), 0); // An index buffer could be something like: 500, 501, 502, 501, 503, 502 // in which case we can't use the existing index buffer without filling 499 slots of vertex // data with padding. Instead we rebase the indices based on the smallest vertex so it becomes // 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer. // // Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512 // which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid. // We just stream-out a tightly packed list of unique indices, and then remap the index buffer // so that what did point to 500 points to 0 (accounting for rebasing), and what did point // to 510 now points to 3 (accounting for the unique sort). // we use a map here since the indices may be sparse. Especially considering if an index // is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries. map<uint32_t, size_t> indexRemap; for(size_t i = 0; i < indices.size(); i++) { // by definition, this index will only appear once in indices[] indexRemap[indices[i]] = i; } if(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for indices", oldSize, m_SOBufferSize); recreate = true; } if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } GetDebugManager()->FillBuffer(m_SOPatchedIndexBuffer, 0, &indices[0], indices.size() * sizeof(uint32_t)); D3D12_INDEX_BUFFER_VIEW patchedIB; patchedIB.BufferLocation = m_SOPatchedIndexBuffer->GetGPUVirtualAddress(); patchedIB.Format = DXGI_FORMAT_R32_UINT; patchedIB.SizeInBytes = UINT(indices.size() * sizeof(uint32_t)); list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); list->IASetIndexBuffer(&patchedIB); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawIndexedInstanced((UINT)indices.size(), drawcall->numInstances, 0, 0, drawcall->instanceOffset); uint32_t stripCutValue = 0; if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF) stripCutValue = 0xffff; else if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF) stripCutValue = 0xffffffff; // rebase existing index buffer to point to the right elements in our stream-out'd // vertex buffer for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // preserve primitive restart indices if(stripCutValue && i32 == stripCutValue) continue; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; if(rs.ibuffer.bytewidth == 2) idx16[i] = uint16_t(indexRemap[i32]); else idx32[i] = uint32_t(indexRemap[i32]); } idxBuf = NULL; if(!idxdata.empty()) { D3D12_RESOURCE_DESC idxBufDesc; idxBufDesc.Alignment = 0; idxBufDesc.DepthOrArraySize = 1; idxBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; idxBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; idxBufDesc.Format = DXGI_FORMAT_UNKNOWN; idxBufDesc.Height = 1; idxBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; idxBufDesc.MipLevels = 1; idxBufDesc.SampleDesc.Count = 1; idxBufDesc.SampleDesc.Quality = 0; idxBufDesc.Width = idxdata.size(); D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &idxBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&idxBuf); RDCASSERTEQUAL(hr, S_OK); SetObjName(idxBuf, StringFormat::Fmt("PostVS idxBuf for %u", eventId)); GetDebugManager()->FillBuffer(idxBuf, 0, &idxdata[0], idxdata.size()); } } D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t numBytesWritten = *(uint64_t *)byteData; if(numBytesWritten == 0) { m_PostVSData[eventId] = D3D12PostVSData(); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } // skip past the counter byteData += 64; uint64_t numPrims = numBytesWritten / stride; ID3D12Resource *vsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&vsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(vsoutBuffer) { SetObjName(vsoutBuffer, StringFormat::Fmt("PostVS vsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(vsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(uint64_t i = 1; numPosComponents == 4 && i < numPrims; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = vsoutBuffer; m_PostVSData[eventId].vsout.vertStride = stride; m_PostVSData[eventId].vsout.nearPlane = nearp; m_PostVSData[eventId].vsout.farPlane = farp; m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer); m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices; m_PostVSData[eventId].vsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].vsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].vsout.idxBuf = NULL; if(m_PostVSData[eventId].vsout.useIndices && idxBuf) { m_PostVSData[eventId].vsout.idxBuf = idxBuf; m_PostVSData[eventId].vsout.idxFmt = rs.ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; } m_PostVSData[eventId].vsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].vsout.topo = topo; } else { // empty vertex output signature m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = NULL; m_PostVSData[eventId].vsout.instStride = 0; m_PostVSData[eventId].vsout.vertStride = 0; m_PostVSData[eventId].vsout.nearPlane = 0.0f; m_PostVSData[eventId].vsout.farPlane = 0.0f; m_PostVSData[eventId].vsout.useIndices = false; m_PostVSData[eventId].vsout.hasPosOut = false; m_PostVSData[eventId].vsout.idxBuf = NULL; m_PostVSData[eventId].vsout.topo = topo; } if(dxbcGS || dxbcDS) { stride = 0; posidx = -1; numPosComponents = 0; DXBC::DXBCFile *lastShader = dxbcGS; if(dxbcDS) lastShader = dxbcDS; sodecls.clear(); for(const SigParameter &sign : lastShader->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; // for now, skip streams that aren't stream 0 if(sign.stream != 0) continue; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // enable the other shader stages again if(origPSO->DS()) psoDesc.DS = origPSO->DS()->GetDesc(); if(origPSO->HS()) psoDesc.HS = origPSO->HS()->GetDesc(); if(origPSO->GS()) psoDesc.GS = origPSO->GS()->GetDesc(); // configure new SO declarations psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; // we're using the same topology this time psoDesc.PrimitiveTopologyType = origPSO->graphics->PrimitiveTopologyType; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; ID3D12GraphicsCommandList *list = NULL; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // draws with multiple instances must be replayed one at a time so we can record the number of // primitives from each drawcall, as due to expansion this can vary per-instance. if(drawcall->numInstances > 1) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // do a dummy draw to make sure we have enough space in the output buffer list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); D3D12_QUERY_DATA_SO_STATISTICS result = *data; range.End = 0; m_SOStagingBuffer->Unmap(0, &range); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; GetDebugManager()->ResetDebugAlloc(); // now do the actual stream out list = GetDebugManager()->ResetDebugList(); // first need to reset the counter byte values which may have either been written to above, or // are newly created { D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); std::swap(sobarr.Transition.StateBefore, sobarr.Transition.StateAfter); list->ResourceBarrier(1, &sobarr); } rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } // reserve space for enough 'buffer filled size' locations view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + AlignUp(uint64_t(drawcall->numInstances * sizeof(UINT64)), 64ULL); // do incremental draws to get the output size. We have to do this O(N^2) style because // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N instances // and count the total number of verts each time, then we can see from the difference how much // each instance wrote. for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++) { if(drawcall->flags & DrawFlags::UseIBuffer) { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawIndexedInstanced(drawcall->numIndices, inst, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawInstanced(drawcall->numIndices, inst, drawcall->vertexOffset, drawcall->instanceOffset); } } list->Close(); l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); // the last draw will have written the actual data we want into the buffer } else { // this only loops if we find from a query that we need to resize up while(true) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); continue; } range.End = 0; m_SOStagingBuffer->Unmap(0, &range); GetDebugManager()->ResetDebugAlloc(); break; } } list = GetDebugManager()->ResetDebugList(); D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t *counters = (uint64_t *)byteData; uint64_t numBytesWritten = 0; std::vector<D3D12PostVSData::InstData> instData; if(drawcall->numInstances > 1) { uint64_t prevByteCount = 0; for(uint32_t inst = 0; inst < drawcall->numInstances; inst++) { uint64_t byteCount = counters[inst]; D3D12PostVSData::InstData d; d.numVerts = uint32_t((byteCount - prevByteCount) / stride); d.bufOffset = prevByteCount; prevByteCount = byteCount; instData.push_back(d); } numBytesWritten = prevByteCount; } else { numBytesWritten = counters[0]; } if(numBytesWritten == 0) { SAFE_RELEASE(soSig); return; } // skip past the counter(s) byteData += (view.BufferLocation - m_SOBuffer->GetGPUVirtualAddress()); uint64_t numVerts = numBytesWritten / stride; ID3D12Resource *gsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&gsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(gsoutBuffer) { SetObjName(gsoutBuffer, StringFormat::Fmt("PostVS gsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(gsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(UINT64 i = 1; numPosComponents == 4 && i < numVerts; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].gsout.buf = gsoutBuffer; m_PostVSData[eventId].gsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].gsout.vertStride = stride; m_PostVSData[eventId].gsout.nearPlane = nearp; m_PostVSData[eventId].gsout.farPlane = farp; m_PostVSData[eventId].gsout.useIndices = false; m_PostVSData[eventId].gsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].gsout.idxBuf = NULL; topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; if(lastShader == dxbcGS) { for(size_t i = 0; i < dxbcGS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcGS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY) { topo = decl.outTopology; break; } } } else if(lastShader == dxbcDS) { for(size_t i = 0; i < dxbcDS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcDS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_TESS_DOMAIN) { if(decl.domain == DXBC::DOMAIN_ISOLINE) topo = D3D_PRIMITIVE_TOPOLOGY_LINELIST; else topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; } } } m_PostVSData[eventId].gsout.topo = topo; // streamout expands strips unfortunately if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; m_PostVSData[eventId].gsout.numVerts = (uint32_t)numVerts; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.numVerts /= RDCMAX(1U, drawcall->numInstances); m_PostVSData[eventId].gsout.instData = instData; } SAFE_RELEASE(soSig); }
ResourceId D3D12Replay::RenderOverlay(ResourceId texid, CompType typeHint, DebugOverlay overlay, uint32_t eventId, const vector<uint32_t> &passEvents) { ID3D12Resource *resource = WrappedID3D12Resource::GetList()[texid]; if(resource == NULL) return ResourceId(); D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc(); std::vector<D3D12_RESOURCE_BARRIER> barriers; int resType = 0; GetDebugManager()->PrepareTextureSampling(resource, typeHint, resType, barriers); D3D12_RESOURCE_DESC overlayTexDesc; overlayTexDesc.Alignment = 0; overlayTexDesc.DepthOrArraySize = 1; overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM; overlayTexDesc.Height = resourceDesc.Height; overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; overlayTexDesc.MipLevels = 1; overlayTexDesc.SampleDesc = resourceDesc.SampleDesc; overlayTexDesc.Width = resourceDesc.Width; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; D3D12_RESOURCE_DESC currentOverlayDesc; RDCEraseEl(currentOverlayDesc); if(m_Overlay.Texture) currentOverlayDesc = m_Overlay.Texture->GetDesc(); WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture; // need to recreate backing custom render tex if(overlayTexDesc.Width != currentOverlayDesc.Width || overlayTexDesc.Height != currentOverlayDesc.Height || overlayTexDesc.Format != currentOverlayDesc.Format || overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count || overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality) { SAFE_RELEASE(m_Overlay.Texture); m_Overlay.resourceId = ResourceId(); ID3D12Resource *customRenderTex = NULL; HRESULT hr = m_pDevice->CreateCommittedResource( &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL, __uuidof(ID3D12Resource), (void **)&customRenderTex); if(FAILED(hr)) { RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str()); return ResourceId(); } wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex; customRenderTex->SetName(L"customRenderTex"); m_Overlay.Texture = wrappedCustomRenderTex; m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID(); } D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState; ID3D12Resource *renderDepth = NULL; D3D12Descriptor *dsView = GetWrapped(rs.dsv); D3D12_RESOURCE_DESC depthTexDesc = {}; D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {}; if(dsView) { ID3D12Resource *realDepth = dsView->nonsamp.resource; dsViewDesc = dsView->nonsamp.dsv; depthTexDesc = realDepth->GetDesc(); depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; depthTexDesc.Alignment = 0; HRESULT hr = S_OK; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc, D3D12_RESOURCE_STATE_COPY_DEST, NULL, __uuidof(ID3D12Resource), (void **)&renderDepth); if(FAILED(hr)) { RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str()); return m_Overlay.resourceId; } renderDepth->SetName(L"Overlay renderDepth"); ID3D12GraphicsCommandList *list = m_pDevice->GetNewList(); const vector<D3D12_RESOURCE_STATES> &states = m_pDevice->GetSubresourceStates(GetResID(realDepth)); vector<D3D12_RESOURCE_BARRIER> depthBarriers; depthBarriers.reserve(states.size()); for(size_t i = 0; i < states.size(); i++) { D3D12_RESOURCE_BARRIER b; // skip unneeded barriers if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE) continue; b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; b.Transition.pResource = realDepth; b.Transition.Subresource = (UINT)i; b.Transition.StateBefore = states[i]; b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; depthBarriers.push_back(b); } if(!depthBarriers.empty()) list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]); list->CopyResource(renderDepth, realDepth); for(size_t i = 0; i < depthBarriers.size(); i++) std::swap(depthBarriers[i].Transition.StateBefore, depthBarriers[i].Transition.StateAfter); if(!depthBarriers.empty()) list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]); D3D12_RESOURCE_BARRIER b = {}; b.Transition.pResource = renderDepth; b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; // prepare tex resource for copying list->ResourceBarrier(1, &b); list->Close(); } D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {}; rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rtDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM; rtDesc.Texture2D.MipSlice = 0; rtDesc.Texture2D.PlaneSlice = 0; if(overlayTexDesc.SampleDesc.Count > 1 || overlayTexDesc.SampleDesc.Quality > 0) rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV); m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv); ID3D12GraphicsCommandList *list = m_pDevice->GetNewList(); FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f}; list->ClearRenderTargetView(rtv, black, 0, NULL); D3D12_CPU_DESCRIPTOR_HANDLE dsv = {}; if(renderDepth) { dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV); m_pDevice->CreateDepthStencilView( renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv); } D3D12_DEPTH_STENCIL_DESC dsDesc; dsDesc.BackFace.StencilFailOp = dsDesc.BackFace.StencilPassOp = dsDesc.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; dsDesc.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; dsDesc.FrontFace.StencilFailOp = dsDesc.FrontFace.StencilPassOp = dsDesc.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; dsDesc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; dsDesc.DepthEnable = TRUE; dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; dsDesc.StencilEnable = FALSE; dsDesc.StencilReadMask = dsDesc.StencilWriteMask = 0xff; WrappedID3D12PipelineState *pipe = NULL; if(rs.pipe != ResourceId()) pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping) { // just need the basic texture } else if(overlay == DebugOverlay::Drawcall) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); float overlayConsts[4] = {0.8f, 0.1f, 0.8f, 1.0f}; ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts); psoDesc.PS.pShaderBytecode = ps->GetBufferPointer(); psoDesc.PS.BytecodeLength = ps->GetBufferSize(); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f}; list->ClearRenderTargetView(rtv, clearColour, 0, NULL); list->Close(); list = NULL; ID3D12PipelineState *pso = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pso); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(ps); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(pso); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE(); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(pso); SAFE_RELEASE(ps); } } else if(overlay == DebugOverlay::BackfaceCull) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode; float redCol[4] = {1.0f, 0.0f, 0.0f, 1.0f}; ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(redCol); float greenCol[4] = {0.0f, 1.0f, 0.0f, 1.0f}; ID3DBlob *green = m_pDevice->GetShaderCache()->MakeFixedColShader(greenCol); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; psoDesc.PS.pShaderBytecode = red->GetBufferPointer(); psoDesc.PS.BytecodeLength = red->GetBufferSize(); list->Close(); list = NULL; ID3D12PipelineState *redPSO = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&redPSO); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(red); SAFE_RELEASE(green); return m_Overlay.resourceId; } psoDesc.RasterizerState.CullMode = origCull; psoDesc.PS.pShaderBytecode = green->GetBufferPointer(); psoDesc.PS.BytecodeLength = green->GetBufferSize(); ID3D12PipelineState *greenPSO = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&greenPSO); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(red); SAFE_RELEASE(redPSO); SAFE_RELEASE(green); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(redPSO); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE(); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs.pipe = GetResID(greenPSO); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(red); SAFE_RELEASE(green); SAFE_RELEASE(redPSO); SAFE_RELEASE(greenPSO); } } else if(overlay == DebugOverlay::Wireframe) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); float overlayConsts[] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 1.0f}; ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts); psoDesc.PS.pShaderBytecode = ps->GetBufferPointer(); psoDesc.PS.BytecodeLength = ps->GetBufferSize(); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; overlayConsts[3] = 0.0f; list->ClearRenderTargetView(rtv, overlayConsts, 0, NULL); list->Close(); list = NULL; ID3D12PipelineState *pso = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pso); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(ps); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(pso); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = dsv; m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(pso); SAFE_RELEASE(ps); } } else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw) { vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::ClearBeforeDraw) events.clear(); events.push_back(eventId); if(!events.empty()) { list->Close(); list = NULL; bool rtSingle = rs.rtSingle; std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rts = rs.rts; if(overlay == DebugOverlay::ClearBeforePass) m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); list = m_pDevice->GetNewList(); for(size_t i = 0; i < rts.size(); i++) { D3D12Descriptor *desc = rtSingle ? GetWrapped(rts[0]) : GetWrapped(rts[i]); if(desc) { if(rtSingle) desc += i; Unwrap(list)->ClearRenderTargetView(UnwrapCPU(desc), black, 0, NULL); } } list->Close(); list = NULL; for(size_t i = 0; i < events.size(); i++) { m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw); if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size()) m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw); } } } else if(overlay == DebugOverlay::ViewportScissor) { if(pipe && pipe->IsGraphics() && !rs.views.empty()) { list->OMSetRenderTargets(1, &rtv, TRUE, NULL); D3D12_VIEWPORT viewport = rs.views[0]; list->RSSetViewports(1, &viewport); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); list->SetPipelineState(m_General.FixedColPipe); list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig); DebugPixelCBufferData pixelData = {0}; // border colour (dark, 2px, opaque) pixelData.WireframeColour = Vec3f(0.1f, 0.1f, 0.1f); // inner colour (light, transparent) pixelData.Channels = Vec4f(0.2f, 0.2f, 0.9f, 0.7f); pixelData.OutputDisplayFormat = 0; pixelData.RangeMinimum = viewport.TopLeftX; pixelData.InverseRangeSize = viewport.TopLeftY; pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f); D3D12_GPU_VIRTUAL_ADDRESS viewCB = GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData)); list->SetGraphicsRootConstantBufferView(0, viewCB); list->SetGraphicsRootConstantBufferView(1, viewCB); list->SetGraphicsRootConstantBufferView(2, viewCB); Vec4f dummy; list->SetGraphicsRoot32BitConstants(3, 4, &dummy.x, 0); float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f}; list->OMSetBlendFactor(factor); list->DrawInstanced(3, 1, 0, 0); viewport.TopLeftX = (float)rs.scissors[0].left; viewport.TopLeftY = (float)rs.scissors[0].top; viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left); viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top); list->RSSetViewports(1, &viewport); pixelData.OutputDisplayFormat = 1; pixelData.RangeMinimum = viewport.TopLeftX; pixelData.InverseRangeSize = viewport.TopLeftY; pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f); D3D12_GPU_VIRTUAL_ADDRESS scissorCB = GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData)); list->SetGraphicsRootConstantBufferView(1, scissorCB); list->DrawInstanced(3, 1, 0, 0); } } else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass) { if(pipe && pipe->IsGraphics()) { SCOPED_TIMER("Triangle size"); vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::TriangleSizeDraw) events.clear(); while(!events.empty()) { const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[0]); // remove any non-drawcalls, like the pass boundary. if(!(draw->flags & DrawFlags::Drawcall)) events.erase(events.begin()); else break; } events.push_back(eventId); D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = pipe->GetGraphicsDesc(); pipeDesc.pRootSignature = m_General.ConstOnlyRootSig; pipeDesc.SampleMask = 0xFFFFFFFF; pipeDesc.SampleDesc.Count = 1; pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; pipeDesc.NumRenderTargets = 1; RDCEraseEl(pipeDesc.RTVFormats); pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; D3D12_INPUT_ELEMENT_DESC ia[2] = {}; ia[0].SemanticName = "pos"; ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; ia[1].SemanticName = "sec"; ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; ia[1].InputSlot = 1; ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; pipeDesc.InputLayout.NumElements = 2; pipeDesc.InputLayout.pInputElementDescs = ia; pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize(); pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer(); RDCEraseEl(pipeDesc.HS); RDCEraseEl(pipeDesc.DS); pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize(); pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer(); pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize(); pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer(); pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER) pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL; if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS) pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; // enough for all primitive topology types ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {}; DebugVertexCBuffer vertexData = {}; vertexData.LineStrip = 0; vertexData.ModelViewProj = Matrix4f::Identity(); vertexData.SpriteSize = Vec2f(); Vec4f viewport(rs.views[0].Width, rs.views[0].Height); if(rs.dsv.ptr) { D3D12_CPU_DESCRIPTOR_HANDLE realDSV = Unwrap(rs.dsv); list->OMSetRenderTargets(1, &rtv, TRUE, &realDSV); } list->RSSetViewports(1, &rs.views[0]); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig); list->SetGraphicsRootConstantBufferView( 0, GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData))); list->SetGraphicsRootConstantBufferView( 1, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp))); list->SetGraphicsRootConstantBufferView( 2, GetDebugManager()->UploadConstants(&viewport, sizeof(viewport))); list->SetGraphicsRoot32BitConstants(3, 4, &viewport.x, 0); for(size_t i = 0; i < events.size(); i++) { const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[i]); for(uint32_t inst = 0; draw && inst < RDCMAX(1U, draw->numInstances); inst++) { MeshFormat fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::GSOut); if(fmt.vertexResourceId == ResourceId()) fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::VSOut); if(fmt.vertexResourceId != ResourceId()) { D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology); if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST || topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST) pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP || topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST || topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ || topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ) pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; else pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; list->IASetPrimitiveTopology(topo); if(pipes[pipeDesc.PrimitiveTopologyType] == NULL) { HRESULT hr = m_pDevice->CreateGraphicsPipelineState( &pipeDesc, __uuidof(ID3D12PipelineState), (void **)&pipes[pipeDesc.PrimitiveTopologyType]); RDCASSERTEQUAL(hr, S_OK); } ID3D12Resource *vb = m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId); D3D12_VERTEX_BUFFER_VIEW vbView = {}; vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset; vbView.StrideInBytes = fmt.vertexByteStride; vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset); // second bind is just a dummy, so we don't have to make a shader // that doesn't accept the secondary stream list->IASetVertexBuffers(0, 1, &vbView); list->IASetVertexBuffers(1, 1, &vbView); list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]); if(fmt.indexByteStride && fmt.indexResourceId != ResourceId()) { ID3D12Resource *ib = m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId); D3D12_INDEX_BUFFER_VIEW view; view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset; view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset); view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; list->IASetIndexBuffer(&view); list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0); } else { list->DrawInstanced(fmt.numIndices, 1, 0, 0); } } } } list->Close(); list = NULL; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); for(size_t i = 0; i < ARRAY_COUNT(pipes); i++) SAFE_RELEASE(pipes[i]); } // restore back to normal m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw); } else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw) { SCOPED_TIMER("Quad Overdraw"); vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::QuadOverdrawDraw) events.clear(); events.push_back(eventId); if(!events.empty()) { if(overlay == DebugOverlay::QuadOverdrawPass) { list->Close(); m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); list = m_pDevice->GetNewList(); } uint32_t width = uint32_t(resourceDesc.Width >> 1); uint32_t height = resourceDesc.Height >> 1; width = RDCMAX(1U, width); height = RDCMAX(1U, height); D3D12_RESOURCE_DESC uavTexDesc = {}; uavTexDesc.Alignment = 0; uavTexDesc.DepthOrArraySize = 4; uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; uavTexDesc.Format = DXGI_FORMAT_R32_UINT; uavTexDesc.Height = height; uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; uavTexDesc.MipLevels = 1; uavTexDesc.SampleDesc.Count = 1; uavTexDesc.SampleDesc.Quality = 0; uavTexDesc.Width = width; ID3D12Resource *overdrawTex = NULL; HRESULT hr = m_pDevice->CreateCommittedResource( &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex); if(FAILED(hr)) { RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str()); list->Close(); list = NULL; return m_Overlay.resourceId; } m_pDevice->CreateShaderResourceView(overdrawTex, NULL, GetDebugManager()->GetCPUHandle(OVERDRAW_SRV)); m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL, GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)); m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL, GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV)); UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV), GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV), overdrawTex, zeroes, 0, NULL); list->Close(); list = NULL; #if ENABLED(SINGLE_FLUSH_VALIDATE) m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); #endif m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); D3D12_SHADER_BYTECODE quadWrite; quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize(); quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer(); // declare callback struct here D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, events, ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV))); m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full); // resolve pass { list = m_pDevice->GetNewList(); D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {}; // make sure UAV work is done then prepare for reading in PS overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; overdrawBarriers[0].UAV.pResource = overdrawTex; overdrawBarriers[1].Transition.pResource = overdrawTex; overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // prepare tex resource for copying list->ResourceBarrier(2, overdrawBarriers); list->OMSetRenderTargets(1, &rtv, TRUE, NULL); list->RSSetViewports(1, &rs.views[0]); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); list->SetPipelineState(m_Overlay.QuadResolvePipe); list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig); GetDebugManager()->SetDescriptorHeaps(list, true, false); list->SetGraphicsRootConstantBufferView( 0, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp))); list->SetGraphicsRootDescriptorTable(1, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV)); list->DrawInstanced(3, 1, 0, 0); list->Close(); list = NULL; } m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it) { SAFE_RELEASE(it->second.pipe); SAFE_RELEASE(it->second.sig); } SAFE_RELEASE(overdrawTex); } if(overlay == DebugOverlay::QuadOverdrawPass) m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw); }
//-------------------------------------------------------------------------- void VeRenderWindowD3D12::Record(VeUInt32 u32Index) noexcept { VeRendererD3D12& kRenderer = *VeMemberCast( &VeRendererD3D12::m_kRenderWindowList, m_kNode.get_list()); VE_ASSERT(u32Index < m_kRecorderList.size()); Recorder& kRecorder = m_kRecorderList[u32Index]; FrameCache& kFrame = m_akFrameCache[m_u32FrameIndex]; ID3D12GraphicsCommandList* pkGCL = kFrame.m_kDirectCommandList[kRecorder.m_u32CommandIndex]; VE_ASSERT_GE(pkGCL->Reset(kFrame.m_pkDirectAllocator, nullptr), S_OK); ID3D12DescriptorHeap* ppHeaps[] = { kRenderer.m_kSRVHeap.Get() }; pkGCL->SetDescriptorHeaps(1, ppHeaps); for (auto& task : kRecorder.m_kTaskList) { switch (task->m_eType) { case REC_BARRIER: { auto& list = ((RecordBarrier*)task)->m_akBarrierList[m_u32FrameIndex]; pkGCL->ResourceBarrier((VeUInt32)list.size(), &list.front()); } break; case REC_CLEAR_RTV: { RecordClearRTV& rec = *((RecordClearRTV*)task); pkGCL->ClearRenderTargetView(rec.m_ahHandle[m_u32FrameIndex], (const FLOAT*)&(rec.m_kColor), 0, nullptr); } break; case REC_CLEAR_DSV: { RecordClearDSV& rec = *((RecordClearDSV*)task); pkGCL->ClearDepthStencilView(rec.m_ahHandle[m_u32FrameIndex], rec.m_eFlags, rec.m_f32Depth, rec.m_u8Stencil, 0, nullptr); } break; case REC_RENDER_TARGET: { RecordRenderTarget& rec = *((RecordRenderTarget*)task); pkGCL->OMSetRenderTargets((VeUInt32)rec.m_akRTVList[m_u32FrameIndex].size(), &rec.m_akRTVList[m_u32FrameIndex].front(), FALSE, rec.m_ahDSV[m_u32FrameIndex].ptr ? &rec.m_ahDSV[m_u32FrameIndex] : nullptr); } break; case REC_VIEWPORT: { RecordViewport& rec = *((RecordViewport*)task); pkGCL->RSSetViewports((VeUInt32)rec.m_kViewportList.size(), &rec.m_kViewportList.front()); } break; case REC_SCISSOR_RECT: { RecordScissorRect& rec = *((RecordScissorRect*)task); pkGCL->RSSetScissorRects((VeUInt32)rec.m_kScissorRectList.size(), &rec.m_kScissorRectList.front()); } break; case REC_RENDER_QUAD: { RecordRenderQuad& rec = *((RecordRenderQuad*)task); pkGCL->SetPipelineState(rec.m_pkPipelineState); pkGCL->SetGraphicsRootSignature(rec.m_pkRootSignature); for (auto& itTab : rec.m_kTable) { pkGCL->SetGraphicsRootDescriptorTable(itTab.first, itTab.second); } pkGCL->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); pkGCL->IASetVertexBuffers(0, 1, &kRenderer.m_kQuadVBV); pkGCL->DrawInstanced(4, 1, 0, 0); } break; default: break; } } VE_ASSERT_GE(pkGCL->Close(), S_OK); }