void ConstructAABBPass::ConstructAABB(ID3D12GraphicsCommandList *pCommandList, SceneType sceneType, D3D12_GPU_VIRTUAL_ADDRESS outputVH, D3D12_GPU_VIRTUAL_ADDRESS scratchBuffer, D3D12_GPU_VIRTUAL_ADDRESS childNodesProcessedCountBuffer, D3D12_GPU_VIRTUAL_ADDRESS hierarchyBuffer, D3D12_GPU_VIRTUAL_ADDRESS outputAABBParentBuffer, D3D12_GPU_DESCRIPTOR_HANDLE globalDescriptorHeap, const bool prepareUpdate, const bool performUpdate, UINT numElements) { bool isEmptyAccelerationStructure = numElements == 0; Level level = (sceneType == SceneType::Triangles) ? Level::Bottom : Level::Top; InputConstants constants = {}; constants.NumberOfElements = numElements; constants.UpdateFlags = ((UINT) prepareUpdate) | (performUpdate << 1); pCommandList->SetComputeRootSignature(m_pRootSignature); pCommandList->SetComputeRoot32BitConstants(InputRootConstants, SizeOfInUint32(InputConstants), &constants, 0); pCommandList->SetComputeRootUnorderedAccessView(OutputBVHRootUAVParam, outputVH); if (!isEmptyAccelerationStructure) { pCommandList->SetComputeRootUnorderedAccessView(ScratchUAVParam, scratchBuffer); pCommandList->SetComputeRootUnorderedAccessView(ChildNodesProcessedCountBufferParam, childNodesProcessedCountBuffer); pCommandList->SetComputeRootUnorderedAccessView(HierarchyUAVParam, hierarchyBuffer); } if (prepareUpdate || performUpdate) { pCommandList->SetComputeRootUnorderedAccessView(AABBParentBufferParam, outputAABBParentBuffer); } if (level == Top) { pCommandList->SetComputeRootDescriptorTable(GlobalDescriptorHeap, globalDescriptorHeap); } // Only given the GPU VA not the resource itself so need to resort to doing an overarching UAV barrier const UINT dispatchWidth = isEmptyAccelerationStructure ? 1 : DivideAndRoundUp<UINT>(numElements, THREAD_GROUP_1D_WIDTH); auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr); pCommandList->SetPipelineState(m_pPrepareForComputeAABBs[level]); pCommandList->Dispatch(dispatchWidth, 1, 1); pCommandList->ResourceBarrier(1, &uavBarrier); if (isEmptyAccelerationStructure) return; // Build the AABBs from the bottom-up pCommandList->SetPipelineState(m_pComputeAABBs[level]); pCommandList->Dispatch(dispatchWidth, 1, 1); pCommandList->ResourceBarrier(1, &uavBarrier); }
void RearrangeElementsPass::Rearrange( ID3D12GraphicsCommandList *pCommandList, SceneType sceneType, UINT numTriangles, D3D12_GPU_VIRTUAL_ADDRESS inputElements, D3D12_GPU_VIRTUAL_ADDRESS inputMetadataBuffer, D3D12_GPU_VIRTUAL_ADDRESS indexBuffer, D3D12_GPU_VIRTUAL_ADDRESS outputTriangles, D3D12_GPU_VIRTUAL_ADDRESS outputMetadataBuffer, D3D12_GPU_VIRTUAL_ADDRESS outputIndexBuffer) { if (numTriangles == 0) return; bool updatesAllowed = outputIndexBuffer != 0; InputConstants constants = {}; constants.NumberOfTriangles = numTriangles; constants.UpdatesAllowed = (UINT) (updatesAllowed); pCommandList->SetComputeRootSignature(m_pRootSignature); switch (sceneType) { case SceneType::Triangles: pCommandList->SetPipelineState(m_pRearrangeTrianglesPSO); break; case SceneType::BottomLevelBVHs: pCommandList->SetPipelineState(m_pRearrangeBVHsPSO); break; default: assert(false); } pCommandList->SetComputeRoot32BitConstants(InputRootConstants, SizeOfInUint32(InputConstants), &constants, 0); pCommandList->SetComputeRootUnorderedAccessView(InputElements, inputElements); pCommandList->SetComputeRootUnorderedAccessView(IndexBuffer, indexBuffer); pCommandList->SetComputeRootUnorderedAccessView(OutputElements, outputTriangles); if (inputMetadataBuffer) { pCommandList->SetComputeRootUnorderedAccessView(InputMetadata, inputMetadataBuffer); pCommandList->SetComputeRootUnorderedAccessView(OutputMetadata, outputMetadataBuffer); } if (updatesAllowed) { pCommandList->SetComputeRootUnorderedAccessView(OutputIndexBuffer, outputIndexBuffer); } const UINT dispatchWidth = DivideAndRoundUp<UINT>(numTriangles, THREAD_GROUP_1D_WIDTH); pCommandList->Dispatch(dispatchWidth, 1, 1); auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr); pCommandList->ResourceBarrier(1, &uavBarrier); }
RearrangeElementsPass::RearrangeElementsPass(ID3D12Device *pDevice, UINT nodeMask) { CD3DX12_ROOT_PARAMETER1 parameters[NumParameters]; parameters[InputElements].InitAsUnorderedAccessView(InputElementBufferRegister); parameters[InputMetadata].InitAsUnorderedAccessView(InputMetadataBufferRegister); parameters[IndexBuffer].InitAsUnorderedAccessView(IndexBufferRegister); parameters[OutputElements].InitAsUnorderedAccessView(OutputElementBufferRegister); parameters[OutputMetadata].InitAsUnorderedAccessView(OutputMetadataBufferRegister); parameters[OutputIndexBuffer].InitAsUnorderedAccessView(OutputIndexBufferRegister); parameters[InputRootConstants].InitAsConstants(SizeOfInUint32(InputConstants), InputConstantsRegister); auto rootSignatureDesc = CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(ARRAYSIZE(parameters), parameters); CreateRootSignatureHelper(pDevice, rootSignatureDesc, &m_pRootSignature); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pRearrangeTriangles), &m_pRearrangeTrianglesPSO); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pRearrangeBVHs), &m_pRearrangeBVHsPSO); }
void LoadInstancesPass::LoadInstances(ID3D12GraphicsCommandList *pCommandList, D3D12_GPU_VIRTUAL_ADDRESS outputBVH, D3D12_GPU_VIRTUAL_ADDRESS instancesDesc, D3D12_ELEMENTS_LAYOUT instanceDescLayout, UINT numElements, D3D12_GPU_DESCRIPTOR_HANDLE descriptorHeapBase, D3D12_GPU_VIRTUAL_ADDRESS cachedSortBuffer) { if (numElements == 0) return; const bool performUpdate = cachedSortBuffer != 0; pCommandList->SetComputeRootSignature(m_pRootSignature); ID3D12PipelineState *pLoadAABBPSO = nullptr; switch (instanceDescLayout) { case D3D12_ELEMENTS_LAYOUT_ARRAY: pLoadAABBPSO = m_pLoadAABBsFromArrayOfInstancesPSO; break; case D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS: pLoadAABBPSO = m_pLoadAABBsFromArrayOfPointersPSO; break; default: ThrowFailure(E_INVALIDARG, L"Unrecognized D3D12_ELEMENTS_LAYOUT provided"); } pCommandList->SetPipelineState(pLoadAABBPSO); LoadInstancesConstants constants = { numElements, (UINT) performUpdate }; pCommandList->SetComputeRoot32BitConstants(Constants, SizeOfInUint32(LoadInstancesConstants), &constants, 0); pCommandList->SetComputeRootDescriptorTable(GlobalDescriptorHeap, descriptorHeapBase); pCommandList->SetComputeRootShaderResourceView(InstanceDescsSRV, instancesDesc); pCommandList->SetComputeRootUnorderedAccessView(OutputBVHRootUAV, outputBVH); if (performUpdate) { pCommandList->SetComputeRootUnorderedAccessView(CachedSortBuffer, cachedSortBuffer); } const UINT dispatchWidth = DivideAndRoundUp<UINT>(numElements, THREAD_GROUP_1D_WIDTH); pCommandList->Dispatch(dispatchWidth, 1, 1); auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr); pCommandList->ResourceBarrier(1, &uavBarrier); }
LoadInstancesPass::LoadInstancesPass(ID3D12Device *pDevice, UINT nodeMask) { D3D12_DESCRIPTOR_RANGE1 globalDescriptorHeapRange[2]; globalDescriptorHeapRange[0] = CD3DX12_DESCRIPTOR_RANGE1(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, (UINT)-1, DescriptorHeapBufferRegister, DescriptorHeapBufferRegisterSpace, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0); globalDescriptorHeapRange[1] = CD3DX12_DESCRIPTOR_RANGE1(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, (UINT)-1, DescriptorHeapSRVBufferRegister, DescriptorHeapSRVBufferRegisterSpace, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0); CD3DX12_ROOT_PARAMETER1 parameters[RootParameterSlot::NumRootParameters]; parameters[OutputBVHRootUAV].InitAsUnorderedAccessView(OutputBVHRegister); parameters[InstanceDescsSRV].InitAsShaderResourceView(InstanceDescsRegister); parameters[GlobalDescriptorHeap].InitAsDescriptorTable(ARRAYSIZE(globalDescriptorHeapRange), globalDescriptorHeapRange); parameters[CachedSortBuffer].InitAsUnorderedAccessView(CachedSortBufferRegister); parameters[Constants].InitAsConstants(SizeOfInUint32(LoadInstancesConstants), LoadInstancesConstantsRegister); auto rootSignatureDesc = CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(ARRAYSIZE(parameters), parameters); CreateRootSignatureHelper(pDevice, rootSignatureDesc, &m_pRootSignature); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelLoadAABBsFromArrayOfPointers), &m_pLoadAABBsFromArrayOfPointersPSO); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelLoadAABBsFromArrayOfInstances), &m_pLoadAABBsFromArrayOfInstancesPSO); }
ConstructAABBPass::ConstructAABBPass(ID3D12Device *pDevice, UINT nodeMask) { D3D12_DESCRIPTOR_RANGE1 globalDescriptorHeapRange = CD3DX12_DESCRIPTOR_RANGE1(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, (UINT)-1, GlobalDescriptorHeapRegister, GlobalDescriptorHeapRegisterSpace, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0); CD3DX12_ROOT_PARAMETER1 rootParameters[NumRootParameters]; rootParameters[OutputBVHRootUAVParam].InitAsUnorderedAccessView(OutputBVHRegister); rootParameters[ScratchUAVParam].InitAsUnorderedAccessView(ScratchBufferRegister); rootParameters[HierarchyUAVParam].InitAsUnorderedAccessView(HierarchyBufferRegister); rootParameters[AABBParentBufferParam].InitAsUnorderedAccessView(AABBParentBufferRegister); rootParameters[ChildNodesProcessedCountBufferParam].InitAsUnorderedAccessView(ChildNodesProcessedBufferRegister); rootParameters[InputRootConstants].InitAsConstants(SizeOfInUint32(InputConstants), InputConstantsRegister); rootParameters[GlobalDescriptorHeap].InitAsDescriptorTable(1, &globalDescriptorHeapRange); auto rootSignatureDesc = CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(ARRAYSIZE(rootParameters), rootParameters); CreateRootSignatureHelper(pDevice, rootSignatureDesc, &m_pRootSignature); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelComputeAABBs), &m_pComputeAABBs[Level::Top]); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelPrepareForComputeAABBs), &m_pPrepareForComputeAABBs[Level::Top]); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pBottomLevelComputeAABBs), &m_pComputeAABBs[Level::Bottom]); CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pBottomLevelPrepareForComputeAABBs), &m_pPrepareForComputeAABBs[Level::Bottom]); }
void BitonicSort::Sort( ID3D12GraphicsCommandList *pCommandList, D3D12_GPU_VIRTUAL_ADDRESS SortKeyBuffer, D3D12_GPU_VIRTUAL_ADDRESS IndexBuffer, UINT ElementCount, bool IsPartiallyPreSorted, bool SortAscending) { if (ElementCount == 0) return; const uint32_t AlignedNumElements = AlignPowerOfTwo(ElementCount); const uint32_t MaxIterations = Log2(std::max(2048u, AlignedNumElements)) - 10; pCommandList->SetComputeRootSignature(m_pRootSignature); struct InputConstants { UINT NullIndex; UINT ListCount; }; InputConstants constants { SortAscending ? 0xffffffff : 0, ElementCount }; pCommandList->SetComputeRoot32BitConstants(GenericConstants, SizeOfInUint32(InputConstants), &constants, 0); // Generate execute indirect arguments pCommandList->SetPipelineState(m_pBitonicIndirectArgsCS); auto argToUAVTransition = CD3DX12_RESOURCE_BARRIER::Transition(m_pDispatchArgs, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); pCommandList->ResourceBarrier(1, &argToUAVTransition); pCommandList->SetComputeRoot32BitConstant(ShaderSpecificConstants, MaxIterations, 0); pCommandList->SetComputeRootUnorderedAccessView(OutputUAV, m_pDispatchArgs->GetGPUVirtualAddress()); pCommandList->SetComputeRootUnorderedAccessView(IndexBufferUAV, IndexBuffer); pCommandList->Dispatch(1, 1, 1); // Pre-Sort the buffer up to k = 2048. This also pads the list with invalid indices // that will drift to the end of the sorted list. auto argToIndirectArgTransition = CD3DX12_RESOURCE_BARRIER::Transition(m_pDispatchArgs, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); pCommandList->ResourceBarrier(1, &argToIndirectArgTransition); pCommandList->SetComputeRootUnorderedAccessView(OutputUAV, SortKeyBuffer); auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr); if (!IsPartiallyPreSorted) { pCommandList->SetPipelineState(m_pBitonicPreSortCS); pCommandList->ExecuteIndirect(m_pCommandSignature, 1, m_pDispatchArgs, 0, nullptr, 0); pCommandList->ResourceBarrier(1, &uavBarrier); } uint32_t IndirectArgsOffset = cIndirectArgStride; // We have already pre-sorted up through k = 2048 when first writing our list, so // we continue sorting with k = 4096. For unnecessarily large values of k, these // indirect dispatches will be skipped over with thread counts of 0. for (uint32_t k = 4096; k <= AlignedNumElements; k *= 2) { pCommandList->SetPipelineState(m_pBitonicOuterSortCS); for (uint32_t j = k / 2; j >= 2048; j /= 2) { struct OuterSortConstants { UINT k; UINT j; } constants { k, j }; pCommandList->SetComputeRoot32BitConstants(ShaderSpecificConstants, SizeOfInUint32(OuterSortConstants), &constants, 0); pCommandList->ExecuteIndirect(m_pCommandSignature, 1, m_pDispatchArgs, IndirectArgsOffset, nullptr, 0); pCommandList->ResourceBarrier(1, &uavBarrier); IndirectArgsOffset += cIndirectArgStride; } pCommandList->SetPipelineState(m_pBitonicInnerSortCS); pCommandList->ExecuteIndirect(m_pCommandSignature, 1, m_pDispatchArgs, IndirectArgsOffset, nullptr, 0); pCommandList->ResourceBarrier(1, &uavBarrier); IndirectArgsOffset += cIndirectArgStride; } }