void ConstructAABBPass::ConstructAABB(ID3D12GraphicsCommandList *pCommandList,
        SceneType sceneType,
        D3D12_GPU_VIRTUAL_ADDRESS outputVH,
        D3D12_GPU_VIRTUAL_ADDRESS scratchBuffer,
        D3D12_GPU_VIRTUAL_ADDRESS childNodesProcessedCountBuffer,
        D3D12_GPU_VIRTUAL_ADDRESS hierarchyBuffer,
        D3D12_GPU_VIRTUAL_ADDRESS outputAABBParentBuffer,
        D3D12_GPU_DESCRIPTOR_HANDLE globalDescriptorHeap,
        const bool prepareUpdate,
        const bool performUpdate,
        UINT numElements)
    {
        bool isEmptyAccelerationStructure = numElements == 0;
        Level level = (sceneType == SceneType::Triangles) ? Level::Bottom : Level::Top;

        InputConstants constants = {};
        constants.NumberOfElements = numElements;
        constants.UpdateFlags = ((UINT) prepareUpdate) | (performUpdate << 1);

        pCommandList->SetComputeRootSignature(m_pRootSignature);
        pCommandList->SetComputeRoot32BitConstants(InputRootConstants, SizeOfInUint32(InputConstants), &constants, 0);
        pCommandList->SetComputeRootUnorderedAccessView(OutputBVHRootUAVParam, outputVH);
        if (!isEmptyAccelerationStructure)
        {
            pCommandList->SetComputeRootUnorderedAccessView(ScratchUAVParam, scratchBuffer);
            pCommandList->SetComputeRootUnorderedAccessView(ChildNodesProcessedCountBufferParam, childNodesProcessedCountBuffer);
            pCommandList->SetComputeRootUnorderedAccessView(HierarchyUAVParam, hierarchyBuffer);
        }

        if (prepareUpdate || performUpdate)
        {
            pCommandList->SetComputeRootUnorderedAccessView(AABBParentBufferParam, outputAABBParentBuffer);
        }

        if (level == Top)
        {
            pCommandList->SetComputeRootDescriptorTable(GlobalDescriptorHeap, globalDescriptorHeap);
        }

        // Only given the GPU VA not the resource itself so need to resort to doing an overarching UAV barrier
        const UINT dispatchWidth = isEmptyAccelerationStructure ? 1 : DivideAndRoundUp<UINT>(numElements, THREAD_GROUP_1D_WIDTH);
        auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr);

        pCommandList->SetPipelineState(m_pPrepareForComputeAABBs[level]);
        pCommandList->Dispatch(dispatchWidth, 1, 1);
        pCommandList->ResourceBarrier(1, &uavBarrier);

        if (isEmptyAccelerationStructure) return;

        // Build the AABBs from the bottom-up
        pCommandList->SetPipelineState(m_pComputeAABBs[level]);
        pCommandList->Dispatch(dispatchWidth, 1, 1);
        pCommandList->ResourceBarrier(1, &uavBarrier);
    }
Exemplo n.º 2
0
    void RearrangeElementsPass::Rearrange(
        ID3D12GraphicsCommandList *pCommandList,
        SceneType sceneType,
        UINT numTriangles,
        D3D12_GPU_VIRTUAL_ADDRESS inputElements,
        D3D12_GPU_VIRTUAL_ADDRESS inputMetadataBuffer,
        D3D12_GPU_VIRTUAL_ADDRESS indexBuffer,
        D3D12_GPU_VIRTUAL_ADDRESS outputTriangles,
        D3D12_GPU_VIRTUAL_ADDRESS outputMetadataBuffer,
        D3D12_GPU_VIRTUAL_ADDRESS outputIndexBuffer)
    {
        if (numTriangles == 0) return;

        bool updatesAllowed = outputIndexBuffer != 0;
        InputConstants constants = {};
        constants.NumberOfTriangles = numTriangles;
        constants.UpdatesAllowed = (UINT) (updatesAllowed);

        pCommandList->SetComputeRootSignature(m_pRootSignature);
        switch (sceneType)
        {
        case SceneType::Triangles:
            pCommandList->SetPipelineState(m_pRearrangeTrianglesPSO);
            break;
        case SceneType::BottomLevelBVHs:
            pCommandList->SetPipelineState(m_pRearrangeBVHsPSO);
            break;
        default:
            assert(false);
        }

        pCommandList->SetComputeRoot32BitConstants(InputRootConstants, SizeOfInUint32(InputConstants), &constants, 0);

        pCommandList->SetComputeRootUnorderedAccessView(InputElements, inputElements);
        pCommandList->SetComputeRootUnorderedAccessView(IndexBuffer, indexBuffer);
        pCommandList->SetComputeRootUnorderedAccessView(OutputElements, outputTriangles);
        if (inputMetadataBuffer)
        {
            pCommandList->SetComputeRootUnorderedAccessView(InputMetadata, inputMetadataBuffer);
            pCommandList->SetComputeRootUnorderedAccessView(OutputMetadata, outputMetadataBuffer);
        }
        if (updatesAllowed)
        {
            pCommandList->SetComputeRootUnorderedAccessView(OutputIndexBuffer, outputIndexBuffer);
        }

        const UINT dispatchWidth = DivideAndRoundUp<UINT>(numTriangles, THREAD_GROUP_1D_WIDTH);
        pCommandList->Dispatch(dispatchWidth, 1, 1);

        auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr);
        pCommandList->ResourceBarrier(1, &uavBarrier);
    }
Exemplo n.º 3
0
    RearrangeElementsPass::RearrangeElementsPass(ID3D12Device *pDevice, UINT nodeMask)
    {
        CD3DX12_ROOT_PARAMETER1 parameters[NumParameters];
        parameters[InputElements].InitAsUnorderedAccessView(InputElementBufferRegister);
        parameters[InputMetadata].InitAsUnorderedAccessView(InputMetadataBufferRegister);
        parameters[IndexBuffer].InitAsUnorderedAccessView(IndexBufferRegister);
        parameters[OutputElements].InitAsUnorderedAccessView(OutputElementBufferRegister);
        parameters[OutputMetadata].InitAsUnorderedAccessView(OutputMetadataBufferRegister);
        parameters[OutputIndexBuffer].InitAsUnorderedAccessView(OutputIndexBufferRegister);
        parameters[InputRootConstants].InitAsConstants(SizeOfInUint32(InputConstants), InputConstantsRegister);

        auto rootSignatureDesc = CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(ARRAYSIZE(parameters), parameters);
        CreateRootSignatureHelper(pDevice, rootSignatureDesc, &m_pRootSignature);

        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pRearrangeTriangles), &m_pRearrangeTrianglesPSO);
        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pRearrangeBVHs), &m_pRearrangeBVHsPSO);
    }
    void LoadInstancesPass::LoadInstances(ID3D12GraphicsCommandList *pCommandList, 
        D3D12_GPU_VIRTUAL_ADDRESS outputBVH, 
        D3D12_GPU_VIRTUAL_ADDRESS instancesDesc, 
        D3D12_ELEMENTS_LAYOUT instanceDescLayout, 
        UINT numElements, 
        D3D12_GPU_DESCRIPTOR_HANDLE descriptorHeapBase,
        D3D12_GPU_VIRTUAL_ADDRESS cachedSortBuffer)
    {
        if (numElements == 0) return;

        const bool performUpdate = cachedSortBuffer != 0;

        pCommandList->SetComputeRootSignature(m_pRootSignature);
        ID3D12PipelineState *pLoadAABBPSO = nullptr;
        switch (instanceDescLayout)
        {
        case D3D12_ELEMENTS_LAYOUT_ARRAY:
            pLoadAABBPSO = m_pLoadAABBsFromArrayOfInstancesPSO;
            break;
        case D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS:
            pLoadAABBPSO = m_pLoadAABBsFromArrayOfPointersPSO;
            break;
        default:
            ThrowFailure(E_INVALIDARG, L"Unrecognized D3D12_ELEMENTS_LAYOUT provided");
        }
        pCommandList->SetPipelineState(pLoadAABBPSO);

        LoadInstancesConstants constants = { numElements, (UINT) performUpdate };
        pCommandList->SetComputeRoot32BitConstants(Constants, SizeOfInUint32(LoadInstancesConstants), &constants, 0);
        pCommandList->SetComputeRootDescriptorTable(GlobalDescriptorHeap, descriptorHeapBase);
        pCommandList->SetComputeRootShaderResourceView(InstanceDescsSRV, instancesDesc);
        pCommandList->SetComputeRootUnorderedAccessView(OutputBVHRootUAV, outputBVH);

        if (performUpdate)
        {
            pCommandList->SetComputeRootUnorderedAccessView(CachedSortBuffer, cachedSortBuffer);
        }

        const UINT dispatchWidth = DivideAndRoundUp<UINT>(numElements, THREAD_GROUP_1D_WIDTH);
        pCommandList->Dispatch(dispatchWidth, 1, 1);

        auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr);
        pCommandList->ResourceBarrier(1, &uavBarrier);
    }
    LoadInstancesPass::LoadInstancesPass(ID3D12Device *pDevice, UINT nodeMask)
    {
        D3D12_DESCRIPTOR_RANGE1 globalDescriptorHeapRange[2];
        globalDescriptorHeapRange[0] = CD3DX12_DESCRIPTOR_RANGE1(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, (UINT)-1, DescriptorHeapBufferRegister, DescriptorHeapBufferRegisterSpace, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0);
        globalDescriptorHeapRange[1] = CD3DX12_DESCRIPTOR_RANGE1(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, (UINT)-1, DescriptorHeapSRVBufferRegister, DescriptorHeapSRVBufferRegisterSpace, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0);

        CD3DX12_ROOT_PARAMETER1 parameters[RootParameterSlot::NumRootParameters];
        parameters[OutputBVHRootUAV].InitAsUnorderedAccessView(OutputBVHRegister);
        parameters[InstanceDescsSRV].InitAsShaderResourceView(InstanceDescsRegister);
        parameters[GlobalDescriptorHeap].InitAsDescriptorTable(ARRAYSIZE(globalDescriptorHeapRange), globalDescriptorHeapRange);
        parameters[CachedSortBuffer].InitAsUnorderedAccessView(CachedSortBufferRegister);
        parameters[Constants].InitAsConstants(SizeOfInUint32(LoadInstancesConstants), LoadInstancesConstantsRegister);

        auto rootSignatureDesc = CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(ARRAYSIZE(parameters), parameters);
        CreateRootSignatureHelper(pDevice, rootSignatureDesc, &m_pRootSignature);

        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelLoadAABBsFromArrayOfPointers), &m_pLoadAABBsFromArrayOfPointersPSO);
        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelLoadAABBsFromArrayOfInstances), &m_pLoadAABBsFromArrayOfInstancesPSO);
    }
    ConstructAABBPass::ConstructAABBPass(ID3D12Device *pDevice, UINT nodeMask)
    {
        D3D12_DESCRIPTOR_RANGE1 globalDescriptorHeapRange = CD3DX12_DESCRIPTOR_RANGE1(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, (UINT)-1, GlobalDescriptorHeapRegister, GlobalDescriptorHeapRegisterSpace, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0);
        CD3DX12_ROOT_PARAMETER1 rootParameters[NumRootParameters];
        rootParameters[OutputBVHRootUAVParam].InitAsUnorderedAccessView(OutputBVHRegister);
        rootParameters[ScratchUAVParam].InitAsUnorderedAccessView(ScratchBufferRegister);
        rootParameters[HierarchyUAVParam].InitAsUnorderedAccessView(HierarchyBufferRegister);
        rootParameters[AABBParentBufferParam].InitAsUnorderedAccessView(AABBParentBufferRegister);
        rootParameters[ChildNodesProcessedCountBufferParam].InitAsUnorderedAccessView(ChildNodesProcessedBufferRegister);
        rootParameters[InputRootConstants].InitAsConstants(SizeOfInUint32(InputConstants), InputConstantsRegister);
        rootParameters[GlobalDescriptorHeap].InitAsDescriptorTable(1, &globalDescriptorHeapRange);

        auto rootSignatureDesc = CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(ARRAYSIZE(rootParameters), rootParameters);
        CreateRootSignatureHelper(pDevice, rootSignatureDesc, &m_pRootSignature);

        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelComputeAABBs), &m_pComputeAABBs[Level::Top]);
        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pTopLevelPrepareForComputeAABBs), &m_pPrepareForComputeAABBs[Level::Top]);

        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pBottomLevelComputeAABBs), &m_pComputeAABBs[Level::Bottom]);
        CreatePSOHelper(pDevice, nodeMask, m_pRootSignature, COMPILED_SHADER(g_pBottomLevelPrepareForComputeAABBs), &m_pPrepareForComputeAABBs[Level::Bottom]);
    }
Exemplo n.º 7
0
void BitonicSort::Sort(
    ID3D12GraphicsCommandList *pCommandList,
    D3D12_GPU_VIRTUAL_ADDRESS SortKeyBuffer,
    D3D12_GPU_VIRTUAL_ADDRESS IndexBuffer,
    UINT ElementCount,
    bool IsPartiallyPreSorted,
    bool SortAscending)
{
    if (ElementCount == 0) return;

    const uint32_t AlignedNumElements = AlignPowerOfTwo(ElementCount);
    const uint32_t MaxIterations = Log2(std::max(2048u, AlignedNumElements)) - 10;

    pCommandList->SetComputeRootSignature(m_pRootSignature);

    struct InputConstants
    {
        UINT NullIndex;
        UINT ListCount;
    };
    InputConstants constants { SortAscending ? 0xffffffff : 0, ElementCount };
    pCommandList->SetComputeRoot32BitConstants(GenericConstants, SizeOfInUint32(InputConstants), &constants, 0);
    
    // Generate execute indirect arguments
    pCommandList->SetPipelineState(m_pBitonicIndirectArgsCS);

    auto argToUAVTransition = CD3DX12_RESOURCE_BARRIER::Transition(m_pDispatchArgs, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
    pCommandList->ResourceBarrier(1, &argToUAVTransition);

    pCommandList->SetComputeRoot32BitConstant(ShaderSpecificConstants, MaxIterations, 0);
    pCommandList->SetComputeRootUnorderedAccessView(OutputUAV, m_pDispatchArgs->GetGPUVirtualAddress());
    pCommandList->SetComputeRootUnorderedAccessView(IndexBufferUAV, IndexBuffer);
    pCommandList->Dispatch(1, 1, 1);

    // Pre-Sort the buffer up to k = 2048.  This also pads the list with invalid indices
    // that will drift to the end of the sorted list.
    auto argToIndirectArgTransition = CD3DX12_RESOURCE_BARRIER::Transition(m_pDispatchArgs, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
    pCommandList->ResourceBarrier(1, &argToIndirectArgTransition);
    pCommandList->SetComputeRootUnorderedAccessView(OutputUAV, SortKeyBuffer);

    auto uavBarrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr);
    if (!IsPartiallyPreSorted)
    {
        pCommandList->SetPipelineState(m_pBitonicPreSortCS);
        pCommandList->ExecuteIndirect(m_pCommandSignature, 1, m_pDispatchArgs, 0, nullptr, 0);
        pCommandList->ResourceBarrier(1, &uavBarrier);
    }

    uint32_t IndirectArgsOffset = cIndirectArgStride;

    // We have already pre-sorted up through k = 2048 when first writing our list, so
    // we continue sorting with k = 4096.  For unnecessarily large values of k, these
    // indirect dispatches will be skipped over with thread counts of 0.

    for (uint32_t k = 4096; k <= AlignedNumElements; k *= 2)
    {
        pCommandList->SetPipelineState(m_pBitonicOuterSortCS);

        for (uint32_t j = k / 2; j >= 2048; j /= 2)
        {
            struct OuterSortConstants
            {
                UINT k;
                UINT j;
            } constants { k, j };

            pCommandList->SetComputeRoot32BitConstants(ShaderSpecificConstants, SizeOfInUint32(OuterSortConstants), &constants, 0);
            pCommandList->ExecuteIndirect(m_pCommandSignature, 1, m_pDispatchArgs, IndirectArgsOffset, nullptr, 0);
            pCommandList->ResourceBarrier(1, &uavBarrier);
            IndirectArgsOffset += cIndirectArgStride;
        }

        pCommandList->SetPipelineState(m_pBitonicInnerSortCS);
        pCommandList->ExecuteIndirect(m_pCommandSignature, 1, m_pDispatchArgs, IndirectArgsOffset, nullptr, 0);
        pCommandList->ResourceBarrier(1, &uavBarrier);
        IndirectArgsOffset += cIndirectArgStride;
    }
}