void afSetVertexBuffer(VBOID id, int stride)
{
	ID3D12GraphicsCommandList* list = deviceMan.GetCommandList();
	D3D12_RESOURCE_DESC desc = id->GetDesc();
	D3D12_VERTEX_BUFFER_VIEW vertexBufferView = { id->GetGPUVirtualAddress(), (UINT)desc.Width, (UINT)stride };
	list->IASetVertexBuffers(0, 1, &vertexBufferView);
}
//--------------------------------------------------------------------------
void VeRenderWindowD3D12::RecordScene(VeUInt32 u32Thread) noexcept
{
	for (auto& cam : m_kCameraList)
	{
		for (auto idx : cam.m_kStageList)
		{
			FrameCache& kFrame = m_akFrameCache[m_u32FrameIndex];
			ID3D12GraphicsCommandList* pkGCL = kFrame.m_kDirectCommandList[idx + u32Thread];
			VE_ASSERT_GE(pkGCL->Reset(kFrame.m_pkDirectAllocator, nullptr), S_OK);

			VE_ASSERT_GE(pkGCL->Close(), S_OK);
		}
	}
}
Exemple #3
0
void Graphics::end() {
	D3D12_RESOURCE_BARRIER barrier;
	barrier.Transition.pResource = renderTarget;
	barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
	barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
	barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
	barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
	barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;

	ID3D12GraphicsCommandList* commandList = commandLists[currentBackBuffer];
	commandList->ResourceBarrier(1, &barrier);

	commandList->Close();

	ID3D12CommandList* commandLists[] = {commandList};
	commandQueue->ExecuteCommandLists(std::extent<decltype(commandLists)>::value, commandLists);
}
void afWriteTexture(SRVID id, const TexDesc& desc, const void* buf)
{
	D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
	UINT numRow;
	UINT64 uploadSize, rowSizeInBytes;
	D3D12_RESOURCE_DESC destDesc = id->GetDesc();
	deviceMan.GetDevice()->GetCopyableFootprints(&destDesc, 0, 1, 0, &footprint, &numRow, &rowSizeInBytes, &uploadSize);
	ComPtr<ID3D12Resource> uploadBuf = afCreateBuffer((int)uploadSize, buf);
	D3D12_TEXTURE_COPY_LOCATION uploadBufLocation = { uploadBuf.Get(), D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, footprint }, nativeBufLocation = { id.Get(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, 0 };
	ID3D12GraphicsCommandList* list = deviceMan.GetCommandList();

	D3D12_RESOURCE_BARRIER transition1 = { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, { id.Get(), 0, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST } };
	list->ResourceBarrier(1, &transition1);
	list->CopyTextureRegion(&nativeBufLocation, 0, 0, 0, &uploadBufLocation, nullptr);
	D3D12_RESOURCE_BARRIER transition2 = { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, { id.Get(), 0, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ } };
	list->ResourceBarrier(1, &transition2);
	deviceMan.Flush();
}
Exemple #5
0
void AppTest::InitBundles()
{
	CHK(Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_BUNDLE, IID_PPV_ARGS(CommandBundleAllocator.GetAddressOf())));

	for (unsigned int i = 0; i < BundleCount; i++)
	{
		const unsigned int bundlesPerThread = BundleCount / ThreadCount;
		const unsigned int threadID = i / bundlesPerThread;

		CHK(Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_BUNDLE, CommandBundleAllocator.Get(), PSO.Get(), IID_PPV_ARGS(CommandBundleArray[i].GetAddressOf())));

		ID3D12GraphicsCommandList* commandBundle = CommandBundleArray[i].Get();

		const unsigned int offset = ObjectsPerBundle * i;

		commandBundle->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
		commandBundle->SetGraphicsRootSignature(RootSignature.Get());

		if (UseRootLevelCBV)
		{
			for (unsigned int command = 0; command < ObjectsPerBundle; command++)
			{
				commandBundle->SetGraphicsRootConstantBufferView(0, PerObjectConstantBuffers.GetGPUHandle(offset + command));
				commandBundle->DrawIndexedInstanced(IndexCount, 1, 0, 0, 0);
			}
		}
		else
		{
			D3D12_GPU_DESCRIPTOR_HANDLE descriptorHandle;

			ID3D12DescriptorHeap* descriptorHeap = ConstantBufferDescriptorHeap->GetBaseHeap();
			commandBundle->SetDescriptorHeaps(1, &descriptorHeap);

			for (unsigned int command = 0; command < ObjectsPerBundle; command++)
			{
				descriptorHandle.ptr = ConstantBufferDescriptorHeap->GetDescriptorGPUHandle(offset + command);
				commandBundle->SetGraphicsRootDescriptorTable(0, descriptorHandle);
				commandBundle->DrawIndexedInstanced(IndexCount, 1, 0, 0, 0);
			}
		}

		commandBundle->Close();
	}
}
void RenderAPI_D3D12::EndModifyTexture(void* textureHandle, int textureWidth, int textureHeight, int rowPitch, void* dataPtr)
{
	ID3D12Device* device = s_D3D12->GetDevice();

	const UINT64 kDataSize = textureWidth * textureHeight * 4;
	ID3D12Resource* upload = GetUploadResource(kDataSize);
	upload->Unmap(0, NULL);

	ID3D12Resource* resource = (ID3D12Resource*)textureHandle;
	D3D12_RESOURCE_DESC desc = resource->GetDesc();
	assert(desc.Width == textureWidth);
	assert(desc.Height == textureHeight);

	D3D12_TEXTURE_COPY_LOCATION srcLoc = {};
	srcLoc.pResource = upload;
	srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
	device->GetCopyableFootprints(&desc, 0, 1, 0, &srcLoc.PlacedFootprint, nullptr, nullptr, nullptr);

	D3D12_TEXTURE_COPY_LOCATION dstLoc = {};
	dstLoc.pResource = resource;
	dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
	dstLoc.SubresourceIndex = 0;

	// We inform Unity that we expect this resource to be in D3D12_RESOURCE_STATE_COPY_DEST state,
	// and because we do not barrier it ourselves, we tell Unity that no changes are done on our command list.
	UnityGraphicsD3D12ResourceState resourceState = {};
	resourceState.resource = resource;
	resourceState.expected = D3D12_RESOURCE_STATE_COPY_DEST;
	resourceState.current = D3D12_RESOURCE_STATE_COPY_DEST;

	// Queue data upload
	s_D3D12CmdList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);

	// Execute the command list
	s_D3D12CmdList->Close();
	s_D3D12FenceValue = s_D3D12->ExecuteCommandList(s_D3D12CmdList, 1, &resourceState);
}
void RenderAPI_D3D12::CreateResources()
{
	ID3D12Device* device = s_D3D12->GetDevice();

	HRESULT hr = E_FAIL;

	// Command list
	hr = device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&s_D3D12CmdAlloc));
	if (FAILED(hr)) OutputDebugStringA("Failed to CreateCommandAllocator.\n");
	hr = device->CreateCommandList(kNodeMask, D3D12_COMMAND_LIST_TYPE_DIRECT, s_D3D12CmdAlloc, nullptr, IID_PPV_ARGS(&s_D3D12CmdList));
	if (FAILED(hr)) OutputDebugStringA("Failed to CreateCommandList.\n");
	s_D3D12CmdList->Close();

	// Fence
	s_D3D12FenceValue = 0;
	s_D3D12Event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
}
void* RenderAPI_D3D12::BeginModifyTexture(void* textureHandle, int textureWidth, int textureHeight, int* outRowPitch)
{
	ID3D12Fence* fence = s_D3D12->GetFrameFence();

	// Wait on the previous job (example only - simplifies resource management)
	if (fence->GetCompletedValue() < s_D3D12FenceValue)
	{
		fence->SetEventOnCompletion(s_D3D12FenceValue, s_D3D12Event);
		WaitForSingleObject(s_D3D12Event, INFINITE);
	}

	// Begin a command list
	s_D3D12CmdAlloc->Reset();
	s_D3D12CmdList->Reset(s_D3D12CmdAlloc, nullptr);

	// Fill data
	const UINT64 kDataSize = textureWidth * textureHeight * 4;
	ID3D12Resource* upload = GetUploadResource(kDataSize);
	void* mapped = NULL;
	upload->Map(0, NULL, &mapped);
	*outRowPitch = textureWidth * 4;
	return mapped;
}
Exemple #9
0
GPA_Status GPA_IMP_VerifyHWSupport(void* pContext, GPA_HWInfo* pHwInfo)
{
    GPA_Status result = GPA_STATUS_OK;

    if ((nullptr == pContext) || (nullptr == pHwInfo))
    {
        result = GPA_STATUS_ERROR_FAILED;
    }
    else
    {
        IUnknown* pUnknown = static_cast<IUnknown*>(pContext);

        ID3D12GraphicsCommandList* pCommandList = nullptr;
        HRESULT hr = pUnknown->QueryInterface(__uuidof(ID3D12GraphicsCommandList), reinterpret_cast<void**>(&pCommandList));

        if (S_OK != hr)
        {
            GPA_LogError("Failed to get command list from context");
            result = GPA_STATUS_ERROR_FAILED;
        }
        else
        {
            ID3D12Device* pDevice;
            hr = pCommandList->GetDevice(__uuidof(ID3D12Device), reinterpret_cast<void**>(&pDevice));

            if (S_OK != hr)
            {
                GPA_LogError("Failed to get D3D12 device");
                result = GPA_STATUS_ERROR_FAILED;
            }
            else
            {
                D3D12_FEATURE_DATA_FEATURE_LEVELS featureLevels;
                static const D3D_FEATURE_LEVEL requestedFeatureLevels[] =
                {
                    D3D_FEATURE_LEVEL_11_0,
                    D3D_FEATURE_LEVEL_11_1,
                };
                featureLevels.NumFeatureLevels =
                    (sizeof(requestedFeatureLevels) / sizeof(D3D_FEATURE_LEVEL));
                featureLevels.pFeatureLevelsRequested = requestedFeatureLevels;
                featureLevels.MaxSupportedFeatureLevel = D3D_FEATURE_LEVEL_11_1;
                hr = pDevice->CheckFeatureSupport(
                         D3D12_FEATURE_FEATURE_LEVELS, &featureLevels, sizeof(featureLevels));

                if (S_OK != hr)
                {
                    GPA_LogError("Failed to get D3D12 device feature levels");
                    result = GPA_STATUS_ERROR_FAILED;
                }
                else
                {
                    if (D3D_FEATURE_LEVEL_11_0 > featureLevels.MaxSupportedFeatureLevel)
                    {
                        result = GPA_STATUS_ERROR_HARDWARE_NOT_SUPPORTED;
                    }
                    else
                    {
                        // TODO Once DX12 performance extension is available, check
                        //      it's possible to create a HW counter
                    }
                }

                pDevice->Release();
            }

            pCommandList->Release();
        }
    }

    return result;
} // end of GPA_IMP_VerifyHwSupport
void afSetPipeline(ComPtr<ID3D12PipelineState> ps, ComPtr<ID3D12RootSignature> rs)
{
	ID3D12GraphicsCommandList* list = deviceMan.GetCommandList();
	list->SetPipelineState(ps.Get());
	list->SetGraphicsRootSignature(rs.Get());
}
void afDraw(PrimitiveTopology pt, int numVertices, int start, int instanceCount)
{
	ID3D12GraphicsCommandList* list = deviceMan.GetCommandList();
	list->IASetPrimitiveTopology(pt);
	list->DrawInstanced(numVertices, instanceCount, start, 0);
}
void DXMultiAdapterRenderer::PopulateCommandLists()
{
	// Primary rendering command list
	{
		ComPtr<ID3D12Resource> curPrimaryRenderTarget = mDXDevices[Device_Primary]->mRenderTargets[mCurrentFrameIndex];

		// Reset allocator and command list for current render target
		ThrowIfFailed(mDXDevices[Device_Primary]->mCommandAllocator->Reset()); // Only do this when all command lists have finished executing
		ID3D12GraphicsCommandList* primaryCommandList = mCommandLists[Primary_CommandList_Scene].Get();
		ThrowIfFailed(primaryCommandList->Reset(mDXDevices[Device_Primary]->mCommandAllocator.Get(), nullptr));

		primaryCommandList->RSSetViewports(1, &mViewport);
		primaryCommandList->RSSetScissorRects(1, &mScissorRect);

		ID3D12DescriptorHeap* descHeaps[] = { mDXDevices[Device_Primary]->mCbvSrvUavHeap.Get() };
		primaryCommandList->SetDescriptorHeaps(_countof(descHeaps), descHeaps);

		// Allow for rending to current render target
		primaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curPrimaryRenderTarget.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET));

		CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(mDXDevices[Device_Primary]->mRtvHeap->GetCPUDescriptorHandleForHeapStart(), mCurrentFrameIndex, mDXDevices[Device_Primary]->mRtvDescriptorSize);
		primaryCommandList->OMSetRenderTargets(1, &rtvHandle, true, nullptr);
		primaryCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
		primaryCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Primary_Quad_Top]);

		// Render top portion of scene on primary GPU
		primaryCommandList->SetGraphicsRootSignature(mRootSignatures[Primary_RootSignature_Scene].Get());
		primaryCommandList->SetGraphicsRootDescriptorTable(0, mTimePrimaryCbvHandle);
		primaryCommandList->SetPipelineState(mPipelineStates[Primary_PipelineState_Scene].Get());
		primaryCommandList->DrawInstanced(4, 1, 0, 0);

		// Indicate that render target will be used for copy 
		primaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curPrimaryRenderTarget.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE));

		ThrowIfFailed(mCommandLists[Primary_CommandList_Scene]->Close());
	}

	// Copy command list - in DXCrossAdapterResources object
	{
		mCrossAdapterResources->PopulateCommandList(mCurrentFrameIndex);
	}

	// Reset command allocator for secondary device - used by all following command lists	
	ThrowIfFailed(mDXDevices[Device_Secondary]->mCommandAllocator->Reset()); // Only do this when all command lists have finished executing

	// Secondary rendering command list
	{
		ID3D12GraphicsCommandList* secondaryCommandList = mCommandLists[Secondary_CommandList_Scene].Get();
		ThrowIfFailed(secondaryCommandList->Reset(mDXDevices[Device_Secondary]->mCommandAllocator.Get(), nullptr));

		secondaryCommandList->RSSetViewports(1, &mViewport);
		secondaryCommandList->RSSetScissorRects(1, &mScissorRect);

		ID3D12DescriptorHeap* descHeaps[] = { mDXDevices[Device_Secondary]->mCbvSrvUavHeap.Get() };
		secondaryCommandList->SetDescriptorHeaps(_countof(descHeaps), descHeaps);

		secondaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mTexture.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET));

		secondaryCommandList->OMSetRenderTargets(1, &mTextureRtvHandle, true, nullptr);
		secondaryCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
		secondaryCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Secondary_Quad_Bottom]); // Quad geometry

		// Render bottom portion of scene on secondary GPU
		secondaryCommandList->SetGraphicsRootSignature(mRootSignatures[Secondary_RootSignature_Scene].Get());
		secondaryCommandList->SetGraphicsRootDescriptorTable(0, mTimeSecondaryCbvHandle);
		secondaryCommandList->SetPipelineState(mPipelineStates[Secondary_PipelineState_Scene].Get());
		secondaryCommandList->DrawInstanced(4, 1, 0, 0); 

		secondaryCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mTexture.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE));

		ThrowIfFailed(mCommandLists[Secondary_CommandList_Scene]->Close());
	}

	// Overlay Command List
	{
		ID3D12GraphicsCommandList* overlayCommandList = mCommandLists[Secondary_CommandList_Combine_Scene].Get();
		ComPtr<ID3D12Resource> curSecondaryRenderTarget = mDXDevices[Device_Secondary]->mRenderTargets[mCurrentFrameIndex]; // Get secondary render target for current frame
		ThrowIfFailed(overlayCommandList->Reset(mDXDevices[Device_Secondary]->mCommandAllocator.Get(), nullptr));

		overlayCommandList->RSSetViewports(1, &mViewport);
		overlayCommandList->RSSetScissorRects(1, &mScissorRect);

		ID3D12DescriptorHeap* descHeaps[] = { mDXDevices[Device_Secondary]->mCbvSrvUavHeap.Get() };
		overlayCommandList->SetDescriptorHeaps(_countof(descHeaps), descHeaps);

		overlayCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curSecondaryRenderTarget.Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET));

		CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(mDXDevices[Device_Secondary]->mRtvHeap->GetCPUDescriptorHandleForHeapStart(), mCurrentFrameIndex, mDXDevices[Device_Secondary]->mRtvDescriptorSize);
		overlayCommandList->OMSetRenderTargets(1, &rtvHandle, true, nullptr);
		static const float clearColor[4] = { 0.0f, 1.0f, 0.0f, 1.0f };
		overlayCommandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr);
		overlayCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);

		// Render top quad with cross-adapter texture
		overlayCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Secondary_Quad_Combine_Top]); // Quad geometry
		CD3DX12_GPU_DESCRIPTOR_HANDLE crossAdapterSrvHandle(mDXDevices[Device_Secondary]->mCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart(), mCurrentFrameIndex, mDXDevices[Device_Secondary]->mCbvSrvUavDescriptorSize);
		overlayCommandList->SetGraphicsRootSignature(mRootSignatures[Secondary_RootSignature_CrossAdapter].Get());
		overlayCommandList->SetPipelineState(mPipelineStates[Secondary_PipelineState_CrossAdapter].Get());
		overlayCommandList->SetGraphicsRootDescriptorTable(0, mTransformTopCbvHandle);
		overlayCommandList->SetGraphicsRootDescriptorTable(1, crossAdapterSrvHandle);
		// Scale and translate quad to top of screen
		mConstantBufferTransformTopData.modelViewProjection = DirectX::XMMatrixIdentity();
		mConstantBufferTransformTopData.modelViewProjection *= DirectX::XMMatrixTranslation(0.0, ((1.0f / mSharePercentage) - 1.0f) + 0.015f, 0.0);
		mConstantBufferTransformTopData.modelViewProjection *= DirectX::XMMatrixScaling(1.0, mSharePercentage, 1.0);
		UpdateConstantBuffer(mConstantBufferTransformTop, &mConstantBufferTransformTopData, sizeof(mConstantBufferTransformTopData));
		overlayCommandList->DrawInstanced(4, 1, 0, 0);  

		// Render bottom quad with texture from secondary GPU
		overlayCommandList->IASetVertexBuffers(0, 1, mVertexBufferViews[Secondary_Quad_Combine_Bottom]); // Quad geometry
		overlayCommandList->SetGraphicsRootDescriptorTable(0, mTransformBottomCbvHandle);
		overlayCommandList->SetGraphicsRootDescriptorTable(1, mTextureSrvHandle);
		// Scale and translate quad to bottom of screen
		mConstantBufferTransformBottomData.modelViewProjection = DirectX::XMMatrixIdentity();
		mConstantBufferTransformBottomData.modelViewProjection *= DirectX::XMMatrixTranslation(0.0, -((1.0f / (1.0f - mSharePercentage)) - 1.0f) - 0.015f, 0.0);
		mConstantBufferTransformBottomData.modelViewProjection *= DirectX::XMMatrixScaling(1.0, (1.0f - mSharePercentage), 1.0);
		UpdateConstantBuffer(mConstantBufferTransformBottom, &mConstantBufferTransformBottomData, sizeof(mConstantBufferTransformBottomData));
		overlayCommandList->DrawInstanced(4, 1, 0, 0); 

		overlayCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(curSecondaryRenderTarget.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT));

		ThrowIfFailed(mCommandLists[Secondary_CommandList_Combine_Scene]->Close());
	}

}
Exemple #13
0
bool D3D12ResourceManager::Prepare_InitialState(ID3D12DeviceChild *res)
{
  ResourceId id = GetResID(res);
  D3D12ResourceType type = IdentifyTypeByPtr(res);

  if(type == Resource_DescriptorHeap)
  {
    WrappedID3D12DescriptorHeap *heap = (WrappedID3D12DescriptorHeap *)res;

    UINT numElems = heap->GetDesc().NumDescriptors;

    D3D12Descriptor *descs =
        (D3D12Descriptor *)Serialiser::AllocAlignedBuffer(sizeof(D3D12Descriptor) * numElems);

    memcpy(descs, heap->GetDescriptors(), sizeof(D3D12Descriptor) * numElems);

    SetInitialContents(heap->GetResourceID(),
                       D3D12ResourceManager::InitialContentData(NULL, numElems, (byte *)descs));
    return true;
  }
  else if(type == Resource_Resource)
  {
    WrappedID3D12Resource *r = (WrappedID3D12Resource *)res;
    ID3D12Pageable *pageable = r;

    bool nonresident = false;
    if(!r->Resident())
      nonresident = true;

    D3D12_RESOURCE_DESC desc = r->GetDesc();

    if(desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D && desc.SampleDesc.Count > 1)
    {
      D3D12NOTIMP("Multisampled initial contents");

      SetInitialContents(GetResID(r), D3D12ResourceManager::InitialContentData(NULL, 2, NULL));
      return true;
    }
    else if(desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
    {
      D3D12_HEAP_PROPERTIES heapProps;
      r->GetHeapProperties(&heapProps, NULL);

      if(heapProps.Type == D3D12_HEAP_TYPE_READBACK)
      {
        // already on readback heap, just mark that we can map it directly and continue
        SetInitialContents(GetResID(r), D3D12ResourceManager::InitialContentData(NULL, 1, NULL));
        return true;
      }

      heapProps.Type = D3D12_HEAP_TYPE_READBACK;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      desc.Flags = D3D12_RESOURCE_FLAG_NONE;

      ID3D12Resource *copyDst = NULL;
      HRESULT hr = m_Device->GetReal()->CreateCommittedResource(
          &heapProps, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COPY_DEST, NULL,
          __uuidof(ID3D12Resource), (void **)&copyDst);

      if(nonresident)
        m_Device->MakeResident(1, &pageable);

      if(SUCCEEDED(hr))
      {
        ID3D12GraphicsCommandList *list = Unwrap(m_Device->GetInitialStateList());

        list->CopyResource(copyDst, r->GetReal());
      }
      else
      {
        RDCERR("Couldn't create readback buffer: 0x%08x", hr);
      }

      if(nonresident)
      {
        m_Device->CloseInitialStateList();

        m_Device->ExecuteLists();
        m_Device->FlushLists();

        m_Device->Evict(1, &pageable);
      }

      SetInitialContents(GetResID(r), D3D12ResourceManager::InitialContentData(copyDst, 0, NULL));
      return true;
    }
    else
    {
      D3D12_HEAP_PROPERTIES heapProps;
      heapProps.Type = D3D12_HEAP_TYPE_READBACK;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      D3D12_RESOURCE_DESC bufDesc;

      bufDesc.Alignment = 0;
      bufDesc.DepthOrArraySize = 1;
      bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
      bufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
      bufDesc.Format = DXGI_FORMAT_UNKNOWN;
      bufDesc.Height = 1;
      bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
      bufDesc.MipLevels = 1;
      bufDesc.SampleDesc.Count = 1;
      bufDesc.SampleDesc.Quality = 0;
      bufDesc.Width = 1;

      UINT numSubresources = desc.MipLevels;
      if(desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D)
        numSubresources *= desc.DepthOrArraySize;

      D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts =
          new D3D12_PLACED_SUBRESOURCE_FOOTPRINT[numSubresources];

      m_Device->GetCopyableFootprints(&desc, 0, numSubresources, 0, layouts, NULL, NULL,
                                      &bufDesc.Width);

      ID3D12Resource *copyDst = NULL;
      HRESULT hr = m_Device->GetReal()->CreateCommittedResource(
          &heapProps, D3D12_HEAP_FLAG_NONE, &bufDesc, D3D12_RESOURCE_STATE_COPY_DEST, NULL,
          __uuidof(ID3D12Resource), (void **)&copyDst);

      if(nonresident)
        m_Device->MakeResident(1, &pageable);

      if(SUCCEEDED(hr))
      {
        ID3D12GraphicsCommandList *list = Unwrap(m_Device->GetInitialStateList());

        vector<D3D12_RESOURCE_BARRIER> barriers;

        const vector<D3D12_RESOURCE_STATES> &states = m_Device->GetSubresourceStates(GetResID(r));

        barriers.reserve(states.size());

        for(size_t i = 0; i < states.size(); i++)
        {
          if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE)
            continue;

          D3D12_RESOURCE_BARRIER barrier;
          barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
          barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
          barrier.Transition.pResource = r->GetReal();
          barrier.Transition.Subresource = (UINT)i;
          barrier.Transition.StateBefore = states[i];
          barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

          barriers.push_back(barrier);
        }

        // transition to copy dest
        if(!barriers.empty())
          list->ResourceBarrier((UINT)barriers.size(), &barriers[0]);

        for(UINT i = 0; i < numSubresources; i++)
        {
          D3D12_TEXTURE_COPY_LOCATION dst, src;

          src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
          src.pResource = r->GetReal();
          src.SubresourceIndex = i;

          dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
          dst.pResource = copyDst;
          dst.PlacedFootprint = layouts[i];

          list->CopyTextureRegion(&dst, 0, 0, 0, &src, NULL);
        }

        // transition back
        for(size_t i = 0; i < barriers.size(); i++)
          std::swap(barriers[i].Transition.StateBefore, barriers[i].Transition.StateAfter);

        if(!barriers.empty())
          list->ResourceBarrier((UINT)barriers.size(), &barriers[0]);
      }
      else
      {
        RDCERR("Couldn't create readback buffer: 0x%08x", hr);
      }

      if(nonresident)
      {
        m_Device->CloseInitialStateList();

        m_Device->ExecuteLists();
        m_Device->FlushLists();

        m_Device->Evict(1, &pageable);
      }

      SAFE_DELETE_ARRAY(layouts);

      SetInitialContents(GetResID(r), D3D12ResourceManager::InitialContentData(copyDst, 0, NULL));
      return true;
    }
  }
  else
  {
    RDCERR("Unexpected type needing an initial state prepared: %d", type);
  }

  return false;
}
Exemple #14
0
void D3D12ResourceManager::Apply_InitialState(ID3D12DeviceChild *live, InitialContentData data)
{
  D3D12ResourceType type = IdentifyTypeByPtr(live);

  if(type == Resource_DescriptorHeap)
  {
    ID3D12DescriptorHeap *dstheap = (ID3D12DescriptorHeap *)live;
    ID3D12DescriptorHeap *srcheap = (ID3D12DescriptorHeap *)data.resource;

    if(srcheap)
    {
      // copy the whole heap
      m_Device->CopyDescriptorsSimple(
          srcheap->GetDesc().NumDescriptors, dstheap->GetCPUDescriptorHandleForHeapStart(),
          srcheap->GetCPUDescriptorHandleForHeapStart(), srcheap->GetDesc().Type);
    }
  }
  else if(type == Resource_Resource)
  {
    if(data.num == 1 && data.resource)
    {
      ID3D12Resource *copyDst = Unwrap((ID3D12Resource *)live);
      ID3D12Resource *copySrc = (ID3D12Resource *)data.resource;

      D3D12_HEAP_PROPERTIES heapProps = {};
      copyDst->GetHeapProperties(&heapProps, NULL);

      // if destination is on the upload heap, it's impossible to copy via the device,
      // so we have to map both sides and CPU copy.
      if(heapProps.Type == D3D12_HEAP_TYPE_UPLOAD)
      {
        byte *src = NULL, *dst = NULL;

        HRESULT hr = S_OK;

        hr = copySrc->Map(0, NULL, (void **)&src);

        if(FAILED(hr))
        {
          RDCERR("Doing CPU-side copy, couldn't map source: 0x%08x", hr);
          src = NULL;
        }

        if(copyDst->GetDesc().Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
        {
          hr = copyDst->Map(0, NULL, (void **)&dst);

          if(FAILED(hr))
          {
            RDCERR("Doing CPU-side copy, couldn't map source: 0x%08x", hr);
            dst = NULL;
          }

          if(src && dst)
          {
            memcpy(dst, src, (size_t)copySrc->GetDesc().Width);
          }

          if(dst)
            copyDst->Unmap(0, NULL);
        }
        else
        {
          D3D12_RESOURCE_DESC desc = copyDst->GetDesc();

          UINT numSubresources = desc.MipLevels;
          if(desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D)
            numSubresources *= desc.DepthOrArraySize;

          D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts =
              new D3D12_PLACED_SUBRESOURCE_FOOTPRINT[numSubresources];
          UINT *numrows = new UINT[numSubresources];
          UINT64 *rowsizes = new UINT64[numSubresources];

          m_Device->GetCopyableFootprints(&desc, 0, numSubresources, 0, layouts, numrows, rowsizes,
                                          NULL);

          for(UINT i = 0; i < numSubresources; i++)
          {
            hr = copyDst->Map(i, NULL, (void **)&dst);

            if(FAILED(hr))
            {
              RDCERR("Doing CPU-side copy, couldn't map source: 0x%08x", hr);
              dst = NULL;
            }

            if(src && dst)
            {
              byte *bufPtr = src + layouts[i].Offset;
              byte *texPtr = dst;

              for(UINT d = 0; d < layouts[i].Footprint.Depth; d++)
              {
                for(UINT r = 0; r < numrows[i]; r++)
                {
                  memcpy(bufPtr, texPtr, (size_t)rowsizes[i]);

                  bufPtr += layouts[i].Footprint.RowPitch;
                  texPtr += rowsizes[i];
                }
              }
            }

            if(dst)
              copyDst->Unmap(0, NULL);
          }

          delete[] layouts;
          delete[] numrows;
          delete[] rowsizes;
        }

        if(src)
          copySrc->Unmap(0, NULL);
      }
      else
      {
        ID3D12GraphicsCommandList *list = Unwrap(m_Device->GetInitialStateList());

        vector<D3D12_RESOURCE_BARRIER> barriers;

        const vector<D3D12_RESOURCE_STATES> &states = m_Device->GetSubresourceStates(GetResID(live));

        barriers.reserve(states.size());

        for(size_t i = 0; i < states.size(); i++)
        {
          if(states[i] & D3D12_RESOURCE_STATE_COPY_DEST)
            continue;

          D3D12_RESOURCE_BARRIER barrier;
          barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
          barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
          barrier.Transition.pResource = copyDst;
          barrier.Transition.Subresource = (UINT)i;
          barrier.Transition.StateBefore = states[i];
          barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;

          barriers.push_back(barrier);
        }

        // transition to copy dest
        if(!barriers.empty())
          list->ResourceBarrier((UINT)barriers.size(), &barriers[0]);

        if(copyDst->GetDesc().Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
        {
          list->CopyBufferRegion(copyDst, 0, copySrc, 0, copySrc->GetDesc().Width);
        }
        else
        {
          D3D12_RESOURCE_DESC desc = copyDst->GetDesc();

          UINT numSubresources = desc.MipLevels;
          if(desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D)
            numSubresources *= desc.DepthOrArraySize;

          D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts =
              new D3D12_PLACED_SUBRESOURCE_FOOTPRINT[numSubresources];

          m_Device->GetCopyableFootprints(&desc, 0, numSubresources, 0, layouts, NULL, NULL, NULL);

          for(UINT i = 0; i < numSubresources; i++)
          {
            D3D12_TEXTURE_COPY_LOCATION dst, src;

            dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
            dst.pResource = copyDst;
            dst.SubresourceIndex = i;

            src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
            src.pResource = copySrc;
            src.PlacedFootprint = layouts[i];

            list->CopyTextureRegion(&dst, 0, 0, 0, &src, NULL);
          }

          delete[] layouts;
        }

        // transition back to whatever it was before
        for(size_t i = 0; i < barriers.size(); i++)
          std::swap(barriers[i].Transition.StateBefore, barriers[i].Transition.StateAfter);

        if(!barriers.empty())
          list->ResourceBarrier((UINT)barriers.size(), &barriers[0]);

#if ENABLED(SINGLE_FLUSH_VALIDATE)
        m_Device->CloseInitialStateList();
        m_Device->ExecuteLists();
        m_Device->FlushLists(true);
#endif
      }
    }
    else
    {
      RDCERR("Unexpected num or NULL resource: %d, %p", data.num, data.resource);
    }
  }
  else
  {
    RDCERR("Unexpected type needing an initial state created: %d", type);
  }
}
Exemple #15
0
void TextureStore::loadTexture(wstring filename, string id)
{
	TextureLoadResult result;
	TextureInfo initialTexture;  // only use to initialize struct in texture store - do not access this after assignment to store
	vector<byte> file_buffer;

	initialTexture.id = id;
	textures[id] = initialTexture;
	TextureInfo *texture = &textures[id];

	// find texture file, look in pak file first:
	PakEntry *pakFileEntry = nullptr;
	pakFileEntry = xapp().findFileInPak(filename.c_str());
	// try file system if not found in pak:
	initialTexture.filename = filename; // TODO check: field not needed? only in this method? --> remove
	if (pakFileEntry == nullptr) {
		wstring binFile = xapp().findFile(filename.c_str(), XApp::TEXTURE);
		texture->filename = binFile;
		//initialTexture.filename = binFile;
		xapp().readFile(texture->filename.c_str(), file_buffer, XApp::FileCategory::TEXTURE);
	} else {
		xapp().readFile(pakFileEntry, file_buffer, XApp::FileCategory::TEXTURE);
	}


	ID3D12GraphicsCommandList *commandList = this->commandList.Get();
	//D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV
	// create heap - TODO adjust for one heap for multiple textures
	// Describe and create a shader resource view (SRV) heap for the texture.
	D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
	srvHeapDesc.NumDescriptors = 1;
	srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
	srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
	ThrowIfFailed(xapp().device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&texture->m_srvHeap)));

	CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(texture->m_srvHeap->GetCPUDescriptorHandleForHeapStart());
	//CD3DX12_CPU_DESCRIPTOR_HANDLE(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);

	CreateDDSTextureFromMemory(
		xapp().device.Get(),
		&file_buffer[0],
		file_buffer.size(),
		0,
		true,
		&texture->texSRV,
		srvHandle,
		result
	);
	//CreateDDSTextureFromFile(
	//	xapp().device.Get(),
	//	texture->filename.c_str(),
	//	0,
	//	true,
	//	&texture->texSRV,
	//	srvHandle,
	//	result
	//	);

	// upload texture to GPU:
	//ID3D12Resource* UploadBuffer;

	UINT64 uploadBufferSize = GetRequiredIntermediateSize(texture->texSRV.Get(), 0, result.NumSubresources);

	//CommandContext& InitContext = CommandContext::Begin();

	D3D12_HEAP_PROPERTIES HeapProps;
	HeapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
	HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
	HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
	HeapProps.CreationNodeMask = 1;
	HeapProps.VisibleNodeMask = 1;

	D3D12_RESOURCE_DESC BufferDesc;
	BufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
	BufferDesc.Alignment = 0;
	BufferDesc.Width = uploadBufferSize;
	BufferDesc.Height = 1;
	BufferDesc.DepthOrArraySize = 1;
	BufferDesc.MipLevels = 1;
	BufferDesc.Format = DXGI_FORMAT_UNKNOWN;
	BufferDesc.SampleDesc.Count = 1;
	BufferDesc.SampleDesc.Quality = 0;
	BufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
	BufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;

	ThrowIfFailed(xapp().device->CreateCommittedResource(&HeapProps, D3D12_HEAP_FLAG_NONE,
		&BufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ,
		nullptr, IID_PPV_ARGS(&result.UploadBuffer)));

	// copy data to the intermediate upload heap and then schedule a copy from the upload heap to the default texture
	//InitContext.TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST, true);
	commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(texture->texSRV.Get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST));
	UpdateSubresources(commandList, texture->texSRV.Get(), result.UploadBuffer, 0, 0, result.NumSubresources, result.initData.get());
	commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(texture->texSRV.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE));
	//InitContext.TransitionResource(Dest, D3D12_RESOURCE_STATE_GENERIC_READ, true);

	// Execute the command list and wait for it to finish so we can release the upload buffer
	//InitContext.CloseAndExecute(true);

	//UploadBuffer->Release();
	ThrowIfFailed(commandList->Close());
	ID3D12CommandList* ppCommandLists[] = { this->commandList.Get() };
	xapp().commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);

	// Create synchronization objects and wait until assets have been uploaded to the GPU.
	//Sleep(300);
	EffectBase::createSyncPoint(updateFrameData, xapp().commandQueue);
	EffectBase::waitForSyncPoint(updateFrameData);

	//auto &f = frameData[frameIndex];
	//createSyncPoint(f, xapp().commandQueue);
	//waitForSyncPoint(f);
	result.UploadBuffer->Release();
	ThrowIfFailed(commandList->Reset(commandAllocator.Get(), pipelineState.Get()));
}
Exemple #16
0
GPA_Status GPA_IMP_OpenContext(void* pContext)
{
    GPA_Status result = GPA_STATUS_OK;

    if (nullptr == pContext)
    {
        GPA_LogError("Unable to open context. Parameter 'pContext' is NULL.");
        result = GPA_STATUS_ERROR_NULL_POINTER;
    }
    else
    {
        IUnknown* pUnknown = static_cast<IUnknown*>(pContext);

        ID3D12GraphicsCommandList* pCommandList = nullptr;
        HRESULT hr = pUnknown->QueryInterface(__uuidof(ID3D12GraphicsCommandList), reinterpret_cast<void**>(&pCommandList));

        if (S_OK != hr)
        {
            GPA_LogError("Failed to get command list from context");
            result = GPA_STATUS_ERROR_FAILED;
        }
        else
        {
            bool setCommandList = GetCurrentContext()->SetCommandList(pCommandList);

            if (!setCommandList)
            {
                result = GPA_STATUS_ERROR_FAILED;
            }

            if (GPA_STATUS_OK == result)
            {
                gpa_uint32 vendorId = 0;
                gpa_uint32 deviceId = 0;
                gpa_uint32 revisionId = 0;

                if (false == (g_pCurrentContext->m_hwInfo.GetVendorID(vendorId)))
                {
                    result = GPA_STATUS_ERROR_FAILED;
                }
                else if (false == (g_pCurrentContext->m_hwInfo.GetDeviceID(deviceId)))
                {
                    result = GPA_STATUS_ERROR_FAILED;
                }
                else if (false == (g_pCurrentContext->m_hwInfo.GetRevisionID(revisionId)))
                {
                    result = GPA_STATUS_ERROR_FAILED;
                }

                if (GPA_STATUS_OK == result)
                {
                    GPA_ICounterAccessor* pCounterAccessor = nullptr;
                    GPA_ICounterScheduler* pCounterScheduler = nullptr;
                    result = GenerateCounters(
                                 GPA_API_DIRECTX_12,
                                 vendorId,
                                 deviceId,
                                 revisionId,
                                 &pCounterAccessor,
                                 &pCounterScheduler);

                    if (GPA_STATUS_OK == result)
                    {
                        g_pCurrentContext->m_pCounterAccessor =
                            static_cast<GPA_CounterGeneratorBase*>(pCounterAccessor);
                        g_pCurrentContext->m_pCounterScheduler = pCounterScheduler;
                    }
                }
            }

            pCommandList->Release();
        }
    }

    return result;
} // end of GPA_IMP_OpenContext
vector<CounterResult> D3D12Replay::FetchCounters(const vector<uint32_t> &counters)
{
  uint32_t maxEID = m_pDevice->GetQueue()->GetMaxEID();

  vector<CounterResult> ret;

  D3D12_HEAP_PROPERTIES heapProps;
  heapProps.Type = D3D12_HEAP_TYPE_READBACK;
  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
  heapProps.CreationNodeMask = 1;
  heapProps.VisibleNodeMask = 1;

  D3D12_RESOURCE_DESC bufDesc;
  bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
  bufDesc.Alignment = 0;
  bufDesc.Width = sizeof(uint64_t) * maxEID * 2;
  bufDesc.Height = 1;
  bufDesc.DepthOrArraySize = 1;
  bufDesc.MipLevels = 1;
  bufDesc.Format = DXGI_FORMAT_UNKNOWN;
  bufDesc.SampleDesc.Count = 1;
  bufDesc.SampleDesc.Quality = 0;
  bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
  bufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;

  ID3D12Resource *readbackBuf;
  HRESULT hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &bufDesc,
                                                  D3D12_RESOURCE_STATE_COPY_DEST, NULL,
                                                  __uuidof(ID3D12Resource), (void **)&readbackBuf);
  if(FAILED(hr))
  {
    RDCERR("Failed to create query readback buffer %08x", hr);
    return ret;
  }

  D3D12_QUERY_HEAP_DESC queryDesc;
  queryDesc.Count = maxEID * 2;
  queryDesc.NodeMask = 1;
  queryDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
  ID3D12QueryHeap *queryHeap = NULL;
  hr = m_pDevice->CreateQueryHeap(&queryDesc, __uuidof(queryHeap), (void **)&queryHeap);
  if(FAILED(hr))
  {
    RDCERR("Failed to create query heap %08x", hr);
    return ret;
  }

  m_pDevice->SetStablePowerState(TRUE);

  D3D12GPUTimerCallback cb(m_pDevice, this, queryHeap);

  // replay the events to perform all the queries
  m_pDevice->ReplayLog(0, maxEID, eReplay_Full);

  m_pDevice->SetStablePowerState(FALSE);

  ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

  list->ResolveQueryData(queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, maxEID * 2, readbackBuf, 0);

  list->Close();

  m_pDevice->ExecuteLists();
  m_pDevice->FlushLists();

  D3D12_RANGE range = {0, (SIZE_T)bufDesc.Width};
  void *data;
  hr = readbackBuf->Map(0, &range, &data);
  if(FAILED(hr))
  {
    RDCERR("Failed to create query heap %08x", hr);
    SAFE_RELEASE(queryHeap);
    SAFE_RELEASE(readbackBuf);
    return ret;
  }

  uint64_t *timestamps = (uint64_t *)data;

  uint64_t freq;
  m_pDevice->GetQueue()->GetTimestampFrequency(&freq);

  for(size_t i = 0; i < cb.m_Results.size(); i++)
  {
    CounterResult result;

    uint64_t delta = timestamps[i * 2 + 1] - timestamps[i * 2 + 0];

    result.eventID = cb.m_Results[i];
    result.counterID = eCounter_EventGPUDuration;
    result.value.d = double(delta) / double(freq);

    ret.push_back(result);
  }

  for(size_t i = 0; i < cb.m_AliasEvents.size(); i++)
  {
    CounterResult search;
    search.counterID = eCounter_EventGPUDuration;
    search.eventID = cb.m_AliasEvents[i].first;

    // find the result we're aliasing
    auto it = std::find(ret.begin(), ret.end(), search);
    RDCASSERT(it != ret.end());

    // duplicate the result and append
    CounterResult aliased = *it;
    aliased.eventID = cb.m_AliasEvents[i].second;
    ret.push_back(aliased);
  }

  // sort so that the alias results appear in the right places
  std::sort(ret.begin(), ret.end());

  return ret;
}
//--------------------------------------------------------------------------
void VeRenderWindowD3D12::Record(VeUInt32 u32Index) noexcept
{
	VeRendererD3D12& kRenderer = *VeMemberCast(
		&VeRendererD3D12::m_kRenderWindowList, m_kNode.get_list());

	VE_ASSERT(u32Index < m_kRecorderList.size());
	Recorder& kRecorder = m_kRecorderList[u32Index];
	FrameCache& kFrame = m_akFrameCache[m_u32FrameIndex];
	ID3D12GraphicsCommandList* pkGCL = kFrame.m_kDirectCommandList[kRecorder.m_u32CommandIndex];
	VE_ASSERT_GE(pkGCL->Reset(kFrame.m_pkDirectAllocator, nullptr), S_OK);
	ID3D12DescriptorHeap* ppHeaps[] = { kRenderer.m_kSRVHeap.Get() };
	pkGCL->SetDescriptorHeaps(1, ppHeaps);
	for (auto& task : kRecorder.m_kTaskList)
	{
		switch (task->m_eType)
		{
		case REC_BARRIER:
		{
			auto& list = ((RecordBarrier*)task)->m_akBarrierList[m_u32FrameIndex];
			pkGCL->ResourceBarrier((VeUInt32)list.size(), &list.front());
		}
		break;
		case REC_CLEAR_RTV:
		{
			RecordClearRTV& rec = *((RecordClearRTV*)task);
			pkGCL->ClearRenderTargetView(rec.m_ahHandle[m_u32FrameIndex],
				(const FLOAT*)&(rec.m_kColor), 0, nullptr);
		}
		break;
		case REC_CLEAR_DSV:
		{
			RecordClearDSV& rec = *((RecordClearDSV*)task);
			pkGCL->ClearDepthStencilView(rec.m_ahHandle[m_u32FrameIndex],
				rec.m_eFlags, rec.m_f32Depth, rec.m_u8Stencil, 0, nullptr);
		}
		break;
		case REC_RENDER_TARGET:
		{
			RecordRenderTarget& rec = *((RecordRenderTarget*)task);
			pkGCL->OMSetRenderTargets((VeUInt32)rec.m_akRTVList[m_u32FrameIndex].size(),
				&rec.m_akRTVList[m_u32FrameIndex].front(), FALSE,
				rec.m_ahDSV[m_u32FrameIndex].ptr ? &rec.m_ahDSV[m_u32FrameIndex] : nullptr);
		}
		break;
		case REC_VIEWPORT:
		{
			RecordViewport& rec = *((RecordViewport*)task);
			pkGCL->RSSetViewports((VeUInt32)rec.m_kViewportList.size(), &rec.m_kViewportList.front());
		}
		break;
		case REC_SCISSOR_RECT:
		{
			RecordScissorRect& rec = *((RecordScissorRect*)task);
			pkGCL->RSSetScissorRects((VeUInt32)rec.m_kScissorRectList.size(), &rec.m_kScissorRectList.front());
		}
		break;
		case REC_RENDER_QUAD:
		{
			RecordRenderQuad& rec = *((RecordRenderQuad*)task);
			pkGCL->SetPipelineState(rec.m_pkPipelineState);
			pkGCL->SetGraphicsRootSignature(rec.m_pkRootSignature);
			for (auto& itTab : rec.m_kTable)
			{
				pkGCL->SetGraphicsRootDescriptorTable(itTab.first, itTab.second);
			}
			pkGCL->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
			pkGCL->IASetVertexBuffers(0, 1, &kRenderer.m_kQuadVBV);
			pkGCL->DrawInstanced(4, 1, 0, 0);
		}
		break;
		default:
			break;
		}

	}
	VE_ASSERT_GE(pkGCL->Close(), S_OK);
}
Exemple #19
0
void D3D12Replay::InitPostVSBuffers(uint32_t eventId)
{
  // go through any aliasing
  if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end())
    eventId = m_PostVSAlias[eventId];

  if(m_PostVSData.find(eventId) != m_PostVSData.end())
    return;

  D3D12CommandData *cmd = m_pDevice->GetQueue()->GetCommandData();
  const D3D12RenderState &rs = cmd->m_RenderState;

  if(rs.pipe == ResourceId())
    return;

  WrappedID3D12PipelineState *origPSO =
      m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(!origPSO->IsGraphics())
    return;

  D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = origPSO->GetGraphicsDesc();

  if(psoDesc.VS.BytecodeLength == 0)
    return;

  WrappedID3D12Shader *vs = origPSO->VS();

  D3D_PRIMITIVE_TOPOLOGY topo = rs.topo;

  const DrawcallDescription *drawcall = m_pDevice->GetDrawcall(eventId);

  if(drawcall->numIndices == 0)
    return;

  DXBC::DXBCFile *dxbcVS = vs->GetDXBC();

  RDCASSERT(dxbcVS);

  DXBC::DXBCFile *dxbcGS = NULL;

  WrappedID3D12Shader *gs = origPSO->GS();

  if(gs)
  {
    dxbcGS = gs->GetDXBC();

    RDCASSERT(dxbcGS);
  }

  DXBC::DXBCFile *dxbcDS = NULL;

  WrappedID3D12Shader *ds = origPSO->DS();

  if(ds)
  {
    dxbcDS = ds->GetDXBC();

    RDCASSERT(dxbcDS);
  }

  ID3D12RootSignature *soSig = NULL;

  HRESULT hr = S_OK;

  {
    WrappedID3D12RootSignature *sig =
        m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(rs.graphics.rootsig);

    D3D12RootSignature rootsig = sig->sig;

    // create a root signature that allows stream out, if necessary
    if((rootsig.Flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT) == 0)
    {
      rootsig.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;

      ID3DBlob *blob = m_pDevice->GetShaderCache()->MakeRootSig(rootsig);

      hr = m_pDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(),
                                          __uuidof(ID3D12RootSignature), (void **)&soSig);
      if(FAILED(hr))
      {
        RDCERR("Couldn't enable stream-out in root signature: HRESULT: %s", ToStr(hr).c_str());
        return;
      }

      SAFE_RELEASE(blob);
    }
  }

  vector<D3D12_SO_DECLARATION_ENTRY> sodecls;

  UINT stride = 0;
  int posidx = -1;
  int numPosComponents = 0;

  if(!dxbcVS->m_OutputSig.empty())
  {
    for(const SigParameter &sign : dxbcVS->m_OutputSig)
    {
      D3D12_SO_DECLARATION_ENTRY decl;

      decl.Stream = 0;
      decl.OutputSlot = 0;

      decl.SemanticName = sign.semanticName.c_str();
      decl.SemanticIndex = sign.semanticIndex;
      decl.StartComponent = 0;
      decl.ComponentCount = sign.compCount & 0xff;

      if(sign.systemValue == ShaderBuiltin::Position)
      {
        posidx = (int)sodecls.size();
        numPosComponents = decl.ComponentCount = 4;
      }

      stride += decl.ComponentCount * sizeof(float);
      sodecls.push_back(decl);
    }

    if(stride == 0)
    {
      RDCERR("Didn't get valid stride! Setting to 4 bytes");
      stride = 4;
    }

    // shift position attribute up to first, keeping order otherwise
    // the same
    if(posidx > 0)
    {
      D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx];
      sodecls.erase(sodecls.begin() + posidx);
      sodecls.insert(sodecls.begin(), pos);
    }

    // set up stream output entries and buffers
    psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size();
    psoDesc.StreamOutput.pSODeclaration = &sodecls[0];
    psoDesc.StreamOutput.NumStrides = 1;
    psoDesc.StreamOutput.pBufferStrides = &stride;
    psoDesc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;

    // disable all other shader stages
    psoDesc.HS.BytecodeLength = 0;
    psoDesc.HS.pShaderBytecode = NULL;
    psoDesc.DS.BytecodeLength = 0;
    psoDesc.DS.pShaderBytecode = NULL;
    psoDesc.GS.BytecodeLength = 0;
    psoDesc.GS.pShaderBytecode = NULL;
    psoDesc.PS.BytecodeLength = 0;
    psoDesc.PS.pShaderBytecode = NULL;

    // disable any rasterization/use of output targets
    psoDesc.DepthStencilState.DepthEnable = FALSE;
    psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
    psoDesc.DepthStencilState.StencilEnable = FALSE;

    if(soSig)
      psoDesc.pRootSignature = soSig;

    // render as points
    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;

    // disable outputs
    psoDesc.NumRenderTargets = 0;
    RDCEraseEl(psoDesc.RTVFormats);
    psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

    ID3D12PipelineState *pipe = NULL;
    hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                (void **)&pipe);
    if(FAILED(hr))
    {
      RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    ID3D12Resource *idxBuf = NULL;

    bool recreate = false;
    uint64_t outputSize = uint64_t(drawcall->numIndices) * drawcall->numInstances * stride;

    if(m_SOBufferSize < outputSize)
    {
      uint64_t oldSize = m_SOBufferSize;
      while(m_SOBufferSize < outputSize)
        m_SOBufferSize *= 2;
      RDCWARN("Resizing stream-out buffer from %llu to %llu for output data", oldSize,
              m_SOBufferSize);
      recreate = true;
    }

    ID3D12GraphicsCommandList *list = NULL;

    if(!(drawcall->flags & DrawFlags::UseIBuffer))
    {
      if(recreate)
      {
        m_pDevice->GPUSync();

        CreateSOBuffers();
      }

      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      D3D12_STREAM_OUTPUT_BUFFER_VIEW view;
      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;
      list->SOSetTargets(0, 1, &view);

      list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
      list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                          drawcall->instanceOffset);
    }
    else    // drawcall is indexed
    {
      bytebuf idxdata;
      GetBufferData(rs.ibuffer.buf, rs.ibuffer.offs + drawcall->indexOffset * rs.ibuffer.bytewidth,
                    RDCMIN(drawcall->numIndices * rs.ibuffer.bytewidth, rs.ibuffer.size), idxdata);

      vector<uint32_t> indices;

      uint16_t *idx16 = (uint16_t *)&idxdata[0];
      uint32_t *idx32 = (uint32_t *)&idxdata[0];

      // only read as many indices as were available in the buffer
      uint32_t numIndices =
          RDCMIN(uint32_t(idxdata.size() / rs.ibuffer.bytewidth), drawcall->numIndices);

      uint32_t idxclamp = 0;
      if(drawcall->baseVertex < 0)
        idxclamp = uint32_t(-drawcall->baseVertex);

      // grab all unique vertex indices referenced
      for(uint32_t i = 0; i < numIndices; i++)
      {
        uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i];

        // apply baseVertex but clamp to 0 (don't allow index to become negative)
        if(i32 < idxclamp)
          i32 = 0;
        else if(drawcall->baseVertex < 0)
          i32 -= idxclamp;
        else if(drawcall->baseVertex > 0)
          i32 += drawcall->baseVertex;

        auto it = std::lower_bound(indices.begin(), indices.end(), i32);

        if(it != indices.end() && *it == i32)
          continue;

        indices.insert(it, i32);
      }

      // if we read out of bounds, we'll also have a 0 index being referenced
      // (as 0 is read). Don't insert 0 if we already have 0 though
      if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0))
        indices.insert(indices.begin(), 0);

      // An index buffer could be something like: 500, 501, 502, 501, 503, 502
      // in which case we can't use the existing index buffer without filling 499 slots of vertex
      // data with padding. Instead we rebase the indices based on the smallest vertex so it becomes
      // 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer.
      //
      // Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512
      // which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid.
      // We just stream-out a tightly packed list of unique indices, and then remap the index buffer
      // so that what did point to 500 points to 0 (accounting for rebasing), and what did point
      // to 510 now points to 3 (accounting for the unique sort).

      // we use a map here since the indices may be sparse. Especially considering if an index
      // is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries.
      map<uint32_t, size_t> indexRemap;
      for(size_t i = 0; i < indices.size(); i++)
      {
        // by definition, this index will only appear once in indices[]
        indexRemap[indices[i]] = i;
      }

      if(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t))
      {
        uint64_t oldSize = m_SOBufferSize;
        while(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t))
          m_SOBufferSize *= 2;
        RDCWARN("Resizing stream-out buffer from %llu to %llu for indices", oldSize, m_SOBufferSize);
        recreate = true;
      }

      if(recreate)
      {
        m_pDevice->GPUSync();

        CreateSOBuffers();
      }

      GetDebugManager()->FillBuffer(m_SOPatchedIndexBuffer, 0, &indices[0],
                                    indices.size() * sizeof(uint32_t));

      D3D12_INDEX_BUFFER_VIEW patchedIB;

      patchedIB.BufferLocation = m_SOPatchedIndexBuffer->GetGPUVirtualAddress();
      patchedIB.Format = DXGI_FORMAT_R32_UINT;
      patchedIB.SizeInBytes = UINT(indices.size() * sizeof(uint32_t));

      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      list->IASetIndexBuffer(&patchedIB);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      D3D12_STREAM_OUTPUT_BUFFER_VIEW view;
      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;
      list->SOSetTargets(0, 1, &view);

      list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);

      list->DrawIndexedInstanced((UINT)indices.size(), drawcall->numInstances, 0, 0,
                                 drawcall->instanceOffset);

      uint32_t stripCutValue = 0;
      if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF)
        stripCutValue = 0xffff;
      else if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF)
        stripCutValue = 0xffffffff;

      // rebase existing index buffer to point to the right elements in our stream-out'd
      // vertex buffer
      for(uint32_t i = 0; i < numIndices; i++)
      {
        uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i];

        // preserve primitive restart indices
        if(stripCutValue && i32 == stripCutValue)
          continue;

        // apply baseVertex but clamp to 0 (don't allow index to become negative)
        if(i32 < idxclamp)
          i32 = 0;
        else if(drawcall->baseVertex < 0)
          i32 -= idxclamp;
        else if(drawcall->baseVertex > 0)
          i32 += drawcall->baseVertex;

        if(rs.ibuffer.bytewidth == 2)
          idx16[i] = uint16_t(indexRemap[i32]);
        else
          idx32[i] = uint32_t(indexRemap[i32]);
      }

      idxBuf = NULL;

      if(!idxdata.empty())
      {
        D3D12_RESOURCE_DESC idxBufDesc;
        idxBufDesc.Alignment = 0;
        idxBufDesc.DepthOrArraySize = 1;
        idxBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
        idxBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
        idxBufDesc.Format = DXGI_FORMAT_UNKNOWN;
        idxBufDesc.Height = 1;
        idxBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
        idxBufDesc.MipLevels = 1;
        idxBufDesc.SampleDesc.Count = 1;
        idxBufDesc.SampleDesc.Quality = 0;
        idxBufDesc.Width = idxdata.size();

        D3D12_HEAP_PROPERTIES heapProps;
        heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
        heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
        heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
        heapProps.CreationNodeMask = 1;
        heapProps.VisibleNodeMask = 1;

        hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &idxBufDesc,
                                                D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                                __uuidof(ID3D12Resource), (void **)&idxBuf);
        RDCASSERTEQUAL(hr, S_OK);

        SetObjName(idxBuf, StringFormat::Fmt("PostVS idxBuf for %u", eventId));

        GetDebugManager()->FillBuffer(idxBuf, 0, &idxdata[0], idxdata.size());
      }
    }

    D3D12_RESOURCE_BARRIER sobarr = {};
    sobarr.Transition.pResource = m_SOBuffer;
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

    list->ResourceBarrier(1, &sobarr);

    list->CopyResource(m_SOStagingBuffer, m_SOBuffer);

    // we're done with this after the copy, so we can discard it and reset
    // the counter for the next stream-out
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
    list->DiscardResource(m_SOBuffer, NULL);
    list->ResourceBarrier(1, &sobarr);

    UINT zeroes[4] = {0, 0, 0, 0};
    list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                       GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                       m_SOBuffer, zeroes, 0, NULL);

    list->Close();

    ID3D12CommandList *l = list;
    m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
    m_pDevice->GPUSync();

    GetDebugManager()->ResetDebugAlloc();

    SAFE_RELEASE(pipe);

    byte *byteData = NULL;
    D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize};
    hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData);
    if(FAILED(hr))
    {
      RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(idxBuf);
      SAFE_RELEASE(soSig);
      return;
    }

    range.End = 0;

    uint64_t numBytesWritten = *(uint64_t *)byteData;

    if(numBytesWritten == 0)
    {
      m_PostVSData[eventId] = D3D12PostVSData();
      SAFE_RELEASE(idxBuf);
      SAFE_RELEASE(soSig);
      return;
    }

    // skip past the counter
    byteData += 64;

    uint64_t numPrims = numBytesWritten / stride;

    ID3D12Resource *vsoutBuffer = NULL;

    {
      D3D12_RESOURCE_DESC vertBufDesc;
      vertBufDesc.Alignment = 0;
      vertBufDesc.DepthOrArraySize = 1;
      vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
      vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
      vertBufDesc.Format = DXGI_FORMAT_UNKNOWN;
      vertBufDesc.Height = 1;
      vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
      vertBufDesc.MipLevels = 1;
      vertBufDesc.SampleDesc.Count = 1;
      vertBufDesc.SampleDesc.Quality = 0;
      vertBufDesc.Width = numBytesWritten;

      D3D12_HEAP_PROPERTIES heapProps;
      heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc,
                                              D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                              __uuidof(ID3D12Resource), (void **)&vsoutBuffer);
      RDCASSERTEQUAL(hr, S_OK);

      if(vsoutBuffer)
      {
        SetObjName(vsoutBuffer, StringFormat::Fmt("PostVS vsoutBuffer for %u", eventId));
        GetDebugManager()->FillBuffer(vsoutBuffer, 0, byteData, (size_t)numBytesWritten);
      }
    }

    float nearp = 0.1f;
    float farp = 100.0f;

    Vec4f *pos0 = (Vec4f *)byteData;

    bool found = false;

    for(uint64_t i = 1; numPosComponents == 4 && i < numPrims; i++)
    {
      //////////////////////////////////////////////////////////////////////////////////
      // derive near/far, assuming a standard perspective matrix
      //
      // the transformation from from pre-projection {Z,W} to post-projection {Z,W}
      // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1
      // and we know Wpost = Zpre from the perspective matrix.
      // we can then see from the perspective matrix that
      // m = F/(F-N)
      // c = -(F*N)/(F-N)
      //
      // with re-arranging and substitution, we then get:
      // N = -c/m
      // F = c/(1-m)
      //
      // so if we can derive m and c then we can determine N and F. We can do this with
      // two points, and we pick them reasonably distinct on z to reduce floating-point
      // error

      Vec4f *pos = (Vec4f *)(byteData + i * stride);

      if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f)
      {
        Vec2f A(pos0->w, pos0->z);
        Vec2f B(pos->w, pos->z);

        float m = (B.y - A.y) / (B.x - A.x);
        float c = B.y - B.x * m;

        if(m == 1.0f)
          continue;

        nearp = -c / m;
        farp = c / (1 - m);

        found = true;

        break;
      }
    }

    // if we didn't find anything, all z's and w's were identical.
    // If the z is positive and w greater for the first element then
    // we detect this projection as reversed z with infinite far plane
    if(!found && pos0->z > 0.0f && pos0->w > pos0->z)
    {
      nearp = pos0->z;
      farp = FLT_MAX;
    }

    m_SOStagingBuffer->Unmap(0, &range);

    m_PostVSData[eventId].vsin.topo = topo;
    m_PostVSData[eventId].vsout.buf = vsoutBuffer;
    m_PostVSData[eventId].vsout.vertStride = stride;
    m_PostVSData[eventId].vsout.nearPlane = nearp;
    m_PostVSData[eventId].vsout.farPlane = farp;

    m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer);
    m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices;

    m_PostVSData[eventId].vsout.instStride = 0;
    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].vsout.instStride =
          uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances));

    m_PostVSData[eventId].vsout.idxBuf = NULL;
    if(m_PostVSData[eventId].vsout.useIndices && idxBuf)
    {
      m_PostVSData[eventId].vsout.idxBuf = idxBuf;
      m_PostVSData[eventId].vsout.idxFmt =
          rs.ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
    }

    m_PostVSData[eventId].vsout.hasPosOut = posidx >= 0;

    m_PostVSData[eventId].vsout.topo = topo;
  }
  else
  {
    // empty vertex output signature
    m_PostVSData[eventId].vsin.topo = topo;
    m_PostVSData[eventId].vsout.buf = NULL;
    m_PostVSData[eventId].vsout.instStride = 0;
    m_PostVSData[eventId].vsout.vertStride = 0;
    m_PostVSData[eventId].vsout.nearPlane = 0.0f;
    m_PostVSData[eventId].vsout.farPlane = 0.0f;
    m_PostVSData[eventId].vsout.useIndices = false;
    m_PostVSData[eventId].vsout.hasPosOut = false;
    m_PostVSData[eventId].vsout.idxBuf = NULL;

    m_PostVSData[eventId].vsout.topo = topo;
  }

  if(dxbcGS || dxbcDS)
  {
    stride = 0;
    posidx = -1;
    numPosComponents = 0;

    DXBC::DXBCFile *lastShader = dxbcGS;
    if(dxbcDS)
      lastShader = dxbcDS;

    sodecls.clear();
    for(const SigParameter &sign : lastShader->m_OutputSig)
    {
      D3D12_SO_DECLARATION_ENTRY decl;

      // for now, skip streams that aren't stream 0
      if(sign.stream != 0)
        continue;

      decl.Stream = 0;
      decl.OutputSlot = 0;

      decl.SemanticName = sign.semanticName.c_str();
      decl.SemanticIndex = sign.semanticIndex;
      decl.StartComponent = 0;
      decl.ComponentCount = sign.compCount & 0xff;

      if(sign.systemValue == ShaderBuiltin::Position)
      {
        posidx = (int)sodecls.size();
        numPosComponents = decl.ComponentCount = 4;
      }

      stride += decl.ComponentCount * sizeof(float);
      sodecls.push_back(decl);
    }

    // shift position attribute up to first, keeping order otherwise
    // the same
    if(posidx > 0)
    {
      D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx];
      sodecls.erase(sodecls.begin() + posidx);
      sodecls.insert(sodecls.begin(), pos);
    }

    // enable the other shader stages again
    if(origPSO->DS())
      psoDesc.DS = origPSO->DS()->GetDesc();
    if(origPSO->HS())
      psoDesc.HS = origPSO->HS()->GetDesc();
    if(origPSO->GS())
      psoDesc.GS = origPSO->GS()->GetDesc();

    // configure new SO declarations
    psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size();
    psoDesc.StreamOutput.pSODeclaration = &sodecls[0];
    psoDesc.StreamOutput.NumStrides = 1;
    psoDesc.StreamOutput.pBufferStrides = &stride;

    // we're using the same topology this time
    psoDesc.PrimitiveTopologyType = origPSO->graphics->PrimitiveTopologyType;

    ID3D12PipelineState *pipe = NULL;
    hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                (void **)&pipe);
    if(FAILED(hr))
    {
      RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    D3D12_STREAM_OUTPUT_BUFFER_VIEW view;

    ID3D12GraphicsCommandList *list = NULL;

    view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
    view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
    view.SizeInBytes = m_SOBufferSize;
    // draws with multiple instances must be replayed one at a time so we can record the number of
    // primitives from each drawcall, as due to expansion this can vary per-instance.
    if(drawcall->numInstances > 1)
    {
      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;

      // do a dummy draw to make sure we have enough space in the output buffer
      list->SOSetTargets(0, 1, &view);

      list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

      // because the result is expanded we don't have to remap index buffers or anything
      if(drawcall->flags & DrawFlags::UseIBuffer)
      {
        list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances,
                                   drawcall->indexOffset, drawcall->baseVertex,
                                   drawcall->instanceOffset);
      }
      else
      {
        list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                            drawcall->instanceOffset);
      }

      list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

      list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1,
                             m_SOStagingBuffer, 0);

      list->Close();

      ID3D12CommandList *l = list;
      m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
      m_pDevice->GPUSync();

      // check that things are OK, and resize up if needed
      D3D12_RANGE range;
      range.Begin = 0;
      range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS);

      D3D12_QUERY_DATA_SO_STATISTICS *data;
      hr = m_SOStagingBuffer->Map(0, &range, (void **)&data);

      D3D12_QUERY_DATA_SO_STATISTICS result = *data;

      range.End = 0;
      m_SOStagingBuffer->Unmap(0, &range);

      if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
      {
        uint64_t oldSize = m_SOBufferSize;
        while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
          m_SOBufferSize *= 2;
        RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize);
        CreateSOBuffers();
      }

      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;

      GetDebugManager()->ResetDebugAlloc();

      // now do the actual stream out
      list = GetDebugManager()->ResetDebugList();

      // first need to reset the counter byte values which may have either been written to above, or
      // are newly created
      {
        D3D12_RESOURCE_BARRIER sobarr = {};
        sobarr.Transition.pResource = m_SOBuffer;
        sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
        sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;

        list->ResourceBarrier(1, &sobarr);

        D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {};
        counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
        counterDesc.Format = DXGI_FORMAT_R32_UINT;
        counterDesc.Buffer.FirstElement = 0;
        counterDesc.Buffer.NumElements = 4;

        UINT zeroes[4] = {0, 0, 0, 0};
        list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                           GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                           m_SOBuffer, zeroes, 0, NULL);

        std::swap(sobarr.Transition.StateBefore, sobarr.Transition.StateAfter);
        list->ResourceBarrier(1, &sobarr);
      }

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      // reserve space for enough 'buffer filled size' locations
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() +
                            AlignUp(uint64_t(drawcall->numInstances * sizeof(UINT64)), 64ULL);

      // do incremental draws to get the output size. We have to do this O(N^2) style because
      // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N instances
      // and count the total number of verts each time, then we can see from the difference how much
      // each instance wrote.
      for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++)
      {
        if(drawcall->flags & DrawFlags::UseIBuffer)
        {
          view.BufferFilledSizeLocation =
              m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64);
          list->SOSetTargets(0, 1, &view);
          list->DrawIndexedInstanced(drawcall->numIndices, inst, drawcall->indexOffset,
                                     drawcall->baseVertex, drawcall->instanceOffset);
        }
        else
        {
          view.BufferFilledSizeLocation =
              m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64);
          list->SOSetTargets(0, 1, &view);
          list->DrawInstanced(drawcall->numIndices, inst, drawcall->vertexOffset,
                              drawcall->instanceOffset);
        }
      }

      list->Close();

      l = list;
      m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
      m_pDevice->GPUSync();

      GetDebugManager()->ResetDebugAlloc();

      // the last draw will have written the actual data we want into the buffer
    }
    else
    {
      // this only loops if we find from a query that we need to resize up
      while(true)
      {
        list = GetDebugManager()->ResetDebugList();

        rs.ApplyState(list);

        list->SetPipelineState(pipe);

        if(soSig)
        {
          list->SetGraphicsRootSignature(soSig);
          rs.ApplyGraphicsRootElements(list);
        }

        view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
        view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
        view.SizeInBytes = m_SOBufferSize;

        list->SOSetTargets(0, 1, &view);

        list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

        // because the result is expanded we don't have to remap index buffers or anything
        if(drawcall->flags & DrawFlags::UseIBuffer)
        {
          list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances,
                                     drawcall->indexOffset, drawcall->baseVertex,
                                     drawcall->instanceOffset);
        }
        else
        {
          list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                              drawcall->instanceOffset);
        }

        list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

        list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1,
                               m_SOStagingBuffer, 0);

        list->Close();

        ID3D12CommandList *l = list;
        m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
        m_pDevice->GPUSync();

        // check that things are OK, and resize up if needed
        D3D12_RANGE range;
        range.Begin = 0;
        range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS);

        D3D12_QUERY_DATA_SO_STATISTICS *data;
        hr = m_SOStagingBuffer->Map(0, &range, (void **)&data);

        if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
        {
          uint64_t oldSize = m_SOBufferSize;
          while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
            m_SOBufferSize *= 2;
          RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize);
          CreateSOBuffers();

          continue;
        }

        range.End = 0;
        m_SOStagingBuffer->Unmap(0, &range);

        GetDebugManager()->ResetDebugAlloc();

        break;
      }
    }

    list = GetDebugManager()->ResetDebugList();

    D3D12_RESOURCE_BARRIER sobarr = {};
    sobarr.Transition.pResource = m_SOBuffer;
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

    list->ResourceBarrier(1, &sobarr);

    list->CopyResource(m_SOStagingBuffer, m_SOBuffer);

    // we're done with this after the copy, so we can discard it and reset
    // the counter for the next stream-out
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
    list->DiscardResource(m_SOBuffer, NULL);
    list->ResourceBarrier(1, &sobarr);

    D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {};
    counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
    counterDesc.Format = DXGI_FORMAT_R32_UINT;
    counterDesc.Buffer.FirstElement = 0;
    counterDesc.Buffer.NumElements = 4;

    UINT zeroes[4] = {0, 0, 0, 0};
    list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                       GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                       m_SOBuffer, zeroes, 0, NULL);

    list->Close();

    ID3D12CommandList *l = list;
    m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
    m_pDevice->GPUSync();

    GetDebugManager()->ResetDebugAlloc();

    SAFE_RELEASE(pipe);

    byte *byteData = NULL;
    D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize};
    hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData);
    if(FAILED(hr))
    {
      RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    range.End = 0;

    uint64_t *counters = (uint64_t *)byteData;

    uint64_t numBytesWritten = 0;
    std::vector<D3D12PostVSData::InstData> instData;
    if(drawcall->numInstances > 1)
    {
      uint64_t prevByteCount = 0;

      for(uint32_t inst = 0; inst < drawcall->numInstances; inst++)
      {
        uint64_t byteCount = counters[inst];

        D3D12PostVSData::InstData d;
        d.numVerts = uint32_t((byteCount - prevByteCount) / stride);
        d.bufOffset = prevByteCount;
        prevByteCount = byteCount;

        instData.push_back(d);
      }

      numBytesWritten = prevByteCount;
    }
    else
    {
      numBytesWritten = counters[0];
    }

    if(numBytesWritten == 0)
    {
      SAFE_RELEASE(soSig);
      return;
    }

    // skip past the counter(s)
    byteData += (view.BufferLocation - m_SOBuffer->GetGPUVirtualAddress());

    uint64_t numVerts = numBytesWritten / stride;

    ID3D12Resource *gsoutBuffer = NULL;

    {
      D3D12_RESOURCE_DESC vertBufDesc;
      vertBufDesc.Alignment = 0;
      vertBufDesc.DepthOrArraySize = 1;
      vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
      vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
      vertBufDesc.Format = DXGI_FORMAT_UNKNOWN;
      vertBufDesc.Height = 1;
      vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
      vertBufDesc.MipLevels = 1;
      vertBufDesc.SampleDesc.Count = 1;
      vertBufDesc.SampleDesc.Quality = 0;
      vertBufDesc.Width = numBytesWritten;

      D3D12_HEAP_PROPERTIES heapProps;
      heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc,
                                              D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                              __uuidof(ID3D12Resource), (void **)&gsoutBuffer);
      RDCASSERTEQUAL(hr, S_OK);

      if(gsoutBuffer)
      {
        SetObjName(gsoutBuffer, StringFormat::Fmt("PostVS gsoutBuffer for %u", eventId));
        GetDebugManager()->FillBuffer(gsoutBuffer, 0, byteData, (size_t)numBytesWritten);
      }
    }

    float nearp = 0.1f;
    float farp = 100.0f;

    Vec4f *pos0 = (Vec4f *)byteData;

    bool found = false;

    for(UINT64 i = 1; numPosComponents == 4 && i < numVerts; i++)
    {
      //////////////////////////////////////////////////////////////////////////////////
      // derive near/far, assuming a standard perspective matrix
      //
      // the transformation from from pre-projection {Z,W} to post-projection {Z,W}
      // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1
      // and we know Wpost = Zpre from the perspective matrix.
      // we can then see from the perspective matrix that
      // m = F/(F-N)
      // c = -(F*N)/(F-N)
      //
      // with re-arranging and substitution, we then get:
      // N = -c/m
      // F = c/(1-m)
      //
      // so if we can derive m and c then we can determine N and F. We can do this with
      // two points, and we pick them reasonably distinct on z to reduce floating-point
      // error

      Vec4f *pos = (Vec4f *)(byteData + i * stride);

      if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f)
      {
        Vec2f A(pos0->w, pos0->z);
        Vec2f B(pos->w, pos->z);

        float m = (B.y - A.y) / (B.x - A.x);
        float c = B.y - B.x * m;

        if(m == 1.0f)
          continue;

        nearp = -c / m;
        farp = c / (1 - m);

        found = true;

        break;
      }
    }

    // if we didn't find anything, all z's and w's were identical.
    // If the z is positive and w greater for the first element then
    // we detect this projection as reversed z with infinite far plane
    if(!found && pos0->z > 0.0f && pos0->w > pos0->z)
    {
      nearp = pos0->z;
      farp = FLT_MAX;
    }

    m_SOStagingBuffer->Unmap(0, &range);

    m_PostVSData[eventId].gsout.buf = gsoutBuffer;
    m_PostVSData[eventId].gsout.instStride = 0;
    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].gsout.instStride =
          uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances));
    m_PostVSData[eventId].gsout.vertStride = stride;
    m_PostVSData[eventId].gsout.nearPlane = nearp;
    m_PostVSData[eventId].gsout.farPlane = farp;
    m_PostVSData[eventId].gsout.useIndices = false;
    m_PostVSData[eventId].gsout.hasPosOut = posidx >= 0;
    m_PostVSData[eventId].gsout.idxBuf = NULL;

    topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;

    if(lastShader == dxbcGS)
    {
      for(size_t i = 0; i < dxbcGS->GetNumDeclarations(); i++)
      {
        const DXBC::ASMDecl &decl = dxbcGS->GetDeclaration(i);

        if(decl.declaration == DXBC::OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY)
        {
          topo = decl.outTopology;
          break;
        }
      }
    }
    else if(lastShader == dxbcDS)
    {
      for(size_t i = 0; i < dxbcDS->GetNumDeclarations(); i++)
      {
        const DXBC::ASMDecl &decl = dxbcDS->GetDeclaration(i);

        if(decl.declaration == DXBC::OPCODE_DCL_TESS_DOMAIN)
        {
          if(decl.domain == DXBC::DOMAIN_ISOLINE)
            topo = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
          else
            topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
          break;
        }
      }
    }

    m_PostVSData[eventId].gsout.topo = topo;

    // streamout expands strips unfortunately
    if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;

    m_PostVSData[eventId].gsout.numVerts = (uint32_t)numVerts;

    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].gsout.numVerts /= RDCMAX(1U, drawcall->numInstances);

    m_PostVSData[eventId].gsout.instData = instData;
  }

  SAFE_RELEASE(soSig);
}
Exemple #20
0
ResourceId D3D12Replay::RenderOverlay(ResourceId texid, CompType typeHint, DebugOverlay overlay,
                                      uint32_t eventId, const vector<uint32_t> &passEvents)
{
  ID3D12Resource *resource = WrappedID3D12Resource::GetList()[texid];

  if(resource == NULL)
    return ResourceId();

  D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc();

  std::vector<D3D12_RESOURCE_BARRIER> barriers;
  int resType = 0;
  GetDebugManager()->PrepareTextureSampling(resource, typeHint, resType, barriers);

  D3D12_RESOURCE_DESC overlayTexDesc;
  overlayTexDesc.Alignment = 0;
  overlayTexDesc.DepthOrArraySize = 1;
  overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
  overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
  overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  overlayTexDesc.Height = resourceDesc.Height;
  overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
  overlayTexDesc.MipLevels = 1;
  overlayTexDesc.SampleDesc = resourceDesc.SampleDesc;
  overlayTexDesc.Width = resourceDesc.Width;

  D3D12_HEAP_PROPERTIES heapProps;
  heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
  heapProps.CreationNodeMask = 1;
  heapProps.VisibleNodeMask = 1;

  D3D12_RESOURCE_DESC currentOverlayDesc;
  RDCEraseEl(currentOverlayDesc);
  if(m_Overlay.Texture)
    currentOverlayDesc = m_Overlay.Texture->GetDesc();

  WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture;

  // need to recreate backing custom render tex
  if(overlayTexDesc.Width != currentOverlayDesc.Width ||
     overlayTexDesc.Height != currentOverlayDesc.Height ||
     overlayTexDesc.Format != currentOverlayDesc.Format ||
     overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count ||
     overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality)
  {
    SAFE_RELEASE(m_Overlay.Texture);
    m_Overlay.resourceId = ResourceId();

    ID3D12Resource *customRenderTex = NULL;
    HRESULT hr = m_pDevice->CreateCommittedResource(
        &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL,
        __uuidof(ID3D12Resource), (void **)&customRenderTex);
    if(FAILED(hr))
    {
      RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str());
      return ResourceId();
    }
    wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex;

    customRenderTex->SetName(L"customRenderTex");

    m_Overlay.Texture = wrappedCustomRenderTex;
    m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID();
  }

  D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState;

  ID3D12Resource *renderDepth = NULL;

  D3D12Descriptor *dsView = GetWrapped(rs.dsv);

  D3D12_RESOURCE_DESC depthTexDesc = {};
  D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {};
  if(dsView)
  {
    ID3D12Resource *realDepth = dsView->nonsamp.resource;

    dsViewDesc = dsView->nonsamp.dsv;

    depthTexDesc = realDepth->GetDesc();
    depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
    depthTexDesc.Alignment = 0;

    HRESULT hr = S_OK;

    hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc,
                                            D3D12_RESOURCE_STATE_COPY_DEST, NULL,
                                            __uuidof(ID3D12Resource), (void **)&renderDepth);
    if(FAILED(hr))
    {
      RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str());
      return m_Overlay.resourceId;
    }

    renderDepth->SetName(L"Overlay renderDepth");

    ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

    const vector<D3D12_RESOURCE_STATES> &states =
        m_pDevice->GetSubresourceStates(GetResID(realDepth));

    vector<D3D12_RESOURCE_BARRIER> depthBarriers;
    depthBarriers.reserve(states.size());
    for(size_t i = 0; i < states.size(); i++)
    {
      D3D12_RESOURCE_BARRIER b;

      // skip unneeded barriers
      if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE)
        continue;

      b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
      b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
      b.Transition.pResource = realDepth;
      b.Transition.Subresource = (UINT)i;
      b.Transition.StateBefore = states[i];
      b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

      depthBarriers.push_back(b);
    }

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    list->CopyResource(renderDepth, realDepth);

    for(size_t i = 0; i < depthBarriers.size(); i++)
      std::swap(depthBarriers[i].Transition.StateBefore, depthBarriers[i].Transition.StateAfter);

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    D3D12_RESOURCE_BARRIER b = {};

    b.Transition.pResource = renderDepth;
    b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
    b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
    b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;

    // prepare tex resource for copying
    list->ResourceBarrier(1, &b);

    list->Close();
  }

  D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {};
  rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
  rtDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  rtDesc.Texture2D.MipSlice = 0;
  rtDesc.Texture2D.PlaneSlice = 0;

  if(overlayTexDesc.SampleDesc.Count > 1 || overlayTexDesc.SampleDesc.Quality > 0)
    rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;

  D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV);

  m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv);

  ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

  FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f};
  list->ClearRenderTargetView(rtv, black, 0, NULL);

  D3D12_CPU_DESCRIPTOR_HANDLE dsv = {};

  if(renderDepth)
  {
    dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV);
    m_pDevice->CreateDepthStencilView(
        renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv);
  }

  D3D12_DEPTH_STENCIL_DESC dsDesc;

  dsDesc.BackFace.StencilFailOp = dsDesc.BackFace.StencilPassOp =
      dsDesc.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.FrontFace.StencilFailOp = dsDesc.FrontFace.StencilPassOp =
      dsDesc.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.DepthEnable = TRUE;
  dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
  dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
  dsDesc.StencilEnable = FALSE;
  dsDesc.StencilReadMask = dsDesc.StencilWriteMask = 0xff;

  WrappedID3D12PipelineState *pipe = NULL;

  if(rs.pipe != ResourceId())
    pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping)
  {
    // just need the basic texture
  }
  else if(overlay == DebugOverlay::Drawcall)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[4] = {0.8f, 0.1f, 0.8f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f};
      list->ClearRenderTargetView(rtv, clearColour, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::BackfaceCull)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode;

      float redCol[4] = {1.0f, 0.0f, 0.0f, 1.0f};
      ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(redCol);

      float greenCol[4] = {0.0f, 1.0f, 0.0f, 1.0f};
      ID3DBlob *green = m_pDevice->GetShaderCache()->MakeFixedColShader(greenCol);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      psoDesc.PS.pShaderBytecode = red->GetBufferPointer();
      psoDesc.PS.BytecodeLength = red->GetBufferSize();

      list->Close();
      list = NULL;

      ID3D12PipelineState *redPSO = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&redPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      psoDesc.RasterizerState.CullMode = origCull;
      psoDesc.PS.pShaderBytecode = green->GetBufferPointer();
      psoDesc.PS.BytecodeLength = green->GetBufferSize();

      ID3D12PipelineState *greenPSO = NULL;
      hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                  (void **)&greenPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(redPSO);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(redPSO);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs.pipe = GetResID(greenPSO);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(red);
      SAFE_RELEASE(green);
      SAFE_RELEASE(redPSO);
      SAFE_RELEASE(greenPSO);
    }
  }
  else if(overlay == DebugOverlay::Wireframe)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      overlayConsts[3] = 0.0f;
      list->ClearRenderTargetView(rtv, overlayConsts, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = dsv;

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw)
  {
    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::ClearBeforeDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      list->Close();
      list = NULL;

      bool rtSingle = rs.rtSingle;
      std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rts = rs.rts;

      if(overlay == DebugOverlay::ClearBeforePass)
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      list = m_pDevice->GetNewList();

      for(size_t i = 0; i < rts.size(); i++)
      {
        D3D12Descriptor *desc = rtSingle ? GetWrapped(rts[0]) : GetWrapped(rts[i]);

        if(desc)
        {
          if(rtSingle)
            desc += i;

          Unwrap(list)->ClearRenderTargetView(UnwrapCPU(desc), black, 0, NULL);
        }
      }

      list->Close();
      list = NULL;

      for(size_t i = 0; i < events.size(); i++)
      {
        m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw);

        if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size())
          m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw);
      }
    }
  }
  else if(overlay == DebugOverlay::ViewportScissor)
  {
    if(pipe && pipe->IsGraphics() && !rs.views.empty())
    {
      list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

      D3D12_VIEWPORT viewport = rs.views[0];
      list->RSSetViewports(1, &viewport);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

      list->SetPipelineState(m_General.FixedColPipe);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      DebugPixelCBufferData pixelData = {0};

      // border colour (dark, 2px, opaque)
      pixelData.WireframeColour = Vec3f(0.1f, 0.1f, 0.1f);
      // inner colour (light, transparent)
      pixelData.Channels = Vec4f(0.2f, 0.2f, 0.9f, 0.7f);
      pixelData.OutputDisplayFormat = 0;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS viewCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(0, viewCB);
      list->SetGraphicsRootConstantBufferView(1, viewCB);
      list->SetGraphicsRootConstantBufferView(2, viewCB);

      Vec4f dummy;
      list->SetGraphicsRoot32BitConstants(3, 4, &dummy.x, 0);

      float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f};
      list->OMSetBlendFactor(factor);

      list->DrawInstanced(3, 1, 0, 0);

      viewport.TopLeftX = (float)rs.scissors[0].left;
      viewport.TopLeftY = (float)rs.scissors[0].top;
      viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left);
      viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top);
      list->RSSetViewports(1, &viewport);

      pixelData.OutputDisplayFormat = 1;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS scissorCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(1, scissorCB);

      list->DrawInstanced(3, 1, 0, 0);
    }
  }
  else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass)
  {
    if(pipe && pipe->IsGraphics())
    {
      SCOPED_TIMER("Triangle size");

      vector<uint32_t> events = passEvents;

      if(overlay == DebugOverlay::TriangleSizeDraw)
        events.clear();

      while(!events.empty())
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[0]);

        // remove any non-drawcalls, like the pass boundary.
        if(!(draw->flags & DrawFlags::Drawcall))
          events.erase(events.begin());
        else
          break;
      }

      events.push_back(eventId);

      D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = pipe->GetGraphicsDesc();
      pipeDesc.pRootSignature = m_General.ConstOnlyRootSig;
      pipeDesc.SampleMask = 0xFFFFFFFF;
      pipeDesc.SampleDesc.Count = 1;
      pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;

      pipeDesc.NumRenderTargets = 1;
      RDCEraseEl(pipeDesc.RTVFormats);
      pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;

      D3D12_INPUT_ELEMENT_DESC ia[2] = {};
      ia[0].SemanticName = "pos";
      ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].SemanticName = "sec";
      ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].InputSlot = 1;
      ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;

      pipeDesc.InputLayout.NumElements = 2;
      pipeDesc.InputLayout.pInputElementDescs = ia;

      pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize();
      pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer();
      RDCEraseEl(pipeDesc.HS);
      RDCEraseEl(pipeDesc.DS);
      pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize();
      pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer();
      pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize();
      pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer();

      pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;

      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL;
      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;

      // enough for all primitive topology types
      ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {};

      DebugVertexCBuffer vertexData = {};
      vertexData.LineStrip = 0;
      vertexData.ModelViewProj = Matrix4f::Identity();
      vertexData.SpriteSize = Vec2f();

      Vec4f viewport(rs.views[0].Width, rs.views[0].Height);

      if(rs.dsv.ptr)
      {
        D3D12_CPU_DESCRIPTOR_HANDLE realDSV = Unwrap(rs.dsv);

        list->OMSetRenderTargets(1, &rtv, TRUE, &realDSV);
      }

      list->RSSetViewports(1, &rs.views[0]);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      list->SetGraphicsRootConstantBufferView(
          0, GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData)));
      list->SetGraphicsRootConstantBufferView(
          1, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
      list->SetGraphicsRootConstantBufferView(
          2, GetDebugManager()->UploadConstants(&viewport, sizeof(viewport)));
      list->SetGraphicsRoot32BitConstants(3, 4, &viewport.x, 0);

      for(size_t i = 0; i < events.size(); i++)
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[i]);

        for(uint32_t inst = 0; draw && inst < RDCMAX(1U, draw->numInstances); inst++)
        {
          MeshFormat fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::GSOut);
          if(fmt.vertexResourceId == ResourceId())
            fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::VSOut);

          if(fmt.vertexResourceId != ResourceId())
          {
            D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology);

            if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST ||
               topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
            else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
            else
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

            list->IASetPrimitiveTopology(topo);

            if(pipes[pipeDesc.PrimitiveTopologyType] == NULL)
            {
              HRESULT hr = m_pDevice->CreateGraphicsPipelineState(
                  &pipeDesc, __uuidof(ID3D12PipelineState),
                  (void **)&pipes[pipeDesc.PrimitiveTopologyType]);
              RDCASSERTEQUAL(hr, S_OK);
            }

            ID3D12Resource *vb =
                m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId);

            D3D12_VERTEX_BUFFER_VIEW vbView = {};
            vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset;
            vbView.StrideInBytes = fmt.vertexByteStride;
            vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset);

            // second bind is just a dummy, so we don't have to make a shader
            // that doesn't accept the secondary stream
            list->IASetVertexBuffers(0, 1, &vbView);
            list->IASetVertexBuffers(1, 1, &vbView);

            list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]);

            if(fmt.indexByteStride && fmt.indexResourceId != ResourceId())
            {
              ID3D12Resource *ib =
                  m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId);

              D3D12_INDEX_BUFFER_VIEW view;
              view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset;
              view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset);
              view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
              list->IASetIndexBuffer(&view);

              list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0);
            }
            else
            {
              list->DrawInstanced(fmt.numIndices, 1, 0, 0);
            }
          }
        }
      }

      list->Close();
      list = NULL;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(size_t i = 0; i < ARRAY_COUNT(pipes); i++)
        SAFE_RELEASE(pipes[i]);
    }

    // restore back to normal
    m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
  else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw)
  {
    SCOPED_TIMER("Quad Overdraw");

    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::QuadOverdrawDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      if(overlay == DebugOverlay::QuadOverdrawPass)
      {
        list->Close();
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);
        list = m_pDevice->GetNewList();
      }

      uint32_t width = uint32_t(resourceDesc.Width >> 1);
      uint32_t height = resourceDesc.Height >> 1;

      width = RDCMAX(1U, width);
      height = RDCMAX(1U, height);

      D3D12_RESOURCE_DESC uavTexDesc = {};
      uavTexDesc.Alignment = 0;
      uavTexDesc.DepthOrArraySize = 4;
      uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
      uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
      uavTexDesc.Format = DXGI_FORMAT_R32_UINT;
      uavTexDesc.Height = height;
      uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
      uavTexDesc.MipLevels = 1;
      uavTexDesc.SampleDesc.Count = 1;
      uavTexDesc.SampleDesc.Quality = 0;
      uavTexDesc.Width = width;

      ID3D12Resource *overdrawTex = NULL;
      HRESULT hr = m_pDevice->CreateCommittedResource(
          &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
          NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str());
        list->Close();
        list = NULL;
        return m_Overlay.resourceId;
      }

      m_pDevice->CreateShaderResourceView(overdrawTex, NULL,
                                          GetDebugManager()->GetCPUHandle(OVERDRAW_SRV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetCPUHandle(OVERDRAW_UAV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV));

      UINT zeroes[4] = {0, 0, 0, 0};
      list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV),
                                         GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV),
                                         overdrawTex, zeroes, 0, NULL);
      list->Close();
      list = NULL;

#if ENABLED(SINGLE_FLUSH_VALIDATE)
      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();
#endif

      m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      D3D12_SHADER_BYTECODE quadWrite;
      quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize();
      quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer();

      // declare callback struct here
      D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, events,
                                   ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)));

      m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full);

      // resolve pass
      {
        list = m_pDevice->GetNewList();

        D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {};

        // make sure UAV work is done then prepare for reading in PS
        overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
        overdrawBarriers[0].UAV.pResource = overdrawTex;
        overdrawBarriers[1].Transition.pResource = overdrawTex;
        overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
        overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;

        // prepare tex resource for copying
        list->ResourceBarrier(2, overdrawBarriers);

        list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

        list->RSSetViewports(1, &rs.views[0]);

        D3D12_RECT scissor = {0, 0, 16384, 16384};
        list->RSSetScissorRects(1, &scissor);

        list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

        list->SetPipelineState(m_Overlay.QuadResolvePipe);

        list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig);

        GetDebugManager()->SetDescriptorHeaps(list, true, false);

        list->SetGraphicsRootConstantBufferView(
            0, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
        list->SetGraphicsRootDescriptorTable(1, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV));

        list->DrawInstanced(3, 1, 0, 0);

        list->Close();
        list = NULL;
      }

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it)
      {
        SAFE_RELEASE(it->second.pipe);
        SAFE_RELEASE(it->second.sig);
      }

      SAFE_RELEASE(overdrawTex);
    }

    if(overlay == DebugOverlay::QuadOverdrawPass)
      m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
// Worker thread body. workerIndex is an integer from 0 to NumContexts 
// describing the worker's thread index.
void D3D12Multithreading::WorkerThread(LPVOID workerIndex)
{
	int threadIndex = reinterpret_cast<int>(workerIndex);
	assert(threadIndex >= 0);
	assert(threadIndex < NumContexts);
#if !SINGLETHREADED

	while (threadIndex >= 0 && threadIndex < NumContexts)
	{
		// Wait for main thread to tell us to draw.

		WaitForSingleObject(m_workerBeginRenderFrame[threadIndex], INFINITE);

#endif
		ID3D12GraphicsCommandList* pShadowCommandList = m_pCurrentFrameResource->m_shadowCommandLists[threadIndex].Get();
		ID3D12GraphicsCommandList* pSceneCommandList = m_pCurrentFrameResource->m_sceneCommandLists[threadIndex].Get();

		//
		// Shadow pass
		//

		// Populate the command list.
		SetCommonPipelineState(pShadowCommandList);
		m_pCurrentFrameResource->Bind(pShadowCommandList, FALSE, nullptr, nullptr);	// No need to pass RTV or DSV descriptor heap.

		// Set null SRVs for the diffuse/normal textures.
		pShadowCommandList->SetGraphicsRootDescriptorTable(0, m_cbvSrvHeap->GetGPUDescriptorHandleForHeapStart());

		// Distribute objects over threads by drawing only 1/NumContexts 
		// objects per worker (i.e. every object such that objectnum % 
		// NumContexts == threadIndex).
		PIXBeginEvent(pShadowCommandList, 0, L"Worker drawing shadow pass...");

		for (int j = threadIndex; j < _countof(SampleAssets::Draws); j += NumContexts)
		{
			SampleAssets::DrawParameters drawArgs = SampleAssets::Draws[j];

			pShadowCommandList->DrawIndexedInstanced(drawArgs.IndexCount, 1, drawArgs.IndexStart, drawArgs.VertexBase, 0);
		}

		PIXEndEvent(pShadowCommandList);

		ThrowIfFailed(pShadowCommandList->Close());

#if !SINGLETHREADED
		// Submit shadow pass.
		SetEvent(m_workerFinishShadowPass[threadIndex]);
#endif

		//
		// Scene pass
		// 

		// Populate the command list.  These can only be sent after the shadow 
		// passes for this frame have been submitted.
		SetCommonPipelineState(pSceneCommandList);
		CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), m_frameIndex, m_rtvDescriptorSize);
		CD3DX12_CPU_DESCRIPTOR_HANDLE dsvHandle(m_dsvHeap->GetCPUDescriptorHandleForHeapStart());
		m_pCurrentFrameResource->Bind(pSceneCommandList, TRUE, &rtvHandle, &dsvHandle);

		PIXBeginEvent(pSceneCommandList, 0, L"Worker drawing scene pass...");

		D3D12_GPU_DESCRIPTOR_HANDLE cbvSrvHeapStart = m_cbvSrvHeap->GetGPUDescriptorHandleForHeapStart();
		const UINT cbvSrvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
		const UINT nullSrvCount = 2;
		for (int j = threadIndex; j < _countof(SampleAssets::Draws); j += NumContexts)
		{
			SampleAssets::DrawParameters drawArgs = SampleAssets::Draws[j];

			// Set the diffuse and normal textures for the current object.
			CD3DX12_GPU_DESCRIPTOR_HANDLE cbvSrvHandle(cbvSrvHeapStart, nullSrvCount + drawArgs.DiffuseTextureIndex, cbvSrvDescriptorSize);
			pSceneCommandList->SetGraphicsRootDescriptorTable(0, cbvSrvHandle);

			pSceneCommandList->DrawIndexedInstanced(drawArgs.IndexCount, 1, drawArgs.IndexStart, drawArgs.VertexBase, 0);
		}

		PIXEndEvent(pSceneCommandList);
		ThrowIfFailed(pSceneCommandList->Close());

#if !SINGLETHREADED
		// Tell main thread that we are done.
		SetEvent(m_workerFinishedRenderFrame[threadIndex]); 
	}
#endif
}
Exemple #22
0
GPA_Status GPA_IMP_GetHWInfo(void* pContext, GPA_HWInfo* pHwInfo)
{
    GPA_Status result = GPA_STATUS_OK;

    if (nullptr == pContext)
    {
        GPA_LogError("Parameter 'pContext' is NULL.");
        result = GPA_STATUS_ERROR_NULL_POINTER;
    }
    else if (nullptr == pHwInfo)
    {
        GPA_LogError("Parameter 'pHwInfo' is NULL.");
        result = GPA_STATUS_ERROR_NULL_POINTER;
    }
    else
    {
        IUnknown* pUnknown = static_cast<IUnknown*>(pContext);

        ID3D12GraphicsCommandList* pCommandList = nullptr;
        HRESULT hr = pUnknown->QueryInterface(__uuidof(ID3D12GraphicsCommandList), reinterpret_cast<void**>(&pCommandList));

        if (S_OK != hr)
        {
            GPA_LogError("Failed to get command list from context");
            result = GPA_STATUS_ERROR_FAILED;
        }
        else
        {
            ID3D12Device* pDevice;
            hr = pCommandList->GetDevice(__uuidof(ID3D12Device), reinterpret_cast<void**>(&pDevice));

            if (S_OK != hr)
            {
                GPA_LogError("Failed to get device from command list");
                result = GPA_STATUS_ERROR_FAILED;
            }
            else
            {
                DXGI_ADAPTER_DESC adapterDesc;
                result = DX12GetAdapterDesc(pDevice, adapterDesc);

                if (GPA_STATUS_OK != result)
                {
                    GPA_LogError("Could not get adapter description, hardware cannot be supported.");
                    result = GPA_STATUS_ERROR_FAILED;
                }
                else
                {
                    //get Time stamp frequency
                    gpa_uint64 freq = 0ull;

                    if (nullptr == g_pCurrentContext)
                    {
                        GPA_LogError("g_pCurrentContext is NULL.");
                        result = GPA_STATUS_ERROR_NULL_POINTER;
                        return result;
                    }

                    GetCurrentContext()->SetCommandList(pCommandList);
                    result = GetCurrentContext()->GetTimestampFrequency(freq);

                    if (GPA_STATUS_OK != result)
                    {
                        GPA_LogError("GetTimestampFrequency() failed.");
                    }
                    else
                    {

                        // For now it is assumed that DX12 MGPU support is exposed to the app
                        // and the app always opens the device on the correct GPU.
                        // In case where MGPU support hides the GPU from the app, then
                        // we will need to use DX12 MGPU extension (and possibly ADL util)
                        // to get the correct HW info
                        pHwInfo->SetVendorID(adapterDesc.VendorId);

                        // TODO: To enable running on WARP driver, fake a Bonaire HW ID if the device is the WARP device
                        if (0x8c == adapterDesc.DeviceId && AMD_VENDOR_ID == adapterDesc.VendorId)
                        {
                            pHwInfo->SetDeviceID(0x665C);
                            pHwInfo->SetRevisionID(0);
                        }
                        else
                        {
                            pHwInfo->SetVendorID(adapterDesc.VendorId);
                            pHwInfo->SetDeviceID(adapterDesc.DeviceId);
                            pHwInfo->SetRevisionID(adapterDesc.Revision);
                        }

                        std::wstring adapterNameW(adapterDesc.Description);
                        std::string adapterName(adapterNameW.begin(), adapterNameW.end());
                        pHwInfo->SetDeviceName(adapterName.c_str());
                        GDT_HW_GENERATION hwGen = GDT_HW_GENERATION_NONE;

                        if (NVIDIA_VENDOR_ID == adapterDesc.VendorId)
                        {
                            hwGen = GDT_HW_GENERATION_NVIDIA;
                        }
                        else if (INTEL_VENDOR_ID == adapterDesc.VendorId)
                        {
                            hwGen = GDT_HW_GENERATION_INTEL;
                        }

                        else if (AMD_VENDOR_ID == adapterDesc.VendorId)
                        {
                            AMDTDeviceInfoUtils::Instance()->GetHardwareGeneration(adapterDesc.DeviceId, hwGen);
                        }

                        pHwInfo->SetHWGeneration(hwGen);
                        pHwInfo->SetTimeStampFrequency(freq);
                    }

                }

                pDevice->Release();
            }

            pCommandList->Release();
        }
    }

    return result;
}