Esempio n. 1
0
void WrappedID3D12Device::CopyDescriptors(
    UINT NumDestDescriptorRanges, const D3D12_CPU_DESCRIPTOR_HANDLE *pDestDescriptorRangeStarts,
    const UINT *pDestDescriptorRangeSizes, UINT NumSrcDescriptorRanges,
    const D3D12_CPU_DESCRIPTOR_HANDLE *pSrcDescriptorRangeStarts,
    const UINT *pSrcDescriptorRangeSizes, D3D12_DESCRIPTOR_HEAP_TYPE DescriptorHeapsType)
{
  D3D12_CPU_DESCRIPTOR_HANDLE *dstStarts = new D3D12_CPU_DESCRIPTOR_HANDLE[NumDestDescriptorRanges];
  D3D12_CPU_DESCRIPTOR_HANDLE *srcStarts = new D3D12_CPU_DESCRIPTOR_HANDLE[NumSrcDescriptorRanges];

  for(UINT i = 0; i < NumDestDescriptorRanges; i++)
    dstStarts[i] = Unwrap(pDestDescriptorRangeStarts[i]);

  for(UINT i = 0; i < NumSrcDescriptorRanges; i++)
    srcStarts[i] = Unwrap(pSrcDescriptorRangeStarts[i]);

  m_pDevice->CopyDescriptors(NumDestDescriptorRanges, dstStarts, pDestDescriptorRangeSizes,
                             NumSrcDescriptorRanges, srcStarts, pSrcDescriptorRangeSizes,
                             DescriptorHeapsType);

  UINT srcRange = 0, dstRange = 0;
  UINT srcIdx = 0, dstIdx = 0;

  D3D12Descriptor *src = GetWrapped(pSrcDescriptorRangeStarts[0]);
  D3D12Descriptor *dst = GetWrapped(pDestDescriptorRangeStarts[0]);

  for(; srcRange < NumSrcDescriptorRanges && dstRange < NumDestDescriptorRanges;)
  {
    dst[dstIdx].CopyFrom(src[srcIdx]);

    srcIdx++;
    dstIdx++;

    // move source onto the next range
    if(srcIdx >= pSrcDescriptorRangeSizes[srcRange])
    {
      srcRange++;
      srcIdx = 0;

      // check srcRange is valid - we might be about to exit the loop from reading off the end
      if(srcRange < NumSrcDescriptorRanges)
        src = GetWrapped(pSrcDescriptorRangeStarts[srcRange]);
    }

    if(dstIdx >= pDestDescriptorRangeSizes[dstRange])
    {
      dstRange++;
      dstIdx = 0;

      if(dstRange < NumDestDescriptorRanges)
        dst = GetWrapped(pDestDescriptorRangeStarts[dstRange]);
    }
  }

  SAFE_DELETE_ARRAY(dstStarts);
  SAFE_DELETE_ARRAY(srcStarts);
}
Esempio n. 2
0
void WrappedID3D12Device::CopyDescriptorsSimple(UINT NumDescriptors,
                                                D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptorRangeStart,
                                                D3D12_CPU_DESCRIPTOR_HANDLE SrcDescriptorRangeStart,
                                                D3D12_DESCRIPTOR_HEAP_TYPE DescriptorHeapsType)
{
  m_pDevice->CopyDescriptorsSimple(NumDescriptors, Unwrap(DestDescriptorRangeStart),
                                   Unwrap(SrcDescriptorRangeStart), DescriptorHeapsType);

  D3D12Descriptor *src = GetWrapped(SrcDescriptorRangeStart);
  D3D12Descriptor *dst = GetWrapped(DestDescriptorRangeStart);

  for(UINT i = 0; i < NumDescriptors; i++)
    dst[i].CopyFrom(src[i]);
}
Esempio n. 3
0
PortableHandle ToPortableHandle(D3D12_GPU_DESCRIPTOR_HANDLE handle)
{
  if(handle.ptr == 0)
    return PortableHandle(0);

  return ToPortableHandle(GetWrapped(handle));
}
Esempio n. 4
0
VkResult WrappedVulkan::vkCreateDisplayPlaneSurfaceKHR(VkInstance instance,
                                                       const VkDisplaySurfaceCreateInfoKHR *pCreateInfo,
                                                       const VkAllocationCallbacks *pAllocator,
                                                       VkSurfaceKHR *pSurface)
{
  // should not come in here at all on replay
  RDCASSERT(m_State >= WRITING);

  VkResult ret = ObjDisp(instance)->CreateDisplayPlaneSurfaceKHR(Unwrap(instance), pCreateInfo,
                                                                 pAllocator, pSurface);

  if(ret == VK_SUCCESS)
  {
    // we must wrap surfaces to be consistent with the rest of the code and surface handling,
    // but there's nothing actually to do here - no meaningful data we care about here.
    GetResourceManager()->WrapResource(Unwrap(instance), *pSurface);

    WrappedVkSurfaceKHR *wrapped = GetWrapped(*pSurface);

    // we don't have an actual OS handle to identify this window. Instead construct something
    // that should be unique and hopefully not clashing/overlapping with other window handles
    // in use.
    uintptr_t fakeWindowHandle;
    fakeWindowHandle = (uintptr_t)NON_DISP_TO_UINT64(pCreateInfo->displayMode);
    fakeWindowHandle += pCreateInfo->planeIndex;
    fakeWindowHandle += pCreateInfo->planeStackIndex << 4;

    // since there's no point in allocating a full resource record and storing the window
    // handle under there somewhere, we just cast. We won't use the resource record for anything

    wrapped->record = (VkResourceRecord *)fakeWindowHandle;
  }

  return ret;
}
Esempio n. 5
0
void WrappedID3D12Device::CreateDepthStencilView(ID3D12Resource *pResource,
                                                 const D3D12_DEPTH_STENCIL_VIEW_DESC *pDesc,
                                                 D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor)
{
  GetWrapped(DestDescriptor)->Init(pResource, pDesc);
  return m_pDevice->CreateDepthStencilView(Unwrap(pResource), pDesc, Unwrap(DestDescriptor));
}
Esempio n. 6
0
void WrappedID3D12Device::CreateRenderTargetView(ID3D12Resource *pResource,
                                                 const D3D12_RENDER_TARGET_VIEW_DESC *pDesc,
                                                 D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor)
{
  GetWrapped(DestDescriptor)->Init(pResource, pDesc);
  return m_pDevice->CreateRenderTargetView(Unwrap(pResource), pDesc, Unwrap(DestDescriptor));
}
Esempio n. 7
0
void WrappedID3D12Device::CreateShaderResourceView(ID3D12Resource *pResource,
                                                   const D3D12_SHADER_RESOURCE_VIEW_DESC *pDesc,
                                                   D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor)
{
  GetWrapped(DestDescriptor)->Init(pResource, pDesc);
  return m_pDevice->CreateShaderResourceView(Unwrap(pResource), pDesc, Unwrap(DestDescriptor));
}
Esempio n. 8
0
VkResult WrappedVulkan::vkCreateWin32SurfaceKHR(VkInstance instance,
                                                const VkWin32SurfaceCreateInfoKHR *pCreateInfo,
                                                const VkAllocationCallbacks *pAllocator,
                                                VkSurfaceKHR *pSurface)
{
  // should not come in here at all on replay
  RDCASSERT(m_State >= WRITING);

  VkResult ret =
      ObjDisp(instance)->CreateWin32SurfaceKHR(Unwrap(instance), pCreateInfo, pAllocator, pSurface);

  if(ret == VK_SUCCESS)
  {
    GetResourceManager()->WrapResource(Unwrap(instance), *pSurface);

    WrappedVkSurfaceKHR *wrapped = GetWrapped(*pSurface);

    // since there's no point in allocating a full resource record and storing the window
    // handle under there somewhere, we just cast. We won't use the resource record for anything
    wrapped->record = (VkResourceRecord *)pCreateInfo->hwnd;

    Keyboard::AddInputWindow((void *)pCreateInfo->hwnd);
  }

  return ret;
}
Esempio n. 9
0
D3D12_GPU_DESCRIPTOR_HANDLE Unwrap(D3D12_GPU_DESCRIPTOR_HANDLE handle)
{
  if(handle.ptr == 0)
    return handle;

  return UnwrapGPU(GetWrapped(handle));
}
Esempio n. 10
0
void WrappedVulkan::vkFreeMemory(
    VkDevice                                    device,
		VkDeviceMemory                              memory,
    const VkAllocationCallbacks*                pAllocator)
{
	// we just need to clean up after ourselves on replay
	WrappedVkNonDispRes *wrapped = (WrappedVkNonDispRes *)GetWrapped(memory);

	VkDeviceMemory unwrappedMem = wrapped->real.As<VkDeviceMemory>();

	if(m_State >= WRITING)
	{
		// there is an implicit unmap on free, so make sure to tidy up
		if(wrapped->record->memMapState && wrapped->record->memMapState->refData)
			Serialiser::FreeAlignedBuffer(wrapped->record->memMapState->refData);

		{
			SCOPED_LOCK(m_CoherentMapsLock);

			auto it = std::find(m_CoherentMaps.begin(), m_CoherentMaps.end(), wrapped->record);
			if(it != m_CoherentMaps.end())
				m_CoherentMaps.erase(it);
		}
	}

	GetResourceManager()->ReleaseWrappedResource(memory);

	ObjDisp(device)->FreeMemory(Unwrap(device), unwrappedMem, pAllocator);
}
Esempio n. 11
0
void WrappedID3D12Device::CreateUnorderedAccessView(ID3D12Resource *pResource,
                                                    ID3D12Resource *pCounterResource,
                                                    const D3D12_UNORDERED_ACCESS_VIEW_DESC *pDesc,
                                                    D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor)
{
  GetWrapped(DestDescriptor)->Init(pResource, pCounterResource, pDesc);
  return m_pDevice->CreateUnorderedAccessView(Unwrap(pResource), Unwrap(pCounterResource), pDesc,
                                              Unwrap(DestDescriptor));
}
Esempio n. 12
0
// needs to be separate since it's dispatchable
void WrappedVulkan::vkFreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers)
{
	for(uint32_t c=0; c < commandBufferCount; c++)
	{
		WrappedVkDispRes *wrapped = (WrappedVkDispRes *)GetWrapped(pCommandBuffers[c]);

		VkCommandBuffer unwrapped = wrapped->real.As<VkCommandBuffer>();

		GetResourceManager()->ReleaseWrappedResource(pCommandBuffers[c]);

		ObjDisp(device)->FreeCommandBuffers(Unwrap(device), Unwrap(commandPool), 1, &unwrapped);
	}
}
Esempio n. 13
0
void WrappedVulkan::vkDestroySurfaceKHR(VkInstance instance, VkSurfaceKHR surface,
                                        const VkAllocationCallbacks *pAllocator)
{
  WrappedVkSurfaceKHR *wrapper = GetWrapped(surface);

  // record pointer has window handle packed in
  if(wrapper->record)
    Keyboard::RemoveInputWindow((void *)wrapper->record);

  // now set record pointer back to NULL so no-one tries to delete it
  wrapper->record = NULL;

  VkSurfaceKHR unwrappedObj = wrapper->real.As<VkSurfaceKHR>();

  GetResourceManager()->ReleaseWrappedResource(surface, true);
  ObjDisp(instance)->DestroySurfaceKHR(Unwrap(instance), unwrappedObj, pAllocator);
}
Esempio n. 14
0
void WrappedID3D12Device::CreateSampler(const D3D12_SAMPLER_DESC *pDesc,
                                        D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor)
{
  GetWrapped(DestDescriptor)->Init(pDesc);
  return m_pDevice->CreateSampler(pDesc, Unwrap(DestDescriptor));
}
Esempio n. 15
0
void WrappedID3D12Device::CreateConstantBufferView(const D3D12_CONSTANT_BUFFER_VIEW_DESC *pDesc,
                                                   D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor)
{
  GetWrapped(DestDescriptor)->Init(pDesc);
  return m_pDevice->CreateConstantBufferView(pDesc, Unwrap(DestDescriptor));
}
Esempio n. 16
0
VkResult WrappedVulkan::vkEnumeratePhysicalDevices(
		VkInstance                                  instance,
		uint32_t*                                   pPhysicalDeviceCount,
		VkPhysicalDevice*                           pPhysicalDevices)
{
	uint32_t count;

	VkResult vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, NULL);

	if(vkr != VK_SUCCESS)
		return vkr;

	VkPhysicalDevice *devices = new VkPhysicalDevice[count];

	vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, devices);
	RDCASSERTEQUAL(vkr, VK_SUCCESS);

	m_PhysicalDevices.resize(count);
	
	for(uint32_t i=0; i < count; i++)
	{
		// it's perfectly valid for enumerate type functions to return the same handle
		// each time. If that happens, we will already have a wrapper created so just
		// return the wrapped object to the user and do nothing else
		if(m_PhysicalDevices[i] != VK_NULL_HANDLE)
		{
			GetWrapped(m_PhysicalDevices[i])->RewrapObject(devices[i]);
			devices[i] = m_PhysicalDevices[i];
		}
		else
		{
			GetResourceManager()->WrapResource(instance, devices[i]);
			
			if(m_State >= WRITING)
			{
				// add the record first since it's used in the serialise function below to fetch
				// the memory indices
				VkResourceRecord *record = GetResourceManager()->AddResourceRecord(devices[i]);
				RDCASSERT(record);
				
				record->memProps = new VkPhysicalDeviceMemoryProperties();

				ObjDisp(devices[i])->GetPhysicalDeviceMemoryProperties(Unwrap(devices[i]), record->memProps);

				m_PhysicalDevices[i] = devices[i];

				// we remap memory indices to discourage coherent maps as much as possible
				RemapMemoryIndices(record->memProps, &record->memIdxMap);
				
				{
					CACHE_THREAD_SERIALISER();

					SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS);
					Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]);

					record->AddChunk(scope.Get());
				}

				VkResourceRecord *instrecord = GetRecord(instance);

				instrecord->AddParent(record);

				// treat physical devices as pool members of the instance (ie. freed when the instance dies)
				{
					instrecord->LockChunks();
					instrecord->pooledChildren.push_back(record);
					instrecord->UnlockChunks();
				}
			}
		}
	}

	if(pPhysicalDeviceCount) *pPhysicalDeviceCount = count;
	if(pPhysicalDevices) memcpy(pPhysicalDevices, devices, count*sizeof(VkPhysicalDevice));

	SAFE_DELETE_ARRAY(devices);

	return VK_SUCCESS;
}
Esempio n. 17
0
bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices(
		Serialiser*                                 localSerialiser,
		VkInstance                                  instance,
		uint32_t*                                   pPhysicalDeviceCount,
		VkPhysicalDevice*                           pPhysicalDevices)
{
	SERIALISE_ELEMENT(ResourceId, inst, GetResID(instance));
	SERIALISE_ELEMENT(uint32_t, physIndex, *pPhysicalDeviceCount);
	SERIALISE_ELEMENT(ResourceId, physId, GetResID(*pPhysicalDevices));

	uint32_t memIdxMap[32] = {0};
	if(m_State >= WRITING)
		memcpy(memIdxMap, GetRecord(*pPhysicalDevices)->memIdxMap, sizeof(memIdxMap));

	localSerialiser->SerialisePODArray<32>("memIdxMap", memIdxMap);

	// not used at the moment but useful for reference and might be used
	// in the future
	VkPhysicalDeviceProperties physProps;
	VkPhysicalDeviceMemoryProperties memProps;
	VkPhysicalDeviceFeatures physFeatures;
	
	if(m_State >= WRITING)
	{
		ObjDisp(instance)->GetPhysicalDeviceProperties(Unwrap(*pPhysicalDevices), &physProps);
		ObjDisp(instance)->GetPhysicalDeviceMemoryProperties(Unwrap(*pPhysicalDevices), &memProps);
		ObjDisp(instance)->GetPhysicalDeviceFeatures(Unwrap(*pPhysicalDevices), &physFeatures);
	}

	localSerialiser->Serialise("physProps", physProps);
	localSerialiser->Serialise("memProps", memProps);
	localSerialiser->Serialise("physFeatures", physFeatures);

	VkPhysicalDevice pd = VK_NULL_HANDLE;

	if(m_State >= WRITING)
	{
		pd = *pPhysicalDevices;
	}
	else
	{
		uint32_t count;
		VkPhysicalDevice *devices;

		instance = GetResourceManager()->GetLiveHandle<VkInstance>(inst);
		VkResult vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, NULL);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		RDCASSERT(count > physIndex);
		devices = new VkPhysicalDevice[count];

		if(physIndex >= m_PhysicalDevices.size())
		{
			m_PhysicalDevices.resize(physIndex+1);
			m_MemIdxMaps.resize(physIndex+1);
		}

		vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, devices);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		// PORTABILITY match up physical devices to those available on replay

		pd = devices[physIndex];

		for(size_t i=0; i < m_PhysicalDevices.size(); i++)
		{
			// physical devices might be re-created inside EnumeratePhysicalDevices every time, so
			// we need to re-wrap any previously enumerated physical devices
			if(m_PhysicalDevices[i] != VK_NULL_HANDLE)
			{
				RDCASSERTNOTEQUAL(i, physIndex);
				GetWrapped(m_PhysicalDevices[i])->RewrapObject(devices[i]);
			}
		}

		SAFE_DELETE_ARRAY(devices);

		GetResourceManager()->WrapResource(instance, pd);
		GetResourceManager()->AddLiveResource(physId, pd);

		m_PhysicalDevices[physIndex] = pd;

		uint32_t *storedMap = new uint32_t[32];
		memcpy(storedMap, memIdxMap, sizeof(memIdxMap));
		m_MemIdxMaps[physIndex] = storedMap;

		RDCLOG("Captured log describes physical device %u:", physIndex);
		RDCLOG("   - %s (ver %x) - %04x:%04x", physProps.deviceName, physProps.driverVersion, physProps.vendorID, physProps.deviceID);

		ObjDisp(pd)->GetPhysicalDeviceProperties(Unwrap(pd), &physProps);
		ObjDisp(pd)->GetPhysicalDeviceMemoryProperties(Unwrap(pd), &memProps);
		ObjDisp(pd)->GetPhysicalDeviceFeatures(Unwrap(pd), &physFeatures);
		
		RDCLOG("Replaying on physical device %u:", physIndex);
		RDCLOG("   - %s (ver %x) - %04x:%04x", physProps.deviceName, physProps.driverVersion, physProps.vendorID, physProps.deviceID);

	}

	return true;
}
Esempio n. 18
0
void WrappedVulkan::Shutdown()
{
	// flush out any pending commands
	SubmitCmds();
	FlushQ();
	
	// since we didn't create proper registered resources for our command buffers,
	// they won't be taken down properly with the pool. So we release them (just our
	// data) here.
	for(size_t i=0; i < m_InternalCmds.freecmds.size(); i++)
		GetResourceManager()->ReleaseWrappedResource(m_InternalCmds.freecmds[i]);

	// destroy the pool
	ObjDisp(m_Device)->DestroyCommandPool(Unwrap(m_Device), Unwrap(m_InternalCmds.cmdpool), NULL);
	GetResourceManager()->ReleaseWrappedResource(m_InternalCmds.cmdpool);
	
	// we do more in Shutdown than the equivalent vkDestroyInstance since on replay there's
	// no explicit vkDestroyDevice, we destroy the device here then the instance

	// destroy any replay objects that aren't specifically to do with the frame capture
	for(size_t i=0; i < m_CleanupMems.size(); i++)
	{
		ObjDisp(m_Device)->FreeMemory(Unwrap(m_Device), Unwrap(m_CleanupMems[i]), NULL);
		GetResourceManager()->ReleaseWrappedResource(m_CleanupMems[i]);
	}
	m_CleanupMems.clear();

	// destroy debug manager and any objects it created
	SAFE_DELETE(m_DebugManager);

	if(ObjDisp(m_Instance)->DestroyDebugReportCallbackEXT && m_DbgMsgCallback != VK_NULL_HANDLE)
		ObjDisp(m_Instance)->DestroyDebugReportCallbackEXT(Unwrap(m_Instance), m_DbgMsgCallback, NULL);

	// need to store the unwrapped device and instance to destroy the
	// API object after resource manager shutdown
	VkInstance inst = Unwrap(m_Instance);
	VkDevice dev = Unwrap(m_Device);
	
	const VkLayerDispatchTable *vt = ObjDisp(m_Device);
	const VkLayerInstanceDispatchTable *vit = ObjDisp(m_Instance);

	// this destroys the wrapped objects for the devices and instances
	m_ResourceManager->Shutdown();

	delete GetWrapped(m_Device);
	delete GetWrapped(m_Instance);
	
	m_PhysicalDevice = VK_NULL_HANDLE;
	m_Device = VK_NULL_HANDLE;
	m_Instance = VK_NULL_HANDLE;

	m_PhysicalDevices.clear();

	for(size_t i=0; i < m_QueueFamilies.size(); i++)
		delete[] m_QueueFamilies[i];

	m_QueueFamilies.clear();

	// finally destroy device then instance
	vt->DestroyDevice(dev, NULL);
	vit->DestroyInstance(inst, NULL);
}
Esempio n. 19
0
ResourceId D3D12Replay::RenderOverlay(ResourceId texid, CompType typeHint, DebugOverlay overlay,
                                      uint32_t eventId, const vector<uint32_t> &passEvents)
{
  ID3D12Resource *resource = WrappedID3D12Resource::GetList()[texid];

  if(resource == NULL)
    return ResourceId();

  D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc();

  std::vector<D3D12_RESOURCE_BARRIER> barriers;
  int resType = 0;
  GetDebugManager()->PrepareTextureSampling(resource, typeHint, resType, barriers);

  D3D12_RESOURCE_DESC overlayTexDesc;
  overlayTexDesc.Alignment = 0;
  overlayTexDesc.DepthOrArraySize = 1;
  overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
  overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
  overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  overlayTexDesc.Height = resourceDesc.Height;
  overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
  overlayTexDesc.MipLevels = 1;
  overlayTexDesc.SampleDesc = resourceDesc.SampleDesc;
  overlayTexDesc.Width = resourceDesc.Width;

  D3D12_HEAP_PROPERTIES heapProps;
  heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
  heapProps.CreationNodeMask = 1;
  heapProps.VisibleNodeMask = 1;

  D3D12_RESOURCE_DESC currentOverlayDesc;
  RDCEraseEl(currentOverlayDesc);
  if(m_Overlay.Texture)
    currentOverlayDesc = m_Overlay.Texture->GetDesc();

  WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture;

  // need to recreate backing custom render tex
  if(overlayTexDesc.Width != currentOverlayDesc.Width ||
     overlayTexDesc.Height != currentOverlayDesc.Height ||
     overlayTexDesc.Format != currentOverlayDesc.Format ||
     overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count ||
     overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality)
  {
    SAFE_RELEASE(m_Overlay.Texture);
    m_Overlay.resourceId = ResourceId();

    ID3D12Resource *customRenderTex = NULL;
    HRESULT hr = m_pDevice->CreateCommittedResource(
        &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL,
        __uuidof(ID3D12Resource), (void **)&customRenderTex);
    if(FAILED(hr))
    {
      RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str());
      return ResourceId();
    }
    wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex;

    customRenderTex->SetName(L"customRenderTex");

    m_Overlay.Texture = wrappedCustomRenderTex;
    m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID();
  }

  D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState;

  ID3D12Resource *renderDepth = NULL;

  D3D12Descriptor *dsView = GetWrapped(rs.dsv);

  D3D12_RESOURCE_DESC depthTexDesc = {};
  D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {};
  if(dsView)
  {
    ID3D12Resource *realDepth = dsView->nonsamp.resource;

    dsViewDesc = dsView->nonsamp.dsv;

    depthTexDesc = realDepth->GetDesc();
    depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
    depthTexDesc.Alignment = 0;

    HRESULT hr = S_OK;

    hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc,
                                            D3D12_RESOURCE_STATE_COPY_DEST, NULL,
                                            __uuidof(ID3D12Resource), (void **)&renderDepth);
    if(FAILED(hr))
    {
      RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str());
      return m_Overlay.resourceId;
    }

    renderDepth->SetName(L"Overlay renderDepth");

    ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

    const vector<D3D12_RESOURCE_STATES> &states =
        m_pDevice->GetSubresourceStates(GetResID(realDepth));

    vector<D3D12_RESOURCE_BARRIER> depthBarriers;
    depthBarriers.reserve(states.size());
    for(size_t i = 0; i < states.size(); i++)
    {
      D3D12_RESOURCE_BARRIER b;

      // skip unneeded barriers
      if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE)
        continue;

      b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
      b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
      b.Transition.pResource = realDepth;
      b.Transition.Subresource = (UINT)i;
      b.Transition.StateBefore = states[i];
      b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

      depthBarriers.push_back(b);
    }

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    list->CopyResource(renderDepth, realDepth);

    for(size_t i = 0; i < depthBarriers.size(); i++)
      std::swap(depthBarriers[i].Transition.StateBefore, depthBarriers[i].Transition.StateAfter);

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    D3D12_RESOURCE_BARRIER b = {};

    b.Transition.pResource = renderDepth;
    b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
    b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
    b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;

    // prepare tex resource for copying
    list->ResourceBarrier(1, &b);

    list->Close();
  }

  D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {};
  rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
  rtDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  rtDesc.Texture2D.MipSlice = 0;
  rtDesc.Texture2D.PlaneSlice = 0;

  if(overlayTexDesc.SampleDesc.Count > 1 || overlayTexDesc.SampleDesc.Quality > 0)
    rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;

  D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV);

  m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv);

  ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

  FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f};
  list->ClearRenderTargetView(rtv, black, 0, NULL);

  D3D12_CPU_DESCRIPTOR_HANDLE dsv = {};

  if(renderDepth)
  {
    dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV);
    m_pDevice->CreateDepthStencilView(
        renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv);
  }

  D3D12_DEPTH_STENCIL_DESC dsDesc;

  dsDesc.BackFace.StencilFailOp = dsDesc.BackFace.StencilPassOp =
      dsDesc.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.FrontFace.StencilFailOp = dsDesc.FrontFace.StencilPassOp =
      dsDesc.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.DepthEnable = TRUE;
  dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
  dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
  dsDesc.StencilEnable = FALSE;
  dsDesc.StencilReadMask = dsDesc.StencilWriteMask = 0xff;

  WrappedID3D12PipelineState *pipe = NULL;

  if(rs.pipe != ResourceId())
    pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping)
  {
    // just need the basic texture
  }
  else if(overlay == DebugOverlay::Drawcall)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[4] = {0.8f, 0.1f, 0.8f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f};
      list->ClearRenderTargetView(rtv, clearColour, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::BackfaceCull)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode;

      float redCol[4] = {1.0f, 0.0f, 0.0f, 1.0f};
      ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(redCol);

      float greenCol[4] = {0.0f, 1.0f, 0.0f, 1.0f};
      ID3DBlob *green = m_pDevice->GetShaderCache()->MakeFixedColShader(greenCol);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      psoDesc.PS.pShaderBytecode = red->GetBufferPointer();
      psoDesc.PS.BytecodeLength = red->GetBufferSize();

      list->Close();
      list = NULL;

      ID3D12PipelineState *redPSO = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&redPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      psoDesc.RasterizerState.CullMode = origCull;
      psoDesc.PS.pShaderBytecode = green->GetBufferPointer();
      psoDesc.PS.BytecodeLength = green->GetBufferSize();

      ID3D12PipelineState *greenPSO = NULL;
      hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                  (void **)&greenPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(redPSO);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(redPSO);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs.pipe = GetResID(greenPSO);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(red);
      SAFE_RELEASE(green);
      SAFE_RELEASE(redPSO);
      SAFE_RELEASE(greenPSO);
    }
  }
  else if(overlay == DebugOverlay::Wireframe)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      overlayConsts[3] = 0.0f;
      list->ClearRenderTargetView(rtv, overlayConsts, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = dsv;

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw)
  {
    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::ClearBeforeDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      list->Close();
      list = NULL;

      bool rtSingle = rs.rtSingle;
      std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rts = rs.rts;

      if(overlay == DebugOverlay::ClearBeforePass)
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      list = m_pDevice->GetNewList();

      for(size_t i = 0; i < rts.size(); i++)
      {
        D3D12Descriptor *desc = rtSingle ? GetWrapped(rts[0]) : GetWrapped(rts[i]);

        if(desc)
        {
          if(rtSingle)
            desc += i;

          Unwrap(list)->ClearRenderTargetView(UnwrapCPU(desc), black, 0, NULL);
        }
      }

      list->Close();
      list = NULL;

      for(size_t i = 0; i < events.size(); i++)
      {
        m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw);

        if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size())
          m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw);
      }
    }
  }
  else if(overlay == DebugOverlay::ViewportScissor)
  {
    if(pipe && pipe->IsGraphics() && !rs.views.empty())
    {
      list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

      D3D12_VIEWPORT viewport = rs.views[0];
      list->RSSetViewports(1, &viewport);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

      list->SetPipelineState(m_General.FixedColPipe);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      DebugPixelCBufferData pixelData = {0};

      // border colour (dark, 2px, opaque)
      pixelData.WireframeColour = Vec3f(0.1f, 0.1f, 0.1f);
      // inner colour (light, transparent)
      pixelData.Channels = Vec4f(0.2f, 0.2f, 0.9f, 0.7f);
      pixelData.OutputDisplayFormat = 0;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS viewCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(0, viewCB);
      list->SetGraphicsRootConstantBufferView(1, viewCB);
      list->SetGraphicsRootConstantBufferView(2, viewCB);

      Vec4f dummy;
      list->SetGraphicsRoot32BitConstants(3, 4, &dummy.x, 0);

      float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f};
      list->OMSetBlendFactor(factor);

      list->DrawInstanced(3, 1, 0, 0);

      viewport.TopLeftX = (float)rs.scissors[0].left;
      viewport.TopLeftY = (float)rs.scissors[0].top;
      viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left);
      viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top);
      list->RSSetViewports(1, &viewport);

      pixelData.OutputDisplayFormat = 1;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS scissorCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(1, scissorCB);

      list->DrawInstanced(3, 1, 0, 0);
    }
  }
  else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass)
  {
    if(pipe && pipe->IsGraphics())
    {
      SCOPED_TIMER("Triangle size");

      vector<uint32_t> events = passEvents;

      if(overlay == DebugOverlay::TriangleSizeDraw)
        events.clear();

      while(!events.empty())
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[0]);

        // remove any non-drawcalls, like the pass boundary.
        if(!(draw->flags & DrawFlags::Drawcall))
          events.erase(events.begin());
        else
          break;
      }

      events.push_back(eventId);

      D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = pipe->GetGraphicsDesc();
      pipeDesc.pRootSignature = m_General.ConstOnlyRootSig;
      pipeDesc.SampleMask = 0xFFFFFFFF;
      pipeDesc.SampleDesc.Count = 1;
      pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;

      pipeDesc.NumRenderTargets = 1;
      RDCEraseEl(pipeDesc.RTVFormats);
      pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;

      D3D12_INPUT_ELEMENT_DESC ia[2] = {};
      ia[0].SemanticName = "pos";
      ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].SemanticName = "sec";
      ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].InputSlot = 1;
      ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;

      pipeDesc.InputLayout.NumElements = 2;
      pipeDesc.InputLayout.pInputElementDescs = ia;

      pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize();
      pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer();
      RDCEraseEl(pipeDesc.HS);
      RDCEraseEl(pipeDesc.DS);
      pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize();
      pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer();
      pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize();
      pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer();

      pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;

      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL;
      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;

      // enough for all primitive topology types
      ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {};

      DebugVertexCBuffer vertexData = {};
      vertexData.LineStrip = 0;
      vertexData.ModelViewProj = Matrix4f::Identity();
      vertexData.SpriteSize = Vec2f();

      Vec4f viewport(rs.views[0].Width, rs.views[0].Height);

      if(rs.dsv.ptr)
      {
        D3D12_CPU_DESCRIPTOR_HANDLE realDSV = Unwrap(rs.dsv);

        list->OMSetRenderTargets(1, &rtv, TRUE, &realDSV);
      }

      list->RSSetViewports(1, &rs.views[0]);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      list->SetGraphicsRootConstantBufferView(
          0, GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData)));
      list->SetGraphicsRootConstantBufferView(
          1, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
      list->SetGraphicsRootConstantBufferView(
          2, GetDebugManager()->UploadConstants(&viewport, sizeof(viewport)));
      list->SetGraphicsRoot32BitConstants(3, 4, &viewport.x, 0);

      for(size_t i = 0; i < events.size(); i++)
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[i]);

        for(uint32_t inst = 0; draw && inst < RDCMAX(1U, draw->numInstances); inst++)
        {
          MeshFormat fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::GSOut);
          if(fmt.vertexResourceId == ResourceId())
            fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::VSOut);

          if(fmt.vertexResourceId != ResourceId())
          {
            D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology);

            if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST ||
               topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
            else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
            else
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

            list->IASetPrimitiveTopology(topo);

            if(pipes[pipeDesc.PrimitiveTopologyType] == NULL)
            {
              HRESULT hr = m_pDevice->CreateGraphicsPipelineState(
                  &pipeDesc, __uuidof(ID3D12PipelineState),
                  (void **)&pipes[pipeDesc.PrimitiveTopologyType]);
              RDCASSERTEQUAL(hr, S_OK);
            }

            ID3D12Resource *vb =
                m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId);

            D3D12_VERTEX_BUFFER_VIEW vbView = {};
            vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset;
            vbView.StrideInBytes = fmt.vertexByteStride;
            vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset);

            // second bind is just a dummy, so we don't have to make a shader
            // that doesn't accept the secondary stream
            list->IASetVertexBuffers(0, 1, &vbView);
            list->IASetVertexBuffers(1, 1, &vbView);

            list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]);

            if(fmt.indexByteStride && fmt.indexResourceId != ResourceId())
            {
              ID3D12Resource *ib =
                  m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId);

              D3D12_INDEX_BUFFER_VIEW view;
              view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset;
              view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset);
              view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
              list->IASetIndexBuffer(&view);

              list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0);
            }
            else
            {
              list->DrawInstanced(fmt.numIndices, 1, 0, 0);
            }
          }
        }
      }

      list->Close();
      list = NULL;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(size_t i = 0; i < ARRAY_COUNT(pipes); i++)
        SAFE_RELEASE(pipes[i]);
    }

    // restore back to normal
    m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
  else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw)
  {
    SCOPED_TIMER("Quad Overdraw");

    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::QuadOverdrawDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      if(overlay == DebugOverlay::QuadOverdrawPass)
      {
        list->Close();
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);
        list = m_pDevice->GetNewList();
      }

      uint32_t width = uint32_t(resourceDesc.Width >> 1);
      uint32_t height = resourceDesc.Height >> 1;

      width = RDCMAX(1U, width);
      height = RDCMAX(1U, height);

      D3D12_RESOURCE_DESC uavTexDesc = {};
      uavTexDesc.Alignment = 0;
      uavTexDesc.DepthOrArraySize = 4;
      uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
      uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
      uavTexDesc.Format = DXGI_FORMAT_R32_UINT;
      uavTexDesc.Height = height;
      uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
      uavTexDesc.MipLevels = 1;
      uavTexDesc.SampleDesc.Count = 1;
      uavTexDesc.SampleDesc.Quality = 0;
      uavTexDesc.Width = width;

      ID3D12Resource *overdrawTex = NULL;
      HRESULT hr = m_pDevice->CreateCommittedResource(
          &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
          NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str());
        list->Close();
        list = NULL;
        return m_Overlay.resourceId;
      }

      m_pDevice->CreateShaderResourceView(overdrawTex, NULL,
                                          GetDebugManager()->GetCPUHandle(OVERDRAW_SRV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetCPUHandle(OVERDRAW_UAV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV));

      UINT zeroes[4] = {0, 0, 0, 0};
      list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV),
                                         GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV),
                                         overdrawTex, zeroes, 0, NULL);
      list->Close();
      list = NULL;

#if ENABLED(SINGLE_FLUSH_VALIDATE)
      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();
#endif

      m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      D3D12_SHADER_BYTECODE quadWrite;
      quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize();
      quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer();

      // declare callback struct here
      D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, events,
                                   ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)));

      m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full);

      // resolve pass
      {
        list = m_pDevice->GetNewList();

        D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {};

        // make sure UAV work is done then prepare for reading in PS
        overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
        overdrawBarriers[0].UAV.pResource = overdrawTex;
        overdrawBarriers[1].Transition.pResource = overdrawTex;
        overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
        overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;

        // prepare tex resource for copying
        list->ResourceBarrier(2, overdrawBarriers);

        list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

        list->RSSetViewports(1, &rs.views[0]);

        D3D12_RECT scissor = {0, 0, 16384, 16384};
        list->RSSetScissorRects(1, &scissor);

        list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

        list->SetPipelineState(m_Overlay.QuadResolvePipe);

        list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig);

        GetDebugManager()->SetDescriptorHeaps(list, true, false);

        list->SetGraphicsRootConstantBufferView(
            0, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
        list->SetGraphicsRootDescriptorTable(1, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV));

        list->DrawInstanced(3, 1, 0, 0);

        list->Close();
        list = NULL;
      }

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it)
      {
        SAFE_RELEASE(it->second.pipe);
        SAFE_RELEASE(it->second.sig);
      }

      SAFE_RELEASE(overdrawTex);
    }

    if(overlay == DebugOverlay::QuadOverdrawPass)
      m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }