Ejemplo n.º 1
0
void D3D11Replay::FillTimers(D3D11CounterContext &ctx, const DrawcallDescription &drawnode)
{
  const D3D11_QUERY_DESC qtimedesc = {D3D11_QUERY_TIMESTAMP, 0};
  const D3D11_QUERY_DESC qstatsdesc = {D3D11_QUERY_PIPELINE_STATISTICS, 0};
  const D3D11_QUERY_DESC qoccldesc = {D3D11_QUERY_OCCLUSION, 0};

  if(drawnode.children.empty())
    return;

  for(size_t i = 0; i < drawnode.children.size(); i++)
  {
    const DrawcallDescription &d = drawnode.children[i];
    FillTimers(ctx, drawnode.children[i]);

    if(d.events.empty())
      continue;

    GPUTimer *timer = NULL;

    HRESULT hr = S_OK;

    {
      ctx.timers.push_back(GPUTimer());

      timer = &ctx.timers.back();
      timer->eventId = d.eventId;
      timer->before = timer->after = timer->stats = timer->occlusion = NULL;

      hr = m_pDevice->CreateQuery(&qtimedesc, &timer->before);
      RDCASSERTEQUAL(hr, S_OK);
      hr = m_pDevice->CreateQuery(&qtimedesc, &timer->after);
      RDCASSERTEQUAL(hr, S_OK);
      hr = m_pDevice->CreateQuery(&qstatsdesc, &timer->stats);
      RDCASSERTEQUAL(hr, S_OK);
      hr = m_pDevice->CreateQuery(&qoccldesc, &timer->occlusion);
      RDCASSERTEQUAL(hr, S_OK);
    }

    m_pDevice->ReplayLog(ctx.eventStart, d.eventId, eReplay_WithoutDraw);

    SerializeImmediateContext();

    if(timer->stats)
      m_pImmediateContext->Begin(timer->stats);
    if(timer->occlusion)
      m_pImmediateContext->Begin(timer->occlusion);
    if(timer->before && timer->after)
      m_pImmediateContext->End(timer->before);
    m_pDevice->ReplayLog(ctx.eventStart, d.eventId, eReplay_OnlyDraw);
    if(timer->before && timer->after)
      m_pImmediateContext->End(timer->after);
    if(timer->occlusion)
      m_pImmediateContext->End(timer->occlusion);
    if(timer->stats)
      m_pImmediateContext->End(timer->stats);

    ctx.eventStart = d.eventId + 1;
  }
}
Ejemplo n.º 2
0
void D3D11DebugManager::FillTimers(CounterContext &ctx, const DrawcallTreeNode &drawnode)
{
  const D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0};

  if(drawnode.children.empty())
    return;

  for(size_t i = 0; i < drawnode.children.size(); i++)
  {
    const FetchDrawcall &d = drawnode.children[i].draw;
    FillTimers(ctx, drawnode.children[i]);

    if(d.events.count == 0)
      continue;

    GPUTimer *timer = NULL;

    HRESULT hr = S_OK;

    {
      if(ctx.reuseIdx == -1)
      {
        ctx.timers.push_back(GPUTimer());

        timer = &ctx.timers.back();
        timer->eventID = d.eventID;
        timer->before = timer->after = NULL;

        hr = m_pDevice->CreateQuery(&qdesc, &timer->before);
        RDCASSERTEQUAL(hr, S_OK);
        hr = m_pDevice->CreateQuery(&qdesc, &timer->after);
        RDCASSERTEQUAL(hr, S_OK);
      }
      else
      {
        timer = &ctx.timers[ctx.reuseIdx++];
      }
    }

    m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_WithoutDraw);

    m_pImmediateContext->Flush();

    if(timer->before && timer->after)
    {
      m_pImmediateContext->End(timer->before);
      m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_OnlyDraw);
      m_pImmediateContext->End(timer->after);
    }
    else
    {
      m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_OnlyDraw);
    }

    ctx.eventStart = d.eventID + 1;
  }
}
Ejemplo n.º 3
0
bool WrappedVulkan::Serialise_vkAllocateMemory(
			Serialiser*                                 localSerialiser,
			VkDevice                                    device,
			const VkMemoryAllocateInfo*                 pAllocateInfo,
			const VkAllocationCallbacks*                pAllocator,
			VkDeviceMemory*                             pMemory)
{
	SERIALISE_ELEMENT(ResourceId, devId, GetResID(device));
	SERIALISE_ELEMENT(VkMemoryAllocateInfo, info, *pAllocateInfo);
	SERIALISE_ELEMENT(ResourceId, id, GetResID(*pMemory));

	if(m_State == READING)
	{
		VkDeviceMemory mem = VK_NULL_HANDLE;

		device = GetResourceManager()->GetLiveHandle<VkDevice>(devId);

		// serialised memory type index is non-remapped, so we remap now.
		// PORTABILITY may need to re-write info to change memory type index to the
		// appropriate index on replay
		info.memoryTypeIndex = m_PhysicalDeviceData.memIdxMap[info.memoryTypeIndex];

		VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &info, NULL, &mem);
		
		if(ret != VK_SUCCESS)
		{
			RDCERR("Failed on resource serialise-creation, VkResult: 0x%08x", ret);
		}
		else
		{
			ResourceId live = GetResourceManager()->WrapResource(Unwrap(device), mem);
			GetResourceManager()->AddLiveResource(id, mem);

			m_CreationInfo.m_Memory[live].Init(GetResourceManager(), m_CreationInfo, &info);

			// create a buffer with the whole memory range bound, for copying to and from
			// conveniently (for initial state data)
			VkBuffer buf = VK_NULL_HANDLE;

			VkBufferCreateInfo bufInfo = {
				VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0,
				info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT,
			};

			ret = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf);
			RDCASSERTEQUAL(ret, VK_SUCCESS);

			ResourceId bufid = GetResourceManager()->WrapResource(Unwrap(device), buf);

			ObjDisp(device)->BindBufferMemory(Unwrap(device), Unwrap(buf), Unwrap(mem), 0);
			
			// register as a live-only resource, so it is cleaned up properly
			GetResourceManager()->AddLiveResource(bufid, buf);

			m_CreationInfo.m_Memory[live].wholeMemBuf = buf;
		}
	}

	return true;
}
Ejemplo n.º 4
0
void VulkanReplay::OutputWindow::CreateSurface(VkInstance inst)
{
  VkAndroidSurfaceCreateInfoKHR createInfo;

  createInfo.sType = VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR;
  createInfo.pNext = NULL;
  createInfo.flags = 0;
  createInfo.window = wnd;

  VkResult vkr = ObjDisp(inst)->CreateAndroidSurfaceKHR(Unwrap(inst), &createInfo, NULL, &surface);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);
}
Ejemplo n.º 5
0
void VulkanReplay::OutputWindow::CreateSurface(VkInstance inst)
{
  VkWin32SurfaceCreateInfoKHR createInfo;

  createInfo.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR;
  createInfo.pNext = NULL;
  createInfo.flags = 0;
  createInfo.hwnd = wnd;

  GetModuleHandleExA(
      GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
      (const char *)&dllLocator, (HMODULE *)&createInfo.hinstance);

  VkResult vkr = ObjDisp(inst)->CreateWin32SurfaceKHR(Unwrap(inst), &createInfo, NULL, &surface);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);
}
Ejemplo n.º 6
0
void VulkanReplay::RenderMesh(uint32_t eventId, const vector<MeshFormat> &secondaryDraws,
                              const MeshDisplay &cfg)
{
  if(cfg.position.vertexResourceId == ResourceId() || cfg.position.numIndices == 0)
    return;

  auto it = m_OutputWindows.find(m_ActiveWinID);
  if(m_ActiveWinID == 0 || it == m_OutputWindows.end())
    return;

  OutputWindow &outw = it->second;

  // if the swapchain failed to create, do nothing. We will try to recreate it
  // again in CheckResizeOutputWindow (once per render 'frame')
  if(outw.swap == VK_NULL_HANDLE)
    return;

  VkDevice dev = m_pDriver->GetDev();
  VkCommandBuffer cmd = m_pDriver->GetNextCmd();
  const VkLayerDispatchTable *vt = ObjDisp(dev);

  VkResult vkr = VK_SUCCESS;

  VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
                                        VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};

  vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  VkRenderPassBeginInfo rpbegin = {
      VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
      NULL,
      Unwrap(outw.rpdepth),
      Unwrap(outw.fbdepth),
      {{
           0, 0,
       },
       {m_DebugWidth, m_DebugHeight}},
      0,
      NULL,
  };
  vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE);

  VkViewport viewport = {0.0f, 0.0f, (float)m_DebugWidth, (float)m_DebugHeight, 0.0f, 1.0f};
  vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport);

  Matrix4f projMat =
      Matrix4f::Perspective(90.0f, 0.1f, 100000.0f, float(m_DebugWidth) / float(m_DebugHeight));
  Matrix4f InvProj = projMat.Inverse();

  Matrix4f camMat = cfg.cam ? ((Camera *)cfg.cam)->GetMatrix() : Matrix4f::Identity();

  Matrix4f ModelViewProj = projMat.Mul(camMat);
  Matrix4f guessProjInv;

  if(cfg.position.unproject)
  {
    // the derivation of the projection matrix might not be right (hell, it could be an
    // orthographic projection). But it'll be close enough likely.
    Matrix4f guessProj =
        cfg.position.farPlane != FLT_MAX
            ? Matrix4f::Perspective(cfg.fov, cfg.position.nearPlane, cfg.position.farPlane, cfg.aspect)
            : Matrix4f::ReversePerspective(cfg.fov, cfg.position.nearPlane, cfg.aspect);

    if(cfg.ortho)
    {
      guessProj = Matrix4f::Orthographic(cfg.position.nearPlane, cfg.position.farPlane);
    }

    guessProjInv = guessProj.Inverse();

    ModelViewProj = projMat.Mul(camMat.Mul(guessProjInv));
  }

  if(!secondaryDraws.empty())
  {
    size_t mapsUsed = 0;

    for(size_t i = 0; i < secondaryDraws.size(); i++)
    {
      const MeshFormat &fmt = secondaryDraws[i];

      if(fmt.vertexResourceId != ResourceId())
      {
        // TODO should move the color to a push constant so we don't have to map all the time
        uint32_t uboOffs = 0;
        MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

        data->mvp = ModelViewProj;
        data->color = Vec4f(fmt.meshColor.x, fmt.meshColor.y, fmt.meshColor.z, fmt.meshColor.w);
        data->homogenousInput = cfg.position.unproject;
        data->pointSpriteSize = Vec2f(0.0f, 0.0f);
        data->displayFormat = MESHDISPLAY_SOLID;
        data->rawoutput = 0;

        m_MeshRender.UBO.Unmap();

        mapsUsed++;

        if(mapsUsed + 1 >= m_MeshRender.UBO.GetRingCount())
        {
          // flush and sync so we can use more maps
          vt->CmdEndRenderPass(Unwrap(cmd));

          vkr = vt->EndCommandBuffer(Unwrap(cmd));
          RDCASSERTEQUAL(vkr, VK_SUCCESS);

          m_pDriver->SubmitCmds();
          m_pDriver->FlushQ();

          mapsUsed = 0;

          cmd = m_pDriver->GetNextCmd();

          vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
          RDCASSERTEQUAL(vkr, VK_SUCCESS);
          vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE);

          vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport);
        }

        MeshDisplayPipelines secondaryCache = GetDebugManager()->CacheMeshDisplayPipelines(
            m_MeshRender.PipeLayout, secondaryDraws[i], secondaryDraws[i]);

        vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                  Unwrap(m_MeshRender.PipeLayout), 0, 1,
                                  UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

        vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                            Unwrap(secondaryCache.pipes[MeshDisplayPipelines::ePipe_WireDepth]));

        VkBuffer vb =
            m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(fmt.vertexResourceId);

        VkDeviceSize offs = fmt.vertexByteOffset;
        vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(vb), &offs);

        if(fmt.indexByteStride)
        {
          VkIndexType idxtype = VK_INDEX_TYPE_UINT16;
          if(fmt.indexByteStride == 4)
            idxtype = VK_INDEX_TYPE_UINT32;

          if(fmt.indexResourceId != ResourceId())
          {
            VkBuffer ib =
                m_pDriver->GetResourceManager()->GetLiveHandle<VkBuffer>(fmt.indexResourceId);

            vt->CmdBindIndexBuffer(Unwrap(cmd), Unwrap(ib), fmt.indexByteOffset, idxtype);
          }
          vt->CmdDrawIndexed(Unwrap(cmd), fmt.numIndices, 1, 0, fmt.baseVertex, 0);
        }
        else
        {
          vt->CmdDraw(Unwrap(cmd), fmt.numIndices, 1, 0, 0);
        }
      }
    }

    {
      // flush and sync so we can use more maps
      vt->CmdEndRenderPass(Unwrap(cmd));

      vkr = vt->EndCommandBuffer(Unwrap(cmd));
      RDCASSERTEQUAL(vkr, VK_SUCCESS);

      m_pDriver->SubmitCmds();
      m_pDriver->FlushQ();

      cmd = m_pDriver->GetNextCmd();

      vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
      RDCASSERTEQUAL(vkr, VK_SUCCESS);
      vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE);

      vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport);
    }
  }

  MeshDisplayPipelines cache = GetDebugManager()->CacheMeshDisplayPipelines(
      m_MeshRender.PipeLayout, cfg.position, cfg.second);

  if(cfg.position.vertexResourceId != ResourceId())
  {
    VkBuffer vb =
        m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.position.vertexResourceId);

    VkDeviceSize offs = cfg.position.vertexByteOffset;

    // we source all data from the first instanced value in the instanced case, so make sure we
    // offset correctly here.
    if(cfg.position.instanced)
      offs += cfg.position.vertexByteStride * (cfg.curInstance / cfg.position.instStepRate);

    vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(vb), &offs);
  }

  SolidShade solidShadeMode = cfg.solidShadeMode;

  // can't support secondary shading without a buffer - no pipeline will have been created
  if(solidShadeMode == SolidShade::Secondary && cfg.second.vertexResourceId == ResourceId())
    solidShadeMode = SolidShade::NoSolid;

  if(solidShadeMode == SolidShade::Secondary)
  {
    VkBuffer vb =
        m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.second.vertexResourceId);

    VkDeviceSize offs = cfg.second.vertexByteOffset;

    // we source all data from the first instanced value in the instanced case, so make sure we
    // offset correctly here.
    if(cfg.second.instanced)
      offs += cfg.second.vertexByteStride * (cfg.curInstance / cfg.second.instStepRate);

    vt->CmdBindVertexBuffers(Unwrap(cmd), 1, 1, UnwrapPtr(vb), &offs);
  }

  // solid render
  if(solidShadeMode != SolidShade::NoSolid && cfg.position.topology < Topology::PatchList)
  {
    VkPipeline pipe = VK_NULL_HANDLE;
    switch(solidShadeMode)
    {
      default:
      case SolidShade::Solid: pipe = cache.pipes[MeshDisplayPipelines::ePipe_SolidDepth]; break;
      case SolidShade::Lit: pipe = cache.pipes[MeshDisplayPipelines::ePipe_Lit]; break;
      case SolidShade::Secondary: pipe = cache.pipes[MeshDisplayPipelines::ePipe_Secondary]; break;
    }

    // can't support lit rendering without the pipeline - maybe geometry shader wasn't supported.
    if(solidShadeMode == SolidShade::Lit && pipe == VK_NULL_HANDLE)
      pipe = cache.pipes[MeshDisplayPipelines::ePipe_SolidDepth];

    uint32_t uboOffs = 0;
    MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

    if(solidShadeMode == SolidShade::Lit)
      data->invProj = projMat.Inverse();

    data->mvp = ModelViewProj;
    data->color = Vec4f(0.8f, 0.8f, 0.0f, 1.0f);
    data->homogenousInput = cfg.position.unproject;
    data->pointSpriteSize = Vec2f(0.0f, 0.0f);
    data->displayFormat = (uint32_t)solidShadeMode;
    data->rawoutput = 0;

    if(solidShadeMode == SolidShade::Secondary && cfg.second.showAlpha)
      data->displayFormat = MESHDISPLAY_SECONDARY_ALPHA;

    m_MeshRender.UBO.Unmap();

    vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                              Unwrap(m_MeshRender.PipeLayout), 0, 1,
                              UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

    vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(pipe));

    if(cfg.position.indexByteStride)
    {
      VkIndexType idxtype = VK_INDEX_TYPE_UINT16;
      if(cfg.position.indexByteStride == 4)
        idxtype = VK_INDEX_TYPE_UINT32;

      if(cfg.position.indexResourceId != ResourceId())
      {
        VkBuffer ib =
            m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.position.indexResourceId);

        vt->CmdBindIndexBuffer(Unwrap(cmd), Unwrap(ib), cfg.position.indexByteOffset, idxtype);
      }
      vt->CmdDrawIndexed(Unwrap(cmd), cfg.position.numIndices, 1, 0, cfg.position.baseVertex, 0);
    }
    else
    {
      vt->CmdDraw(Unwrap(cmd), cfg.position.numIndices, 1, 0, 0);
    }
  }

  // wireframe render
  if(solidShadeMode == SolidShade::NoSolid || cfg.wireframeDraw ||
     cfg.position.topology >= Topology::PatchList)
  {
    Vec4f wireCol =
        Vec4f(cfg.position.meshColor.x, cfg.position.meshColor.y, cfg.position.meshColor.z, 1.0f);

    uint32_t uboOffs = 0;
    MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

    data->mvp = ModelViewProj;
    data->color = wireCol;
    data->displayFormat = (uint32_t)SolidShade::Solid;
    data->homogenousInput = cfg.position.unproject;
    data->pointSpriteSize = Vec2f(0.0f, 0.0f);
    data->rawoutput = 0;

    m_MeshRender.UBO.Unmap();

    vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                              Unwrap(m_MeshRender.PipeLayout), 0, 1,
                              UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

    vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                        Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_WireDepth]));

    if(cfg.position.indexByteStride)
    {
      VkIndexType idxtype = VK_INDEX_TYPE_UINT16;
      if(cfg.position.indexByteStride == 4)
        idxtype = VK_INDEX_TYPE_UINT32;

      if(cfg.position.indexResourceId != ResourceId())
      {
        VkBuffer ib =
            m_pDriver->GetResourceManager()->GetCurrentHandle<VkBuffer>(cfg.position.indexResourceId);

        vt->CmdBindIndexBuffer(Unwrap(cmd), Unwrap(ib), cfg.position.indexByteOffset, idxtype);
      }
      vt->CmdDrawIndexed(Unwrap(cmd), cfg.position.numIndices, 1, 0, cfg.position.baseVertex, 0);
    }
    else
    {
      vt->CmdDraw(Unwrap(cmd), cfg.position.numIndices, 1, 0, 0);
    }
  }

  MeshFormat helper;
  helper.indexByteStride = 2;
  helper.topology = Topology::LineList;

  helper.format.type = ResourceFormatType::Regular;
  helper.format.compByteWidth = 4;
  helper.format.compCount = 4;
  helper.format.compType = CompType::Float;

  helper.vertexByteStride = sizeof(Vec4f);

  // cache pipelines for use in drawing wireframe helpers
  cache = GetDebugManager()->CacheMeshDisplayPipelines(m_MeshRender.PipeLayout, helper, helper);

  if(cfg.showBBox)
  {
    Vec4f a = Vec4f(cfg.minBounds.x, cfg.minBounds.y, cfg.minBounds.z, cfg.minBounds.w);
    Vec4f b = Vec4f(cfg.maxBounds.x, cfg.maxBounds.y, cfg.maxBounds.z, cfg.maxBounds.w);

    Vec4f TLN = Vec4f(a.x, b.y, a.z, 1.0f);    // TopLeftNear, etc...
    Vec4f TRN = Vec4f(b.x, b.y, a.z, 1.0f);
    Vec4f BLN = Vec4f(a.x, a.y, a.z, 1.0f);
    Vec4f BRN = Vec4f(b.x, a.y, a.z, 1.0f);

    Vec4f TLF = Vec4f(a.x, b.y, b.z, 1.0f);
    Vec4f TRF = Vec4f(b.x, b.y, b.z, 1.0f);
    Vec4f BLF = Vec4f(a.x, a.y, b.z, 1.0f);
    Vec4f BRF = Vec4f(b.x, a.y, b.z, 1.0f);

    // 12 frustum lines => 24 verts
    Vec4f bbox[24] = {
        TLN, TRN, TRN, BRN, BRN, BLN, BLN, TLN,

        TLN, TLF, TRN, TRF, BLN, BLF, BRN, BRF,

        TLF, TRF, TRF, BRF, BRF, BLF, BLF, TLF,
    };

    VkDeviceSize vboffs = 0;
    Vec4f *ptr = (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs);

    memcpy(ptr, bbox, sizeof(bbox));

    m_MeshRender.BBoxVB.Unmap();

    vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs);

    uint32_t uboOffs = 0;
    MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

    data->mvp = ModelViewProj;
    data->color = Vec4f(0.2f, 0.2f, 1.0f, 1.0f);
    data->displayFormat = (uint32_t)SolidShade::Solid;
    data->homogenousInput = 0;
    data->pointSpriteSize = Vec2f(0.0f, 0.0f);
    data->rawoutput = 0;

    m_MeshRender.UBO.Unmap();

    vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                              Unwrap(m_MeshRender.PipeLayout), 0, 1,
                              UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

    vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                        Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_WireDepth]));

    vt->CmdDraw(Unwrap(cmd), 24, 1, 0, 0);
  }

  // draw axis helpers
  if(!cfg.position.unproject)
  {
    VkDeviceSize vboffs = 0;
    vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.AxisFrustumVB.buf), &vboffs);

    uint32_t uboOffs = 0;
    MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

    data->mvp = ModelViewProj;
    data->color = Vec4f(1.0f, 0.0f, 0.0f, 1.0f);
    data->displayFormat = (uint32_t)SolidShade::Solid;
    data->homogenousInput = 0;
    data->pointSpriteSize = Vec2f(0.0f, 0.0f);
    data->rawoutput = 0;

    m_MeshRender.UBO.Unmap();

    vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                              Unwrap(m_MeshRender.PipeLayout), 0, 1,
                              UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

    vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                        Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Wire]));

    vt->CmdDraw(Unwrap(cmd), 2, 1, 0, 0);

    // poke the color (this would be a good candidate for a push constant)
    data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

    data->mvp = ModelViewProj;
    data->color = Vec4f(0.0f, 1.0f, 0.0f, 1.0f);
    data->displayFormat = (uint32_t)SolidShade::Solid;
    data->homogenousInput = 0;
    data->pointSpriteSize = Vec2f(0.0f, 0.0f);
    data->rawoutput = 0;

    m_MeshRender.UBO.Unmap();

    vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                              Unwrap(m_MeshRender.PipeLayout), 0, 1,
                              UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);
    vt->CmdDraw(Unwrap(cmd), 2, 1, 2, 0);

    data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

    data->mvp = ModelViewProj;
    data->color = Vec4f(0.0f, 0.0f, 1.0f, 1.0f);
    data->displayFormat = (uint32_t)SolidShade::Solid;
    data->homogenousInput = 0;
    data->pointSpriteSize = Vec2f(0.0f, 0.0f);
    data->rawoutput = 0;

    m_MeshRender.UBO.Unmap();

    vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                              Unwrap(m_MeshRender.PipeLayout), 0, 1,
                              UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);
    vt->CmdDraw(Unwrap(cmd), 2, 1, 4, 0);
  }

  // 'fake' helper frustum
  if(cfg.position.unproject)
  {
    VkDeviceSize vboffs = sizeof(Vec4f) * 6;    // skim the axis helpers
    vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.AxisFrustumVB.buf), &vboffs);

    uint32_t uboOffs = 0;
    MeshUBOData *data = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);

    data->mvp = ModelViewProj;
    data->color = Vec4f(1.0f, 1.0f, 1.0f, 1.0f);
    data->displayFormat = (uint32_t)SolidShade::Solid;
    data->homogenousInput = 0;
    data->pointSpriteSize = Vec2f(0.0f, 0.0f);
    data->rawoutput = 0;

    m_MeshRender.UBO.Unmap();

    vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                              Unwrap(m_MeshRender.PipeLayout), 0, 1,
                              UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

    vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                        Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Wire]));

    vt->CmdDraw(Unwrap(cmd), 24, 1, 0, 0);
  }

  // show highlighted vertex
  if(cfg.highlightVert != ~0U)
  {
    {
      // need to end our cmd buffer, it might be submitted in GetBufferData when caching highlight
      // data
      vt->CmdEndRenderPass(Unwrap(cmd));

      vkr = vt->EndCommandBuffer(Unwrap(cmd));
      RDCASSERTEQUAL(vkr, VK_SUCCESS);

#if ENABLED(SINGLE_FLUSH_VALIDATE)
      m_pDriver->SubmitCmds();
#endif
    }

    m_HighlightCache.CacheHighlightingData(eventId, cfg);

    {
      // get a new cmdbuffer and begin it
      cmd = m_pDriver->GetNextCmd();

      vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
      RDCASSERTEQUAL(vkr, VK_SUCCESS);
      vt->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE);

      vt->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport);
    }

    Topology meshtopo = cfg.position.topology;

    ///////////////////////////////////////////////////////////////
    // vectors to be set from buffers, depending on topology

    // this vert (blue dot, required)
    FloatVector activeVertex;

    // primitive this vert is a part of (red prim, optional)
    vector<FloatVector> activePrim;

    // for patch lists, to show other verts in patch (green dots, optional)
    // for non-patch lists, we use the activePrim and adjacentPrimVertices
    // to show what other verts are related
    vector<FloatVector> inactiveVertices;

    // adjacency (line or tri, strips or lists) (green prims, optional)
    // will be N*M long, N adjacent prims of M verts each. M = primSize below
    vector<FloatVector> adjacentPrimVertices;

    helper.topology = Topology::TriangleList;
    uint32_t primSize = 3;    // number of verts per primitive

    if(meshtopo == Topology::LineList || meshtopo == Topology::LineStrip ||
       meshtopo == Topology::LineList_Adj || meshtopo == Topology::LineStrip_Adj)
    {
      primSize = 2;
      helper.topology = Topology::LineList;
    }
    else
    {
      // update the cache, as it's currently linelist
      helper.topology = Topology::TriangleList;
      cache = GetDebugManager()->CacheMeshDisplayPipelines(m_MeshRender.PipeLayout, helper, helper);
    }

    bool valid = m_HighlightCache.FetchHighlightPositions(cfg, activeVertex, activePrim,
                                                          adjacentPrimVertices, inactiveVertices);

    if(valid)
    {
      ////////////////////////////////////////////////////////////////
      // prepare rendering (for both vertices & primitives)

      // if data is from post transform, it will be in clipspace
      if(cfg.position.unproject)
        ModelViewProj = projMat.Mul(camMat.Mul(guessProjInv));
      else
        ModelViewProj = projMat.Mul(camMat);

      MeshUBOData uniforms = {};
      uniforms.mvp = ModelViewProj;
      uniforms.color = Vec4f(1.0f, 1.0f, 1.0f, 1.0f);
      uniforms.displayFormat = (uint32_t)SolidShade::Solid;
      uniforms.homogenousInput = cfg.position.unproject;
      uniforms.pointSpriteSize = Vec2f(0.0f, 0.0f);

      uint32_t uboOffs = 0;
      MeshUBOData *ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);
      *ubodata = uniforms;
      m_MeshRender.UBO.Unmap();

      vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                Unwrap(m_MeshRender.PipeLayout), 0, 1,
                                UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

      vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                          Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Solid]));

      ////////////////////////////////////////////////////////////////
      // render primitives

      // Draw active primitive (red)
      uniforms.color = Vec4f(1.0f, 0.0f, 0.0f, 1.0f);
      // poke the color (this would be a good candidate for a push constant)
      ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);
      *ubodata = uniforms;
      m_MeshRender.UBO.Unmap();
      vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                Unwrap(m_MeshRender.PipeLayout), 0, 1,
                                UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

      if(activePrim.size() >= primSize)
      {
        VkDeviceSize vboffs = 0;
        Vec4f *ptr = (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(Vec4f) * primSize);

        memcpy(ptr, &activePrim[0], sizeof(Vec4f) * primSize);

        m_MeshRender.BBoxVB.Unmap();

        vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs);

        vt->CmdDraw(Unwrap(cmd), primSize, 1, 0, 0);
      }

      // Draw adjacent primitives (green)
      uniforms.color = Vec4f(0.0f, 1.0f, 0.0f, 1.0f);
      // poke the color (this would be a good candidate for a push constant)
      ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);
      *ubodata = uniforms;
      m_MeshRender.UBO.Unmap();
      vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                Unwrap(m_MeshRender.PipeLayout), 0, 1,
                                UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

      if(adjacentPrimVertices.size() >= primSize && (adjacentPrimVertices.size() % primSize) == 0)
      {
        VkDeviceSize vboffs = 0;
        Vec4f *ptr =
            (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(Vec4f) * adjacentPrimVertices.size());

        memcpy(ptr, &adjacentPrimVertices[0], sizeof(Vec4f) * adjacentPrimVertices.size());

        m_MeshRender.BBoxVB.Unmap();

        vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs);

        vt->CmdDraw(Unwrap(cmd), (uint32_t)adjacentPrimVertices.size(), 1, 0, 0);
      }

      ////////////////////////////////////////////////////////////////
      // prepare to render dots
      float scale = 800.0f / float(m_DebugHeight);
      float asp = float(m_DebugWidth) / float(m_DebugHeight);

      uniforms.pointSpriteSize = Vec2f(scale / asp, scale);

      // Draw active vertex (blue)
      uniforms.color = Vec4f(0.0f, 0.0f, 1.0f, 1.0f);
      // poke the color (this would be a good candidate for a push constant)
      ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);
      *ubodata = uniforms;
      m_MeshRender.UBO.Unmap();
      vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                Unwrap(m_MeshRender.PipeLayout), 0, 1,
                                UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

      // vertices are drawn with tri strips
      helper.topology = Topology::TriangleStrip;
      cache = GetDebugManager()->CacheMeshDisplayPipelines(m_MeshRender.PipeLayout, helper, helper);

      FloatVector vertSprite[4] = {
          activeVertex, activeVertex, activeVertex, activeVertex,
      };

      vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                Unwrap(m_MeshRender.PipeLayout), 0, 1,
                                UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

      vt->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                          Unwrap(cache.pipes[MeshDisplayPipelines::ePipe_Solid]));

      {
        VkDeviceSize vboffs = 0;
        Vec4f *ptr = (Vec4f *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(vertSprite));

        memcpy(ptr, &vertSprite[0], sizeof(vertSprite));

        m_MeshRender.BBoxVB.Unmap();

        vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs);

        vt->CmdDraw(Unwrap(cmd), 4, 1, 0, 0);
      }

      // Draw inactive vertices (green)
      uniforms.color = Vec4f(0.0f, 1.0f, 0.0f, 1.0f);
      // poke the color (this would be a good candidate for a push constant)
      ubodata = (MeshUBOData *)m_MeshRender.UBO.Map(&uboOffs);
      *ubodata = uniforms;
      m_MeshRender.UBO.Unmap();
      vt->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                Unwrap(m_MeshRender.PipeLayout), 0, 1,
                                UnwrapPtr(m_MeshRender.DescSet), 1, &uboOffs);

      if(!inactiveVertices.empty())
      {
        VkDeviceSize vboffs = 0;
        FloatVector *ptr = (FloatVector *)m_MeshRender.BBoxVB.Map(vboffs, sizeof(vertSprite));

        for(size_t i = 0; i < inactiveVertices.size(); i++)
        {
          *ptr++ = inactiveVertices[i];
          *ptr++ = inactiveVertices[i];
          *ptr++ = inactiveVertices[i];
          *ptr++ = inactiveVertices[i];
        }

        m_MeshRender.BBoxVB.Unmap();

        for(size_t i = 0; i < inactiveVertices.size(); i++)
        {
          vt->CmdBindVertexBuffers(Unwrap(cmd), 0, 1, UnwrapPtr(m_MeshRender.BBoxVB.buf), &vboffs);

          vt->CmdDraw(Unwrap(cmd), 4, 1, 0, 0);

          vboffs += sizeof(FloatVector) * 4;
        }
      }
    }
  }

  vt->CmdEndRenderPass(Unwrap(cmd));

  vkr = vt->EndCommandBuffer(Unwrap(cmd));
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

#if ENABLED(SINGLE_FLUSH_VALIDATE)
  m_pDriver->SubmitCmds();
#endif
}
Ejemplo n.º 7
0
VkResult WrappedVulkan::vkAllocateMemory(
			VkDevice                                    device,
			const VkMemoryAllocateInfo*                 pAllocateInfo,
			const VkAllocationCallbacks*                pAllocator,
			VkDeviceMemory*                             pMemory)
{
	VkMemoryAllocateInfo info = *pAllocateInfo;
	if(m_State >= WRITING)
		info.memoryTypeIndex = GetRecord(device)->memIdxMap[info.memoryTypeIndex];
	VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &info, pAllocator, pMemory);
	
	if(ret == VK_SUCCESS)
	{
		ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pMemory);

		if(m_State >= WRITING)
		{
			Chunk *chunk = NULL;

			{
				CACHE_THREAD_SERIALISER();
					
				SCOPED_SERIALISE_CONTEXT(ALLOC_MEM);
				Serialise_vkAllocateMemory(localSerialiser, device, pAllocateInfo, NULL, pMemory);

				chunk = scope.Get();
			}
			
			// create resource record for gpu memory
			VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pMemory);
			RDCASSERT(record);

			record->AddChunk(chunk);

			record->Length = pAllocateInfo->allocationSize;

			uint32_t memProps = m_PhysicalDeviceData.fakeMemProps->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags;

			// if memory is not host visible, so not mappable, don't create map state at all
			if((memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0)
			{
				record->memMapState = new MemMapState();
				record->memMapState->mapCoherent = (memProps & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
				record->memMapState->refData = NULL;
			}
		}
		else
		{
			GetResourceManager()->AddLiveResource(id, *pMemory);

			m_CreationInfo.m_Memory[id].Init(GetResourceManager(), m_CreationInfo, pAllocateInfo);

			// create a buffer with the whole memory range bound, for copying to and from
			// conveniently (for initial state data)
			VkBuffer buf = VK_NULL_HANDLE;

			VkBufferCreateInfo bufInfo = {
				VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0,
				info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT,
			};

			ret = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf);
			RDCASSERTEQUAL(ret, VK_SUCCESS);

			ResourceId bufid = GetResourceManager()->WrapResource(Unwrap(device), buf);

			ObjDisp(device)->BindBufferMemory(Unwrap(device), Unwrap(buf), Unwrap(*pMemory), 0);
			
			// register as a live-only resource, so it is cleaned up properly
			GetResourceManager()->AddLiveResource(bufid, buf);

			m_CreationInfo.m_Memory[id].wholeMemBuf = buf;
		}
	}

	return ret;
}
Ejemplo n.º 8
0
VkResult WrappedVulkan::vkCreateDevice(
		VkPhysicalDevice                            physicalDevice,
		const VkDeviceCreateInfo*                   pCreateInfo,
		const VkAllocationCallbacks*                pAllocator,
		VkDevice*                                   pDevice)
{
	VkDeviceCreateInfo createInfo = *pCreateInfo;

	uint32_t qCount = 0;
	VkResult vkr = VK_SUCCESS;
	
	ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, NULL);

	VkQueueFamilyProperties *props = new VkQueueFamilyProperties[qCount];
	ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, props);

	// find a queue that supports all capabilities, and if one doesn't exist, add it.
	bool found = false;
	uint32_t qFamilyIdx = 0;
	VkQueueFlags search = (VK_QUEUE_GRAPHICS_BIT);

	// for queue priorities, if we need it
	float one = 1.0f;

	// if we need to change the requested queues, it will point to this
	VkDeviceQueueCreateInfo *modQueues = NULL;

	for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++)
	{
		uint32_t idx = createInfo.pQueueCreateInfos[i].queueFamilyIndex;
		RDCASSERT(idx < qCount);

		// this requested queue is one we can use too
		if((props[idx].queueFlags & search) == search && createInfo.pQueueCreateInfos[i].queueCount > 0)
		{
			qFamilyIdx = idx;
			found = true;
			break;
		}
	}

	// if we didn't find it, search for which queue family we should add a request for
	if(!found)
	{
		RDCDEBUG("App didn't request a queue family we can use - adding our own");

		for(uint32_t i=0; i < qCount; i++)
		{
			if((props[i].queueFlags & search) == search)
			{
				qFamilyIdx = i;
				found = true;
				break;
			}
		}

		if(!found)
		{
			SAFE_DELETE_ARRAY(props);
			RDCERR("Can't add a queue with required properties for RenderDoc! Unsupported configuration");
			return VK_ERROR_INITIALIZATION_FAILED;
		}

		// we found the queue family, add it
		modQueues = new VkDeviceQueueCreateInfo[createInfo.queueCreateInfoCount + 1];
		for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++)
			modQueues[i] = createInfo.pQueueCreateInfos[i];

		modQueues[createInfo.queueCreateInfoCount].queueFamilyIndex = qFamilyIdx;
		modQueues[createInfo.queueCreateInfoCount].queueCount = 1;
		modQueues[createInfo.queueCreateInfoCount].pQueuePriorities = &one;

		createInfo.pQueueCreateInfos = modQueues;
		createInfo.queueCreateInfoCount++;
	}

	SAFE_DELETE_ARRAY(props);

	m_QueueFamilies.resize(createInfo.queueCreateInfoCount);
	for(size_t i=0; i < createInfo.queueCreateInfoCount; i++)
	{
		uint32_t family = createInfo.pQueueCreateInfos[i].queueFamilyIndex;
		uint32_t count = createInfo.pQueueCreateInfos[i].queueCount;
		m_QueueFamilies.resize(RDCMAX(m_QueueFamilies.size(), size_t(family+1)));

		m_QueueFamilies[family] = new VkQueue[count];
		for(uint32_t q=0; q < count; q++)
			m_QueueFamilies[family][q] = VK_NULL_HANDLE;
	}

	VkLayerDeviceCreateInfo *layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext;

	// step through the chain of pNext until we get to the link info
	while(layerCreateInfo &&
				(layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || 
				 layerCreateInfo->function != VK_LAYER_LINK_INFO)
			)
	{
		layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext;
	}
	RDCASSERT(layerCreateInfo);

	PFN_vkGetDeviceProcAddr gdpa = layerCreateInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr;
	PFN_vkGetInstanceProcAddr gipa = layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr;
	// move chain on for next layer
	layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext;

	PFN_vkCreateDevice createFunc = (PFN_vkCreateDevice)gipa(VK_NULL_HANDLE, "vkCreateDevice");

	// now search again through for the loader data callback (if it exists)
	layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext;

	// step through the chain of pNext
	while(layerCreateInfo &&
				(layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || 
				 layerCreateInfo->function != VK_LOADER_DATA_CALLBACK)
			)
	{
		layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext;
	}

	// if we found one (we might not - on old loaders), then store the func ptr for
	// use instead of SetDispatchTableOverMagicNumber
	if(layerCreateInfo)
	{
		RDCASSERT(m_SetDeviceLoaderData == layerCreateInfo->u.pfnSetDeviceLoaderData || m_SetDeviceLoaderData == NULL,
		          m_SetDeviceLoaderData, layerCreateInfo->u.pfnSetDeviceLoaderData);
		m_SetDeviceLoaderData = layerCreateInfo->u.pfnSetDeviceLoaderData;
	}

	VkResult ret = createFunc(Unwrap(physicalDevice), &createInfo, pAllocator, pDevice);
	
	// don't serialise out any of the pNext stuff for layer initialisation
	// (note that we asserted above that there was nothing else in the chain)
	createInfo.pNext = NULL;

	if(ret == VK_SUCCESS)
	{
		InitDeviceTable(*pDevice, gdpa);

		ResourceId id = GetResourceManager()->WrapResource(*pDevice, *pDevice);
		
		if(m_State >= WRITING)
		{
			Chunk *chunk = NULL;

			{
				CACHE_THREAD_SERIALISER();

				SCOPED_SERIALISE_CONTEXT(CREATE_DEVICE);
				Serialise_vkCreateDevice(localSerialiser, physicalDevice, &createInfo, NULL, pDevice);

				chunk = scope.Get();
			}

			VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pDevice);
			RDCASSERT(record);

			record->AddChunk(chunk);

			record->memIdxMap = GetRecord(physicalDevice)->memIdxMap;

			record->instDevInfo = new InstanceDeviceInfo();
		
#undef CheckExt
#define CheckExt(name) record->instDevInfo->name = GetRecord(m_Instance)->instDevInfo->name;

			// inherit extension enablement from instance, that way GetDeviceProcAddress can check
			// for enabled extensions for instance functions
			CheckInstanceExts();

#undef CheckExt
#define CheckExt(name) if(!strcmp(createInfo.ppEnabledExtensionNames[i], STRINGIZE(name))) { record->instDevInfo->name = true; }

			for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++)
			{
				CheckDeviceExts();
			}
		
			InitDeviceExtensionTables(*pDevice);

			GetRecord(m_Instance)->AddParent(record);
		}
		else
		{
			GetResourceManager()->AddLiveResource(id, *pDevice);
		}

		VkDevice device = *pDevice;

		RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE

		m_PhysicalDevice = physicalDevice;
		m_Device = device;

		m_QueueFamilyIdx = qFamilyIdx;

		if(m_InternalCmds.cmdpool == VK_NULL_HANDLE)
		{
			VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, NULL, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, qFamilyIdx };
			vkr = ObjDisp(device)->CreateCommandPool(Unwrap(device), &poolInfo, NULL, &m_InternalCmds.cmdpool);
			RDCASSERTEQUAL(vkr, VK_SUCCESS);

			GetResourceManager()->WrapResource(Unwrap(device), m_InternalCmds.cmdpool);
		}
		
		ObjDisp(physicalDevice)->GetPhysicalDeviceProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.props);
		
		ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.memProps);

		ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &m_PhysicalDeviceData.features);

		m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
		m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
		m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);

		m_PhysicalDeviceData.fakeMemProps = GetRecord(physicalDevice)->memProps;

		m_DebugManager = new VulkanDebugManager(this, device);
	}

	SAFE_DELETE_ARRAY(modQueues);

	return ret;
}
Ejemplo n.º 9
0
VkResult WrappedVulkan::vkEnumeratePhysicalDevices(
		VkInstance                                  instance,
		uint32_t*                                   pPhysicalDeviceCount,
		VkPhysicalDevice*                           pPhysicalDevices)
{
	uint32_t count;

	VkResult vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, NULL);

	if(vkr != VK_SUCCESS)
		return vkr;

	VkPhysicalDevice *devices = new VkPhysicalDevice[count];

	vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, devices);
	RDCASSERTEQUAL(vkr, VK_SUCCESS);

	m_PhysicalDevices.resize(count);
	
	for(uint32_t i=0; i < count; i++)
	{
		// it's perfectly valid for enumerate type functions to return the same handle
		// each time. If that happens, we will already have a wrapper created so just
		// return the wrapped object to the user and do nothing else
		if(m_PhysicalDevices[i] != VK_NULL_HANDLE)
		{
			GetWrapped(m_PhysicalDevices[i])->RewrapObject(devices[i]);
			devices[i] = m_PhysicalDevices[i];
		}
		else
		{
			GetResourceManager()->WrapResource(instance, devices[i]);
			
			if(m_State >= WRITING)
			{
				// add the record first since it's used in the serialise function below to fetch
				// the memory indices
				VkResourceRecord *record = GetResourceManager()->AddResourceRecord(devices[i]);
				RDCASSERT(record);
				
				record->memProps = new VkPhysicalDeviceMemoryProperties();

				ObjDisp(devices[i])->GetPhysicalDeviceMemoryProperties(Unwrap(devices[i]), record->memProps);

				m_PhysicalDevices[i] = devices[i];

				// we remap memory indices to discourage coherent maps as much as possible
				RemapMemoryIndices(record->memProps, &record->memIdxMap);
				
				{
					CACHE_THREAD_SERIALISER();

					SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS);
					Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]);

					record->AddChunk(scope.Get());
				}

				VkResourceRecord *instrecord = GetRecord(instance);

				instrecord->AddParent(record);

				// treat physical devices as pool members of the instance (ie. freed when the instance dies)
				{
					instrecord->LockChunks();
					instrecord->pooledChildren.push_back(record);
					instrecord->UnlockChunks();
				}
			}
		}
	}

	if(pPhysicalDeviceCount) *pPhysicalDeviceCount = count;
	if(pPhysicalDevices) memcpy(pPhysicalDevices, devices, count*sizeof(VkPhysicalDevice));

	SAFE_DELETE_ARRAY(devices);

	return VK_SUCCESS;
}
Ejemplo n.º 10
0
VkResult WrappedVulkan::vkAllocateMemory(
			VkDevice                                    device,
			const VkMemoryAllocateInfo*                 pAllocateInfo,
			const VkAllocationCallbacks*                pAllocator,
			VkDeviceMemory*                             pMemory)
{
	VkMemoryAllocateInfo info = *pAllocateInfo;
	if(m_State >= WRITING)
	{
		info.memoryTypeIndex = GetRecord(device)->memIdxMap[info.memoryTypeIndex];

		// we need to be able to allocate a buffer that covers the whole memory range. However
		// if the memory is e.g. 100 bytes (arbitrary example) and buffers have memory requirements
		// such that it must be bound to a multiple of 128 bytes, then we can't create a buffer
		// that entirely covers a 100 byte allocation.
		// To get around this, we create a buffer of the allocation's size with the properties we
		// want, check its required size, then bump up the allocation size to that as if the application
		// had requested more. We're assuming here no system will require something like "buffer of
		// size N must be bound to memory of size N+O for some value of O overhead bytes".
		//
		// this could be optimised as maybe we'll be creating buffers of multiple sizes, but allocation
		// in vulkan is already expensive and making it a little more expensive isn't a big deal.
	
		VkBufferCreateInfo bufInfo = {
			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0,
			info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT,
		};

		// since this is very short lived, it's not wrapped
		VkBuffer buf;

		VkResult vkr = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		if(vkr == VK_SUCCESS && buf != VK_NULL_HANDLE)
		{
			VkMemoryRequirements mrq = { 0 };
			ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), buf, &mrq);

			RDCASSERTMSG("memory requirements less than desired size", mrq.size >= bufInfo.size, mrq.size, bufInfo.size);

			// round up allocation size to allow creation of buffers
			if(mrq.size >= bufInfo.size)
				info.allocationSize = mrq.size;
		}

		ObjDisp(device)->DestroyBuffer(Unwrap(device), buf, NULL);
	}

	VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &info, pAllocator, pMemory);

	// restore the memoryTypeIndex to the original, as that's what we want to serialise,
	// but maintain any potential modifications we made to info.allocationSize
	info.memoryTypeIndex = pAllocateInfo->memoryTypeIndex;
	
	if(ret == VK_SUCCESS)
	{
		ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pMemory);

		if(m_State >= WRITING)
		{
			Chunk *chunk = NULL;

			{
				CACHE_THREAD_SERIALISER();
					
				SCOPED_SERIALISE_CONTEXT(ALLOC_MEM);
				Serialise_vkAllocateMemory(localSerialiser, device, &info, NULL, pMemory);

				chunk = scope.Get();
			}
			
			// create resource record for gpu memory
			VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pMemory);
			RDCASSERT(record);

			record->AddChunk(chunk);

			record->Length = info.allocationSize;

			uint32_t memProps = m_PhysicalDeviceData.fakeMemProps->memoryTypes[info.memoryTypeIndex].propertyFlags;

			// if memory is not host visible, so not mappable, don't create map state at all
			if((memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0)
			{
				record->memMapState = new MemMapState();
				record->memMapState->mapCoherent = (memProps & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
				record->memMapState->refData = NULL;
			}
		}
		else
		{
			GetResourceManager()->AddLiveResource(id, *pMemory);

			m_CreationInfo.m_Memory[id].Init(GetResourceManager(), m_CreationInfo, &info);

			// create a buffer with the whole memory range bound, for copying to and from
			// conveniently (for initial state data)
			VkBuffer buf = VK_NULL_HANDLE;

			VkBufferCreateInfo bufInfo = {
				VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0,
				info.allocationSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT,
			};

			ret = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &buf);
			RDCASSERTEQUAL(ret, VK_SUCCESS);

			ResourceId bufid = GetResourceManager()->WrapResource(Unwrap(device), buf);

			ObjDisp(device)->BindBufferMemory(Unwrap(device), Unwrap(buf), Unwrap(*pMemory), 0);
			
			// register as a live-only resource, so it is cleaned up properly
			GetResourceManager()->AddLiveResource(bufid, buf);

			m_CreationInfo.m_Memory[id].wholeMemBuf = buf;
		}
	}

	return ret;
}
Ejemplo n.º 11
0
ResourceId D3D12Replay::RenderOverlay(ResourceId texid, CompType typeHint, DebugOverlay overlay,
                                      uint32_t eventId, const vector<uint32_t> &passEvents)
{
  ID3D12Resource *resource = WrappedID3D12Resource::GetList()[texid];

  if(resource == NULL)
    return ResourceId();

  D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc();

  std::vector<D3D12_RESOURCE_BARRIER> barriers;
  int resType = 0;
  GetDebugManager()->PrepareTextureSampling(resource, typeHint, resType, barriers);

  D3D12_RESOURCE_DESC overlayTexDesc;
  overlayTexDesc.Alignment = 0;
  overlayTexDesc.DepthOrArraySize = 1;
  overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
  overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
  overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  overlayTexDesc.Height = resourceDesc.Height;
  overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
  overlayTexDesc.MipLevels = 1;
  overlayTexDesc.SampleDesc = resourceDesc.SampleDesc;
  overlayTexDesc.Width = resourceDesc.Width;

  D3D12_HEAP_PROPERTIES heapProps;
  heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
  heapProps.CreationNodeMask = 1;
  heapProps.VisibleNodeMask = 1;

  D3D12_RESOURCE_DESC currentOverlayDesc;
  RDCEraseEl(currentOverlayDesc);
  if(m_Overlay.Texture)
    currentOverlayDesc = m_Overlay.Texture->GetDesc();

  WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture;

  // need to recreate backing custom render tex
  if(overlayTexDesc.Width != currentOverlayDesc.Width ||
     overlayTexDesc.Height != currentOverlayDesc.Height ||
     overlayTexDesc.Format != currentOverlayDesc.Format ||
     overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count ||
     overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality)
  {
    SAFE_RELEASE(m_Overlay.Texture);
    m_Overlay.resourceId = ResourceId();

    ID3D12Resource *customRenderTex = NULL;
    HRESULT hr = m_pDevice->CreateCommittedResource(
        &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL,
        __uuidof(ID3D12Resource), (void **)&customRenderTex);
    if(FAILED(hr))
    {
      RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str());
      return ResourceId();
    }
    wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex;

    customRenderTex->SetName(L"customRenderTex");

    m_Overlay.Texture = wrappedCustomRenderTex;
    m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID();
  }

  D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState;

  ID3D12Resource *renderDepth = NULL;

  D3D12Descriptor *dsView = GetWrapped(rs.dsv);

  D3D12_RESOURCE_DESC depthTexDesc = {};
  D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {};
  if(dsView)
  {
    ID3D12Resource *realDepth = dsView->nonsamp.resource;

    dsViewDesc = dsView->nonsamp.dsv;

    depthTexDesc = realDepth->GetDesc();
    depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
    depthTexDesc.Alignment = 0;

    HRESULT hr = S_OK;

    hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc,
                                            D3D12_RESOURCE_STATE_COPY_DEST, NULL,
                                            __uuidof(ID3D12Resource), (void **)&renderDepth);
    if(FAILED(hr))
    {
      RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str());
      return m_Overlay.resourceId;
    }

    renderDepth->SetName(L"Overlay renderDepth");

    ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

    const vector<D3D12_RESOURCE_STATES> &states =
        m_pDevice->GetSubresourceStates(GetResID(realDepth));

    vector<D3D12_RESOURCE_BARRIER> depthBarriers;
    depthBarriers.reserve(states.size());
    for(size_t i = 0; i < states.size(); i++)
    {
      D3D12_RESOURCE_BARRIER b;

      // skip unneeded barriers
      if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE)
        continue;

      b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
      b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
      b.Transition.pResource = realDepth;
      b.Transition.Subresource = (UINT)i;
      b.Transition.StateBefore = states[i];
      b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

      depthBarriers.push_back(b);
    }

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    list->CopyResource(renderDepth, realDepth);

    for(size_t i = 0; i < depthBarriers.size(); i++)
      std::swap(depthBarriers[i].Transition.StateBefore, depthBarriers[i].Transition.StateAfter);

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    D3D12_RESOURCE_BARRIER b = {};

    b.Transition.pResource = renderDepth;
    b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
    b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
    b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;

    // prepare tex resource for copying
    list->ResourceBarrier(1, &b);

    list->Close();
  }

  D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {};
  rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
  rtDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  rtDesc.Texture2D.MipSlice = 0;
  rtDesc.Texture2D.PlaneSlice = 0;

  if(overlayTexDesc.SampleDesc.Count > 1 || overlayTexDesc.SampleDesc.Quality > 0)
    rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;

  D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV);

  m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv);

  ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

  FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f};
  list->ClearRenderTargetView(rtv, black, 0, NULL);

  D3D12_CPU_DESCRIPTOR_HANDLE dsv = {};

  if(renderDepth)
  {
    dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV);
    m_pDevice->CreateDepthStencilView(
        renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv);
  }

  D3D12_DEPTH_STENCIL_DESC dsDesc;

  dsDesc.BackFace.StencilFailOp = dsDesc.BackFace.StencilPassOp =
      dsDesc.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.FrontFace.StencilFailOp = dsDesc.FrontFace.StencilPassOp =
      dsDesc.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.DepthEnable = TRUE;
  dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
  dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
  dsDesc.StencilEnable = FALSE;
  dsDesc.StencilReadMask = dsDesc.StencilWriteMask = 0xff;

  WrappedID3D12PipelineState *pipe = NULL;

  if(rs.pipe != ResourceId())
    pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping)
  {
    // just need the basic texture
  }
  else if(overlay == DebugOverlay::Drawcall)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[4] = {0.8f, 0.1f, 0.8f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f};
      list->ClearRenderTargetView(rtv, clearColour, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::BackfaceCull)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode;

      float redCol[4] = {1.0f, 0.0f, 0.0f, 1.0f};
      ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(redCol);

      float greenCol[4] = {0.0f, 1.0f, 0.0f, 1.0f};
      ID3DBlob *green = m_pDevice->GetShaderCache()->MakeFixedColShader(greenCol);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      psoDesc.PS.pShaderBytecode = red->GetBufferPointer();
      psoDesc.PS.BytecodeLength = red->GetBufferSize();

      list->Close();
      list = NULL;

      ID3D12PipelineState *redPSO = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&redPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      psoDesc.RasterizerState.CullMode = origCull;
      psoDesc.PS.pShaderBytecode = green->GetBufferPointer();
      psoDesc.PS.BytecodeLength = green->GetBufferSize();

      ID3D12PipelineState *greenPSO = NULL;
      hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                  (void **)&greenPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(redPSO);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(redPSO);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs.pipe = GetResID(greenPSO);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(red);
      SAFE_RELEASE(green);
      SAFE_RELEASE(redPSO);
      SAFE_RELEASE(greenPSO);
    }
  }
  else if(overlay == DebugOverlay::Wireframe)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      overlayConsts[3] = 0.0f;
      list->ClearRenderTargetView(rtv, overlayConsts, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = dsv;

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw)
  {
    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::ClearBeforeDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      list->Close();
      list = NULL;

      bool rtSingle = rs.rtSingle;
      std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rts = rs.rts;

      if(overlay == DebugOverlay::ClearBeforePass)
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      list = m_pDevice->GetNewList();

      for(size_t i = 0; i < rts.size(); i++)
      {
        D3D12Descriptor *desc = rtSingle ? GetWrapped(rts[0]) : GetWrapped(rts[i]);

        if(desc)
        {
          if(rtSingle)
            desc += i;

          Unwrap(list)->ClearRenderTargetView(UnwrapCPU(desc), black, 0, NULL);
        }
      }

      list->Close();
      list = NULL;

      for(size_t i = 0; i < events.size(); i++)
      {
        m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw);

        if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size())
          m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw);
      }
    }
  }
  else if(overlay == DebugOverlay::ViewportScissor)
  {
    if(pipe && pipe->IsGraphics() && !rs.views.empty())
    {
      list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

      D3D12_VIEWPORT viewport = rs.views[0];
      list->RSSetViewports(1, &viewport);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

      list->SetPipelineState(m_General.FixedColPipe);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      DebugPixelCBufferData pixelData = {0};

      // border colour (dark, 2px, opaque)
      pixelData.WireframeColour = Vec3f(0.1f, 0.1f, 0.1f);
      // inner colour (light, transparent)
      pixelData.Channels = Vec4f(0.2f, 0.2f, 0.9f, 0.7f);
      pixelData.OutputDisplayFormat = 0;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS viewCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(0, viewCB);
      list->SetGraphicsRootConstantBufferView(1, viewCB);
      list->SetGraphicsRootConstantBufferView(2, viewCB);

      Vec4f dummy;
      list->SetGraphicsRoot32BitConstants(3, 4, &dummy.x, 0);

      float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f};
      list->OMSetBlendFactor(factor);

      list->DrawInstanced(3, 1, 0, 0);

      viewport.TopLeftX = (float)rs.scissors[0].left;
      viewport.TopLeftY = (float)rs.scissors[0].top;
      viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left);
      viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top);
      list->RSSetViewports(1, &viewport);

      pixelData.OutputDisplayFormat = 1;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS scissorCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(1, scissorCB);

      list->DrawInstanced(3, 1, 0, 0);
    }
  }
  else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass)
  {
    if(pipe && pipe->IsGraphics())
    {
      SCOPED_TIMER("Triangle size");

      vector<uint32_t> events = passEvents;

      if(overlay == DebugOverlay::TriangleSizeDraw)
        events.clear();

      while(!events.empty())
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[0]);

        // remove any non-drawcalls, like the pass boundary.
        if(!(draw->flags & DrawFlags::Drawcall))
          events.erase(events.begin());
        else
          break;
      }

      events.push_back(eventId);

      D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = pipe->GetGraphicsDesc();
      pipeDesc.pRootSignature = m_General.ConstOnlyRootSig;
      pipeDesc.SampleMask = 0xFFFFFFFF;
      pipeDesc.SampleDesc.Count = 1;
      pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;

      pipeDesc.NumRenderTargets = 1;
      RDCEraseEl(pipeDesc.RTVFormats);
      pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;

      D3D12_INPUT_ELEMENT_DESC ia[2] = {};
      ia[0].SemanticName = "pos";
      ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].SemanticName = "sec";
      ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].InputSlot = 1;
      ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;

      pipeDesc.InputLayout.NumElements = 2;
      pipeDesc.InputLayout.pInputElementDescs = ia;

      pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize();
      pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer();
      RDCEraseEl(pipeDesc.HS);
      RDCEraseEl(pipeDesc.DS);
      pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize();
      pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer();
      pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize();
      pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer();

      pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;

      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL;
      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;

      // enough for all primitive topology types
      ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {};

      DebugVertexCBuffer vertexData = {};
      vertexData.LineStrip = 0;
      vertexData.ModelViewProj = Matrix4f::Identity();
      vertexData.SpriteSize = Vec2f();

      Vec4f viewport(rs.views[0].Width, rs.views[0].Height);

      if(rs.dsv.ptr)
      {
        D3D12_CPU_DESCRIPTOR_HANDLE realDSV = Unwrap(rs.dsv);

        list->OMSetRenderTargets(1, &rtv, TRUE, &realDSV);
      }

      list->RSSetViewports(1, &rs.views[0]);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      list->SetGraphicsRootConstantBufferView(
          0, GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData)));
      list->SetGraphicsRootConstantBufferView(
          1, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
      list->SetGraphicsRootConstantBufferView(
          2, GetDebugManager()->UploadConstants(&viewport, sizeof(viewport)));
      list->SetGraphicsRoot32BitConstants(3, 4, &viewport.x, 0);

      for(size_t i = 0; i < events.size(); i++)
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[i]);

        for(uint32_t inst = 0; draw && inst < RDCMAX(1U, draw->numInstances); inst++)
        {
          MeshFormat fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::GSOut);
          if(fmt.vertexResourceId == ResourceId())
            fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::VSOut);

          if(fmt.vertexResourceId != ResourceId())
          {
            D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology);

            if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST ||
               topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
            else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
            else
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

            list->IASetPrimitiveTopology(topo);

            if(pipes[pipeDesc.PrimitiveTopologyType] == NULL)
            {
              HRESULT hr = m_pDevice->CreateGraphicsPipelineState(
                  &pipeDesc, __uuidof(ID3D12PipelineState),
                  (void **)&pipes[pipeDesc.PrimitiveTopologyType]);
              RDCASSERTEQUAL(hr, S_OK);
            }

            ID3D12Resource *vb =
                m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId);

            D3D12_VERTEX_BUFFER_VIEW vbView = {};
            vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset;
            vbView.StrideInBytes = fmt.vertexByteStride;
            vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset);

            // second bind is just a dummy, so we don't have to make a shader
            // that doesn't accept the secondary stream
            list->IASetVertexBuffers(0, 1, &vbView);
            list->IASetVertexBuffers(1, 1, &vbView);

            list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]);

            if(fmt.indexByteStride && fmt.indexResourceId != ResourceId())
            {
              ID3D12Resource *ib =
                  m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId);

              D3D12_INDEX_BUFFER_VIEW view;
              view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset;
              view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset);
              view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
              list->IASetIndexBuffer(&view);

              list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0);
            }
            else
            {
              list->DrawInstanced(fmt.numIndices, 1, 0, 0);
            }
          }
        }
      }

      list->Close();
      list = NULL;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(size_t i = 0; i < ARRAY_COUNT(pipes); i++)
        SAFE_RELEASE(pipes[i]);
    }

    // restore back to normal
    m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
  else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw)
  {
    SCOPED_TIMER("Quad Overdraw");

    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::QuadOverdrawDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      if(overlay == DebugOverlay::QuadOverdrawPass)
      {
        list->Close();
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);
        list = m_pDevice->GetNewList();
      }

      uint32_t width = uint32_t(resourceDesc.Width >> 1);
      uint32_t height = resourceDesc.Height >> 1;

      width = RDCMAX(1U, width);
      height = RDCMAX(1U, height);

      D3D12_RESOURCE_DESC uavTexDesc = {};
      uavTexDesc.Alignment = 0;
      uavTexDesc.DepthOrArraySize = 4;
      uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
      uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
      uavTexDesc.Format = DXGI_FORMAT_R32_UINT;
      uavTexDesc.Height = height;
      uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
      uavTexDesc.MipLevels = 1;
      uavTexDesc.SampleDesc.Count = 1;
      uavTexDesc.SampleDesc.Quality = 0;
      uavTexDesc.Width = width;

      ID3D12Resource *overdrawTex = NULL;
      HRESULT hr = m_pDevice->CreateCommittedResource(
          &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
          NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str());
        list->Close();
        list = NULL;
        return m_Overlay.resourceId;
      }

      m_pDevice->CreateShaderResourceView(overdrawTex, NULL,
                                          GetDebugManager()->GetCPUHandle(OVERDRAW_SRV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetCPUHandle(OVERDRAW_UAV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV));

      UINT zeroes[4] = {0, 0, 0, 0};
      list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV),
                                         GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV),
                                         overdrawTex, zeroes, 0, NULL);
      list->Close();
      list = NULL;

#if ENABLED(SINGLE_FLUSH_VALIDATE)
      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();
#endif

      m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      D3D12_SHADER_BYTECODE quadWrite;
      quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize();
      quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer();

      // declare callback struct here
      D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, events,
                                   ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)));

      m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full);

      // resolve pass
      {
        list = m_pDevice->GetNewList();

        D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {};

        // make sure UAV work is done then prepare for reading in PS
        overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
        overdrawBarriers[0].UAV.pResource = overdrawTex;
        overdrawBarriers[1].Transition.pResource = overdrawTex;
        overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
        overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;

        // prepare tex resource for copying
        list->ResourceBarrier(2, overdrawBarriers);

        list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

        list->RSSetViewports(1, &rs.views[0]);

        D3D12_RECT scissor = {0, 0, 16384, 16384};
        list->RSSetScissorRects(1, &scissor);

        list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

        list->SetPipelineState(m_Overlay.QuadResolvePipe);

        list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig);

        GetDebugManager()->SetDescriptorHeaps(list, true, false);

        list->SetGraphicsRootConstantBufferView(
            0, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
        list->SetGraphicsRootDescriptorTable(1, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV));

        list->DrawInstanced(3, 1, 0, 0);

        list->Close();
        list = NULL;
      }

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it)
      {
        SAFE_RELEASE(it->second.pipe);
        SAFE_RELEASE(it->second.sig);
      }

      SAFE_RELEASE(overdrawTex);
    }

    if(overlay == DebugOverlay::QuadOverdrawPass)
      m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
Ejemplo n.º 12
0
vector<CounterResult> D3D11Replay::FetchCounters(const vector<GPUCounter> &counters)
{
  vector<CounterResult> ret;

  if(counters.empty())
  {
    RDCERR("No counters specified to FetchCounters");
    return ret;
  }

  SCOPED_TIMER("Fetch Counters, counters to fetch %u", counters.size());

  vector<GPUCounter> d3dCounters;
  std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters),
               [](const GPUCounter &c) { return !IsAMDCounter(c); });

  if(m_pAMDCounters)
  {
    // Filter out the AMD counters
    vector<GPUCounter> amdCounters;
    std::copy_if(counters.begin(), counters.end(), std::back_inserter(amdCounters),
                 [](const GPUCounter &c) { return IsAMDCounter(c); });

    if(!amdCounters.empty())
    {
      ret = FetchCountersAMD(amdCounters);
    }
  }

  if(d3dCounters.empty())
  {
    return ret;
  }

  D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0};
  ID3D11Query *disjoint = NULL;

  D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0};
  ID3D11Query *start = NULL;

  HRESULT hr = S_OK;

  hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint);
  if(FAILED(hr))
  {
    RDCERR("Failed to create disjoint query HRESULT: %s", ToStr(hr).c_str());
    return ret;
  }

  hr = m_pDevice->CreateQuery(&qdesc, &start);
  if(FAILED(hr))
  {
    RDCERR("Failed to create start query HRESULT: %s", ToStr(hr).c_str());
    return ret;
  }

  D3D11CounterContext ctx;

  {
    {
      m_pImmediateContext->Begin(disjoint);

      m_pImmediateContext->End(start);

      ctx.eventStart = 0;
      FillTimers(ctx, m_pImmediateContext->GetRootDraw());

      m_pImmediateContext->End(disjoint);
    }

    {
      D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData;
      do
      {
        hr = m_pImmediateContext->GetData(disjoint, &disjointData,
                                          sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0);
      } while(hr == S_FALSE);
      RDCASSERTEQUAL(hr, S_OK);

      RDCASSERT(!disjointData.Disjoint);

      double ticksToSecs = double(disjointData.Frequency);

      UINT64 a = 0;
      hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0);
      RDCASSERTEQUAL(hr, S_OK);

      for(size_t i = 0; i < ctx.timers.size(); i++)
      {
        if(ctx.timers[i].before && ctx.timers[i].after && ctx.timers[i].stats &&
           ctx.timers[i].occlusion)
        {
          hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0);
          RDCASSERTEQUAL(hr, S_OK);

          UINT64 b = 0;
          hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0);
          RDCASSERTEQUAL(hr, S_OK);

          double duration = (double(b - a) / ticksToSecs);

          a = b;

          D3D11_QUERY_DATA_PIPELINE_STATISTICS pipelineStats;
          hr = m_pImmediateContext->GetData(ctx.timers[i].stats, &pipelineStats,
                                            sizeof(D3D11_QUERY_DATA_PIPELINE_STATISTICS), 0);
          RDCASSERTEQUAL(hr, S_OK);

          UINT64 occlusion = 0;
          hr = m_pImmediateContext->GetData(ctx.timers[i].occlusion, &occlusion, sizeof(UINT64), 0);
          RDCASSERTEQUAL(hr, S_OK);

          for(size_t c = 0; c < d3dCounters.size(); c++)
          {
            switch(d3dCounters[c])
            {
              case GPUCounter::EventGPUDuration:
                ret.push_back(
                    CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, duration));
                break;
              case GPUCounter::InputVerticesRead:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::InputVerticesRead,
                                            pipelineStats.IAVertices));
                break;
              case GPUCounter::IAPrimitives:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::IAPrimitives,
                                            pipelineStats.IAPrimitives));
                break;
              case GPUCounter::VSInvocations:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::VSInvocations,
                                            pipelineStats.VSInvocations));
                break;
              case GPUCounter::GSInvocations:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSInvocations,
                                            pipelineStats.GSInvocations));
                break;
              case GPUCounter::GSPrimitives:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSPrimitives,
                                            pipelineStats.GSPrimitives));
                break;
              case GPUCounter::RasterizerInvocations:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizerInvocations,
                                            pipelineStats.CInvocations));
                break;
              case GPUCounter::RasterizedPrimitives:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizedPrimitives,
                                            pipelineStats.CPrimitives));
                break;
              case GPUCounter::PSInvocations:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::PSInvocations,
                                            pipelineStats.PSInvocations));
                break;
              case GPUCounter::HSInvocations:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::HSInvocations,
                                            pipelineStats.HSInvocations));
                break;
              case GPUCounter::DSInvocations:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::DSInvocations,
                                            pipelineStats.DSInvocations));
                break;
              case GPUCounter::CSInvocations:
                ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::CSInvocations,
                                            pipelineStats.CSInvocations));
                break;
              case GPUCounter::SamplesWritten:
                ret.push_back(
                    CounterResult(ctx.timers[i].eventId, GPUCounter::SamplesWritten, occlusion));
                break;
            }
          }
        }
        else
        {
          for(size_t c = 0; c < d3dCounters.size(); c++)
          {
            switch(d3dCounters[c])
            {
              case GPUCounter::EventGPUDuration:
                ret.push_back(
                    CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, -1.0));
                break;
              case GPUCounter::InputVerticesRead:
              case GPUCounter::IAPrimitives:
              case GPUCounter::GSPrimitives:
              case GPUCounter::RasterizerInvocations:
              case GPUCounter::RasterizedPrimitives:
              case GPUCounter::VSInvocations:
              case GPUCounter::HSInvocations:
              case GPUCounter::DSInvocations:
              case GPUCounter::GSInvocations:
              case GPUCounter::PSInvocations:
              case GPUCounter::CSInvocations:
              case GPUCounter::SamplesWritten:
                ret.push_back(
                    CounterResult(ctx.timers[i].eventId, d3dCounters[c], 0xFFFFFFFFFFFFFFFF));
                break;
            }
          }
        }
      }
    }
  }

  for(size_t i = 0; i < ctx.timers.size(); i++)
  {
    SAFE_RELEASE(ctx.timers[i].before);
    SAFE_RELEASE(ctx.timers[i].after);
    SAFE_RELEASE(ctx.timers[i].stats);
    SAFE_RELEASE(ctx.timers[i].occlusion);
  }

  SAFE_RELEASE(disjoint);
  SAFE_RELEASE(start);

  return ret;
}
Ejemplo n.º 13
0
void VulkanDebugManager::CopyDepthArrayToTex2DMS(VkImage destMS, VkImage srcArray, VkExtent3D extent,
                                                 uint32_t layers, uint32_t samples, VkFormat fmt)
{
  VkImageAspectFlags aspectFlags = VK_IMAGE_ASPECT_DEPTH_BIT;

  int pipeIndex = 0;
  switch(fmt)
  {
    case VK_FORMAT_D16_UNORM: pipeIndex = 0; break;
    case VK_FORMAT_D16_UNORM_S8_UINT:
      pipeIndex = 1;
      aspectFlags |= VK_IMAGE_ASPECT_STENCIL_BIT;
      break;
    case VK_FORMAT_X8_D24_UNORM_PACK32: pipeIndex = 2; break;
    case VK_FORMAT_D24_UNORM_S8_UINT:
      pipeIndex = 3;
      aspectFlags |= VK_IMAGE_ASPECT_STENCIL_BIT;
      break;
    case VK_FORMAT_D32_SFLOAT: pipeIndex = 4; break;
    case VK_FORMAT_D32_SFLOAT_S8_UINT:
      pipeIndex = 5;
      aspectFlags |= VK_IMAGE_ASPECT_STENCIL_BIT;
      break;
    default: RDCERR("Unexpected depth format: %d", fmt); return;
  }

  // 0-based from 2x MSAA
  uint32_t sampleIndex = SampleIndex((VkSampleCountFlagBits)samples) - 1;

  if(sampleIndex >= ARRAY_COUNT(m_DepthArray2MSPipe[0]))
  {
    RDCERR("Unsupported sample count %u", samples);
    return;
  }

  VkPipeline pipe = m_DepthArray2MSPipe[pipeIndex][sampleIndex];

  if(pipe == VK_NULL_HANDLE)
    return;

  VkDevice dev = m_Device;

  VkResult vkr = VK_SUCCESS;

  VkImageView srcDepthView = VK_NULL_HANDLE, srcStencilView = VK_NULL_HANDLE;
  VkImageView *destView = new VkImageView[layers];

  VkImageViewCreateInfo viewInfo = {
      VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
      NULL,
      0,
      srcArray,
      VK_IMAGE_VIEW_TYPE_2D_ARRAY,
      fmt,
      {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO,
       VK_COMPONENT_SWIZZLE_ZERO},
      {
          VK_IMAGE_ASPECT_DEPTH_BIT, 0, VK_REMAINING_MIP_LEVELS, 0, VK_REMAINING_ARRAY_LAYERS,
      },
  };

  vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &srcDepthView);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  if(aspectFlags & VK_IMAGE_ASPECT_STENCIL_BIT)
  {
    viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
    vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &srcStencilView);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);
  }

  viewInfo.subresourceRange.aspectMask = aspectFlags;
  viewInfo.image = destMS;

  viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
  viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
  viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
  viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;

  for(uint32_t i = 0; i < layers; i++)
  {
    viewInfo.subresourceRange.baseArrayLayer = i;
    viewInfo.subresourceRange.layerCount = 1;

    vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &destView[i]);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);
  }

  VkDescriptorImageInfo srcdesc[2];
  srcdesc[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  srcdesc[0].imageView = srcDepthView;
  srcdesc[0].sampler = Unwrap(m_ArrayMSSampler);    // not used - we use texelFetch
  srcdesc[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  srcdesc[1].imageView = srcStencilView;
  srcdesc[1].sampler = Unwrap(m_ArrayMSSampler);    // not used - we use texelFetch

  VkWriteDescriptorSet writeSet[] = {
      {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 0, 0, 1,
       VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &srcdesc[0], NULL, NULL},
      {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 1, 0, 1,
       VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &srcdesc[1], NULL, NULL},
  };

  if(aspectFlags & VK_IMAGE_ASPECT_STENCIL_BIT)
    ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), 2, writeSet, 0, NULL);
  else
    ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), 1, writeSet, 0, NULL);

  // create a bespoke framebuffer and renderpass for rendering
  VkAttachmentDescription attDesc = {0,
                                     fmt,
                                     (VkSampleCountFlagBits)samples,
                                     VK_ATTACHMENT_LOAD_OP_CLEAR,
                                     VK_ATTACHMENT_STORE_OP_STORE,
                                     VK_ATTACHMENT_LOAD_OP_CLEAR,
                                     VK_ATTACHMENT_STORE_OP_STORE,
                                     VK_IMAGE_LAYOUT_GENERAL,
                                     VK_IMAGE_LAYOUT_GENERAL};

  VkAttachmentReference attRef = {0, VK_IMAGE_LAYOUT_GENERAL};

  VkSubpassDescription sub = {};
  sub.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
  sub.pDepthStencilAttachment = &attRef;

  VkRenderPassCreateInfo rpinfo = {
      VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
      NULL,
      0,
      1,
      &attDesc,
      1,
      &sub,
      0,
      NULL,    // dependencies
  };

  VkRenderPass rp = VK_NULL_HANDLE;

  ObjDisp(dev)->CreateRenderPass(Unwrap(dev), &rpinfo, NULL, &rp);

  VkFramebufferCreateInfo fbinfo = {
      VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
      NULL,
      0,
      rp,
      1,
      NULL,
      extent.width,
      extent.height,
      1,
  };

  VkFramebuffer *fb = new VkFramebuffer[layers];

  for(uint32_t i = 0; i < layers; i++)
  {
    fbinfo.pAttachments = destView + i;

    vkr = ObjDisp(dev)->CreateFramebuffer(Unwrap(dev), &fbinfo, NULL, &fb[i]);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);
  }

  VkCommandBuffer cmd = m_pDriver->GetNextCmd();

  VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
                                        VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};

  ObjDisp(cmd)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);

  VkClearValue clearval = {};

  VkRenderPassBeginInfo rpbegin = {
      VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, NULL, rp,        VK_NULL_HANDLE,
      {{0, 0}, {extent.width, extent.height}},  1,    &clearval,
  };

  uint32_t numStencil = 1;

  if(aspectFlags & VK_IMAGE_ASPECT_STENCIL_BIT)
    numStencil = 256;

  Vec4u params;
  params.x = samples;
  params.y = 0;    // currentSample;

  for(uint32_t i = 0; i < layers; i++)
  {
    rpbegin.framebuffer = fb[i];

    ObjDisp(cmd)->CmdBeginRenderPass(Unwrap(cmd), &rpbegin, VK_SUBPASS_CONTENTS_INLINE);

    ObjDisp(cmd)->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS, Unwrap(pipe));
    ObjDisp(cmd)->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_GRAPHICS,
                                        Unwrap(m_ArrayMSPipeLayout), 0, 1,
                                        UnwrapPtr(m_ArrayMSDescSet), 0, NULL);

    VkViewport viewport = {0.0f, 0.0f, (float)extent.width, (float)extent.height, 0.0f, 1.0f};
    ObjDisp(cmd)->CmdSetViewport(Unwrap(cmd), 0, 1, &viewport);

    params.z = i;    // currentSlice;

    for(uint32_t s = 0; s < numStencil; s++)
    {
      params.w = numStencil == 1 ? 1000 : s;    // currentStencil;

      ObjDisp(cmd)->CmdSetStencilReference(Unwrap(cmd), VK_STENCIL_FRONT_AND_BACK, s);
      ObjDisp(cmd)->CmdPushConstants(Unwrap(cmd), Unwrap(m_ArrayMSPipeLayout), VK_SHADER_STAGE_ALL,
                                     0, sizeof(Vec4u), &params);
      ObjDisp(cmd)->CmdDraw(Unwrap(cmd), 4, 1, 0, 0);
    }

    ObjDisp(cmd)->CmdEndRenderPass(Unwrap(cmd));
  }

  ObjDisp(cmd)->EndCommandBuffer(Unwrap(cmd));

  // submit cmds and wait for idle so we can readback
  m_pDriver->SubmitCmds();
  m_pDriver->FlushQ();

  for(uint32_t i = 0; i < layers; i++)
    ObjDisp(dev)->DestroyFramebuffer(Unwrap(dev), fb[i], NULL);
  ObjDisp(dev)->DestroyRenderPass(Unwrap(dev), rp, NULL);

  ObjDisp(dev)->DestroyImageView(Unwrap(dev), srcDepthView, NULL);
  if(srcStencilView != VK_NULL_HANDLE)
    ObjDisp(dev)->DestroyImageView(Unwrap(dev), srcStencilView, NULL);
  for(uint32_t i = 0; i < layers; i++)
    ObjDisp(dev)->DestroyImageView(Unwrap(dev), destView[i], NULL);

  SAFE_DELETE_ARRAY(destView);
  SAFE_DELETE_ARRAY(fb);
}
Ejemplo n.º 14
0
void VulkanDebugManager::CopyArrayToTex2DMS(VkImage destMS, VkImage srcArray, VkExtent3D extent,
                                            uint32_t layers, uint32_t samples, VkFormat fmt)
{
  if(!m_pDriver->GetDeviceFeatures().shaderStorageImageMultisample ||
     !m_pDriver->GetDeviceFeatures().shaderStorageImageWriteWithoutFormat)
    return;

  if(m_Array2MSPipe == VK_NULL_HANDLE)
    return;

  if(IsDepthOrStencilFormat(fmt))
  {
    CopyDepthArrayToTex2DMS(destMS, srcArray, extent, layers, samples, fmt);
    return;
  }

  VkDevice dev = m_Device;

  VkResult vkr = VK_SUCCESS;

  VkImageView srcView, destView;

  VkImageViewCreateInfo viewInfo = {
      VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
      NULL,
      0,
      srcArray,
      VK_IMAGE_VIEW_TYPE_2D_ARRAY,
      VK_FORMAT_UNDEFINED,
      {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
       VK_COMPONENT_SWIZZLE_IDENTITY},
      {
          VK_IMAGE_ASPECT_COLOR_BIT, 0, VK_REMAINING_MIP_LEVELS, 0, VK_REMAINING_ARRAY_LAYERS,
      },
  };

  uint32_t bs = GetByteSize(1, 1, 1, fmt, 0);

  if(bs == 1)
    viewInfo.format = VK_FORMAT_R8_UINT;
  else if(bs == 2)
    viewInfo.format = VK_FORMAT_R16_UINT;
  else if(bs == 4)
    viewInfo.format = VK_FORMAT_R32_UINT;
  else if(bs == 8)
    viewInfo.format = VK_FORMAT_R32G32_UINT;
  else if(bs == 16)
    viewInfo.format = VK_FORMAT_R32G32B32A32_UINT;

  if(viewInfo.format == VK_FORMAT_UNDEFINED)
  {
    RDCERR("Can't copy Array to MS with format %s", ToStr(fmt).c_str());
    return;
  }

  if(IsStencilOnlyFormat(fmt))
    viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
  else if(IsDepthOrStencilFormat(fmt))
    viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;

  vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &srcView);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  viewInfo.image = destMS;
  viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;

  vkr = ObjDisp(dev)->CreateImageView(Unwrap(dev), &viewInfo, NULL, &destView);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  VkDescriptorImageInfo srcdesc = {0};
  srcdesc.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  srcdesc.imageView = srcView;
  srcdesc.sampler = Unwrap(m_ArrayMSSampler);    // not used - we use texelFetch

  VkDescriptorImageInfo destdesc = {0};
  destdesc.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
  destdesc.imageView = destView;

  VkWriteDescriptorSet writeSet[] = {
      {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 0, 0, 1,
       VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &srcdesc, NULL, NULL},
      {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, Unwrap(m_ArrayMSDescSet), 2, 0, 1,
       VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &destdesc, NULL, NULL},
  };

  ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), ARRAY_COUNT(writeSet), writeSet, 0, NULL);

  VkCommandBuffer cmd = m_pDriver->GetNextCmd();

  VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
                                        VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};

  ObjDisp(cmd)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);

  ObjDisp(cmd)->CmdBindPipeline(Unwrap(cmd), VK_PIPELINE_BIND_POINT_COMPUTE, Unwrap(m_Array2MSPipe));
  ObjDisp(cmd)->CmdBindDescriptorSets(Unwrap(cmd), VK_PIPELINE_BIND_POINT_COMPUTE,
                                      Unwrap(m_ArrayMSPipeLayout), 0, 1,
                                      UnwrapPtr(m_ArrayMSDescSet), 0, NULL);

  Vec4u params = {samples, 0, 0, 0};

  ObjDisp(cmd)->CmdPushConstants(Unwrap(cmd), Unwrap(m_ArrayMSPipeLayout), VK_SHADER_STAGE_ALL, 0,
                                 sizeof(Vec4u), &params);

  ObjDisp(cmd)->CmdDispatch(Unwrap(cmd), extent.width, extent.height, layers * samples);

  ObjDisp(cmd)->EndCommandBuffer(Unwrap(cmd));

  // submit cmds and wait for idle so we can readback
  m_pDriver->SubmitCmds();
  m_pDriver->FlushQ();

  ObjDisp(dev)->DestroyImageView(Unwrap(dev), srcView, NULL);
  ObjDisp(dev)->DestroyImageView(Unwrap(dev), destView, NULL);
}
Ejemplo n.º 15
0
VkResult WrappedVulkan::vkCreateRenderPass(
			VkDevice                                    device,
			const VkRenderPassCreateInfo*               pCreateInfo,
			const VkAllocationCallbacks*                pAllocator,
			VkRenderPass*                               pRenderPass)
{
	VkResult ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), pCreateInfo, pAllocator, pRenderPass);

	if(ret == VK_SUCCESS)
	{
		ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pRenderPass);
		
		if(m_State >= WRITING)
		{
			Chunk *chunk = NULL;

			{
				CACHE_THREAD_SERIALISER();

				SCOPED_SERIALISE_CONTEXT(CREATE_RENDERPASS);
				Serialise_vkCreateRenderPass(localSerialiser, device, pCreateInfo, NULL, pRenderPass);

				chunk = scope.Get();
			}

			VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pRenderPass);
			record->AddChunk(chunk);
		}
		else
		{
			GetResourceManager()->AddLiveResource(id, *pRenderPass);
			
			VulkanCreationInfo::RenderPass rpinfo;
			rpinfo.Init(GetResourceManager(), m_CreationInfo, pCreateInfo);

			VkRenderPassCreateInfo info = *pCreateInfo;

			VkAttachmentDescription atts[16];
			RDCASSERT(ARRAY_COUNT(atts) >= (size_t)info.attachmentCount);

			// make a version of the render pass that loads from its attachments,
			// so it can be used for replaying a single draw after a render pass
			// without doing a clear or a DONT_CARE load.
			for(uint32_t i=0; i < info.attachmentCount; i++)
			{
				atts[i] = info.pAttachments[i];
				atts[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
				atts[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
			}
			
			info.pAttachments = atts;

			ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), &info, NULL, &rpinfo.loadRP);
			RDCASSERTEQUAL(ret, VK_SUCCESS);

			ResourceId loadRPid = GetResourceManager()->WrapResource(Unwrap(device), rpinfo.loadRP);
			
			// register as a live-only resource, so it is cleaned up properly
			GetResourceManager()->AddLiveResource(loadRPid, rpinfo.loadRP);

			m_CreationInfo.m_RenderPass[id] = rpinfo;
		}
	}

	return ret;
}
Ejemplo n.º 16
0
bool WrappedVulkan::Serialise_vkCreateRenderPass(
			Serialiser*                                 localSerialiser,
			VkDevice                                    device,
			const VkRenderPassCreateInfo*               pCreateInfo,
			const VkAllocationCallbacks*                pAllocator,
			VkRenderPass*                               pRenderPass)
{
	SERIALISE_ELEMENT(ResourceId, devId, GetResID(device));
	SERIALISE_ELEMENT(VkRenderPassCreateInfo, info, *pCreateInfo);
	SERIALISE_ELEMENT(ResourceId, id, GetResID(*pRenderPass));

	if(m_State == READING)
	{
		device = GetResourceManager()->GetLiveHandle<VkDevice>(devId);
		VkRenderPass rp = VK_NULL_HANDLE;

		VulkanCreationInfo::RenderPass rpinfo;
		rpinfo.Init(GetResourceManager(), m_CreationInfo, &info);

		// we want to store off the data so we can display it after the pass.
		// override any user-specified DONT_CARE.
		VkAttachmentDescription *att = (VkAttachmentDescription *)info.pAttachments;
		for(uint32_t i=0; i < info.attachmentCount; i++)
		{
			att[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
			att[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;

			// renderpass can't start or end in presentable layout on replay
			ReplacePresentableImageLayout(att[i].initialLayout);
			ReplacePresentableImageLayout(att[i].finalLayout);
		}

		VkResult ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), &info, NULL, &rp);

		if(ret != VK_SUCCESS)
		{
			RDCERR("Failed on resource serialise-creation, VkResult: 0x%08x", ret);
		}
		else
		{
			ResourceId live;

			if(GetResourceManager()->HasWrapper(ToTypedHandle(rp)))
			{
				live = GetResourceManager()->GetNonDispWrapper(rp)->id;

				// destroy this instance of the duplicate, as we must have matching create/destroy
				// calls and there won't be a wrapped resource hanging around to destroy this one.
				ObjDisp(device)->DestroyRenderPass(Unwrap(device), rp, NULL);

				// whenever the new ID is requested, return the old ID, via replacements.
				GetResourceManager()->ReplaceResource(id, GetResourceManager()->GetOriginalID(live));
			}
			else
			{
				live = GetResourceManager()->WrapResource(Unwrap(device), rp);
				GetResourceManager()->AddLiveResource(id, rp);

				// make a version of the render pass that loads from its attachments,
				// so it can be used for replaying a single draw after a render pass
				// without doing a clear or a DONT_CARE load.
				for(uint32_t i=0; i < info.attachmentCount; i++)
				{
					att[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
					att[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
				}

				ret = ObjDisp(device)->CreateRenderPass(Unwrap(device), &info, NULL, &rpinfo.loadRP);
				RDCASSERTEQUAL(ret, VK_SUCCESS);
				
				// handle the loadRP being a duplicate
				if(GetResourceManager()->HasWrapper(ToTypedHandle(rpinfo.loadRP)))
				{
					// just fetch the existing wrapped object
					rpinfo.loadRP = (VkRenderPass)(uint64_t)GetResourceManager()->GetNonDispWrapper(rpinfo.loadRP);

					// destroy this instance of the duplicate, as we must have matching create/destroy
					// calls and there won't be a wrapped resource hanging around to destroy this one.
					ObjDisp(device)->DestroyRenderPass(Unwrap(device), rpinfo.loadRP, NULL);

					// don't need to ReplaceResource as no IDs are involved
				}
				else
				{
					ResourceId loadRPid = GetResourceManager()->WrapResource(Unwrap(device), rpinfo.loadRP);

					// register as a live-only resource, so it is cleaned up properly
					GetResourceManager()->AddLiveResource(loadRPid, rpinfo.loadRP);
				}
				
				m_CreationInfo.m_RenderPass[live] = rpinfo;
			}
		}
	}

	return true;
}
Ejemplo n.º 17
0
void DoSerialise(SerialiserType &ser, D3D12Descriptor &el)
{
  D3D12DescriptorType type = el.GetType();
  ser.Serialise("type", type);

  ID3D12DescriptorHeap *heap = (ID3D12DescriptorHeap *)el.samp.heap;

  ser.Serialise("heap", heap);
  ser.Serialise("index", el.samp.idx);

  if(ser.IsReading())
  {
    el.samp.heap = (WrappedID3D12DescriptorHeap *)heap;

    // for sampler types, this will be overwritten when serialising the sampler descriptor
    el.nonsamp.type = type;
  }

  switch(type)
  {
    case D3D12DescriptorType::Sampler:
    {
      ser.Serialise("Descriptor", el.samp.desc);
      RDCASSERTEQUAL(el.GetType(), D3D12DescriptorType::Sampler);
      break;
    }
    case D3D12DescriptorType::CBV:
    {
      ser.Serialise("Descriptor", el.nonsamp.cbv);
      break;
    }
    case D3D12DescriptorType::SRV:
    {
      ser.Serialise("Resource", el.nonsamp.resource);
      ser.Serialise("Descriptor", el.nonsamp.srv);
      break;
    }
    case D3D12DescriptorType::RTV:
    {
      ser.Serialise("Resource", el.nonsamp.resource);
      ser.Serialise("Descriptor", el.nonsamp.rtv);
      break;
    }
    case D3D12DescriptorType::DSV:
    {
      ser.Serialise("Resource", el.nonsamp.resource);
      ser.Serialise("Descriptor", el.nonsamp.dsv);
      break;
    }
    case D3D12DescriptorType::UAV:
    {
      ser.Serialise("Resource", el.nonsamp.resource);
      ser.Serialise("CounterResource", el.nonsamp.uav.counterResource);

      // special case because of extra resource and squeezed descriptor
      D3D12_UNORDERED_ACCESS_VIEW_DESC desc = el.nonsamp.uav.desc.AsDesc();
      ser.Serialise("Descriptor", desc);
      el.nonsamp.uav.desc.Init(desc);
      break;
    }
    case D3D12DescriptorType::Undefined:
    {
      el.nonsamp.type = type;
      break;
    }
  }
}
Ejemplo n.º 18
0
  void PreDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd)
  {
    if(std::find(m_Events.begin(), m_Events.end(), eid) == m_Events.end())
      return;

    // we customise the pipeline to disable framebuffer writes, but perform normal testing
    // and substitute our quad calculation fragment shader that writes to a storage image
    // that is bound in a new root signature element.

    D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState;
    m_PrevState = rs;

    // check cache first
    CachedPipeline cache = m_PipelineCache[rs.pipe];

    // if we don't get a hit, create a modified pipeline
    if(cache.pipe == NULL)
    {
      HRESULT hr = S_OK;

      WrappedID3D12RootSignature *sig =
          m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(
              rs.graphics.rootsig);

      // need to be able to add a descriptor table with our UAV without hitting the 64 DWORD limit
      RDCASSERT(sig->sig.dwordLength < 64);

      D3D12RootSignature modsig = sig->sig;

      // make sure no other UAV tables overlap. We can't remove elements entirely because then the
      // root signature indices wouldn't match up as expected.
      // Instead move them into an unused space.
      for(size_t i = 0; i < modsig.params.size(); i++)
      {
        if(modsig.params[i].ShaderVisibility == D3D12_SHADER_VISIBILITY_PIXEL)
        {
          if(modsig.params[i].ParameterType == D3D12_ROOT_PARAMETER_TYPE_UAV)
          {
            modsig.params[i].Descriptor.RegisterSpace = modsig.numSpaces;
          }
          else if(modsig.params[i].ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE)
          {
            for(size_t r = 0; r < modsig.params[i].ranges.size(); r++)
            {
              modsig.params[i].ranges[r].RegisterSpace = modsig.numSpaces;
            }
          }
        }
      }

      D3D12_DESCRIPTOR_RANGE1 range;
      range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
      range.NumDescriptors = 1;
      range.BaseShaderRegister = 0;
      range.RegisterSpace = 0;
      range.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
      range.OffsetInDescriptorsFromTableStart = 0;

      modsig.params.push_back(D3D12RootSignatureParameter());
      D3D12RootSignatureParameter &param = modsig.params.back();
      param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
      param.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
      param.DescriptorTable.NumDescriptorRanges = 1;
      param.DescriptorTable.pDescriptorRanges = &range;

      cache.sigElem = uint32_t(modsig.params.size() - 1);

      std::vector<D3D12_ROOT_PARAMETER1> params;
      params.resize(modsig.params.size());
      for(size_t i = 0; i < params.size(); i++)
        params[i] = modsig.params[i];

      ID3DBlob *root = m_pDevice->GetShaderCache()->MakeRootSig(modsig);

      hr = m_pDevice->CreateRootSignature(0, root->GetBufferPointer(), root->GetBufferSize(),
                                          __uuidof(ID3D12RootSignature), (void **)&cache.sig);
      RDCASSERTEQUAL(hr, S_OK);

      SAFE_RELEASE(root);

      WrappedID3D12PipelineState *origPSO =
          m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

      RDCASSERT(origPSO->IsGraphics());

      D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = origPSO->GetGraphicsDesc();

      for(size_t i = 0; i < ARRAY_COUNT(pipeDesc.BlendState.RenderTarget); i++)
        pipeDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = 0;

      // disable depth/stencil writes
      pipeDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      pipeDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
      pipeDesc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
      pipeDesc.DepthStencilState.StencilWriteMask = 0;

      pipeDesc.PS = m_QuadWritePS;

      pipeDesc.pRootSignature = cache.sig;

      hr = m_pDevice->CreateGraphicsPipelineState(&pipeDesc, __uuidof(ID3D12PipelineState),
                                                  (void **)&cache.pipe);
      RDCASSERTEQUAL(hr, S_OK);

      m_PipelineCache[rs.pipe] = cache;
    }

    // modify state for first draw call
    rs.pipe = GetResID(cache.pipe);
    rs.graphics.rootsig = GetResID(cache.sig);

    if(rs.graphics.sigelems.size() <= cache.sigElem)
      rs.graphics.sigelems.resize(cache.sigElem + 1);

    PortableHandle uav = m_UAV;

    // if a CBV_SRV_UAV heap is already set, we need to copy our descriptor in
    // if we haven't already. Otherwise we can set our own heap.
    for(size_t i = 0; i < rs.heaps.size(); i++)
    {
      WrappedID3D12DescriptorHeap *h =
          m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12DescriptorHeap>(rs.heaps[i]);
      if(h->GetDesc().Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
      {
        // use the last descriptor
        D3D12_CPU_DESCRIPTOR_HANDLE dst = h->GetCPUDescriptorHandleForHeapStart();
        dst.ptr += (h->GetDesc().NumDescriptors - 1) * sizeof(D3D12Descriptor);

        if(m_CopiedHeaps.find(rs.heaps[i]) == m_CopiedHeaps.end())
        {
          WrappedID3D12DescriptorHeap *h2 =
              m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12DescriptorHeap>(m_UAV.heap);
          D3D12_CPU_DESCRIPTOR_HANDLE src = h2->GetCPUDescriptorHandleForHeapStart();
          src.ptr += m_UAV.index * sizeof(D3D12Descriptor);

          // can't do a copy because the src heap is CPU write-only (shader visible). So instead,
          // create directly
          D3D12Descriptor *srcDesc = (D3D12Descriptor *)src.ptr;
          srcDesc->Create(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, m_pDevice, dst);

          m_CopiedHeaps.insert(rs.heaps[i]);
        }

        uav = ToPortableHandle(dst);

        break;
      }
    }

    if(uav.heap == m_UAV.heap)
      rs.heaps.push_back(m_UAV.heap);

    rs.graphics.sigelems[cache.sigElem] =
        D3D12RenderState::SignatureElement(eRootTable, uav.heap, uav.index);

    // as we're changing the root signature, we need to reapply all elements,
    // so just apply all state
    if(cmd)
      rs.ApplyState(cmd);
  }
Ejemplo n.º 19
0
bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices(
		Serialiser*                                 localSerialiser,
		VkInstance                                  instance,
		uint32_t*                                   pPhysicalDeviceCount,
		VkPhysicalDevice*                           pPhysicalDevices)
{
	SERIALISE_ELEMENT(ResourceId, inst, GetResID(instance));
	SERIALISE_ELEMENT(uint32_t, physIndex, *pPhysicalDeviceCount);
	SERIALISE_ELEMENT(ResourceId, physId, GetResID(*pPhysicalDevices));

	uint32_t memIdxMap[32] = {0};
	if(m_State >= WRITING)
		memcpy(memIdxMap, GetRecord(*pPhysicalDevices)->memIdxMap, sizeof(memIdxMap));

	localSerialiser->SerialisePODArray<32>("memIdxMap", memIdxMap);

	// not used at the moment but useful for reference and might be used
	// in the future
	VkPhysicalDeviceProperties physProps;
	VkPhysicalDeviceMemoryProperties memProps;
	VkPhysicalDeviceFeatures physFeatures;
	
	if(m_State >= WRITING)
	{
		ObjDisp(instance)->GetPhysicalDeviceProperties(Unwrap(*pPhysicalDevices), &physProps);
		ObjDisp(instance)->GetPhysicalDeviceMemoryProperties(Unwrap(*pPhysicalDevices), &memProps);
		ObjDisp(instance)->GetPhysicalDeviceFeatures(Unwrap(*pPhysicalDevices), &physFeatures);
	}

	localSerialiser->Serialise("physProps", physProps);
	localSerialiser->Serialise("memProps", memProps);
	localSerialiser->Serialise("physFeatures", physFeatures);

	VkPhysicalDevice pd = VK_NULL_HANDLE;

	if(m_State >= WRITING)
	{
		pd = *pPhysicalDevices;
	}
	else
	{
		uint32_t count;
		VkPhysicalDevice *devices;

		instance = GetResourceManager()->GetLiveHandle<VkInstance>(inst);
		VkResult vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, NULL);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		RDCASSERT(count > physIndex);
		devices = new VkPhysicalDevice[count];

		if(physIndex >= m_PhysicalDevices.size())
		{
			m_PhysicalDevices.resize(physIndex+1);
			m_MemIdxMaps.resize(physIndex+1);
		}

		vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, devices);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		// PORTABILITY match up physical devices to those available on replay

		pd = devices[physIndex];

		for(size_t i=0; i < m_PhysicalDevices.size(); i++)
		{
			// physical devices might be re-created inside EnumeratePhysicalDevices every time, so
			// we need to re-wrap any previously enumerated physical devices
			if(m_PhysicalDevices[i] != VK_NULL_HANDLE)
			{
				RDCASSERTNOTEQUAL(i, physIndex);
				GetWrapped(m_PhysicalDevices[i])->RewrapObject(devices[i]);
			}
		}

		SAFE_DELETE_ARRAY(devices);

		GetResourceManager()->WrapResource(instance, pd);
		GetResourceManager()->AddLiveResource(physId, pd);

		m_PhysicalDevices[physIndex] = pd;

		uint32_t *storedMap = new uint32_t[32];
		memcpy(storedMap, memIdxMap, sizeof(memIdxMap));
		m_MemIdxMaps[physIndex] = storedMap;

		RDCLOG("Captured log describes physical device %u:", physIndex);
		RDCLOG("   - %s (ver %x) - %04x:%04x", physProps.deviceName, physProps.driverVersion, physProps.vendorID, physProps.deviceID);

		ObjDisp(pd)->GetPhysicalDeviceProperties(Unwrap(pd), &physProps);
		ObjDisp(pd)->GetPhysicalDeviceMemoryProperties(Unwrap(pd), &memProps);
		ObjDisp(pd)->GetPhysicalDeviceFeatures(Unwrap(pd), &physFeatures);
		
		RDCLOG("Replaying on physical device %u:", physIndex);
		RDCLOG("   - %s (ver %x) - %04x:%04x", physProps.deviceName, physProps.driverVersion, physProps.vendorID, physProps.deviceID);

	}

	return true;
}
Ejemplo n.º 20
0
void D3D12Replay::InitPostVSBuffers(uint32_t eventId)
{
  // go through any aliasing
  if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end())
    eventId = m_PostVSAlias[eventId];

  if(m_PostVSData.find(eventId) != m_PostVSData.end())
    return;

  D3D12CommandData *cmd = m_pDevice->GetQueue()->GetCommandData();
  const D3D12RenderState &rs = cmd->m_RenderState;

  if(rs.pipe == ResourceId())
    return;

  WrappedID3D12PipelineState *origPSO =
      m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(!origPSO->IsGraphics())
    return;

  D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = origPSO->GetGraphicsDesc();

  if(psoDesc.VS.BytecodeLength == 0)
    return;

  WrappedID3D12Shader *vs = origPSO->VS();

  D3D_PRIMITIVE_TOPOLOGY topo = rs.topo;

  const DrawcallDescription *drawcall = m_pDevice->GetDrawcall(eventId);

  if(drawcall->numIndices == 0)
    return;

  DXBC::DXBCFile *dxbcVS = vs->GetDXBC();

  RDCASSERT(dxbcVS);

  DXBC::DXBCFile *dxbcGS = NULL;

  WrappedID3D12Shader *gs = origPSO->GS();

  if(gs)
  {
    dxbcGS = gs->GetDXBC();

    RDCASSERT(dxbcGS);
  }

  DXBC::DXBCFile *dxbcDS = NULL;

  WrappedID3D12Shader *ds = origPSO->DS();

  if(ds)
  {
    dxbcDS = ds->GetDXBC();

    RDCASSERT(dxbcDS);
  }

  ID3D12RootSignature *soSig = NULL;

  HRESULT hr = S_OK;

  {
    WrappedID3D12RootSignature *sig =
        m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(rs.graphics.rootsig);

    D3D12RootSignature rootsig = sig->sig;

    // create a root signature that allows stream out, if necessary
    if((rootsig.Flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT) == 0)
    {
      rootsig.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;

      ID3DBlob *blob = m_pDevice->GetShaderCache()->MakeRootSig(rootsig);

      hr = m_pDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(),
                                          __uuidof(ID3D12RootSignature), (void **)&soSig);
      if(FAILED(hr))
      {
        RDCERR("Couldn't enable stream-out in root signature: HRESULT: %s", ToStr(hr).c_str());
        return;
      }

      SAFE_RELEASE(blob);
    }
  }

  vector<D3D12_SO_DECLARATION_ENTRY> sodecls;

  UINT stride = 0;
  int posidx = -1;
  int numPosComponents = 0;

  if(!dxbcVS->m_OutputSig.empty())
  {
    for(const SigParameter &sign : dxbcVS->m_OutputSig)
    {
      D3D12_SO_DECLARATION_ENTRY decl;

      decl.Stream = 0;
      decl.OutputSlot = 0;

      decl.SemanticName = sign.semanticName.c_str();
      decl.SemanticIndex = sign.semanticIndex;
      decl.StartComponent = 0;
      decl.ComponentCount = sign.compCount & 0xff;

      if(sign.systemValue == ShaderBuiltin::Position)
      {
        posidx = (int)sodecls.size();
        numPosComponents = decl.ComponentCount = 4;
      }

      stride += decl.ComponentCount * sizeof(float);
      sodecls.push_back(decl);
    }

    if(stride == 0)
    {
      RDCERR("Didn't get valid stride! Setting to 4 bytes");
      stride = 4;
    }

    // shift position attribute up to first, keeping order otherwise
    // the same
    if(posidx > 0)
    {
      D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx];
      sodecls.erase(sodecls.begin() + posidx);
      sodecls.insert(sodecls.begin(), pos);
    }

    // set up stream output entries and buffers
    psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size();
    psoDesc.StreamOutput.pSODeclaration = &sodecls[0];
    psoDesc.StreamOutput.NumStrides = 1;
    psoDesc.StreamOutput.pBufferStrides = &stride;
    psoDesc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;

    // disable all other shader stages
    psoDesc.HS.BytecodeLength = 0;
    psoDesc.HS.pShaderBytecode = NULL;
    psoDesc.DS.BytecodeLength = 0;
    psoDesc.DS.pShaderBytecode = NULL;
    psoDesc.GS.BytecodeLength = 0;
    psoDesc.GS.pShaderBytecode = NULL;
    psoDesc.PS.BytecodeLength = 0;
    psoDesc.PS.pShaderBytecode = NULL;

    // disable any rasterization/use of output targets
    psoDesc.DepthStencilState.DepthEnable = FALSE;
    psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
    psoDesc.DepthStencilState.StencilEnable = FALSE;

    if(soSig)
      psoDesc.pRootSignature = soSig;

    // render as points
    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;

    // disable outputs
    psoDesc.NumRenderTargets = 0;
    RDCEraseEl(psoDesc.RTVFormats);
    psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

    ID3D12PipelineState *pipe = NULL;
    hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                (void **)&pipe);
    if(FAILED(hr))
    {
      RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    ID3D12Resource *idxBuf = NULL;

    bool recreate = false;
    uint64_t outputSize = uint64_t(drawcall->numIndices) * drawcall->numInstances * stride;

    if(m_SOBufferSize < outputSize)
    {
      uint64_t oldSize = m_SOBufferSize;
      while(m_SOBufferSize < outputSize)
        m_SOBufferSize *= 2;
      RDCWARN("Resizing stream-out buffer from %llu to %llu for output data", oldSize,
              m_SOBufferSize);
      recreate = true;
    }

    ID3D12GraphicsCommandList *list = NULL;

    if(!(drawcall->flags & DrawFlags::UseIBuffer))
    {
      if(recreate)
      {
        m_pDevice->GPUSync();

        CreateSOBuffers();
      }

      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      D3D12_STREAM_OUTPUT_BUFFER_VIEW view;
      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;
      list->SOSetTargets(0, 1, &view);

      list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
      list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                          drawcall->instanceOffset);
    }
    else    // drawcall is indexed
    {
      bytebuf idxdata;
      GetBufferData(rs.ibuffer.buf, rs.ibuffer.offs + drawcall->indexOffset * rs.ibuffer.bytewidth,
                    RDCMIN(drawcall->numIndices * rs.ibuffer.bytewidth, rs.ibuffer.size), idxdata);

      vector<uint32_t> indices;

      uint16_t *idx16 = (uint16_t *)&idxdata[0];
      uint32_t *idx32 = (uint32_t *)&idxdata[0];

      // only read as many indices as were available in the buffer
      uint32_t numIndices =
          RDCMIN(uint32_t(idxdata.size() / rs.ibuffer.bytewidth), drawcall->numIndices);

      uint32_t idxclamp = 0;
      if(drawcall->baseVertex < 0)
        idxclamp = uint32_t(-drawcall->baseVertex);

      // grab all unique vertex indices referenced
      for(uint32_t i = 0; i < numIndices; i++)
      {
        uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i];

        // apply baseVertex but clamp to 0 (don't allow index to become negative)
        if(i32 < idxclamp)
          i32 = 0;
        else if(drawcall->baseVertex < 0)
          i32 -= idxclamp;
        else if(drawcall->baseVertex > 0)
          i32 += drawcall->baseVertex;

        auto it = std::lower_bound(indices.begin(), indices.end(), i32);

        if(it != indices.end() && *it == i32)
          continue;

        indices.insert(it, i32);
      }

      // if we read out of bounds, we'll also have a 0 index being referenced
      // (as 0 is read). Don't insert 0 if we already have 0 though
      if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0))
        indices.insert(indices.begin(), 0);

      // An index buffer could be something like: 500, 501, 502, 501, 503, 502
      // in which case we can't use the existing index buffer without filling 499 slots of vertex
      // data with padding. Instead we rebase the indices based on the smallest vertex so it becomes
      // 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer.
      //
      // Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512
      // which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid.
      // We just stream-out a tightly packed list of unique indices, and then remap the index buffer
      // so that what did point to 500 points to 0 (accounting for rebasing), and what did point
      // to 510 now points to 3 (accounting for the unique sort).

      // we use a map here since the indices may be sparse. Especially considering if an index
      // is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries.
      map<uint32_t, size_t> indexRemap;
      for(size_t i = 0; i < indices.size(); i++)
      {
        // by definition, this index will only appear once in indices[]
        indexRemap[indices[i]] = i;
      }

      if(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t))
      {
        uint64_t oldSize = m_SOBufferSize;
        while(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t))
          m_SOBufferSize *= 2;
        RDCWARN("Resizing stream-out buffer from %llu to %llu for indices", oldSize, m_SOBufferSize);
        recreate = true;
      }

      if(recreate)
      {
        m_pDevice->GPUSync();

        CreateSOBuffers();
      }

      GetDebugManager()->FillBuffer(m_SOPatchedIndexBuffer, 0, &indices[0],
                                    indices.size() * sizeof(uint32_t));

      D3D12_INDEX_BUFFER_VIEW patchedIB;

      patchedIB.BufferLocation = m_SOPatchedIndexBuffer->GetGPUVirtualAddress();
      patchedIB.Format = DXGI_FORMAT_R32_UINT;
      patchedIB.SizeInBytes = UINT(indices.size() * sizeof(uint32_t));

      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      list->IASetIndexBuffer(&patchedIB);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      D3D12_STREAM_OUTPUT_BUFFER_VIEW view;
      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;
      list->SOSetTargets(0, 1, &view);

      list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);

      list->DrawIndexedInstanced((UINT)indices.size(), drawcall->numInstances, 0, 0,
                                 drawcall->instanceOffset);

      uint32_t stripCutValue = 0;
      if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF)
        stripCutValue = 0xffff;
      else if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF)
        stripCutValue = 0xffffffff;

      // rebase existing index buffer to point to the right elements in our stream-out'd
      // vertex buffer
      for(uint32_t i = 0; i < numIndices; i++)
      {
        uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i];

        // preserve primitive restart indices
        if(stripCutValue && i32 == stripCutValue)
          continue;

        // apply baseVertex but clamp to 0 (don't allow index to become negative)
        if(i32 < idxclamp)
          i32 = 0;
        else if(drawcall->baseVertex < 0)
          i32 -= idxclamp;
        else if(drawcall->baseVertex > 0)
          i32 += drawcall->baseVertex;

        if(rs.ibuffer.bytewidth == 2)
          idx16[i] = uint16_t(indexRemap[i32]);
        else
          idx32[i] = uint32_t(indexRemap[i32]);
      }

      idxBuf = NULL;

      if(!idxdata.empty())
      {
        D3D12_RESOURCE_DESC idxBufDesc;
        idxBufDesc.Alignment = 0;
        idxBufDesc.DepthOrArraySize = 1;
        idxBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
        idxBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
        idxBufDesc.Format = DXGI_FORMAT_UNKNOWN;
        idxBufDesc.Height = 1;
        idxBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
        idxBufDesc.MipLevels = 1;
        idxBufDesc.SampleDesc.Count = 1;
        idxBufDesc.SampleDesc.Quality = 0;
        idxBufDesc.Width = idxdata.size();

        D3D12_HEAP_PROPERTIES heapProps;
        heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
        heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
        heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
        heapProps.CreationNodeMask = 1;
        heapProps.VisibleNodeMask = 1;

        hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &idxBufDesc,
                                                D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                                __uuidof(ID3D12Resource), (void **)&idxBuf);
        RDCASSERTEQUAL(hr, S_OK);

        SetObjName(idxBuf, StringFormat::Fmt("PostVS idxBuf for %u", eventId));

        GetDebugManager()->FillBuffer(idxBuf, 0, &idxdata[0], idxdata.size());
      }
    }

    D3D12_RESOURCE_BARRIER sobarr = {};
    sobarr.Transition.pResource = m_SOBuffer;
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

    list->ResourceBarrier(1, &sobarr);

    list->CopyResource(m_SOStagingBuffer, m_SOBuffer);

    // we're done with this after the copy, so we can discard it and reset
    // the counter for the next stream-out
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
    list->DiscardResource(m_SOBuffer, NULL);
    list->ResourceBarrier(1, &sobarr);

    UINT zeroes[4] = {0, 0, 0, 0};
    list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                       GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                       m_SOBuffer, zeroes, 0, NULL);

    list->Close();

    ID3D12CommandList *l = list;
    m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
    m_pDevice->GPUSync();

    GetDebugManager()->ResetDebugAlloc();

    SAFE_RELEASE(pipe);

    byte *byteData = NULL;
    D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize};
    hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData);
    if(FAILED(hr))
    {
      RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(idxBuf);
      SAFE_RELEASE(soSig);
      return;
    }

    range.End = 0;

    uint64_t numBytesWritten = *(uint64_t *)byteData;

    if(numBytesWritten == 0)
    {
      m_PostVSData[eventId] = D3D12PostVSData();
      SAFE_RELEASE(idxBuf);
      SAFE_RELEASE(soSig);
      return;
    }

    // skip past the counter
    byteData += 64;

    uint64_t numPrims = numBytesWritten / stride;

    ID3D12Resource *vsoutBuffer = NULL;

    {
      D3D12_RESOURCE_DESC vertBufDesc;
      vertBufDesc.Alignment = 0;
      vertBufDesc.DepthOrArraySize = 1;
      vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
      vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
      vertBufDesc.Format = DXGI_FORMAT_UNKNOWN;
      vertBufDesc.Height = 1;
      vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
      vertBufDesc.MipLevels = 1;
      vertBufDesc.SampleDesc.Count = 1;
      vertBufDesc.SampleDesc.Quality = 0;
      vertBufDesc.Width = numBytesWritten;

      D3D12_HEAP_PROPERTIES heapProps;
      heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc,
                                              D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                              __uuidof(ID3D12Resource), (void **)&vsoutBuffer);
      RDCASSERTEQUAL(hr, S_OK);

      if(vsoutBuffer)
      {
        SetObjName(vsoutBuffer, StringFormat::Fmt("PostVS vsoutBuffer for %u", eventId));
        GetDebugManager()->FillBuffer(vsoutBuffer, 0, byteData, (size_t)numBytesWritten);
      }
    }

    float nearp = 0.1f;
    float farp = 100.0f;

    Vec4f *pos0 = (Vec4f *)byteData;

    bool found = false;

    for(uint64_t i = 1; numPosComponents == 4 && i < numPrims; i++)
    {
      //////////////////////////////////////////////////////////////////////////////////
      // derive near/far, assuming a standard perspective matrix
      //
      // the transformation from from pre-projection {Z,W} to post-projection {Z,W}
      // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1
      // and we know Wpost = Zpre from the perspective matrix.
      // we can then see from the perspective matrix that
      // m = F/(F-N)
      // c = -(F*N)/(F-N)
      //
      // with re-arranging and substitution, we then get:
      // N = -c/m
      // F = c/(1-m)
      //
      // so if we can derive m and c then we can determine N and F. We can do this with
      // two points, and we pick them reasonably distinct on z to reduce floating-point
      // error

      Vec4f *pos = (Vec4f *)(byteData + i * stride);

      if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f)
      {
        Vec2f A(pos0->w, pos0->z);
        Vec2f B(pos->w, pos->z);

        float m = (B.y - A.y) / (B.x - A.x);
        float c = B.y - B.x * m;

        if(m == 1.0f)
          continue;

        nearp = -c / m;
        farp = c / (1 - m);

        found = true;

        break;
      }
    }

    // if we didn't find anything, all z's and w's were identical.
    // If the z is positive and w greater for the first element then
    // we detect this projection as reversed z with infinite far plane
    if(!found && pos0->z > 0.0f && pos0->w > pos0->z)
    {
      nearp = pos0->z;
      farp = FLT_MAX;
    }

    m_SOStagingBuffer->Unmap(0, &range);

    m_PostVSData[eventId].vsin.topo = topo;
    m_PostVSData[eventId].vsout.buf = vsoutBuffer;
    m_PostVSData[eventId].vsout.vertStride = stride;
    m_PostVSData[eventId].vsout.nearPlane = nearp;
    m_PostVSData[eventId].vsout.farPlane = farp;

    m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer);
    m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices;

    m_PostVSData[eventId].vsout.instStride = 0;
    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].vsout.instStride =
          uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances));

    m_PostVSData[eventId].vsout.idxBuf = NULL;
    if(m_PostVSData[eventId].vsout.useIndices && idxBuf)
    {
      m_PostVSData[eventId].vsout.idxBuf = idxBuf;
      m_PostVSData[eventId].vsout.idxFmt =
          rs.ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
    }

    m_PostVSData[eventId].vsout.hasPosOut = posidx >= 0;

    m_PostVSData[eventId].vsout.topo = topo;
  }
  else
  {
    // empty vertex output signature
    m_PostVSData[eventId].vsin.topo = topo;
    m_PostVSData[eventId].vsout.buf = NULL;
    m_PostVSData[eventId].vsout.instStride = 0;
    m_PostVSData[eventId].vsout.vertStride = 0;
    m_PostVSData[eventId].vsout.nearPlane = 0.0f;
    m_PostVSData[eventId].vsout.farPlane = 0.0f;
    m_PostVSData[eventId].vsout.useIndices = false;
    m_PostVSData[eventId].vsout.hasPosOut = false;
    m_PostVSData[eventId].vsout.idxBuf = NULL;

    m_PostVSData[eventId].vsout.topo = topo;
  }

  if(dxbcGS || dxbcDS)
  {
    stride = 0;
    posidx = -1;
    numPosComponents = 0;

    DXBC::DXBCFile *lastShader = dxbcGS;
    if(dxbcDS)
      lastShader = dxbcDS;

    sodecls.clear();
    for(const SigParameter &sign : lastShader->m_OutputSig)
    {
      D3D12_SO_DECLARATION_ENTRY decl;

      // for now, skip streams that aren't stream 0
      if(sign.stream != 0)
        continue;

      decl.Stream = 0;
      decl.OutputSlot = 0;

      decl.SemanticName = sign.semanticName.c_str();
      decl.SemanticIndex = sign.semanticIndex;
      decl.StartComponent = 0;
      decl.ComponentCount = sign.compCount & 0xff;

      if(sign.systemValue == ShaderBuiltin::Position)
      {
        posidx = (int)sodecls.size();
        numPosComponents = decl.ComponentCount = 4;
      }

      stride += decl.ComponentCount * sizeof(float);
      sodecls.push_back(decl);
    }

    // shift position attribute up to first, keeping order otherwise
    // the same
    if(posidx > 0)
    {
      D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx];
      sodecls.erase(sodecls.begin() + posidx);
      sodecls.insert(sodecls.begin(), pos);
    }

    // enable the other shader stages again
    if(origPSO->DS())
      psoDesc.DS = origPSO->DS()->GetDesc();
    if(origPSO->HS())
      psoDesc.HS = origPSO->HS()->GetDesc();
    if(origPSO->GS())
      psoDesc.GS = origPSO->GS()->GetDesc();

    // configure new SO declarations
    psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size();
    psoDesc.StreamOutput.pSODeclaration = &sodecls[0];
    psoDesc.StreamOutput.NumStrides = 1;
    psoDesc.StreamOutput.pBufferStrides = &stride;

    // we're using the same topology this time
    psoDesc.PrimitiveTopologyType = origPSO->graphics->PrimitiveTopologyType;

    ID3D12PipelineState *pipe = NULL;
    hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                (void **)&pipe);
    if(FAILED(hr))
    {
      RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    D3D12_STREAM_OUTPUT_BUFFER_VIEW view;

    ID3D12GraphicsCommandList *list = NULL;

    view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
    view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
    view.SizeInBytes = m_SOBufferSize;
    // draws with multiple instances must be replayed one at a time so we can record the number of
    // primitives from each drawcall, as due to expansion this can vary per-instance.
    if(drawcall->numInstances > 1)
    {
      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;

      // do a dummy draw to make sure we have enough space in the output buffer
      list->SOSetTargets(0, 1, &view);

      list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

      // because the result is expanded we don't have to remap index buffers or anything
      if(drawcall->flags & DrawFlags::UseIBuffer)
      {
        list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances,
                                   drawcall->indexOffset, drawcall->baseVertex,
                                   drawcall->instanceOffset);
      }
      else
      {
        list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                            drawcall->instanceOffset);
      }

      list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

      list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1,
                             m_SOStagingBuffer, 0);

      list->Close();

      ID3D12CommandList *l = list;
      m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
      m_pDevice->GPUSync();

      // check that things are OK, and resize up if needed
      D3D12_RANGE range;
      range.Begin = 0;
      range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS);

      D3D12_QUERY_DATA_SO_STATISTICS *data;
      hr = m_SOStagingBuffer->Map(0, &range, (void **)&data);

      D3D12_QUERY_DATA_SO_STATISTICS result = *data;

      range.End = 0;
      m_SOStagingBuffer->Unmap(0, &range);

      if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
      {
        uint64_t oldSize = m_SOBufferSize;
        while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
          m_SOBufferSize *= 2;
        RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize);
        CreateSOBuffers();
      }

      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;

      GetDebugManager()->ResetDebugAlloc();

      // now do the actual stream out
      list = GetDebugManager()->ResetDebugList();

      // first need to reset the counter byte values which may have either been written to above, or
      // are newly created
      {
        D3D12_RESOURCE_BARRIER sobarr = {};
        sobarr.Transition.pResource = m_SOBuffer;
        sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
        sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;

        list->ResourceBarrier(1, &sobarr);

        D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {};
        counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
        counterDesc.Format = DXGI_FORMAT_R32_UINT;
        counterDesc.Buffer.FirstElement = 0;
        counterDesc.Buffer.NumElements = 4;

        UINT zeroes[4] = {0, 0, 0, 0};
        list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                           GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                           m_SOBuffer, zeroes, 0, NULL);

        std::swap(sobarr.Transition.StateBefore, sobarr.Transition.StateAfter);
        list->ResourceBarrier(1, &sobarr);
      }

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      // reserve space for enough 'buffer filled size' locations
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() +
                            AlignUp(uint64_t(drawcall->numInstances * sizeof(UINT64)), 64ULL);

      // do incremental draws to get the output size. We have to do this O(N^2) style because
      // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N instances
      // and count the total number of verts each time, then we can see from the difference how much
      // each instance wrote.
      for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++)
      {
        if(drawcall->flags & DrawFlags::UseIBuffer)
        {
          view.BufferFilledSizeLocation =
              m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64);
          list->SOSetTargets(0, 1, &view);
          list->DrawIndexedInstanced(drawcall->numIndices, inst, drawcall->indexOffset,
                                     drawcall->baseVertex, drawcall->instanceOffset);
        }
        else
        {
          view.BufferFilledSizeLocation =
              m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64);
          list->SOSetTargets(0, 1, &view);
          list->DrawInstanced(drawcall->numIndices, inst, drawcall->vertexOffset,
                              drawcall->instanceOffset);
        }
      }

      list->Close();

      l = list;
      m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
      m_pDevice->GPUSync();

      GetDebugManager()->ResetDebugAlloc();

      // the last draw will have written the actual data we want into the buffer
    }
    else
    {
      // this only loops if we find from a query that we need to resize up
      while(true)
      {
        list = GetDebugManager()->ResetDebugList();

        rs.ApplyState(list);

        list->SetPipelineState(pipe);

        if(soSig)
        {
          list->SetGraphicsRootSignature(soSig);
          rs.ApplyGraphicsRootElements(list);
        }

        view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
        view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
        view.SizeInBytes = m_SOBufferSize;

        list->SOSetTargets(0, 1, &view);

        list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

        // because the result is expanded we don't have to remap index buffers or anything
        if(drawcall->flags & DrawFlags::UseIBuffer)
        {
          list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances,
                                     drawcall->indexOffset, drawcall->baseVertex,
                                     drawcall->instanceOffset);
        }
        else
        {
          list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                              drawcall->instanceOffset);
        }

        list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

        list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1,
                               m_SOStagingBuffer, 0);

        list->Close();

        ID3D12CommandList *l = list;
        m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
        m_pDevice->GPUSync();

        // check that things are OK, and resize up if needed
        D3D12_RANGE range;
        range.Begin = 0;
        range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS);

        D3D12_QUERY_DATA_SO_STATISTICS *data;
        hr = m_SOStagingBuffer->Map(0, &range, (void **)&data);

        if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
        {
          uint64_t oldSize = m_SOBufferSize;
          while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
            m_SOBufferSize *= 2;
          RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize);
          CreateSOBuffers();

          continue;
        }

        range.End = 0;
        m_SOStagingBuffer->Unmap(0, &range);

        GetDebugManager()->ResetDebugAlloc();

        break;
      }
    }

    list = GetDebugManager()->ResetDebugList();

    D3D12_RESOURCE_BARRIER sobarr = {};
    sobarr.Transition.pResource = m_SOBuffer;
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

    list->ResourceBarrier(1, &sobarr);

    list->CopyResource(m_SOStagingBuffer, m_SOBuffer);

    // we're done with this after the copy, so we can discard it and reset
    // the counter for the next stream-out
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
    list->DiscardResource(m_SOBuffer, NULL);
    list->ResourceBarrier(1, &sobarr);

    D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {};
    counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
    counterDesc.Format = DXGI_FORMAT_R32_UINT;
    counterDesc.Buffer.FirstElement = 0;
    counterDesc.Buffer.NumElements = 4;

    UINT zeroes[4] = {0, 0, 0, 0};
    list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                       GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                       m_SOBuffer, zeroes, 0, NULL);

    list->Close();

    ID3D12CommandList *l = list;
    m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
    m_pDevice->GPUSync();

    GetDebugManager()->ResetDebugAlloc();

    SAFE_RELEASE(pipe);

    byte *byteData = NULL;
    D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize};
    hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData);
    if(FAILED(hr))
    {
      RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    range.End = 0;

    uint64_t *counters = (uint64_t *)byteData;

    uint64_t numBytesWritten = 0;
    std::vector<D3D12PostVSData::InstData> instData;
    if(drawcall->numInstances > 1)
    {
      uint64_t prevByteCount = 0;

      for(uint32_t inst = 0; inst < drawcall->numInstances; inst++)
      {
        uint64_t byteCount = counters[inst];

        D3D12PostVSData::InstData d;
        d.numVerts = uint32_t((byteCount - prevByteCount) / stride);
        d.bufOffset = prevByteCount;
        prevByteCount = byteCount;

        instData.push_back(d);
      }

      numBytesWritten = prevByteCount;
    }
    else
    {
      numBytesWritten = counters[0];
    }

    if(numBytesWritten == 0)
    {
      SAFE_RELEASE(soSig);
      return;
    }

    // skip past the counter(s)
    byteData += (view.BufferLocation - m_SOBuffer->GetGPUVirtualAddress());

    uint64_t numVerts = numBytesWritten / stride;

    ID3D12Resource *gsoutBuffer = NULL;

    {
      D3D12_RESOURCE_DESC vertBufDesc;
      vertBufDesc.Alignment = 0;
      vertBufDesc.DepthOrArraySize = 1;
      vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
      vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
      vertBufDesc.Format = DXGI_FORMAT_UNKNOWN;
      vertBufDesc.Height = 1;
      vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
      vertBufDesc.MipLevels = 1;
      vertBufDesc.SampleDesc.Count = 1;
      vertBufDesc.SampleDesc.Quality = 0;
      vertBufDesc.Width = numBytesWritten;

      D3D12_HEAP_PROPERTIES heapProps;
      heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc,
                                              D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                              __uuidof(ID3D12Resource), (void **)&gsoutBuffer);
      RDCASSERTEQUAL(hr, S_OK);

      if(gsoutBuffer)
      {
        SetObjName(gsoutBuffer, StringFormat::Fmt("PostVS gsoutBuffer for %u", eventId));
        GetDebugManager()->FillBuffer(gsoutBuffer, 0, byteData, (size_t)numBytesWritten);
      }
    }

    float nearp = 0.1f;
    float farp = 100.0f;

    Vec4f *pos0 = (Vec4f *)byteData;

    bool found = false;

    for(UINT64 i = 1; numPosComponents == 4 && i < numVerts; i++)
    {
      //////////////////////////////////////////////////////////////////////////////////
      // derive near/far, assuming a standard perspective matrix
      //
      // the transformation from from pre-projection {Z,W} to post-projection {Z,W}
      // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1
      // and we know Wpost = Zpre from the perspective matrix.
      // we can then see from the perspective matrix that
      // m = F/(F-N)
      // c = -(F*N)/(F-N)
      //
      // with re-arranging and substitution, we then get:
      // N = -c/m
      // F = c/(1-m)
      //
      // so if we can derive m and c then we can determine N and F. We can do this with
      // two points, and we pick them reasonably distinct on z to reduce floating-point
      // error

      Vec4f *pos = (Vec4f *)(byteData + i * stride);

      if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f)
      {
        Vec2f A(pos0->w, pos0->z);
        Vec2f B(pos->w, pos->z);

        float m = (B.y - A.y) / (B.x - A.x);
        float c = B.y - B.x * m;

        if(m == 1.0f)
          continue;

        nearp = -c / m;
        farp = c / (1 - m);

        found = true;

        break;
      }
    }

    // if we didn't find anything, all z's and w's were identical.
    // If the z is positive and w greater for the first element then
    // we detect this projection as reversed z with infinite far plane
    if(!found && pos0->z > 0.0f && pos0->w > pos0->z)
    {
      nearp = pos0->z;
      farp = FLT_MAX;
    }

    m_SOStagingBuffer->Unmap(0, &range);

    m_PostVSData[eventId].gsout.buf = gsoutBuffer;
    m_PostVSData[eventId].gsout.instStride = 0;
    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].gsout.instStride =
          uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances));
    m_PostVSData[eventId].gsout.vertStride = stride;
    m_PostVSData[eventId].gsout.nearPlane = nearp;
    m_PostVSData[eventId].gsout.farPlane = farp;
    m_PostVSData[eventId].gsout.useIndices = false;
    m_PostVSData[eventId].gsout.hasPosOut = posidx >= 0;
    m_PostVSData[eventId].gsout.idxBuf = NULL;

    topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;

    if(lastShader == dxbcGS)
    {
      for(size_t i = 0; i < dxbcGS->GetNumDeclarations(); i++)
      {
        const DXBC::ASMDecl &decl = dxbcGS->GetDeclaration(i);

        if(decl.declaration == DXBC::OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY)
        {
          topo = decl.outTopology;
          break;
        }
      }
    }
    else if(lastShader == dxbcDS)
    {
      for(size_t i = 0; i < dxbcDS->GetNumDeclarations(); i++)
      {
        const DXBC::ASMDecl &decl = dxbcDS->GetDeclaration(i);

        if(decl.declaration == DXBC::OPCODE_DCL_TESS_DOMAIN)
        {
          if(decl.domain == DXBC::DOMAIN_ISOLINE)
            topo = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
          else
            topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
          break;
        }
      }
    }

    m_PostVSData[eventId].gsout.topo = topo;

    // streamout expands strips unfortunately
    if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;

    m_PostVSData[eventId].gsout.numVerts = (uint32_t)numVerts;

    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].gsout.numVerts /= RDCMAX(1U, drawcall->numInstances);

    m_PostVSData[eventId].gsout.instData = instData;
  }

  SAFE_RELEASE(soSig);
}
Ejemplo n.º 21
0
bool WrappedVulkan::Serialise_vkCreateDevice(
		Serialiser*                                 localSerialiser,
		VkPhysicalDevice                            physicalDevice,
		const VkDeviceCreateInfo*                   pCreateInfo,
    const VkAllocationCallbacks*                pAllocator,
		VkDevice*                                   pDevice)
{
	SERIALISE_ELEMENT(ResourceId, physId, GetResID(physicalDevice));
	SERIALISE_ELEMENT(VkDeviceCreateInfo, serCreateInfo, *pCreateInfo);
	SERIALISE_ELEMENT(ResourceId, devId, GetResID(*pDevice));

	if(m_State == READING)
	{
		// we must make any modifications locally, so the free of pointers
		// in the serialised VkDeviceCreateInfo don't double-free
		VkDeviceCreateInfo createInfo = serCreateInfo;
		
		std::vector<string> Extensions;
		for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++)
		{
			// don't include the debug marker extension
			if(strcmp(createInfo.ppEnabledExtensionNames[i], VK_EXT_DEBUG_MARKER_EXTENSION_NAME))
				Extensions.push_back(createInfo.ppEnabledExtensionNames[i]);
		}

		std::vector<string> Layers;
		for(uint32_t i=0; i < createInfo.enabledLayerCount; i++)
			Layers.push_back(createInfo.ppEnabledLayerNames[i]);

		StripUnwantedLayers(Layers);

		AddRequiredExtensions(false, Extensions);
		
#if defined(FORCE_VALIDATION_LAYERS)
		Layers.push_back("VK_LAYER_LUNARG_standard_validation");
#endif

		createInfo.enabledLayerCount = (uint32_t)Layers.size();

		const char **layerArray = NULL;
		if(!Layers.empty())
		{
			layerArray = new const char *[createInfo.enabledLayerCount];
			
			for(uint32_t i=0; i < createInfo.enabledLayerCount; i++)
				layerArray[i] = Layers[i].c_str();

			createInfo.ppEnabledLayerNames = layerArray;
		}

		createInfo.enabledExtensionCount = (uint32_t)Extensions.size();

		const char **extArray = NULL;
		if(!Extensions.empty())
		{
			extArray = new const char *[createInfo.enabledExtensionCount];
			
			for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++)
				extArray[i] = Extensions[i].c_str();

			createInfo.ppEnabledExtensionNames = extArray;
		}

		physicalDevice = GetResourceManager()->GetLiveHandle<VkPhysicalDevice>(physId);

		VkDevice device;

		uint32_t qCount = 0;
		ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, NULL);

		VkQueueFamilyProperties *props = new VkQueueFamilyProperties[qCount];
		ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, props);

		bool found = false;
		uint32_t qFamilyIdx = 0;
		VkQueueFlags search = (VK_QUEUE_GRAPHICS_BIT);

		// for queue priorities, if we need it
		float one = 1.0f;

		// if we need to change the requested queues, it will point to this
		VkDeviceQueueCreateInfo *modQueues = NULL;

		for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++)
		{
			uint32_t idx = createInfo.pQueueCreateInfos[i].queueFamilyIndex;
			RDCASSERT(idx < qCount);

			// this requested queue is one we can use too
			if((props[idx].queueFlags & search) == search && createInfo.pQueueCreateInfos[i].queueCount > 0)
			{
				qFamilyIdx = idx;
				found = true;
				break;
			}
		}

		// if we didn't find it, search for which queue family we should add a request for
		if(!found)
		{
			RDCDEBUG("App didn't request a queue family we can use - adding our own");

			for(uint32_t i=0; i < qCount; i++)
			{
				if((props[i].queueFlags & search) == search)
				{
					qFamilyIdx = i;
					found = true;
					break;
				}
			}

			if(!found)
			{
				SAFE_DELETE_ARRAY(props);
				RDCERR("Can't add a queue with required properties for RenderDoc! Unsupported configuration");
			}
			else
			{
				// we found the queue family, add it
				modQueues = new VkDeviceQueueCreateInfo[createInfo.queueCreateInfoCount + 1];
				for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++)
					modQueues[i] = createInfo.pQueueCreateInfos[i];

				modQueues[createInfo.queueCreateInfoCount].queueFamilyIndex = qFamilyIdx;
				modQueues[createInfo.queueCreateInfoCount].queueCount = 1;
				modQueues[createInfo.queueCreateInfoCount].pQueuePriorities = &one;

				createInfo.pQueueCreateInfos = modQueues;
				createInfo.queueCreateInfoCount++;
			}
		}
		
		SAFE_DELETE_ARRAY(props);

		VkPhysicalDeviceFeatures enabledFeatures = {0};
		if(createInfo.pEnabledFeatures != NULL) enabledFeatures = *createInfo.pEnabledFeatures;
		createInfo.pEnabledFeatures = &enabledFeatures;

		VkPhysicalDeviceFeatures availFeatures = {0};
		ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &availFeatures);

		if(availFeatures.fillModeNonSolid)
			enabledFeatures.fillModeNonSolid = true;
		else
			RDCWARN("fillModeNonSolid = false, wireframe overlay will be solid");
		
		if(availFeatures.robustBufferAccess)
			enabledFeatures.robustBufferAccess = true;
		else
			RDCWARN("robustBufferAccess = false, out of bounds access due to bugs in application or RenderDoc may cause crashes");

		if(availFeatures.vertexPipelineStoresAndAtomics)
			enabledFeatures.vertexPipelineStoresAndAtomics = true;
		else
			RDCWARN("vertexPipelineStoresAndAtomics = false, output mesh data will not be available");

		uint32_t numExts = 0;

		VkResult vkr = ObjDisp(physicalDevice)->EnumerateDeviceExtensionProperties(Unwrap(physicalDevice), NULL, &numExts, NULL);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		VkExtensionProperties *exts = new VkExtensionProperties[numExts];

		vkr = ObjDisp(physicalDevice)->EnumerateDeviceExtensionProperties(Unwrap(physicalDevice), NULL, &numExts, exts);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		for(uint32_t i=0; i < numExts; i++)
			RDCLOG("Ext %u: %s (%u)", i, exts[i].extensionName, exts[i].specVersion);

		SAFE_DELETE_ARRAY(exts);

		// PORTABILITY check that extensions and layers supported in capture (from createInfo) are supported in replay

		vkr = GetDeviceDispatchTable(NULL)->CreateDevice(Unwrap(physicalDevice), &createInfo, NULL, &device);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);

		GetResourceManager()->WrapResource(device, device);
		GetResourceManager()->AddLiveResource(devId, device);
		
		InitDeviceReplayTables(Unwrap(device));

		RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE
		
		m_PhysicalDevice = physicalDevice;
		m_Device = device;

		m_QueueFamilyIdx = qFamilyIdx;

		if(m_InternalCmds.cmdpool == VK_NULL_HANDLE)
		{
			VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, NULL, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, qFamilyIdx };
			vkr = ObjDisp(device)->CreateCommandPool(Unwrap(device), &poolInfo, NULL, &m_InternalCmds.cmdpool);
			RDCASSERTEQUAL(vkr, VK_SUCCESS);

			GetResourceManager()->WrapResource(Unwrap(device), m_InternalCmds.cmdpool);
		}
		
		ObjDisp(physicalDevice)->GetPhysicalDeviceProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.props);
		
		ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.memProps);

		ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &m_PhysicalDeviceData.features);

		for(int i=VK_FORMAT_BEGIN_RANGE+1; i < VK_FORMAT_END_RANGE; i++)
			ObjDisp(physicalDevice)->GetPhysicalDeviceFormatProperties(Unwrap(physicalDevice), VkFormat(i), &m_PhysicalDeviceData.fmtprops[i]);

		m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
		m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
		m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);

		for(size_t i=0; i < m_PhysicalDevices.size(); i++)
		{
			if(physicalDevice == m_PhysicalDevices[i])
			{
				m_PhysicalDeviceData.memIdxMap = m_MemIdxMaps[i];
				break;
			}
		}

		m_DebugManager = new VulkanDebugManager(this, device);

		SAFE_DELETE_ARRAY(modQueues);
		SAFE_DELETE_ARRAY(layerArray);
		SAFE_DELETE_ARRAY(extArray);
	}

	return true;
}
Ejemplo n.º 22
0
void WrappedID3D12CommandQueue::ReplayLog(LogState readType, uint32_t startEventID,
                                          uint32_t endEventID, bool partial)
{
  m_State = readType;

  D3D12ChunkType header = (D3D12ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false);
  RDCASSERTEQUAL(header, CONTEXT_CAPTURE_HEADER);

  m_pDevice->Serialise_BeginCaptureFrame(!partial);

  if(readType == READING)
  {
    GetResourceManager()->ApplyInitialContents();

    m_pDevice->ExecuteLists();
    m_pDevice->FlushLists();
  }

  m_pSerialiser->PopContext(header);

  m_Cmd.m_RootEvents.clear();

  if(m_State == EXECUTING)
  {
    FetchAPIEvent ev = GetEvent(startEventID);
    m_Cmd.m_RootEventID = ev.eventID;

    // if not partial, we need to be sure to replay
    // past the command list records, so can't
    // skip to the file offset of the first event
    if(partial)
      m_pSerialiser->SetOffset(ev.fileOffset);

    m_Cmd.m_FirstEventID = startEventID;
    m_Cmd.m_LastEventID = endEventID;
  }
  else if(m_State == READING)
  {
    m_Cmd.m_RootEventID = 1;
    m_Cmd.m_RootDrawcallID = 1;
    m_Cmd.m_FirstEventID = 0;
    m_Cmd.m_LastEventID = ~0U;
  }

  for(;;)
  {
    if(m_State == EXECUTING && m_Cmd.m_RootEventID > endEventID)
    {
      // we can just break out if we've done all the events desired.
      // note that the command list events aren't 'real' and we just blaze through them
      break;
    }

    uint64_t offset = m_pSerialiser->GetOffset();

    D3D12ChunkType context = (D3D12ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false);

    m_Cmd.m_LastCmdListID = ResourceId();

    ProcessChunk(offset, context);

    RenderDoc::Inst().SetProgress(FileInitialRead, float(offset) / float(m_pSerialiser->GetSize()));

    // for now just abort after capture scope. Really we'd need to support multiple frames
    // but for now this will do.
    if(context == CONTEXT_CAPTURE_FOOTER)
      break;

    // break out if we were only executing one event
    if(m_State == EXECUTING && startEventID == endEventID)
      break;

    // increment root event ID either if we didn't just replay a cmd
    // buffer event, OR if we are doing a frame sub-section replay,
    // in which case it's up to the calling code to make sure we only
    // replay inside a command list (if we crossed command list
    // boundaries, the event IDs would no longer match up).
    if(m_Cmd.m_LastCmdListID == ResourceId() || startEventID > 1)
    {
      m_Cmd.m_RootEventID++;

      if(startEventID > 1)
        m_pSerialiser->SetOffset(GetEvent(m_Cmd.m_RootEventID).fileOffset);
    }
    else
    {
      m_Cmd.m_BakedCmdListInfo[m_Cmd.m_LastCmdListID].curEventID++;
    }
  }

  if(m_State == READING)
  {
    struct SortEID
    {
      bool operator()(const FetchAPIEvent &a, const FetchAPIEvent &b)
      {
        return a.eventID < b.eventID;
      }
    };

    std::sort(m_Cmd.m_Events.begin(), m_Cmd.m_Events.end(), SortEID());
  }

  for(int p = 0; p < D3D12CommandData::ePartialNum; p++)
    SAFE_RELEASE(m_Cmd.m_Partial[p].resultPartialCmdList);

  for(auto it = m_Cmd.m_RerecordCmds.begin(); it != m_Cmd.m_RerecordCmds.end(); ++it)
    SAFE_RELEASE(it->second);

  m_Cmd.m_RerecordCmds.clear();

  m_State = READING;
}
Ejemplo n.º 23
0
void WrappedVulkan::Initialise(VkInitParams &params)
{
	m_InitParams = params;

	params.AppName = string("RenderDoc @ ") + params.AppName;
	params.EngineName = string("RenderDoc @ ") + params.EngineName;

	// PORTABILITY verify that layers/extensions are available
	StripUnwantedLayers(params.Layers);

#if defined(FORCE_VALIDATION_LAYERS)
	params.Layers.push_back("VK_LAYER_LUNARG_standard_validation");

	params.Extensions.push_back("VK_EXT_debug_report");
#endif

	// strip out any WSI extensions. We'll add the ones we want for creating windows
	// on the current platforms below, and we don't replay any of the WSI functionality
	// directly so these extensions aren't needed
	for(auto it = params.Extensions.begin(); it != params.Extensions.end();)
	{
		if(*it == "VK_KHR_xlib_surface" ||
			*it == "VK_KHR_xcb_surface" ||
			*it == "VK_KHR_wayland_surface" ||
			*it == "VK_KHR_mir_surface" ||
			*it == "VK_KHR_android_surface" ||
			*it == "VK_KHR_win32_surface")
		{
			it = params.Extensions.erase(it);
		}
		else
		{
			++it;
		}
	}

	AddRequiredExtensions(true, params.Extensions);

	const char **layerscstr = new const char *[params.Layers.size()];
	for(size_t i=0; i < params.Layers.size(); i++)
		layerscstr[i] = params.Layers[i].c_str();

	const char **extscstr = new const char *[params.Extensions.size()];
	for(size_t i=0; i < params.Extensions.size(); i++)
		extscstr[i] = params.Extensions[i].c_str();

	VkApplicationInfo appinfo = {
			VK_STRUCTURE_TYPE_APPLICATION_INFO, NULL,
			params.AppName.c_str(), params.AppVersion,
			params.EngineName.c_str(), params.EngineVersion,
			VK_API_VERSION_1_0,
	};

	VkInstanceCreateInfo instinfo = {
			VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, NULL, 0,
			&appinfo,
			(uint32_t)params.Layers.size(), layerscstr,
			(uint32_t)params.Extensions.size(), extscstr,
	};

	m_Instance = VK_NULL_HANDLE;

	VkResult ret = GetInstanceDispatchTable(NULL)->CreateInstance(&instinfo, NULL, &m_Instance);
	RDCASSERTEQUAL(ret, VK_SUCCESS);

	InitInstanceReplayTables(m_Instance);

	GetResourceManager()->WrapResource(m_Instance, m_Instance);
	GetResourceManager()->AddLiveResource(params.InstanceID, m_Instance);

	m_DbgMsgCallback = VK_NULL_HANDLE;
	m_PhysicalDevice = VK_NULL_HANDLE;
	m_Device = VK_NULL_HANDLE;
	m_QueueFamilyIdx = ~0U;
	m_Queue = VK_NULL_HANDLE;
	m_InternalCmds.Reset();

	if(ObjDisp(m_Instance)->CreateDebugReportCallbackEXT)
	{
		VkDebugReportCallbackCreateInfoEXT debugInfo = {};
		debugInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT;
		debugInfo.pNext = NULL;
		debugInfo.pfnCallback = &DebugCallbackStatic;
		debugInfo.pUserData = this;
		debugInfo.flags = VK_DEBUG_REPORT_WARNING_BIT_EXT|VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT|VK_DEBUG_REPORT_ERROR_BIT_EXT;

		ObjDisp(m_Instance)->CreateDebugReportCallbackEXT(Unwrap(m_Instance), &debugInfo, NULL, &m_DbgMsgCallback);
	}

	SAFE_DELETE_ARRAY(layerscstr);
	SAFE_DELETE_ARRAY(extscstr);
}
Ejemplo n.º 24
0
void WrappedID3D11DeviceContext::ReplayLog(LogState readType, uint32_t startEventID, uint32_t endEventID, bool partial)
{
	m_State = readType;

	m_DoStateVerify = true;

	D3D11ChunkType header = (D3D11ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false);
	RDCASSERTEQUAL(header, CONTEXT_CAPTURE_HEADER);

	ResourceId id;
	m_pSerialiser->Serialise("context", id);

	WrappedID3D11DeviceContext *context = (WrappedID3D11DeviceContext *)m_pDevice->GetResourceManager()->GetLiveResource(id);
	
	RDCASSERT(WrappedID3D11DeviceContext::IsAlloc(context) && context == this);

	Serialise_BeginCaptureFrame(!partial);

	m_pSerialiser->PopContext(header);

	m_CurEvents.clear();
	
	if(m_State == EXECUTING)
	{
		FetchAPIEvent ev = GetEvent(startEventID);
		m_CurEventID = ev.eventID;
		m_pSerialiser->SetOffset(ev.fileOffset);
	}
	else if(m_State == READING)
	{
		m_CurEventID = 1;
	}

	if(m_State == EXECUTING)
	{
		ClearMaps();
		for(size_t i=0; i < m_pDevice->GetNumDeferredContexts(); i++)
		{
			WrappedID3D11DeviceContext *defcontext = m_pDevice->GetDeferredContext(i);
			defcontext->ClearMaps();
		}
	}

	m_pDevice->GetResourceManager()->MarkInFrame(true);

	uint64_t startOffset = m_pSerialiser->GetOffset();

	while(1)
	{
		if(m_State == EXECUTING && m_CurEventID > endEventID)
		{
			// we can just break out if we've done all the events desired.
			break;
		}

		uint64_t offset = m_pSerialiser->GetOffset();

		D3D11ChunkType chunktype = (D3D11ChunkType)m_pSerialiser->PushContext(NULL, NULL, 1, false);

		ProcessChunk(offset, chunktype, false);
		
		RenderDoc::Inst().SetProgress(FrameEventsRead, float(offset - startOffset)/float(m_pSerialiser->GetSize()));
		
		// for now just abort after capture scope. Really we'd need to support multiple frames
		// but for now this will do.
		if(chunktype == CONTEXT_CAPTURE_FOOTER)
			break;
		
		m_CurEventID++;
	}

	if(m_State == READING)
	{
		m_pDevice->GetFrameRecord().back().drawcallList = m_ParentDrawcall.Bake();
		m_pDevice->GetFrameRecord().back().frameInfo.debugMessages = m_pDevice->GetDebugMessages();

		int initialSkips = 0;

		for(auto it=WrappedID3D11Buffer::m_BufferList.begin(); it != WrappedID3D11Buffer::m_BufferList.end(); ++it)
			m_ResourceUses[it->first];

		for(auto it=WrappedID3D11Texture1D::m_TextureList.begin(); it != WrappedID3D11Texture1D::m_TextureList.end(); ++it)
			m_ResourceUses[it->first];
		for(auto it=WrappedID3D11Texture2D::m_TextureList.begin(); it != WrappedID3D11Texture2D::m_TextureList.end(); ++it)
			m_ResourceUses[it->first];
		for(auto it=WrappedID3D11Texture3D::m_TextureList.begin(); it != WrappedID3D11Texture3D::m_TextureList.end(); ++it)
			m_ResourceUses[it->first];
		
		// it's easier to remove duplicate usages here than check it as we go.
		// this means if textures are bound in multiple places in the same draw
		// we don't have duplicate uses
		for(auto it = m_ResourceUses.begin(); it != m_ResourceUses.end(); ++it)
		{
			vector<EventUsage> &v = it->second;
			std::sort(v.begin(), v.end());
			v.erase( std::unique(v.begin(), v.end()), v.end() );
			
#if 0
			ResourceId resid = m_pDevice->GetResourceManager()->GetOriginalID(it->first);
			
			if(m_pDevice->GetResourceManager()->GetInitialContents(resid).resource == NULL)
				continue;
			
			// code disabled for now as skipping these initial states
			// doesn't seem to produce any measurable improvement in any case
			// I've checked
			RDCDEBUG("Resource %llu", resid);
			if(v.empty())
			{
				RDCDEBUG("Never used!");
				initialSkips++;
			}
			else
			{
				bool written = false;

				for(auto usit = v.begin(); usit != v.end(); ++usit)
				{
					ResourceUsage u = usit->usage;

					if(u == eUsage_SO ||
						(u >= eUsage_VS_RWResource && u <= eUsage_CS_RWResource) ||
						u == eUsage_DepthStencilTarget || u == eUsage_ColourTarget)
					{
						written = true;
						break;
					}
				}

				if(written)
				{
					RDCDEBUG("Written in frame - needs initial state");
				}
				else
				{
					RDCDEBUG("Never written to in the frame");
					initialSkips++;
				}
			}
#endif
		}

		//RDCDEBUG("Can skip %d initial states.", initialSkips);
	}

	m_pDevice->GetResourceManager()->MarkInFrame(false);

	m_State = READING;

	m_DoStateVerify = false;
}
Ejemplo n.º 25
0
bool WrappedVulkan::Serialise_vkCreateSwapchainKHR(
		Serialiser*                             localSerialiser,
		VkDevice                                device,
		const VkSwapchainCreateInfoKHR*         pCreateInfo,
    const VkAllocationCallbacks*            pAllocator,
		VkSwapchainKHR*                         pSwapChain)
{
	SERIALISE_ELEMENT(ResourceId, devId, GetResID(device));
	SERIALISE_ELEMENT(VkSwapchainCreateInfoKHR, info, *pCreateInfo);
	SERIALISE_ELEMENT(ResourceId, id, GetResID(*pSwapChain));

	uint32_t numIms = 0;

	if(m_State >= WRITING)
	{
		VkResult vkr = VK_SUCCESS;

		vkr = ObjDisp(device)->GetSwapchainImagesKHR(Unwrap(device), Unwrap(*pSwapChain), &numIms, NULL);
		RDCASSERTEQUAL(vkr, VK_SUCCESS);
	}

	SERIALISE_ELEMENT(uint32_t, numSwapImages, numIms);
	SERIALISE_ELEMENT(VkSharingMode, sharingMode, pCreateInfo->imageSharingMode);

	if(m_State == READING)
	{
		// use original ID because we don't create a live version of the swapchain
		SwapchainInfo &swapinfo = m_CreationInfo.m_SwapChain[id];

		swapinfo.format = info.imageFormat;
		swapinfo.extent = info.imageExtent;
		swapinfo.arraySize = info.imageArrayLayers;

		swapinfo.images.resize(numSwapImages);

		device = GetResourceManager()->GetLiveHandle<VkDevice>(devId);

		const VkImageCreateInfo imInfo = {
			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, NULL, 0,
			VK_IMAGE_TYPE_2D, info.imageFormat,
			{ info.imageExtent.width, info.imageExtent.height, 1 },
			1, info.imageArrayLayers, VK_SAMPLE_COUNT_1_BIT,
			VK_IMAGE_TILING_OPTIMAL,
			VK_IMAGE_USAGE_TRANSFER_SRC_BIT|
			VK_IMAGE_USAGE_TRANSFER_DST_BIT|
			VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|
			VK_IMAGE_USAGE_SAMPLED_BIT,
			sharingMode, 0, NULL,
			VK_IMAGE_LAYOUT_UNDEFINED,
		};

		for(uint32_t i=0; i < numSwapImages; i++)
		{
			VkDeviceMemory mem = VK_NULL_HANDLE;
			VkImage im = VK_NULL_HANDLE;

			VkResult vkr = ObjDisp(device)->CreateImage(Unwrap(device), &imInfo, NULL, &im);
			RDCASSERTEQUAL(vkr, VK_SUCCESS);

			ResourceId liveId = GetResourceManager()->WrapResource(Unwrap(device), im);
			
			VkMemoryRequirements mrq = {0};

			ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(im), &mrq);
			
			VkMemoryAllocateInfo allocInfo = {
				VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL,
				mrq.size, GetGPULocalMemoryIndex(mrq.memoryTypeBits),
			};

			vkr = ObjDisp(device)->AllocateMemory(Unwrap(device), &allocInfo, NULL, &mem);
			RDCASSERTEQUAL(vkr, VK_SUCCESS);
			
			ResourceId memid = GetResourceManager()->WrapResource(Unwrap(device), mem);
			// register as a live-only resource, so it is cleaned up properly
			GetResourceManager()->AddLiveResource(memid, mem);

			vkr = ObjDisp(device)->BindImageMemory(Unwrap(device), Unwrap(im), Unwrap(mem), 0);
			RDCASSERTEQUAL(vkr, VK_SUCCESS);

			// image live ID will be assigned separately in Serialise_vkGetSwapChainInfoWSI
			// memory doesn't have a live ID

			swapinfo.images[i].im = im;

			// fill out image info so we track resource state barriers
			// sneaky-cheeky use of the swapchain's ID here (it's not a live ID because
			// we don't create a live swapchain). This will be picked up in
			// Serialise_vkGetSwapchainImagesKHR to set the data for the live IDs on the
			// swapchain images.
			VulkanCreationInfo::Image &iminfo = m_CreationInfo.m_Image[id];
			iminfo.type = VK_IMAGE_TYPE_2D;
			iminfo.format = info.imageFormat;
			iminfo.extent.width = info.imageExtent.width;
			iminfo.extent.height = info.imageExtent.height;
			iminfo.extent.depth = 1;
			iminfo.mipLevels = 1;
			iminfo.arrayLayers = info.imageArrayLayers;
			iminfo.creationFlags = eTextureCreate_SRV|eTextureCreate_RTV|eTextureCreate_SwapBuffer;
			iminfo.cube = false;
			iminfo.samples = VK_SAMPLE_COUNT_1_BIT;

			m_CreationInfo.m_Names[liveId] = StringFormat::Fmt("Presentable Image %u", i);

			VkImageSubresourceRange range;
			range.baseMipLevel = range.baseArrayLayer = 0;
			range.levelCount = 1;
			range.layerCount = info.imageArrayLayers;
			range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;

			m_ImageLayouts[liveId].subresourceStates.clear();
			m_ImageLayouts[liveId].subresourceStates.push_back(ImageRegionState(range, UNKNOWN_PREV_IMG_LAYOUT, VK_IMAGE_LAYOUT_UNDEFINED));
		}
	}

	return true;
}
Ejemplo n.º 26
0
VkResult WrappedVulkan::vkCreateSwapchainKHR(
		VkDevice                                device,
		const VkSwapchainCreateInfoKHR*         pCreateInfo,
		const VkAllocationCallbacks*            pAllocator,
		VkSwapchainKHR*                         pSwapChain)
{
	VkSwapchainCreateInfoKHR createInfo = *pCreateInfo;

	// make sure we can readback to get the screenshot
	createInfo.imageUsage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
	createInfo.surface = Unwrap(createInfo.surface);
	createInfo.oldSwapchain = Unwrap(createInfo.oldSwapchain);

	VkResult ret = ObjDisp(device)->CreateSwapchainKHR(Unwrap(device), &createInfo, pAllocator, pSwapChain);
	
	if(ret == VK_SUCCESS)
	{
		ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pSwapChain);
		
		if(m_State >= WRITING)
		{
			Chunk *chunk = NULL;

			{
				CACHE_THREAD_SERIALISER();
		
				SCOPED_SERIALISE_CONTEXT(CREATE_SWAP_BUFFER);
				Serialise_vkCreateSwapchainKHR(localSerialiser, device, pCreateInfo, NULL, pSwapChain);

				chunk = scope.Get();
			}

			VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pSwapChain);
			record->AddChunk(chunk);
			
			record->swapInfo = new SwapchainInfo();
			SwapchainInfo &swapInfo = *record->swapInfo;

			// sneaky casting of window handle into record
			swapInfo.wndHandle = (RENDERDOC_WindowHandle)GetRecord(pCreateInfo->surface);

			{
				SCOPED_LOCK(m_SwapLookupLock);
				m_SwapLookup[swapInfo.wndHandle] = *pSwapChain;
			}

			RenderDoc::Inst().AddFrameCapturer(LayerDisp(m_Instance), swapInfo.wndHandle, this);
			
			swapInfo.format = pCreateInfo->imageFormat;
			swapInfo.extent = pCreateInfo->imageExtent;
			swapInfo.arraySize = pCreateInfo->imageArrayLayers;

			VkResult vkr = VK_SUCCESS;

			const VkLayerDispatchTable *vt = ObjDisp(device);

			{
				VkAttachmentDescription attDesc = {
					0, pCreateInfo->imageFormat, VK_SAMPLE_COUNT_1_BIT,
					VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE,
					VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE,
					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
				};

				VkAttachmentReference attRef = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL };

				VkSubpassDescription sub = {
					0, VK_PIPELINE_BIND_POINT_GRAPHICS,
					0, NULL, // inputs
					1, &attRef, // color
					NULL, // resolve
					NULL, // depth-stencil
					0, NULL, // preserve
				};

				VkRenderPassCreateInfo rpinfo = {
					VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, NULL, 0,
					1, &attDesc,
					1, &sub,
					0, NULL, // dependencies
				};

				vkr = vt->CreateRenderPass(Unwrap(device), &rpinfo, NULL, &swapInfo.rp);
				RDCASSERTEQUAL(vkr, VK_SUCCESS);

				GetResourceManager()->WrapResource(Unwrap(device), swapInfo.rp);
			}

			// serialise out the swap chain images
			{
				uint32_t numSwapImages;
				VkResult ret = vt->GetSwapchainImagesKHR(Unwrap(device), Unwrap(*pSwapChain), &numSwapImages, NULL);
				RDCASSERTEQUAL(ret, VK_SUCCESS);
				
				swapInfo.lastPresent = 0;
				swapInfo.images.resize(numSwapImages);
				for(uint32_t i=0; i < numSwapImages; i++)
				{
					swapInfo.images[i].im = VK_NULL_HANDLE;
					swapInfo.images[i].view = VK_NULL_HANDLE;
					swapInfo.images[i].fb = VK_NULL_HANDLE;
				}

				VkImage* images = new VkImage[numSwapImages];

				// go through our own function so we assign these images IDs
				ret = vkGetSwapchainImagesKHR(device, *pSwapChain, &numSwapImages, images);
				RDCASSERTEQUAL(ret, VK_SUCCESS);

				for(uint32_t i=0; i < numSwapImages; i++)
				{
					SwapchainInfo::SwapImage &swapImInfo = swapInfo.images[i];

					// memory doesn't exist for genuine WSI created images
					swapImInfo.im = images[i];

					ResourceId imid = GetResID(images[i]);

					VkImageSubresourceRange range;
					range.baseMipLevel = range.baseArrayLayer = 0;
					range.levelCount = 1;
					range.layerCount = pCreateInfo->imageArrayLayers;
					range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
					
					// fill out image info so we track resource state barriers
					{
						SCOPED_LOCK(m_ImageLayoutsLock);
						m_ImageLayouts[imid].subresourceStates.clear();
						m_ImageLayouts[imid].subresourceStates.push_back(ImageRegionState(range, UNKNOWN_PREV_IMG_LAYOUT, VK_IMAGE_LAYOUT_UNDEFINED));
					}

					{
						VkImageViewCreateInfo info = {
							VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, NULL, 0,
							Unwrap(images[i]), VK_IMAGE_VIEW_TYPE_2D,
							pCreateInfo->imageFormat,
							{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY },
							{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 },
						};

						vkr = vt->CreateImageView(Unwrap(device), &info, NULL, &swapImInfo.view);
						RDCASSERTEQUAL(vkr, VK_SUCCESS);

						GetResourceManager()->WrapResource(Unwrap(device), swapImInfo.view);

						VkFramebufferCreateInfo fbinfo = {
							VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, NULL, 0,
							Unwrap(swapInfo.rp),
							1, UnwrapPtr(swapImInfo.view),
							(uint32_t)pCreateInfo->imageExtent.width, (uint32_t)pCreateInfo->imageExtent.height, 1,
						};

						vkr = vt->CreateFramebuffer(Unwrap(device), &fbinfo, NULL, &swapImInfo.fb);
						RDCASSERTEQUAL(vkr, VK_SUCCESS);

						GetResourceManager()->WrapResource(Unwrap(device), swapImInfo.fb);
					}
				}

				SAFE_DELETE_ARRAY(images);
			}
		}
		else
		{
			GetResourceManager()->AddLiveResource(id, *pSwapChain);
		}
	}

	return ret;
}
Ejemplo n.º 27
0
MeshDisplayPipelines VulkanDebugManager::CacheMeshDisplayPipelines(VkPipelineLayout pipeLayout,
                                                                   const MeshFormat &primary,
                                                                   const MeshFormat &secondary)
{
  // generate a key to look up the map
  uint64_t key = 0;

  uint64_t bit = 0;

  if(primary.indexByteStride == 4)
    key |= 1ULL << bit;
  bit++;

  RDCASSERT((uint32_t)primary.topology < 64);
  key |= uint64_t((uint32_t)primary.topology & 0x3f) << bit;
  bit += 6;

  VkFormat primaryFmt = MakeVkFormat(primary.format);
  VkFormat secondaryFmt = secondary.vertexResourceId == ResourceId()
                              ? VK_FORMAT_UNDEFINED
                              : MakeVkFormat(secondary.format);

  RDCCOMPILE_ASSERT(VK_FORMAT_RANGE_SIZE <= 255,
                    "Mesh pipeline cache key needs an extra bit for format");

  key |= uint64_t((uint32_t)primaryFmt & 0xff) << bit;
  bit += 8;

  key |= uint64_t((uint32_t)secondaryFmt & 0xff) << bit;
  bit += 8;

  RDCASSERT(primary.vertexByteStride <= 0xffff);
  key |= uint64_t((uint32_t)primary.vertexByteStride & 0xffff) << bit;
  bit += 16;

  if(secondary.vertexResourceId != ResourceId())
  {
    RDCASSERT(secondary.vertexByteStride <= 0xffff);
    key |= uint64_t((uint32_t)secondary.vertexByteStride & 0xffff) << bit;
  }
  bit += 16;

  if(primary.instanced)
    key |= 1ULL << bit;
  bit++;

  if(secondary.instanced)
    key |= 1ULL << bit;
  bit++;

  // only 64 bits, make sure they all fit
  RDCASSERT(bit < 64);

  MeshDisplayPipelines &cache = m_CachedMeshPipelines[key];

  if(cache.pipes[(uint32_t)SolidShade::NoSolid] != VK_NULL_HANDLE)
    return cache;

  const VkLayerDispatchTable *vt = ObjDisp(m_Device);
  VkResult vkr = VK_SUCCESS;

  // should we try and evict old pipelines from the cache here?
  // or just keep them forever

  VkVertexInputBindingDescription binds[] = {
      // primary
      {0, primary.vertexByteStride,
       primary.instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX},
      // secondary
      {1, secondary.vertexByteStride,
       secondary.instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX}};

  RDCASSERT(primaryFmt != VK_FORMAT_UNDEFINED);

  VkVertexInputAttributeDescription vertAttrs[] = {
      // primary
      {
          0, 0, primaryFmt, 0,
      },
      // secondary
      {
          1, 0, primaryFmt, 0,
      },
  };

  VkPipelineVertexInputStateCreateInfo vi = {
      VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, NULL, 0, 1, binds, 2, vertAttrs,
  };

  VkPipelineShaderStageCreateInfo stages[3] = {
      {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, NULL, 0, VK_SHADER_STAGE_ALL_GRAPHICS,
       VK_NULL_HANDLE, "main", NULL},
      {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, NULL, 0, VK_SHADER_STAGE_ALL_GRAPHICS,
       VK_NULL_HANDLE, "main", NULL},
      {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, NULL, 0, VK_SHADER_STAGE_ALL_GRAPHICS,
       VK_NULL_HANDLE, "main", NULL},
  };

  VkPipelineInputAssemblyStateCreateInfo ia = {
      VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, NULL, 0,
      primary.topology >= Topology::PatchList ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST
                                              : MakeVkPrimitiveTopology(primary.topology),
      false,
  };

  VkRect2D scissor = {{0, 0}, {16384, 16384}};

  VkPipelineViewportStateCreateInfo vp = {
      VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, NULL, 0, 1, NULL, 1, &scissor};

  VkPipelineRasterizationStateCreateInfo rs = {
      VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
      NULL,
      0,
      false,
      false,
      VK_POLYGON_MODE_FILL,
      VK_CULL_MODE_NONE,
      VK_FRONT_FACE_CLOCKWISE,
      false,
      0.0f,
      0.0f,
      0.0f,
      1.0f,
  };

  VkPipelineMultisampleStateCreateInfo msaa = {
      VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
      NULL,
      0,
      VULKAN_MESH_VIEW_SAMPLES,
      false,
      0.0f,
      NULL,
      false,
      false};

  VkPipelineDepthStencilStateCreateInfo ds = {
      VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
      NULL,
      0,
      true,
      true,
      VK_COMPARE_OP_LESS_OR_EQUAL,
      false,
      false,
      {VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 0, 0, 0},
      {VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 0, 0, 0},
      0.0f,
      1.0f,
  };

  VkPipelineColorBlendAttachmentState attState = {
      false,
      VK_BLEND_FACTOR_ONE,
      VK_BLEND_FACTOR_ZERO,
      VK_BLEND_OP_ADD,
      VK_BLEND_FACTOR_ONE,
      VK_BLEND_FACTOR_ZERO,
      VK_BLEND_OP_ADD,
      0xf,
  };

  VkPipelineColorBlendStateCreateInfo cb = {
      VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
      NULL,
      0,
      false,
      VK_LOGIC_OP_NO_OP,
      1,
      &attState,
      {1.0f, 1.0f, 1.0f, 1.0f}};

  VkDynamicState dynstates[] = {VK_DYNAMIC_STATE_VIEWPORT};

  VkPipelineDynamicStateCreateInfo dyn = {
      VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
      NULL,
      0,
      ARRAY_COUNT(dynstates),
      dynstates,
  };

  VkRenderPass rp;    // compatible render pass

  {
    VkAttachmentDescription attDesc[] = {
        {0, VK_FORMAT_R8G8B8A8_SRGB, VULKAN_MESH_VIEW_SAMPLES, VK_ATTACHMENT_LOAD_OP_LOAD,
         VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
         VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL},
        {0, VK_FORMAT_D32_SFLOAT, VULKAN_MESH_VIEW_SAMPLES, VK_ATTACHMENT_LOAD_OP_LOAD,
         VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
         VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL},
    };

    VkAttachmentReference attRef = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
    VkAttachmentReference dsRef = {1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL};

    VkSubpassDescription sub = {
        0,      VK_PIPELINE_BIND_POINT_GRAPHICS,
        0,      NULL,       // inputs
        1,      &attRef,    // color
        NULL,               // resolve
        &dsRef,             // depth-stencil
        0,      NULL,       // preserve
    };

    VkRenderPassCreateInfo rpinfo = {
        VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
        NULL,
        0,
        2,
        attDesc,
        1,
        &sub,
        0,
        NULL,    // dependencies
    };

    vt->CreateRenderPass(Unwrap(m_Device), &rpinfo, NULL, &rp);
  }

  VkGraphicsPipelineCreateInfo pipeInfo = {
      VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
      NULL,
      0,
      2,
      stages,
      &vi,
      &ia,
      NULL,    // tess
      &vp,
      &rs,
      &msaa,
      &ds,
      &cb,
      &dyn,
      Unwrap(pipeLayout),
      rp,
      0,                 // sub pass
      VK_NULL_HANDLE,    // base pipeline handle
      0,                 // base pipeline index
  };

  // wireframe pipeline
  stages[0].module = Unwrap(m_pDriver->GetShaderCache()->GetBuiltinModule(BuiltinShader::MeshVS));
  stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
  stages[1].module = Unwrap(m_pDriver->GetShaderCache()->GetBuiltinModule(BuiltinShader::MeshFS));
  stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;

  rs.polygonMode = VK_POLYGON_MODE_LINE;
  rs.lineWidth = 1.0f;
  ds.depthTestEnable = false;

  vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL,
                                    &cache.pipes[MeshDisplayPipelines::ePipe_Wire]);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  ds.depthTestEnable = true;

  vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL,
                                    &cache.pipes[MeshDisplayPipelines::ePipe_WireDepth]);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  // solid shading pipeline
  rs.polygonMode = VK_POLYGON_MODE_FILL;
  ds.depthTestEnable = false;

  vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL,
                                    &cache.pipes[MeshDisplayPipelines::ePipe_Solid]);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  ds.depthTestEnable = true;

  vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL,
                                    &cache.pipes[MeshDisplayPipelines::ePipe_SolidDepth]);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  if(secondary.vertexResourceId != ResourceId())
  {
    // pull secondary information from second vertex buffer
    vertAttrs[1].binding = 1;
    vertAttrs[1].format = secondaryFmt;
    RDCASSERT(secondaryFmt != VK_FORMAT_UNDEFINED);

    vi.vertexBindingDescriptionCount = 2;

    vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL,
                                      &cache.pipes[MeshDisplayPipelines::ePipe_Secondary]);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);
  }

  vertAttrs[1].binding = 0;
  vi.vertexBindingDescriptionCount = 1;

  // flat lit pipeline, needs geometry shader to calculate face normals
  stages[2].module = Unwrap(m_pDriver->GetShaderCache()->GetBuiltinModule(BuiltinShader::MeshGS));
  stages[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
  pipeInfo.stageCount = 3;

  if(stages[2].module != VK_NULL_HANDLE)
  {
    vkr = vt->CreateGraphicsPipelines(Unwrap(m_Device), VK_NULL_HANDLE, 1, &pipeInfo, NULL,
                                      &cache.pipes[MeshDisplayPipelines::ePipe_Lit]);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);
  }

  for(uint32_t i = 0; i < MeshDisplayPipelines::ePipe_Count; i++)
    if(cache.pipes[i] != VK_NULL_HANDLE)
      m_pDriver->GetResourceManager()->WrapResource(Unwrap(m_Device), cache.pipes[i]);

  vt->DestroyRenderPass(Unwrap(m_Device), rp, NULL);

  return cache;
}
Ejemplo n.º 28
0
VkResult WrappedVulkan::vkQueuePresentKHR(
			VkQueue                                      queue,
			const VkPresentInfoKHR*                      pPresentInfo)
{
	if(m_State == WRITING_IDLE)
	{
		RenderDoc::Inst().Tick();

		GetResourceManager()->FlushPendingDirty();
	}
	
	m_FrameCounter++; // first present becomes frame #1, this function is at the end of the frame

	if(pPresentInfo->swapchainCount > 1 && (m_FrameCounter % 100) == 0)
	{
		RDCWARN("Presenting multiple swapchains at once - only first will be processed");
	}
	
	vector<VkSwapchainKHR> unwrappedSwaps;
	vector<VkSemaphore> unwrappedSems;
	
	VkPresentInfoKHR unwrappedInfo = *pPresentInfo;

	for(uint32_t i=0; i < unwrappedInfo.swapchainCount; i++)
		unwrappedSwaps.push_back(Unwrap(unwrappedInfo.pSwapchains[i]));
	for(uint32_t i=0; i < unwrappedInfo.waitSemaphoreCount; i++)
		unwrappedSems.push_back(Unwrap(unwrappedInfo.pWaitSemaphores[i]));

	unwrappedInfo.pSwapchains = unwrappedInfo.swapchainCount ? &unwrappedSwaps[0] : NULL;
	unwrappedInfo.pWaitSemaphores = unwrappedInfo.waitSemaphoreCount ? &unwrappedSems[0] : NULL;

	// Don't support any extensions for present info
	RDCASSERT(pPresentInfo->pNext == NULL);
	
	VkResourceRecord *swaprecord = GetRecord(pPresentInfo->pSwapchains[0]);
	RDCASSERT(swaprecord->swapInfo);

	SwapchainInfo &swapInfo = *swaprecord->swapInfo;

	bool activeWindow = RenderDoc::Inst().IsActiveWindow(LayerDisp(m_Instance), swapInfo.wndHandle);

	// need to record which image was last flipped so we can get the correct backbuffer
	// for a thumbnail in EndFrameCapture
	swapInfo.lastPresent = pPresentInfo->pImageIndices[0];
	m_LastSwap = swaprecord->GetResourceID();
	
	VkImage backbuffer = swapInfo.images[pPresentInfo->pImageIndices[0]].im;
	
	if(m_State == WRITING_IDLE)
	{
		m_FrameTimes.push_back(m_FrameTimer.GetMilliseconds());
		m_TotalTime += m_FrameTimes.back();
		m_FrameTimer.Restart();

		// update every second
		if(m_TotalTime > 1000.0)
		{
			m_MinFrametime = 10000.0;
			m_MaxFrametime = 0.0;
			m_AvgFrametime = 0.0;

			m_TotalTime = 0.0;

			for(size_t i=0; i < m_FrameTimes.size(); i++)
			{
				m_AvgFrametime += m_FrameTimes[i];
				if(m_FrameTimes[i] < m_MinFrametime)
					m_MinFrametime = m_FrameTimes[i];
				if(m_FrameTimes[i] > m_MaxFrametime)
					m_MaxFrametime = m_FrameTimes[i];
			}

			m_AvgFrametime /= double(m_FrameTimes.size());

			m_FrameTimes.clear();
		}
		
		uint32_t overlay = RenderDoc::Inst().GetOverlayBits();

		if(overlay & eRENDERDOC_Overlay_Enabled)
		{
			VkRenderPass rp = swapInfo.rp;
			VkImage im = swapInfo.images[pPresentInfo->pImageIndices[0]].im;
			VkFramebuffer fb = swapInfo.images[pPresentInfo->pImageIndices[0]].fb;

			VkLayerDispatchTable *vt = ObjDisp(GetDev());

			TextPrintState textstate = { GetNextCmd(), rp, fb, swapInfo.extent.width, swapInfo.extent.height };
			
			VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };

			VkResult vkr = vt->BeginCommandBuffer(Unwrap(textstate.cmd), &beginInfo);
			RDCASSERTEQUAL(vkr, VK_SUCCESS);

			VkImageMemoryBarrier bbBarrier = {
				VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, NULL,
				0, 0, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
				VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
				Unwrap(im),
				{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }
			};

			bbBarrier.srcAccessMask = VK_ACCESS_ALL_READ_BITS;
			bbBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;

			DoPipelineBarrier(textstate.cmd, 1, &bbBarrier);

			GetDebugManager()->BeginText(textstate);

			if(activeWindow)
			{
				vector<RENDERDOC_InputButton> keys = RenderDoc::Inst().GetCaptureKeys();

				string overlayText = "Vulkan. ";

				for(size_t i=0; i < keys.size(); i++)
				{
					if(i > 0)
						overlayText += ", ";

					overlayText += ToStr::Get(keys[i]);
				}

				if(!keys.empty())
					overlayText += " to capture.";

				if(overlay & eRENDERDOC_Overlay_FrameNumber)
				{
					overlayText += StringFormat::Fmt(" Frame: %d.", m_FrameCounter);
				}
				if(overlay & eRENDERDOC_Overlay_FrameRate)
				{
					overlayText += StringFormat::Fmt(" %.2lf ms (%.2lf .. %.2lf) (%.0lf FPS)",
																					m_AvgFrametime, m_MinFrametime, m_MaxFrametime, 1000.0f/m_AvgFrametime);
				}

				float y=0.0f;

				if(!overlayText.empty())
				{
					GetDebugManager()->RenderText(textstate, 0.0f, y, overlayText.c_str());
					y += 1.0f;
				}

				if(overlay & eRENDERDOC_Overlay_CaptureList)
				{
					GetDebugManager()->RenderText(textstate, 0.0f, y, "%d Captures saved.\n", (uint32_t)m_FrameRecord.size());
					y += 1.0f;

					uint64_t now = Timing::GetUnixTimestamp();
					for(size_t i=0; i < m_FrameRecord.size(); i++)
					{
						if(now - m_FrameRecord[i].frameInfo.captureTime < 20)
						{
							GetDebugManager()->RenderText(textstate, 0.0f, y, "Captured frame %d.\n", m_FrameRecord[i].frameInfo.frameNumber);
							y += 1.0f;
						}
					}
				}

#if !defined(RELEASE)
				GetDebugManager()->RenderText(textstate, 0.0f, y, "%llu chunks - %.2f MB", Chunk::NumLiveChunks(), float(Chunk::TotalMem())/1024.0f/1024.0f);
				y += 1.0f;
#endif
			}
			else
			{
				vector<RENDERDOC_InputButton> keys = RenderDoc::Inst().GetFocusKeys();

				string str = "Vulkan. Inactive swapchain.";

				for(size_t i=0; i < keys.size(); i++)
				{
					if(i == 0)
						str += " ";
					else
						str += ", ";

					str += ToStr::Get(keys[i]);
				}

				if(!keys.empty())
					str += " to cycle between swapchains";
				
				GetDebugManager()->RenderText(textstate, 0.0f, 0.0f, str.c_str());
			}
			
			GetDebugManager()->EndText(textstate);
			
			std::swap(bbBarrier.oldLayout, bbBarrier.newLayout);
			bbBarrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
			bbBarrier.dstAccessMask = VK_ACCESS_ALL_READ_BITS;

			DoPipelineBarrier(textstate.cmd, 1, &bbBarrier);

			ObjDisp(textstate.cmd)->EndCommandBuffer(Unwrap(textstate.cmd));

			SubmitCmds();

			FlushQ();
		}
	}

	VkResult vkr = ObjDisp(queue)->QueuePresentKHR(Unwrap(queue), &unwrappedInfo);

	if(!activeWindow)
		return vkr;
	
	RenderDoc::Inst().SetCurrentDriver(RDC_Vulkan);

	// kill any current capture that isn't application defined
	if(m_State == WRITING_CAPFRAME && !m_AppControlledCapture)
		RenderDoc::Inst().EndFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle);

	if(RenderDoc::Inst().ShouldTriggerCapture(m_FrameCounter) && m_State == WRITING_IDLE)
	{
		RenderDoc::Inst().StartFrameCapture(LayerDisp(m_Instance), swapInfo.wndHandle);

		m_AppControlledCapture = false;
	}

	return vkr;
}
Ejemplo n.º 29
0
vector<CounterResult> VulkanReplay::FetchCounters(const vector<GPUCounter> &counters)
{
  uint32_t maxEID = m_pDriver->GetMaxEID();

  VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures();

  VkDevice dev = m_pDriver->GetDev();

  VkQueryPoolCreateInfo timeStampPoolCreateInfo = {
      VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_TIMESTAMP, maxEID * 2, 0};

  VkQueryPoolCreateInfo occlusionPoolCreateInfo = {
      VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_OCCLUSION, maxEID, 0};

  VkQueryPipelineStatisticFlags pipeStatsFlags =
      VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT |
      VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT |
      VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT |
      VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT |
      VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
      VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT |
      VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT |
      VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT |
      VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT |
      VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT |
      VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;

  VkQueryPoolCreateInfo pipeStatsPoolCreateInfo = {
      VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL,   0,
      VK_QUERY_TYPE_PIPELINE_STATISTICS,        maxEID, pipeStatsFlags};

  VkQueryPool timeStampPool;
  VkResult vkr =
      ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &timeStampPoolCreateInfo, NULL, &timeStampPool);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  bool occlNeeded = false;
  bool statsNeeded = false;

  for(size_t c = 0; c < counters.size(); c++)
  {
    switch(counters[c])
    {
      case GPUCounter::InputVerticesRead:
      case GPUCounter::IAPrimitives:
      case GPUCounter::GSPrimitives:
      case GPUCounter::RasterizerInvocations:
      case GPUCounter::RasterizedPrimitives:
      case GPUCounter::VSInvocations:
      case GPUCounter::TCSInvocations:
      case GPUCounter::TESInvocations:
      case GPUCounter::GSInvocations:
      case GPUCounter::PSInvocations:
      case GPUCounter::CSInvocations: statsNeeded = true; break;
      case GPUCounter::SamplesWritten: occlNeeded = true; break;
      default: break;
    }
  }

  VkQueryPool occlusionPool = VK_NULL_HANDLE;
  if(availableFeatures.occlusionQueryPrecise && occlNeeded)
  {
    vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &occlusionPoolCreateInfo, NULL, &occlusionPool);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);
  }

  VkQueryPool pipeStatsPool = VK_NULL_HANDLE;
  if(availableFeatures.pipelineStatisticsQuery && statsNeeded)
  {
    vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &pipeStatsPoolCreateInfo, NULL, &pipeStatsPool);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);
  }

  VkCommandBuffer cmd = m_pDriver->GetNextCmd();

  VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
                                        VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};

  vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), timeStampPool, 0, maxEID * 2);
  if(occlusionPool != VK_NULL_HANDLE)
    ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), occlusionPool, 0, maxEID);
  if(pipeStatsPool != VK_NULL_HANDLE)
    ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), pipeStatsPool, 0, maxEID);

  vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd));
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

#if ENABLED(SINGLE_FLUSH_VALIDATE)
  m_pDriver->SubmitCmds();
#endif

  VulkanGPUTimerCallback cb(m_pDriver, this, timeStampPool, occlusionPool, pipeStatsPool);

  // replay the events to perform all the queries
  m_pDriver->ReplayLog(0, maxEID, eReplay_Full);

  vector<uint64_t> m_TimeStampData;
  m_TimeStampData.resize(cb.m_Results.size() * 2);

  vkr = ObjDisp(dev)->GetQueryPoolResults(
      Unwrap(dev), timeStampPool, 0, (uint32_t)m_TimeStampData.size(),
      sizeof(uint64_t) * m_TimeStampData.size(), &m_TimeStampData[0], sizeof(uint64_t),
      VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
  RDCASSERTEQUAL(vkr, VK_SUCCESS);

  ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), timeStampPool, NULL);

  vector<uint64_t> m_OcclusionData;
  m_OcclusionData.resize(cb.m_Results.size());
  if(occlusionPool != VK_NULL_HANDLE)
  {
    vkr = ObjDisp(dev)->GetQueryPoolResults(
        Unwrap(dev), occlusionPool, 0, (uint32_t)m_OcclusionData.size(),
        sizeof(uint64_t) * m_OcclusionData.size(), &m_OcclusionData[0], sizeof(uint64_t),
        VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);

    ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), occlusionPool, NULL);
  }

  vector<uint64_t> m_PipeStatsData;
  m_PipeStatsData.resize(cb.m_Results.size() * 11);
  if(pipeStatsPool != VK_NULL_HANDLE)
  {
    vkr = ObjDisp(dev)->GetQueryPoolResults(
        Unwrap(dev), pipeStatsPool, 0, (uint32_t)cb.m_Results.size(),
        sizeof(uint64_t) * m_PipeStatsData.size(), &m_PipeStatsData[0], sizeof(uint64_t) * 11,
        VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
    RDCASSERTEQUAL(vkr, VK_SUCCESS);

    ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), pipeStatsPool, NULL);
  }

  vector<CounterResult> ret;

  for(size_t i = 0; i < cb.m_Results.size(); i++)
  {
    for(size_t c = 0; c < counters.size(); c++)
    {
      CounterResult result;

      result.eventId = cb.m_Results[i];
      result.counter = counters[c];

      switch(counters[c])
      {
        case GPUCounter::EventGPUDuration:
        {
          uint64_t delta = m_TimeStampData[i * 2 + 1] - m_TimeStampData[i * 2 + 0];
          result.value.d = (double(m_pDriver->GetDeviceProps().limits.timestampPeriod) *
                            double(delta))                  // nanoseconds
                           / (1000.0 * 1000.0 * 1000.0);    // to seconds
        }
        break;
        case GPUCounter::InputVerticesRead: result.value.u64 = m_PipeStatsData[i * 11 + 0]; break;
        case GPUCounter::IAPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 1]; break;
        case GPUCounter::GSPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 4]; break;
        case GPUCounter::RasterizerInvocations:
          result.value.u64 = m_PipeStatsData[i * 11 + 5];
          break;
        case GPUCounter::RasterizedPrimitives:
          result.value.u64 = m_PipeStatsData[i * 11 + 6];
          break;
        case GPUCounter::SamplesWritten: result.value.u64 = m_OcclusionData[i]; break;
        case GPUCounter::VSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 2]; break;
        case GPUCounter::TCSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 8]; break;
        case GPUCounter::TESInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break;
        case GPUCounter::GSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 3]; break;
        case GPUCounter::PSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break;
        case GPUCounter::CSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 10]; break;
        default: break;
      }
      ret.push_back(result);
    }
  }

  for(size_t i = 0; i < cb.m_AliasEvents.size(); i++)
  {
    for(size_t c = 0; c < counters.size(); c++)
    {
      CounterResult search;
      search.counter = counters[c];
      search.eventId = cb.m_AliasEvents[i].first;

      // find the result we're aliasing
      auto it = std::find(ret.begin(), ret.end(), search);
      if(it != ret.end())
      {
        // duplicate the result and append
        CounterResult aliased = *it;
        aliased.eventId = cb.m_AliasEvents[i].second;
        ret.push_back(aliased);
      }
      else
      {
        RDCERR("Expected to find alias-target result for EID %u counter %u, but didn't",
               search.eventId, search.counter);
      }
    }
  }

  // sort so that the alias results appear in the right places
  std::sort(ret.begin(), ret.end());

  return ret;
}
Ejemplo n.º 30
0
vector<CounterResult> D3D11DebugManager::FetchCounters(const vector<uint32_t> &counters)
{
  vector<CounterResult> ret;

  if(counters.empty())
  {
    RDCERR("No counters specified to FetchCounters");
    return ret;
  }

  uint32_t counterID = counters[0];
  RDCASSERT(counters.size() == 1);
  RDCASSERT(counterID == eCounter_EventGPUDuration);

  SCOPED_TIMER("Fetch Counters for %u", counterID);

  D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0};
  ID3D11Query *disjoint = NULL;

  D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0};
  ID3D11Query *start = NULL;

  HRESULT hr = S_OK;

  hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint);
  if(FAILED(hr))
  {
    RDCERR("Failed to create disjoint query %08x", hr);
    return ret;
  }

  hr = m_pDevice->CreateQuery(&qdesc, &start);
  if(FAILED(hr))
  {
    RDCERR("Failed to create start query %08x", hr);
    return ret;
  }

  CounterContext ctx;

  for(int loop = 0; loop < 1; loop++)
  {
    {
      m_pImmediateContext->Begin(disjoint);

      m_pImmediateContext->End(start);

      ctx.eventStart = 0;
      ctx.reuseIdx = loop == 0 ? -1 : 0;
      FillTimers(ctx, m_WrappedContext->GetRootDraw());

      m_pImmediateContext->End(disjoint);
    }

    {
      D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData;
      do
      {
        hr = m_pImmediateContext->GetData(disjoint, &disjointData,
                                          sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0);
      } while(hr == S_FALSE);
      RDCASSERTEQUAL(hr, S_OK);

      RDCASSERT(!disjointData.Disjoint);

      double ticksToSecs = double(disjointData.Frequency);

      UINT64 a = 0;
      hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0);
      RDCASSERTEQUAL(hr, S_OK);

      for(size_t i = 0; i < ctx.timers.size(); i++)
      {
        if(ctx.timers[i].before && ctx.timers[i].after)
        {
          hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0);
          RDCASSERTEQUAL(hr, S_OK);

          UINT64 b = 0;
          hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0);
          RDCASSERTEQUAL(hr, S_OK);

          double duration = (double(b - a) / ticksToSecs);

          ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, duration));

          a = b;
        }
        else
        {
          ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, 0.0));
        }
      }
    }
  }

  for(size_t i = 0; i < ctx.timers.size(); i++)
  {
    SAFE_RELEASE(ctx.timers[i].before);
    SAFE_RELEASE(ctx.timers[i].after);
  }

  SAFE_RELEASE(disjoint);
  SAFE_RELEASE(start);

  return ret;
}