コード例 #1
0
ファイル: replay_output.cpp プロジェクト: EecheE/renderdoc
bool ReplayOutput::SetPixelContextLocation(uint32_t x, uint32_t y)
{
  m_ContextX = RDCMAX((float)x, 0.0f);
  m_ContextY = RDCMAX((float)y, 0.0f);

  DisplayContext();

  return true;
}
コード例 #2
0
ファイル: replay_output.cpp プロジェクト: DrChat/renderdoc
uint32_t ReplayOutput::PickVertex(uint32_t eventID, uint32_t x, uint32_t y, uint32_t *pickedInstance)
{
  FetchDrawcall *draw = m_pRenderer->GetDrawcallByEID(eventID);

  if(!draw)
    return ~0U;
  if(m_RenderData.meshDisplay.type == eMeshDataStage_Unknown)
    return ~0U;
  if((draw->flags & eDraw_Drawcall) == 0)
    return ~0U;

  MeshDisplay cfg = m_RenderData.meshDisplay;

  if(cfg.position.buf == ResourceId())
    return ~0U;

  cfg.position.buf = m_pDevice->GetLiveID(cfg.position.buf);
  cfg.position.idxbuf = m_pDevice->GetLiveID(cfg.position.idxbuf);
  cfg.second.buf = m_pDevice->GetLiveID(cfg.second.buf);
  cfg.second.idxbuf = m_pDevice->GetLiveID(cfg.second.idxbuf);

  *pickedInstance = 0;

  if(draw->flags & eDraw_Instanced)
  {
    uint32_t maxInst = 0;
    if(m_RenderData.meshDisplay.showPrevInstances)
      maxInst = RDCMAX(1U, m_RenderData.meshDisplay.curInstance);
    if(m_RenderData.meshDisplay.showAllInstances)
      maxInst = RDCMAX(1U, draw->numInstances);

    for(uint32_t inst = 0; inst < maxInst; inst++)
    {
      // get the 'most final' stage
      MeshFormat fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_GSOut);
      if(fmt.buf == ResourceId())
        fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_VSOut);

      cfg.position = fmt;

      uint32_t ret = m_pDevice->PickVertex(m_EventID, cfg, x, y);
      if(ret != ~0U)
      {
        *pickedInstance = inst;
        return ret;
      }
    }

    return ~0U;
  }
  else
  {
    return m_pDevice->PickVertex(m_EventID, cfg, x, y);
  }
}
コード例 #3
0
FetchDrawcall *ReplayRenderer::SetupDrawcallPointers(FetchFrameInfo frame, rdctype::array<FetchDrawcall> &draws, FetchDrawcall *parent, FetchDrawcall *previous)
{
	FetchDrawcall *ret = NULL;

	for(int32_t i=0; i < draws.count; i++)
	{
		FetchDrawcall *draw = &draws[i];

		draw->parent = parent ? parent->drawcallID : 0;

		if(draw->children.count > 0)
		{
			ret = previous = SetupDrawcallPointers(frame, draw->children, draw, previous);
		}
		else if(draw->flags & (eDraw_PushMarker|eDraw_SetMarker|eDraw_Present))
		{
			// don't want to set up previous/next links for markers
		}
		else
		{
			if(previous != NULL)
				previous->next = draw->drawcallID;
			draw->previous = previous ? previous->drawcallID : 0;

			RDCASSERT(m_Drawcalls.empty() || draw->eventID > m_Drawcalls.back()->eventID || draw->context != frame.immContextId);
			m_Drawcalls.resize(RDCMAX(m_Drawcalls.size(), size_t(draw->drawcallID+1)));
			m_Drawcalls[draw->drawcallID] = draw;

			ret = previous = draw;
		}
	}

	return ret;
}
コード例 #4
0
ファイル: vk_info.cpp プロジェクト: Tak/renderdoc
void VulkanCreationInfo::Image::Init(VulkanResourceManager *resourceMan, VulkanCreationInfo &info, const VkImageCreateInfo* pCreateInfo)
{
	view = VK_NULL_HANDLE;
	stencilView = VK_NULL_HANDLE;

	type = pCreateInfo->imageType;
	format = pCreateInfo->format;
	extent = pCreateInfo->extent;
	arrayLayers = pCreateInfo->arrayLayers;
	mipLevels = pCreateInfo->mipLevels;
	samples = RDCMAX(VK_SAMPLE_COUNT_1_BIT, pCreateInfo->samples);

	creationFlags = 0;

	if(pCreateInfo->usage & VK_IMAGE_USAGE_SAMPLED_BIT)
		creationFlags |= eTextureCreate_SRV;
	if(pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT))
		creationFlags |= eTextureCreate_RTV;
	if(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
		creationFlags |= eTextureCreate_DSV;
	if(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)
		creationFlags |= eTextureCreate_UAV;

	cube = (pCreateInfo->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) ? true : false;
}
コード例 #5
0
ファイル: gl_debug_funcs.cpp プロジェクト: cgmb/renderdoc
bool WrappedOpenGL::Serialise_glPopDebugGroup(SerialiserType &ser)
{
  if(IsReplayingAndReading())
  {
    GLMarkerRegion::End();
    m_ReplayEventCount = RDCMAX(0, m_ReplayEventCount - 1);

    if(IsLoading(m_State) && !m_CurEvents.empty())
    {
      DrawcallDescription draw;
      draw.name = "API Calls";
      draw.flags |= DrawFlags::SetMarker | DrawFlags::APICalls;

      AddDrawcall(draw, true);
    }
  }

  return true;
}
コード例 #6
0
ファイル: gl_resources.cpp プロジェクト: excalybure/renderdoc
int GetNumMips(const GLHookSet &gl, GLenum target, GLuint tex, GLuint w, GLuint h, GLuint d)
{
	int mips = 1;

	GLint immut = 0;
	gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_IMMUTABLE_FORMAT, &immut);

	if(immut)
		gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_IMMUTABLE_LEVELS, (GLint *)&mips);
	else
		mips = CalcNumMips(w, h, d);

	GLint maxLevel = 1000;
	gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_MAX_LEVEL, &maxLevel);
	mips = RDCMIN(mips, maxLevel+1);

	if(immut == 0)
	{
		// check to see if all mips are set, or clip the number of mips to those that are
		// set.
		if(target == eGL_TEXTURE_CUBE_MAP)
			target = eGL_TEXTURE_CUBE_MAP_POSITIVE_X;

		for(int i=0; i < mips; i++)
		{
			GLint width = 0;
			gl.glGetTextureLevelParameterivEXT(tex, target, i, eGL_TEXTURE_WIDTH, &width);
			if(width == 0)
			{
				mips = i;
				break;
			}
		}
	}

	return RDCMAX(1, mips);
}
コード例 #7
0
ファイル: dds_readwrite.cpp プロジェクト: cgmb/renderdoc
bool write_dds_to_file(FILE *f, const dds_data &data)
{
  if(!f)
    return false;

  uint32_t magic = dds_fourcc;
  DDS_HEADER header;
  DDS_HEADER_DXT10 headerDXT10;
  RDCEraseEl(header);
  RDCEraseEl(headerDXT10);

  header.dwSize = sizeof(DDS_HEADER);

  header.ddspf.dwSize = sizeof(DDS_PIXELFORMAT);

  header.dwWidth = data.width;
  header.dwHeight = data.height;
  header.dwDepth = data.depth;
  header.dwMipMapCount = data.mips;

  header.dwFlags = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
  if(data.mips > 1)
    header.dwFlags |= DDSD_MIPMAPCOUNT;
  if(data.depth > 1)
    header.dwFlags |= DDSD_DEPTH;

  bool blockFormat = false;

  if(data.format.Special())
  {
    switch(data.format.type)
    {
      case ResourceFormatType::BC1:
      case ResourceFormatType::BC2:
      case ResourceFormatType::BC3:
      case ResourceFormatType::BC4:
      case ResourceFormatType::BC5:
      case ResourceFormatType::BC6:
      case ResourceFormatType::BC7: blockFormat = true; break;
      case ResourceFormatType::ETC2:
      case ResourceFormatType::EAC:
      case ResourceFormatType::ASTC:
      case ResourceFormatType::YUV:
        RDCERR("Unsupported file format, %u", data.format.type);
        return false;
      default: break;
    }
  }

  if(blockFormat)
    header.dwFlags |= DDSD_LINEARSIZE;
  else
    header.dwFlags |= DDSD_PITCH;

  header.dwCaps = DDSCAPS_TEXTURE;
  if(data.mips > 1)
    header.dwCaps |= DDSCAPS_MIPMAP;
  if(data.mips > 1 || data.slices > 1 || data.depth > 1)
    header.dwCaps |= DDSCAPS_COMPLEX;

  header.dwCaps2 = data.depth > 1 ? DDSCAPS2_VOLUME : 0;

  bool dx10Header = false;

  headerDXT10.dxgiFormat = ResourceFormat2DXGIFormat(data.format);
  headerDXT10.resourceDimension =
      data.depth > 1 ? D3D10_RESOURCE_DIMENSION_TEXTURE3D : D3D10_RESOURCE_DIMENSION_TEXTURE2D;
  headerDXT10.miscFlag = 0;
  headerDXT10.arraySize = data.slices;

  if(headerDXT10.dxgiFormat == DXGI_FORMAT_UNKNOWN)
  {
    RDCERR("Couldn't convert resource format to DXGI format");
    return false;
  }

  if(data.cubemap)
  {
    header.dwCaps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALLFACES;
    headerDXT10.miscFlag |= DDS_RESOURCE_MISC_TEXTURECUBE;
    headerDXT10.arraySize /= 6;
  }

  if(headerDXT10.arraySize > 1)
    dx10Header = true;    // need to specify dx10 header to give array size

  uint32_t bytesPerPixel = 1;

  if(blockFormat)
  {
    int blockSize =
        (data.format.type == ResourceFormatType::BC1 || data.format.type == ResourceFormatType::BC4)
            ? 8
            : 16;
    header.dwPitchOrLinearSize = RDCMAX(1U, ((header.dwWidth + 3) / 4)) * blockSize;
  }
  else
  {
    switch(data.format.type)
    {
      case ResourceFormatType::S8: bytesPerPixel = 1; break;
      case ResourceFormatType::R10G10B10A2:
      case ResourceFormatType::R9G9B9E5:
      case ResourceFormatType::R11G11B10:
      case ResourceFormatType::D24S8: bytesPerPixel = 4; break;
      case ResourceFormatType::R5G6B5:
      case ResourceFormatType::R5G5B5A1:
      case ResourceFormatType::R4G4B4A4: bytesPerPixel = 2; break;
      case ResourceFormatType::D32S8: bytesPerPixel = 8; break;
      case ResourceFormatType::D16S8:
      case ResourceFormatType::YUV:
      case ResourceFormatType::R4G4:
        RDCERR("Unsupported file format %u", data.format.type);
        return false;
      default: bytesPerPixel = data.format.compCount * data.format.compByteWidth;
    }

    header.dwPitchOrLinearSize = header.dwWidth * bytesPerPixel;
  }

  // special case a couple of formats to write out non-DX10 style, for
  // backwards compatibility
  if(data.format.compByteWidth == 1 && data.format.compCount == 4 &&
     data.format.compType == CompType::UNorm)
  {
    header.ddspf.dwFlags = DDPF_RGBA;
    header.ddspf.dwRGBBitCount = 32;
    header.ddspf.dwRBitMask = 0x000000ff;
    header.ddspf.dwGBitMask = 0x0000ff00;
    header.ddspf.dwBBitMask = 0x00ff0000;
    header.ddspf.dwABitMask = 0xff000000;

    if(data.format.bgraOrder)
      std::swap(header.ddspf.dwRBitMask, header.ddspf.dwBBitMask);
  }
  else if(data.format.type == ResourceFormatType::BC1)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', 'T', '1');
  }
  else if(data.format.type == ResourceFormatType::BC2)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', 'T', '3');
  }
  else if(data.format.type == ResourceFormatType::BC3)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', 'T', '5');
  }
  else if(data.format.type == ResourceFormatType::BC4 && data.format.compType == CompType::UNorm)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('B', 'C', '4', 'U');
  }
  else if(data.format.type == ResourceFormatType::BC4 && data.format.compType == CompType::SNorm)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('B', 'C', '4', 'S');
  }
  else if(data.format.type == ResourceFormatType::BC5 && data.format.compType == CompType::UNorm)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('A', 'T', 'I', '2');
  }
  else if(data.format.type == ResourceFormatType::BC5 && data.format.compType == CompType::SNorm)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('B', 'C', '5', 'S');
  }
  else
  {
    // just write out DX10 header
    dx10Header = true;
  }

  if(dx10Header)
  {
    header.ddspf.dwFlags = DDPF_FOURCC;
    header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', '1', '0');
  }

  {
    FileIO::fwrite(&magic, sizeof(magic), 1, f);
    FileIO::fwrite(&header, sizeof(header), 1, f);
    if(dx10Header)
      FileIO::fwrite(&headerDXT10, sizeof(headerDXT10), 1, f);

    int i = 0;
    for(int slice = 0; slice < RDCMAX(1, data.slices); slice++)
    {
      for(int mip = 0; mip < RDCMAX(1, data.mips); mip++)
      {
        int numdepths = RDCMAX(1, data.depth >> mip);
        for(int d = 0; d < numdepths; d++)
        {
          byte *bytedata = data.subdata[i];

          int rowlen = RDCMAX(1, data.width >> mip);
          int numRows = RDCMAX(1, data.height >> mip);
          int pitch = RDCMAX(1U, rowlen * bytesPerPixel);

          // pitch/rows are in blocks, not pixels, for block formats.
          if(blockFormat)
          {
            numRows = RDCMAX(1, numRows / 4);

            int blockSize = (data.format.type == ResourceFormatType::BC1 ||
                             data.format.type == ResourceFormatType::BC4)
                                ? 8
                                : 16;

            pitch = RDCMAX(blockSize, (((rowlen + 3) / 4)) * blockSize);
          }

          for(int row = 0; row < numRows; row++)
          {
            FileIO::fwrite(bytedata, 1, pitch, f);

            bytedata += pitch;
          }

          i++;
        }
      }
    }
  }

  return true;
}
コード例 #8
0
ファイル: vk_device_funcs.cpp プロジェクト: qqdiguo/renderdoc
VkResult WrappedVulkan::vkCreateDevice(
		VkPhysicalDevice                            physicalDevice,
		const VkDeviceCreateInfo*                   pCreateInfo,
		const VkAllocationCallbacks*                pAllocator,
		VkDevice*                                   pDevice)
{
	VkDeviceCreateInfo createInfo = *pCreateInfo;

	uint32_t qCount = 0;
	VkResult vkr = VK_SUCCESS;
	
	ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, NULL);

	VkQueueFamilyProperties *props = new VkQueueFamilyProperties[qCount];
	ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, props);

	// find a queue that supports all capabilities, and if one doesn't exist, add it.
	bool found = false;
	uint32_t qFamilyIdx = 0;
	VkQueueFlags search = (VK_QUEUE_GRAPHICS_BIT);

	// for queue priorities, if we need it
	float one = 1.0f;

	// if we need to change the requested queues, it will point to this
	VkDeviceQueueCreateInfo *modQueues = NULL;

	for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++)
	{
		uint32_t idx = createInfo.pQueueCreateInfos[i].queueFamilyIndex;
		RDCASSERT(idx < qCount);

		// this requested queue is one we can use too
		if((props[idx].queueFlags & search) == search && createInfo.pQueueCreateInfos[i].queueCount > 0)
		{
			qFamilyIdx = idx;
			found = true;
			break;
		}
	}

	// if we didn't find it, search for which queue family we should add a request for
	if(!found)
	{
		RDCDEBUG("App didn't request a queue family we can use - adding our own");

		for(uint32_t i=0; i < qCount; i++)
		{
			if((props[i].queueFlags & search) == search)
			{
				qFamilyIdx = i;
				found = true;
				break;
			}
		}

		if(!found)
		{
			SAFE_DELETE_ARRAY(props);
			RDCERR("Can't add a queue with required properties for RenderDoc! Unsupported configuration");
			return VK_ERROR_INITIALIZATION_FAILED;
		}

		// we found the queue family, add it
		modQueues = new VkDeviceQueueCreateInfo[createInfo.queueCreateInfoCount + 1];
		for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++)
			modQueues[i] = createInfo.pQueueCreateInfos[i];

		modQueues[createInfo.queueCreateInfoCount].queueFamilyIndex = qFamilyIdx;
		modQueues[createInfo.queueCreateInfoCount].queueCount = 1;
		modQueues[createInfo.queueCreateInfoCount].pQueuePriorities = &one;

		createInfo.pQueueCreateInfos = modQueues;
		createInfo.queueCreateInfoCount++;
	}

	SAFE_DELETE_ARRAY(props);

	m_QueueFamilies.resize(createInfo.queueCreateInfoCount);
	for(size_t i=0; i < createInfo.queueCreateInfoCount; i++)
	{
		uint32_t family = createInfo.pQueueCreateInfos[i].queueFamilyIndex;
		uint32_t count = createInfo.pQueueCreateInfos[i].queueCount;
		m_QueueFamilies.resize(RDCMAX(m_QueueFamilies.size(), size_t(family+1)));

		m_QueueFamilies[family] = new VkQueue[count];
		for(uint32_t q=0; q < count; q++)
			m_QueueFamilies[family][q] = VK_NULL_HANDLE;
	}

	VkLayerDeviceCreateInfo *layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext;

	// step through the chain of pNext until we get to the link info
	while(layerCreateInfo &&
				(layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || 
				 layerCreateInfo->function != VK_LAYER_LINK_INFO)
			)
	{
		layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext;
	}
	RDCASSERT(layerCreateInfo);

	PFN_vkGetDeviceProcAddr gdpa = layerCreateInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr;
	PFN_vkGetInstanceProcAddr gipa = layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr;
	// move chain on for next layer
	layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext;

	PFN_vkCreateDevice createFunc = (PFN_vkCreateDevice)gipa(VK_NULL_HANDLE, "vkCreateDevice");

	// now search again through for the loader data callback (if it exists)
	layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext;

	// step through the chain of pNext
	while(layerCreateInfo &&
				(layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || 
				 layerCreateInfo->function != VK_LOADER_DATA_CALLBACK)
			)
	{
		layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext;
	}

	// if we found one (we might not - on old loaders), then store the func ptr for
	// use instead of SetDispatchTableOverMagicNumber
	if(layerCreateInfo)
	{
		RDCASSERT(m_SetDeviceLoaderData == layerCreateInfo->u.pfnSetDeviceLoaderData || m_SetDeviceLoaderData == NULL,
		          m_SetDeviceLoaderData, layerCreateInfo->u.pfnSetDeviceLoaderData);
		m_SetDeviceLoaderData = layerCreateInfo->u.pfnSetDeviceLoaderData;
	}

	VkResult ret = createFunc(Unwrap(physicalDevice), &createInfo, pAllocator, pDevice);
	
	// don't serialise out any of the pNext stuff for layer initialisation
	// (note that we asserted above that there was nothing else in the chain)
	createInfo.pNext = NULL;

	if(ret == VK_SUCCESS)
	{
		InitDeviceTable(*pDevice, gdpa);

		ResourceId id = GetResourceManager()->WrapResource(*pDevice, *pDevice);
		
		if(m_State >= WRITING)
		{
			Chunk *chunk = NULL;

			{
				CACHE_THREAD_SERIALISER();

				SCOPED_SERIALISE_CONTEXT(CREATE_DEVICE);
				Serialise_vkCreateDevice(localSerialiser, physicalDevice, &createInfo, NULL, pDevice);

				chunk = scope.Get();
			}

			VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pDevice);
			RDCASSERT(record);

			record->AddChunk(chunk);

			record->memIdxMap = GetRecord(physicalDevice)->memIdxMap;

			record->instDevInfo = new InstanceDeviceInfo();
		
#undef CheckExt
#define CheckExt(name) record->instDevInfo->name = GetRecord(m_Instance)->instDevInfo->name;

			// inherit extension enablement from instance, that way GetDeviceProcAddress can check
			// for enabled extensions for instance functions
			CheckInstanceExts();

#undef CheckExt
#define CheckExt(name) if(!strcmp(createInfo.ppEnabledExtensionNames[i], STRINGIZE(name))) { record->instDevInfo->name = true; }

			for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++)
			{
				CheckDeviceExts();
			}
		
			InitDeviceExtensionTables(*pDevice);

			GetRecord(m_Instance)->AddParent(record);
		}
		else
		{
			GetResourceManager()->AddLiveResource(id, *pDevice);
		}

		VkDevice device = *pDevice;

		RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE

		m_PhysicalDevice = physicalDevice;
		m_Device = device;

		m_QueueFamilyIdx = qFamilyIdx;

		if(m_InternalCmds.cmdpool == VK_NULL_HANDLE)
		{
			VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, NULL, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, qFamilyIdx };
			vkr = ObjDisp(device)->CreateCommandPool(Unwrap(device), &poolInfo, NULL, &m_InternalCmds.cmdpool);
			RDCASSERTEQUAL(vkr, VK_SUCCESS);

			GetResourceManager()->WrapResource(Unwrap(device), m_InternalCmds.cmdpool);
		}
		
		ObjDisp(physicalDevice)->GetPhysicalDeviceProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.props);
		
		ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.memProps);

		ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &m_PhysicalDeviceData.features);

		m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
		m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
		m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);

		m_PhysicalDeviceData.fakeMemProps = GetRecord(physicalDevice)->memProps;

		m_DebugManager = new VulkanDebugManager(this, device);
	}

	SAFE_DELETE_ARRAY(modQueues);

	return ret;
}
コード例 #9
0
bool WrappedID3D11DeviceContext::Serialise_UpdateSubresource1(ID3D11Resource *pDstResource, UINT DstSubresource, const D3D11_BOX *pDstBox,
																const void *pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch, UINT CopyFlags)
{
	SERIALISE_ELEMENT(ResourceId, idx, GetIDForResource(pDstResource));
	SERIALISE_ELEMENT(uint32_t, flags, CopyFlags);
	SERIALISE_ELEMENT(uint32_t, DestSubresource, DstSubresource);
	
	D3D11ResourceRecord *record = m_pDevice->GetResourceManager()->GetResourceRecord(idx);

	D3D11ResourceRecord *parent = record;

	if(record && record->NumSubResources > (int)DestSubresource)
		record = (D3D11ResourceRecord *)record->SubResources[DestSubresource];

	SERIALISE_ELEMENT(uint8_t, isUpdate, record->DataInSerialiser);

	ID3D11Resource *DestResource = pDstResource;
	if(m_State < WRITING)
	{
		if(m_pDevice->GetResourceManager()->HasLiveResource(idx))
			DestResource = (ID3D11Resource *)m_pDevice->GetResourceManager()->GetLiveResource(idx);
	}

	if(isUpdate)
	{
		SERIALISE_ELEMENT(uint8_t, HasDestBox, pDstBox != NULL);
		SERIALISE_ELEMENT_OPT(D3D11_BOX, box, *pDstBox, HasDestBox);
		SERIALISE_ELEMENT(uint32_t, SourceRowPitch, SrcRowPitch);
		SERIALISE_ELEMENT(uint32_t, SourceDepthPitch, SrcDepthPitch);

		size_t srcLength = 0;

		if(m_State >= WRITING)
		{
			RDCASSERT(record);
			
			if(WrappedID3D11Buffer::IsAlloc(DestResource))
			{
				srcLength = record->Length;

				if(HasDestBox)
					srcLength = RDCMIN((uint32_t)srcLength, pDstBox->right - pDstBox->left);
			}
			else
			{
				WrappedID3D11Texture1D *tex1 = WrappedID3D11Texture1D::IsAlloc(DestResource) ? (WrappedID3D11Texture1D *)DestResource : NULL;
				WrappedID3D11Texture2D *tex2 = WrappedID3D11Texture2D::IsAlloc(DestResource) ? (WrappedID3D11Texture2D *)DestResource : NULL;
				WrappedID3D11Texture3D *tex3 = WrappedID3D11Texture3D::IsAlloc(DestResource) ? (WrappedID3D11Texture3D *)DestResource : NULL;

				UINT mipLevel = GetMipForSubresource(DestResource, DestSubresource);

				if(tex1)
				{
					srcLength = record->Length;

					if(HasDestBox)
						srcLength = RDCMIN((uint32_t)srcLength, pDstBox->right - pDstBox->left);
				}
				else if(tex2)
				{
					D3D11_TEXTURE2D_DESC desc = {0};
					tex2->GetDesc(&desc);
					size_t rows = RDCMAX(1U,desc.Height>>mipLevel);
					DXGI_FORMAT fmt = desc.Format;
					
					if(HasDestBox)
						rows = (pDstBox->bottom - pDstBox->top);

					if(IsBlockFormat(fmt))
						rows = RDCMAX((size_t)1, rows/4);

					srcLength = SourceRowPitch*rows;
				}
				else if(tex3)
				{
					D3D11_TEXTURE3D_DESC desc = {0};
					tex3->GetDesc(&desc);
					size_t slices = RDCMAX(1U,desc.Depth>>mipLevel);

					srcLength = SourceDepthPitch*slices;

					if(HasDestBox)
						srcLength = SourceDepthPitch*(pDstBox->back - pDstBox->front);
				}
				else
				{
					RDCERR("UpdateSubResource on unexpected resource type");
				}
			}
コード例 #10
0
ファイル: d3d12_overlay.cpp プロジェクト: cgmb/renderdoc
ResourceId D3D12Replay::RenderOverlay(ResourceId texid, CompType typeHint, DebugOverlay overlay,
                                      uint32_t eventId, const vector<uint32_t> &passEvents)
{
  ID3D12Resource *resource = WrappedID3D12Resource::GetList()[texid];

  if(resource == NULL)
    return ResourceId();

  D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc();

  std::vector<D3D12_RESOURCE_BARRIER> barriers;
  int resType = 0;
  GetDebugManager()->PrepareTextureSampling(resource, typeHint, resType, barriers);

  D3D12_RESOURCE_DESC overlayTexDesc;
  overlayTexDesc.Alignment = 0;
  overlayTexDesc.DepthOrArraySize = 1;
  overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
  overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
  overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  overlayTexDesc.Height = resourceDesc.Height;
  overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
  overlayTexDesc.MipLevels = 1;
  overlayTexDesc.SampleDesc = resourceDesc.SampleDesc;
  overlayTexDesc.Width = resourceDesc.Width;

  D3D12_HEAP_PROPERTIES heapProps;
  heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
  heapProps.CreationNodeMask = 1;
  heapProps.VisibleNodeMask = 1;

  D3D12_RESOURCE_DESC currentOverlayDesc;
  RDCEraseEl(currentOverlayDesc);
  if(m_Overlay.Texture)
    currentOverlayDesc = m_Overlay.Texture->GetDesc();

  WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture;

  // need to recreate backing custom render tex
  if(overlayTexDesc.Width != currentOverlayDesc.Width ||
     overlayTexDesc.Height != currentOverlayDesc.Height ||
     overlayTexDesc.Format != currentOverlayDesc.Format ||
     overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count ||
     overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality)
  {
    SAFE_RELEASE(m_Overlay.Texture);
    m_Overlay.resourceId = ResourceId();

    ID3D12Resource *customRenderTex = NULL;
    HRESULT hr = m_pDevice->CreateCommittedResource(
        &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL,
        __uuidof(ID3D12Resource), (void **)&customRenderTex);
    if(FAILED(hr))
    {
      RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str());
      return ResourceId();
    }
    wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex;

    customRenderTex->SetName(L"customRenderTex");

    m_Overlay.Texture = wrappedCustomRenderTex;
    m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID();
  }

  D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState;

  ID3D12Resource *renderDepth = NULL;

  D3D12Descriptor *dsView = GetWrapped(rs.dsv);

  D3D12_RESOURCE_DESC depthTexDesc = {};
  D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {};
  if(dsView)
  {
    ID3D12Resource *realDepth = dsView->nonsamp.resource;

    dsViewDesc = dsView->nonsamp.dsv;

    depthTexDesc = realDepth->GetDesc();
    depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
    depthTexDesc.Alignment = 0;

    HRESULT hr = S_OK;

    hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc,
                                            D3D12_RESOURCE_STATE_COPY_DEST, NULL,
                                            __uuidof(ID3D12Resource), (void **)&renderDepth);
    if(FAILED(hr))
    {
      RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str());
      return m_Overlay.resourceId;
    }

    renderDepth->SetName(L"Overlay renderDepth");

    ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

    const vector<D3D12_RESOURCE_STATES> &states =
        m_pDevice->GetSubresourceStates(GetResID(realDepth));

    vector<D3D12_RESOURCE_BARRIER> depthBarriers;
    depthBarriers.reserve(states.size());
    for(size_t i = 0; i < states.size(); i++)
    {
      D3D12_RESOURCE_BARRIER b;

      // skip unneeded barriers
      if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE)
        continue;

      b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
      b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
      b.Transition.pResource = realDepth;
      b.Transition.Subresource = (UINT)i;
      b.Transition.StateBefore = states[i];
      b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

      depthBarriers.push_back(b);
    }

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    list->CopyResource(renderDepth, realDepth);

    for(size_t i = 0; i < depthBarriers.size(); i++)
      std::swap(depthBarriers[i].Transition.StateBefore, depthBarriers[i].Transition.StateAfter);

    if(!depthBarriers.empty())
      list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]);

    D3D12_RESOURCE_BARRIER b = {};

    b.Transition.pResource = renderDepth;
    b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
    b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
    b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;

    // prepare tex resource for copying
    list->ResourceBarrier(1, &b);

    list->Close();
  }

  D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {};
  rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
  rtDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM;
  rtDesc.Texture2D.MipSlice = 0;
  rtDesc.Texture2D.PlaneSlice = 0;

  if(overlayTexDesc.SampleDesc.Count > 1 || overlayTexDesc.SampleDesc.Quality > 0)
    rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;

  D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV);

  m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv);

  ID3D12GraphicsCommandList *list = m_pDevice->GetNewList();

  FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f};
  list->ClearRenderTargetView(rtv, black, 0, NULL);

  D3D12_CPU_DESCRIPTOR_HANDLE dsv = {};

  if(renderDepth)
  {
    dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV);
    m_pDevice->CreateDepthStencilView(
        renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv);
  }

  D3D12_DEPTH_STENCIL_DESC dsDesc;

  dsDesc.BackFace.StencilFailOp = dsDesc.BackFace.StencilPassOp =
      dsDesc.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.FrontFace.StencilFailOp = dsDesc.FrontFace.StencilPassOp =
      dsDesc.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
  dsDesc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
  dsDesc.DepthEnable = TRUE;
  dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
  dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
  dsDesc.StencilEnable = FALSE;
  dsDesc.StencilReadMask = dsDesc.StencilWriteMask = 0xff;

  WrappedID3D12PipelineState *pipe = NULL;

  if(rs.pipe != ResourceId())
    pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping)
  {
    // just need the basic texture
  }
  else if(overlay == DebugOverlay::Drawcall)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[4] = {0.8f, 0.1f, 0.8f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f};
      list->ClearRenderTargetView(rtv, clearColour, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::BackfaceCull)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode;

      float redCol[4] = {1.0f, 0.0f, 0.0f, 1.0f};
      ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(redCol);

      float greenCol[4] = {0.0f, 1.0f, 0.0f, 1.0f};
      ID3DBlob *green = m_pDevice->GetShaderCache()->MakeFixedColShader(greenCol);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      psoDesc.PS.pShaderBytecode = red->GetBufferPointer();
      psoDesc.PS.BytecodeLength = red->GetBufferSize();

      list->Close();
      list = NULL;

      ID3D12PipelineState *redPSO = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&redPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      psoDesc.RasterizerState.CullMode = origCull;
      psoDesc.PS.pShaderBytecode = green->GetBufferPointer();
      psoDesc.PS.BytecodeLength = green->GetBufferSize();

      ID3D12PipelineState *greenPSO = NULL;
      hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                  (void **)&greenPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(redPSO);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(redPSO);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE();

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs.pipe = GetResID(greenPSO);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(red);
      SAFE_RELEASE(green);
      SAFE_RELEASE(redPSO);
      SAFE_RELEASE(greenPSO);
    }
  }
  else if(overlay == DebugOverlay::Wireframe)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc();

      float overlayConsts[] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 1.0f};
      ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts);

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats);
      psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      psoDesc.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.MultisampleEnable = FALSE;
      psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;

      overlayConsts[3] = 0.0f;
      list->ClearRenderTargetView(rtv, overlayConsts, 0, NULL);

      list->Close();
      list = NULL;

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                          (void **)&pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rtSingle = true;
      rs.rts.resize(1);
      rs.rts[0] = rtv;
      rs.dsv = dsv;

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw)
  {
    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::ClearBeforeDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      list->Close();
      list = NULL;

      bool rtSingle = rs.rtSingle;
      std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rts = rs.rts;

      if(overlay == DebugOverlay::ClearBeforePass)
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      list = m_pDevice->GetNewList();

      for(size_t i = 0; i < rts.size(); i++)
      {
        D3D12Descriptor *desc = rtSingle ? GetWrapped(rts[0]) : GetWrapped(rts[i]);

        if(desc)
        {
          if(rtSingle)
            desc += i;

          Unwrap(list)->ClearRenderTargetView(UnwrapCPU(desc), black, 0, NULL);
        }
      }

      list->Close();
      list = NULL;

      for(size_t i = 0; i < events.size(); i++)
      {
        m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw);

        if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size())
          m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw);
      }
    }
  }
  else if(overlay == DebugOverlay::ViewportScissor)
  {
    if(pipe && pipe->IsGraphics() && !rs.views.empty())
    {
      list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

      D3D12_VIEWPORT viewport = rs.views[0];
      list->RSSetViewports(1, &viewport);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

      list->SetPipelineState(m_General.FixedColPipe);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      DebugPixelCBufferData pixelData = {0};

      // border colour (dark, 2px, opaque)
      pixelData.WireframeColour = Vec3f(0.1f, 0.1f, 0.1f);
      // inner colour (light, transparent)
      pixelData.Channels = Vec4f(0.2f, 0.2f, 0.9f, 0.7f);
      pixelData.OutputDisplayFormat = 0;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS viewCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(0, viewCB);
      list->SetGraphicsRootConstantBufferView(1, viewCB);
      list->SetGraphicsRootConstantBufferView(2, viewCB);

      Vec4f dummy;
      list->SetGraphicsRoot32BitConstants(3, 4, &dummy.x, 0);

      float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f};
      list->OMSetBlendFactor(factor);

      list->DrawInstanced(3, 1, 0, 0);

      viewport.TopLeftX = (float)rs.scissors[0].left;
      viewport.TopLeftY = (float)rs.scissors[0].top;
      viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left);
      viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top);
      list->RSSetViewports(1, &viewport);

      pixelData.OutputDisplayFormat = 1;
      pixelData.RangeMinimum = viewport.TopLeftX;
      pixelData.InverseRangeSize = viewport.TopLeftY;
      pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f);

      D3D12_GPU_VIRTUAL_ADDRESS scissorCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(1, scissorCB);

      list->DrawInstanced(3, 1, 0, 0);
    }
  }
  else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass)
  {
    if(pipe && pipe->IsGraphics())
    {
      SCOPED_TIMER("Triangle size");

      vector<uint32_t> events = passEvents;

      if(overlay == DebugOverlay::TriangleSizeDraw)
        events.clear();

      while(!events.empty())
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[0]);

        // remove any non-drawcalls, like the pass boundary.
        if(!(draw->flags & DrawFlags::Drawcall))
          events.erase(events.begin());
        else
          break;
      }

      events.push_back(eventId);

      D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = pipe->GetGraphicsDesc();
      pipeDesc.pRootSignature = m_General.ConstOnlyRootSig;
      pipeDesc.SampleMask = 0xFFFFFFFF;
      pipeDesc.SampleDesc.Count = 1;
      pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;

      pipeDesc.NumRenderTargets = 1;
      RDCEraseEl(pipeDesc.RTVFormats);
      pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM;
      pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;

      D3D12_INPUT_ELEMENT_DESC ia[2] = {};
      ia[0].SemanticName = "pos";
      ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].SemanticName = "sec";
      ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].InputSlot = 1;
      ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;

      pipeDesc.InputLayout.NumElements = 2;
      pipeDesc.InputLayout.pInputElementDescs = ia;

      pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize();
      pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer();
      RDCEraseEl(pipeDesc.HS);
      RDCEraseEl(pipeDesc.DS);
      pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize();
      pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer();
      pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize();
      pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer();

      pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;

      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL;
      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;

      // enough for all primitive topology types
      ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {};

      DebugVertexCBuffer vertexData = {};
      vertexData.LineStrip = 0;
      vertexData.ModelViewProj = Matrix4f::Identity();
      vertexData.SpriteSize = Vec2f();

      Vec4f viewport(rs.views[0].Width, rs.views[0].Height);

      if(rs.dsv.ptr)
      {
        D3D12_CPU_DESCRIPTOR_HANDLE realDSV = Unwrap(rs.dsv);

        list->OMSetRenderTargets(1, &rtv, TRUE, &realDSV);
      }

      list->RSSetViewports(1, &rs.views[0]);

      D3D12_RECT scissor = {0, 0, 16384, 16384};
      list->RSSetScissorRects(1, &scissor);

      list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig);

      list->SetGraphicsRootConstantBufferView(
          0, GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData)));
      list->SetGraphicsRootConstantBufferView(
          1, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
      list->SetGraphicsRootConstantBufferView(
          2, GetDebugManager()->UploadConstants(&viewport, sizeof(viewport)));
      list->SetGraphicsRoot32BitConstants(3, 4, &viewport.x, 0);

      for(size_t i = 0; i < events.size(); i++)
      {
        const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[i]);

        for(uint32_t inst = 0; draw && inst < RDCMAX(1U, draw->numInstances); inst++)
        {
          MeshFormat fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::GSOut);
          if(fmt.vertexResourceId == ResourceId())
            fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::VSOut);

          if(fmt.vertexResourceId != ResourceId())
          {
            D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology);

            if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST ||
               topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
            else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ)
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
            else
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

            list->IASetPrimitiveTopology(topo);

            if(pipes[pipeDesc.PrimitiveTopologyType] == NULL)
            {
              HRESULT hr = m_pDevice->CreateGraphicsPipelineState(
                  &pipeDesc, __uuidof(ID3D12PipelineState),
                  (void **)&pipes[pipeDesc.PrimitiveTopologyType]);
              RDCASSERTEQUAL(hr, S_OK);
            }

            ID3D12Resource *vb =
                m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId);

            D3D12_VERTEX_BUFFER_VIEW vbView = {};
            vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset;
            vbView.StrideInBytes = fmt.vertexByteStride;
            vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset);

            // second bind is just a dummy, so we don't have to make a shader
            // that doesn't accept the secondary stream
            list->IASetVertexBuffers(0, 1, &vbView);
            list->IASetVertexBuffers(1, 1, &vbView);

            list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]);

            if(fmt.indexByteStride && fmt.indexResourceId != ResourceId())
            {
              ID3D12Resource *ib =
                  m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId);

              D3D12_INDEX_BUFFER_VIEW view;
              view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset;
              view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset);
              view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
              list->IASetIndexBuffer(&view);

              list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0);
            }
            else
            {
              list->DrawInstanced(fmt.numIndices, 1, 0, 0);
            }
          }
        }
      }

      list->Close();
      list = NULL;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(size_t i = 0; i < ARRAY_COUNT(pipes); i++)
        SAFE_RELEASE(pipes[i]);
    }

    // restore back to normal
    m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
  else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw)
  {
    SCOPED_TIMER("Quad Overdraw");

    vector<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::QuadOverdrawDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      if(overlay == DebugOverlay::QuadOverdrawPass)
      {
        list->Close();
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);
        list = m_pDevice->GetNewList();
      }

      uint32_t width = uint32_t(resourceDesc.Width >> 1);
      uint32_t height = resourceDesc.Height >> 1;

      width = RDCMAX(1U, width);
      height = RDCMAX(1U, height);

      D3D12_RESOURCE_DESC uavTexDesc = {};
      uavTexDesc.Alignment = 0;
      uavTexDesc.DepthOrArraySize = 4;
      uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
      uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
      uavTexDesc.Format = DXGI_FORMAT_R32_UINT;
      uavTexDesc.Height = height;
      uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
      uavTexDesc.MipLevels = 1;
      uavTexDesc.SampleDesc.Count = 1;
      uavTexDesc.SampleDesc.Quality = 0;
      uavTexDesc.Width = width;

      ID3D12Resource *overdrawTex = NULL;
      HRESULT hr = m_pDevice->CreateCommittedResource(
          &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
          NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str());
        list->Close();
        list = NULL;
        return m_Overlay.resourceId;
      }

      m_pDevice->CreateShaderResourceView(overdrawTex, NULL,
                                          GetDebugManager()->GetCPUHandle(OVERDRAW_SRV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetCPUHandle(OVERDRAW_UAV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV));

      UINT zeroes[4] = {0, 0, 0, 0};
      list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV),
                                         GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV),
                                         overdrawTex, zeroes, 0, NULL);
      list->Close();
      list = NULL;

#if ENABLED(SINGLE_FLUSH_VALIDATE)
      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();
#endif

      m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      D3D12_SHADER_BYTECODE quadWrite;
      quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize();
      quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer();

      // declare callback struct here
      D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, events,
                                   ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)));

      m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full);

      // resolve pass
      {
        list = m_pDevice->GetNewList();

        D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {};

        // make sure UAV work is done then prepare for reading in PS
        overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
        overdrawBarriers[0].UAV.pResource = overdrawTex;
        overdrawBarriers[1].Transition.pResource = overdrawTex;
        overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
        overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;

        // prepare tex resource for copying
        list->ResourceBarrier(2, overdrawBarriers);

        list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

        list->RSSetViewports(1, &rs.views[0]);

        D3D12_RECT scissor = {0, 0, 16384, 16384};
        list->RSSetScissorRects(1, &scissor);

        list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

        list->SetPipelineState(m_Overlay.QuadResolvePipe);

        list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig);

        GetDebugManager()->SetDescriptorHeaps(list, true, false);

        list->SetGraphicsRootConstantBufferView(
            0, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp)));
        list->SetGraphicsRootDescriptorTable(1, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV));

        list->DrawInstanced(3, 1, 0, 0);

        list->Close();
        list = NULL;
      }

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it)
      {
        SAFE_RELEASE(it->second.pipe);
        SAFE_RELEASE(it->second.sig);
      }

      SAFE_RELEASE(overdrawTex);
    }

    if(overlay == DebugOverlay::QuadOverdrawPass)
      m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
コード例 #11
0
GLXContext glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, GLXContext shareList, Bool direct, const int *attribList)
{
	const int *attribs = attribList;
	vector<int> attribVec;

	// modify attribs to our liking
	{
		bool flagsNext = false;
		bool flagsFound = false;
		const int *a = attribList;
		while(*a)
		{
			int val = *a;

			if(flagsNext)
			{
				flagsNext = false;

				if(RenderDoc::Inst().GetCaptureOptions().DebugDeviceMode)
					val |= GLX_CONTEXT_DEBUG_BIT_ARB;
				else
					val &= ~GLX_CONTEXT_DEBUG_BIT_ARB;

				// remove NO_ERROR bit
				val &= ~GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
			}

			if(val == GLX_CONTEXT_FLAGS_ARB)
			{
				flagsNext = true;
				flagsFound = true;
			}

			attribVec.push_back(val);

			a++;
		}

		if(!flagsFound && RenderDoc::Inst().GetCaptureOptions().DebugDeviceMode)
		{
			attribVec.push_back(GLX_CONTEXT_FLAGS_ARB);
			attribVec.push_back(GLX_CONTEXT_DEBUG_BIT_ARB);
		}

		attribVec.push_back(0);

		attribs = &attribVec[0];
	}

	RDCDEBUG("glXCreateContextAttribsARB:");

	bool core = false;

	int *a = (int *)attribs;
	while(*a)
	{
		RDCDEBUG("%x: %d", a[0], a[1]);
		a += 2;

		if(a[0] == GLX_CONTEXT_PROFILE_MASK_ARB)
			core = (a[1] & GLX_CONTEXT_CORE_PROFILE_BIT_ARB);
	}

	GLXContext ret = OpenGLHook::glhooks.glXCreateContextAttribsARB_real(dpy, config, shareList, direct, attribs);

	XVisualInfo *vis = OpenGLHook::glhooks.glXGetVisualFromFBConfig_real(dpy, config);	
	
	GLInitParams init;
	
	init.width = 0;
	init.height = 0;
	
	int value = 0;
	
	Keyboard::CloneDisplay(dpy);
	
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_BUFFER_SIZE, &value); init.colorBits = value;
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_DEPTH_SIZE, &value); init.depthBits = value;
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_STENCIL_SIZE, &value); init.stencilBits = value;
	value = 1; // default to srgb
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_FRAMEBUFFER_SRGB_CAPABLE_ARB, &value); init.isSRGB = value;
	value = 1;
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_SAMPLES_ARB, &value); init.isSRGB = RDCMAX(1, value);

	XFree(vis);
	
	GLWindowingData data;
	data.dpy = dpy;
	data.wnd = (GLXDrawable)NULL;
	data.ctx = ret;

	OpenGLHook::glhooks.GetDriver()->CreateContext(data, shareList, init, core, true);

	return ret;
}
コード例 #12
0
GLXContext glXCreateContext(Display *dpy, XVisualInfo *vis, GLXContext shareList, Bool direct)
{
	GLXContext ret = OpenGLHook::glhooks.glXCreateContext_real(dpy, vis, shareList, direct);
	
	GLInitParams init;
	
	init.width = 0;
	init.height = 0;
	
	int value = 0;
	
	Keyboard::CloneDisplay(dpy);

	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_BUFFER_SIZE, &value); init.colorBits = value;
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_DEPTH_SIZE, &value); init.depthBits = value;
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_STENCIL_SIZE, &value); init.stencilBits = value;
	value = 1; // default to srgb
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_FRAMEBUFFER_SRGB_CAPABLE_ARB, &value); init.isSRGB = value;
	value = 1;
	OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_SAMPLES_ARB, &value); init.isSRGB = RDCMAX(1, value);

	GLWindowingData data;
	data.dpy = dpy;
	data.wnd = (GLXDrawable)NULL;
	data.ctx = ret;

	OpenGLHook::glhooks.GetDriver()->CreateContext(data, shareList, init, false, false);

	return ret;
}
コード例 #13
0
ファイル: gl_interop_funcs.cpp プロジェクト: cgmb/renderdoc
HANDLE WrappedOpenGL::wglDXRegisterObjectNV(HANDLE hDevice, void *dxObject, GLuint name,
                                            GLenum type, GLenum access)
{
  RDCASSERT(IsCaptureMode(m_State));

  ID3D11Resource *real = UnwrapDXResource(dxObject);

  if(real == NULL)
  {
    SetLastError(ERROR_OPEN_FAILED);
    return 0;
  }

  WrappedHANDLE *wrapped = new WrappedHANDLE();

  if(type == eGL_RENDERBUFFER)
    wrapped->res = RenderbufferRes(GetCtx(), name);
  else if(type == eGL_NONE)
    wrapped->res = BufferRes(GetCtx(), name);
  else
    wrapped->res = TextureRes(GetCtx(), name);

  GLResourceRecord *record = GetResourceManager()->GetResourceRecord(wrapped->res);

  if(!record)
  {
    RDCERR("Unrecognised object with type %x and name %u", type, name);
    delete wrapped;
    return NULL;
  }

  SERIALISE_TIME_CALL(wrapped->real =
                          m_Real.wglDXRegisterObjectNV(hDevice, real, name, type, access));

  {
    RDCASSERT(record);

    USE_SCRATCH_SERIALISER();
    SCOPED_SERIALISE_CHUNK(gl_CurChunk);
    Serialise_wglDXRegisterObjectNV(ser, wrapped->res, type, dxObject);

    record->AddChunk(scope.Get());
  }

  if(type != eGL_NONE)
  {
    ResourceFormat fmt;
    uint32_t width = 0, height = 0, depth = 0, mips = 0, layers = 0, samples = 0;
    GetDXTextureProperties(dxObject, fmt, width, height, depth, mips, layers, samples);

    // defined as arrays mostly for Coverity code analysis to stay calm about passing
    // them to the *TexParameter* functions
    GLint maxlevel[4] = {GLint(mips - 1), 0, 0, 0};

    m_Real.glTextureParameteriEXT(wrapped->res.name, type, eGL_TEXTURE_MAX_LEVEL, GLint(mips - 1));

    ResourceId texId = record->GetResourceID();
    m_Textures[texId].resource = wrapped->res;
    m_Textures[texId].curType = type;
    m_Textures[texId].width = width;
    m_Textures[texId].height = height;
    m_Textures[texId].depth = RDCMAX(depth, samples);
    m_Textures[texId].samples = samples;
    m_Textures[texId].dimension = 2;
    if(type == eGL_TEXTURE_1D || type == eGL_TEXTURE_1D_ARRAY)
      m_Textures[texId].dimension = 1;
    else if(type == eGL_TEXTURE_3D)
      m_Textures[texId].dimension = 3;

    m_Textures[texId].internalFormat = MakeGLFormat(fmt);
  }

  return wrapped;
}
コード例 #14
0
ファイル: gl_interop_funcs.cpp プロジェクト: cgmb/renderdoc
bool WrappedOpenGL::Serialise_wglDXRegisterObjectNV(SerialiserType &ser, GLResource Resource,
                                                    GLenum type, void *dxObject)
{
  SERIALISE_ELEMENT(Resource);

  GLenum internalFormat = eGL_NONE;
  uint32_t width = 0, height = 0, depth = 0, mips = 0, layers = 0, samples = 0;
  if(ser.IsWriting())
  {
    ResourceFormat format;
#if ENABLED(RDOC_WIN32) && ENABLED(RENDERDOC_DX_GL_INTEROP)
    GetDXTextureProperties(dxObject, format, width, height, depth, mips, layers, samples);
    if(type != eGL_NONE)
      internalFormat = MakeGLFormat(format);
#else
    RDCERR("Should never happen - cannot serialise wglDXRegisterObjectNV, interop is disabled");
#endif
  }

  SERIALISE_ELEMENT(type);
  SERIALISE_ELEMENT(internalFormat);
  SERIALISE_ELEMENT(width);
  SERIALISE_ELEMENT(height);
  SERIALISE_ELEMENT(depth);
  SERIALISE_ELEMENT(mips);
  SERIALISE_ELEMENT(layers);
  SERIALISE_ELEMENT(samples);

  SERIALISE_CHECK_READ_ERRORS();

  if(IsReplayingAndReading())
  {
    GLuint name = Resource.name;

    switch(type)
    {
      case eGL_NONE:
      case eGL_TEXTURE_BUFFER:
      {
        m_Real.glNamedBufferDataEXT(name, width, NULL, eGL_STATIC_DRAW);
        break;
      }
      case eGL_TEXTURE_1D:
        m_Real.glTextureStorage1DEXT(name, type, mips, internalFormat, width);
        break;
      case eGL_TEXTURE_1D_ARRAY:
        m_Real.glTextureStorage2DEXT(name, type, mips, internalFormat, width, layers);
        break;
      // treat renderbuffers and texture rects as tex2D just to make things easier
      case eGL_RENDERBUFFER:
      case eGL_TEXTURE_RECTANGLE:
      case eGL_TEXTURE_2D:
      case eGL_TEXTURE_CUBE_MAP:
        m_Real.glTextureStorage2DEXT(name, type, mips, internalFormat, width, height);
        break;
      case eGL_TEXTURE_2D_ARRAY:
      case eGL_TEXTURE_CUBE_MAP_ARRAY:
        m_Real.glTextureStorage3DEXT(name, type, mips, internalFormat, width, height, layers);
        break;
      case eGL_TEXTURE_2D_MULTISAMPLE:
        m_Real.glTextureStorage2DMultisampleEXT(name, type, samples, internalFormat, width, height,
                                                GL_TRUE);
        break;
      case eGL_TEXTURE_2D_MULTISAMPLE_ARRAY:
        m_Real.glTextureStorage3DMultisampleEXT(name, type, samples, internalFormat, width, height,
                                                layers, GL_TRUE);
        break;
      case eGL_TEXTURE_3D:
        m_Real.glTextureStorage3DEXT(name, type, mips, internalFormat, width, height, depth);
        break;
      default: RDCERR("Unexpected type of interop texture: %s", ToStr(type).c_str()); break;
    }

    if(type != eGL_NONE)
    {
      ResourceId liveId = GetResourceManager()->GetID(Resource);
      m_Textures[liveId].curType = type;
      m_Textures[liveId].width = width;
      m_Textures[liveId].height = height;
      m_Textures[liveId].depth = RDCMAX(depth, samples);
      m_Textures[liveId].samples = samples;
      m_Textures[liveId].dimension = 2;
      if(type == eGL_TEXTURE_1D || type == eGL_TEXTURE_1D_ARRAY)
        m_Textures[liveId].dimension = 1;
      else if(type == eGL_TEXTURE_3D)
        m_Textures[liveId].dimension = 3;

      m_Textures[liveId].internalFormat = internalFormat;
    }

    AddResourceInitChunk(Resource);
  }

  return true;
}
コード例 #15
0
ファイル: dds_readwrite.cpp プロジェクト: cgmb/renderdoc
dds_data load_dds_from_file(FILE *f)
{
  dds_data ret = {};
  dds_data error = {};

  FileIO::fseek64(f, 0, SEEK_SET);

  uint32_t magic = 0;
  FileIO::fread(&magic, sizeof(magic), 1, f);

  DDS_HEADER header = {};
  FileIO::fread(&header, sizeof(header), 1, f);

  bool dx10Header = false;
  DDS_HEADER_DXT10 headerDXT10 = {};

  if(header.ddspf.dwFlags == DDPF_FOURCC && header.ddspf.dwFourCC == MAKE_FOURCC('D', 'X', '1', '0'))
  {
    FileIO::fread(&headerDXT10, sizeof(headerDXT10), 1, f);
    dx10Header = true;
  }

  ret.width = RDCMAX(1U, header.dwWidth);
  ret.height = RDCMAX(1U, header.dwHeight);
  ret.depth = RDCMAX(1U, header.dwDepth);
  ret.slices = dx10Header ? RDCMAX(1U, headerDXT10.arraySize) : 1;
  ret.mips = RDCMAX(1U, header.dwMipMapCount);

  uint32_t cubeFlags = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALLFACES;

  if((header.dwCaps2 & cubeFlags) == cubeFlags && header.dwCaps & DDSCAPS_COMPLEX)
    ret.cubemap = true;

  if(dx10Header && headerDXT10.miscFlag & DDS_RESOURCE_MISC_TEXTURECUBE)
    ret.cubemap = true;

  if(ret.cubemap)
    ret.slices *= 6;

  if(dx10Header)
  {
    ret.format = DXGIFormat2ResourceFormat(headerDXT10.dxgiFormat);
    if(ret.format.type == ResourceFormatType::Undefined)
    {
      RDCWARN("Unsupported DXGI_FORMAT: %u", (uint32_t)headerDXT10.dxgiFormat);
      return error;
    }
  }
  else if(header.ddspf.dwFlags & DDPF_FOURCC)
  {
    switch(header.ddspf.dwFourCC)
    {
      case MAKE_FOURCC('D', 'X', 'T', '1'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC1_UNORM);
        break;
      case MAKE_FOURCC('D', 'X', 'T', '3'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC2_UNORM);
        break;
      case MAKE_FOURCC('D', 'X', 'T', '5'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC3_UNORM);
        break;
      case MAKE_FOURCC('A', 'T', 'I', '1'):
      case MAKE_FOURCC('B', 'C', '4', 'U'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC4_UNORM);
        break;
      case MAKE_FOURCC('B', 'C', '4', 'S'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC4_SNORM);
        break;
      case MAKE_FOURCC('A', 'T', 'I', '2'):
      case MAKE_FOURCC('B', 'C', '5', 'U'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC5_UNORM);
        break;
      case MAKE_FOURCC('B', 'C', '5', 'S'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC5_SNORM);
        break;
      case MAKE_FOURCC('R', 'G', 'B', 'G'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R8G8_B8G8_UNORM);
        break;
      case MAKE_FOURCC('G', 'R', 'G', 'B'):
        ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_G8R8_G8B8_UNORM);
        break;
      case 36: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16B16A16_UNORM); break;
      case 110: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16B16A16_SNORM); break;
      case 111: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16_FLOAT); break;
      case 112: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16_FLOAT); break;
      case 113: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16B16A16_FLOAT); break;
      case 114: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R32_FLOAT); break;
      case 115: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R32G32_FLOAT); break;
      case 116: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R32G32B32A32_FLOAT); break;
      default: RDCWARN("Unsupported FourCC: %08x", header.ddspf.dwFourCC); return error;
    }
  }
  else
  {
    if(header.ddspf.dwRGBBitCount != 32 && header.ddspf.dwRGBBitCount != 24 &&
       header.ddspf.dwRGBBitCount != 16 && header.ddspf.dwRGBBitCount != 8)
    {
      RDCWARN("Unsupported RGB bit count: %u", header.ddspf.dwRGBBitCount);
      return error;
    }

    ret.format.compByteWidth = 1;
    ret.format.compCount = uint8_t(header.ddspf.dwRGBBitCount / 8);
    ret.format.compType = CompType::UNorm;
    ret.format.type = ResourceFormatType::Regular;

    if(header.ddspf.dwBBitMask < header.ddspf.dwRBitMask)
      ret.format.bgraOrder = true;
  }

  uint32_t bytesPerPixel = 1;
  switch(ret.format.type)
  {
    case ResourceFormatType::S8: bytesPerPixel = 1; break;
    case ResourceFormatType::R10G10B10A2:
    case ResourceFormatType::R9G9B9E5:
    case ResourceFormatType::R11G11B10:
    case ResourceFormatType::D24S8: bytesPerPixel = 4; break;
    case ResourceFormatType::R5G6B5:
    case ResourceFormatType::R5G5B5A1:
    case ResourceFormatType::R4G4B4A4: bytesPerPixel = 2; break;
    case ResourceFormatType::D32S8: bytesPerPixel = 8; break;
    case ResourceFormatType::D16S8:
    case ResourceFormatType::YUV:
    case ResourceFormatType::R4G4:
      RDCERR("Unsupported file format %u", ret.format.type);
      return error;
    default: bytesPerPixel = ret.format.compCount * ret.format.compByteWidth;
  }

  bool blockFormat = false;

  if(ret.format.Special())
  {
    switch(ret.format.type)
    {
      case ResourceFormatType::BC1:
      case ResourceFormatType::BC2:
      case ResourceFormatType::BC3:
      case ResourceFormatType::BC4:
      case ResourceFormatType::BC5:
      case ResourceFormatType::BC6:
      case ResourceFormatType::BC7: blockFormat = true; break;
      case ResourceFormatType::ETC2:
      case ResourceFormatType::EAC:
      case ResourceFormatType::ASTC:
      case ResourceFormatType::YUV:
        RDCERR("Unsupported file format, %u", ret.format.type);
        return error;
      default: break;
    }
  }

  ret.subsizes = new uint32_t[ret.slices * ret.mips];
  ret.subdata = new byte *[ret.slices * ret.mips];

  int i = 0;
  for(int slice = 0; slice < ret.slices; slice++)
  {
    for(int mip = 0; mip < ret.mips; mip++)
    {
      int rowlen = RDCMAX(1, ret.width >> mip);
      int numRows = RDCMAX(1, ret.height >> mip);
      int numdepths = RDCMAX(1, ret.depth >> mip);
      int pitch = RDCMAX(1U, rowlen * bytesPerPixel);

      // pitch/rows are in blocks, not pixels, for block formats.
      if(blockFormat)
      {
        numRows = RDCMAX(1, numRows / 4);

        int blockSize = (ret.format.type == ResourceFormatType::BC1 ||
                         ret.format.type == ResourceFormatType::BC4)
                            ? 8
                            : 16;

        pitch = RDCMAX(blockSize, (((rowlen + 3) / 4)) * blockSize);
      }

      ret.subsizes[i] = numdepths * numRows * pitch;

      byte *bytedata = ret.subdata[i] = new byte[ret.subsizes[i]];

      for(int d = 0; d < numdepths; d++)
      {
        for(int row = 0; row < numRows; row++)
        {
          FileIO::fread(bytedata, 1, pitch, f);

          bytedata += pitch;
        }
      }

      i++;
    }
  }

  return ret;
}
コード例 #16
0
ファイル: d3d12_postvs.cpp プロジェクト: cgmb/renderdoc
void D3D12Replay::InitPostVSBuffers(uint32_t eventId)
{
  // go through any aliasing
  if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end())
    eventId = m_PostVSAlias[eventId];

  if(m_PostVSData.find(eventId) != m_PostVSData.end())
    return;

  D3D12CommandData *cmd = m_pDevice->GetQueue()->GetCommandData();
  const D3D12RenderState &rs = cmd->m_RenderState;

  if(rs.pipe == ResourceId())
    return;

  WrappedID3D12PipelineState *origPSO =
      m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(!origPSO->IsGraphics())
    return;

  D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = origPSO->GetGraphicsDesc();

  if(psoDesc.VS.BytecodeLength == 0)
    return;

  WrappedID3D12Shader *vs = origPSO->VS();

  D3D_PRIMITIVE_TOPOLOGY topo = rs.topo;

  const DrawcallDescription *drawcall = m_pDevice->GetDrawcall(eventId);

  if(drawcall->numIndices == 0)
    return;

  DXBC::DXBCFile *dxbcVS = vs->GetDXBC();

  RDCASSERT(dxbcVS);

  DXBC::DXBCFile *dxbcGS = NULL;

  WrappedID3D12Shader *gs = origPSO->GS();

  if(gs)
  {
    dxbcGS = gs->GetDXBC();

    RDCASSERT(dxbcGS);
  }

  DXBC::DXBCFile *dxbcDS = NULL;

  WrappedID3D12Shader *ds = origPSO->DS();

  if(ds)
  {
    dxbcDS = ds->GetDXBC();

    RDCASSERT(dxbcDS);
  }

  ID3D12RootSignature *soSig = NULL;

  HRESULT hr = S_OK;

  {
    WrappedID3D12RootSignature *sig =
        m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(rs.graphics.rootsig);

    D3D12RootSignature rootsig = sig->sig;

    // create a root signature that allows stream out, if necessary
    if((rootsig.Flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT) == 0)
    {
      rootsig.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;

      ID3DBlob *blob = m_pDevice->GetShaderCache()->MakeRootSig(rootsig);

      hr = m_pDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(),
                                          __uuidof(ID3D12RootSignature), (void **)&soSig);
      if(FAILED(hr))
      {
        RDCERR("Couldn't enable stream-out in root signature: HRESULT: %s", ToStr(hr).c_str());
        return;
      }

      SAFE_RELEASE(blob);
    }
  }

  vector<D3D12_SO_DECLARATION_ENTRY> sodecls;

  UINT stride = 0;
  int posidx = -1;
  int numPosComponents = 0;

  if(!dxbcVS->m_OutputSig.empty())
  {
    for(const SigParameter &sign : dxbcVS->m_OutputSig)
    {
      D3D12_SO_DECLARATION_ENTRY decl;

      decl.Stream = 0;
      decl.OutputSlot = 0;

      decl.SemanticName = sign.semanticName.c_str();
      decl.SemanticIndex = sign.semanticIndex;
      decl.StartComponent = 0;
      decl.ComponentCount = sign.compCount & 0xff;

      if(sign.systemValue == ShaderBuiltin::Position)
      {
        posidx = (int)sodecls.size();
        numPosComponents = decl.ComponentCount = 4;
      }

      stride += decl.ComponentCount * sizeof(float);
      sodecls.push_back(decl);
    }

    if(stride == 0)
    {
      RDCERR("Didn't get valid stride! Setting to 4 bytes");
      stride = 4;
    }

    // shift position attribute up to first, keeping order otherwise
    // the same
    if(posidx > 0)
    {
      D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx];
      sodecls.erase(sodecls.begin() + posidx);
      sodecls.insert(sodecls.begin(), pos);
    }

    // set up stream output entries and buffers
    psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size();
    psoDesc.StreamOutput.pSODeclaration = &sodecls[0];
    psoDesc.StreamOutput.NumStrides = 1;
    psoDesc.StreamOutput.pBufferStrides = &stride;
    psoDesc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;

    // disable all other shader stages
    psoDesc.HS.BytecodeLength = 0;
    psoDesc.HS.pShaderBytecode = NULL;
    psoDesc.DS.BytecodeLength = 0;
    psoDesc.DS.pShaderBytecode = NULL;
    psoDesc.GS.BytecodeLength = 0;
    psoDesc.GS.pShaderBytecode = NULL;
    psoDesc.PS.BytecodeLength = 0;
    psoDesc.PS.pShaderBytecode = NULL;

    // disable any rasterization/use of output targets
    psoDesc.DepthStencilState.DepthEnable = FALSE;
    psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
    psoDesc.DepthStencilState.StencilEnable = FALSE;

    if(soSig)
      psoDesc.pRootSignature = soSig;

    // render as points
    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;

    // disable outputs
    psoDesc.NumRenderTargets = 0;
    RDCEraseEl(psoDesc.RTVFormats);
    psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

    ID3D12PipelineState *pipe = NULL;
    hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                (void **)&pipe);
    if(FAILED(hr))
    {
      RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    ID3D12Resource *idxBuf = NULL;

    bool recreate = false;
    uint64_t outputSize = uint64_t(drawcall->numIndices) * drawcall->numInstances * stride;

    if(m_SOBufferSize < outputSize)
    {
      uint64_t oldSize = m_SOBufferSize;
      while(m_SOBufferSize < outputSize)
        m_SOBufferSize *= 2;
      RDCWARN("Resizing stream-out buffer from %llu to %llu for output data", oldSize,
              m_SOBufferSize);
      recreate = true;
    }

    ID3D12GraphicsCommandList *list = NULL;

    if(!(drawcall->flags & DrawFlags::UseIBuffer))
    {
      if(recreate)
      {
        m_pDevice->GPUSync();

        CreateSOBuffers();
      }

      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      D3D12_STREAM_OUTPUT_BUFFER_VIEW view;
      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;
      list->SOSetTargets(0, 1, &view);

      list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
      list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                          drawcall->instanceOffset);
    }
    else    // drawcall is indexed
    {
      bytebuf idxdata;
      GetBufferData(rs.ibuffer.buf, rs.ibuffer.offs + drawcall->indexOffset * rs.ibuffer.bytewidth,
                    RDCMIN(drawcall->numIndices * rs.ibuffer.bytewidth, rs.ibuffer.size), idxdata);

      vector<uint32_t> indices;

      uint16_t *idx16 = (uint16_t *)&idxdata[0];
      uint32_t *idx32 = (uint32_t *)&idxdata[0];

      // only read as many indices as were available in the buffer
      uint32_t numIndices =
          RDCMIN(uint32_t(idxdata.size() / rs.ibuffer.bytewidth), drawcall->numIndices);

      uint32_t idxclamp = 0;
      if(drawcall->baseVertex < 0)
        idxclamp = uint32_t(-drawcall->baseVertex);

      // grab all unique vertex indices referenced
      for(uint32_t i = 0; i < numIndices; i++)
      {
        uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i];

        // apply baseVertex but clamp to 0 (don't allow index to become negative)
        if(i32 < idxclamp)
          i32 = 0;
        else if(drawcall->baseVertex < 0)
          i32 -= idxclamp;
        else if(drawcall->baseVertex > 0)
          i32 += drawcall->baseVertex;

        auto it = std::lower_bound(indices.begin(), indices.end(), i32);

        if(it != indices.end() && *it == i32)
          continue;

        indices.insert(it, i32);
      }

      // if we read out of bounds, we'll also have a 0 index being referenced
      // (as 0 is read). Don't insert 0 if we already have 0 though
      if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0))
        indices.insert(indices.begin(), 0);

      // An index buffer could be something like: 500, 501, 502, 501, 503, 502
      // in which case we can't use the existing index buffer without filling 499 slots of vertex
      // data with padding. Instead we rebase the indices based on the smallest vertex so it becomes
      // 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer.
      //
      // Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512
      // which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid.
      // We just stream-out a tightly packed list of unique indices, and then remap the index buffer
      // so that what did point to 500 points to 0 (accounting for rebasing), and what did point
      // to 510 now points to 3 (accounting for the unique sort).

      // we use a map here since the indices may be sparse. Especially considering if an index
      // is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries.
      map<uint32_t, size_t> indexRemap;
      for(size_t i = 0; i < indices.size(); i++)
      {
        // by definition, this index will only appear once in indices[]
        indexRemap[indices[i]] = i;
      }

      if(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t))
      {
        uint64_t oldSize = m_SOBufferSize;
        while(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t))
          m_SOBufferSize *= 2;
        RDCWARN("Resizing stream-out buffer from %llu to %llu for indices", oldSize, m_SOBufferSize);
        recreate = true;
      }

      if(recreate)
      {
        m_pDevice->GPUSync();

        CreateSOBuffers();
      }

      GetDebugManager()->FillBuffer(m_SOPatchedIndexBuffer, 0, &indices[0],
                                    indices.size() * sizeof(uint32_t));

      D3D12_INDEX_BUFFER_VIEW patchedIB;

      patchedIB.BufferLocation = m_SOPatchedIndexBuffer->GetGPUVirtualAddress();
      patchedIB.Format = DXGI_FORMAT_R32_UINT;
      patchedIB.SizeInBytes = UINT(indices.size() * sizeof(uint32_t));

      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      list->IASetIndexBuffer(&patchedIB);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      D3D12_STREAM_OUTPUT_BUFFER_VIEW view;
      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;
      list->SOSetTargets(0, 1, &view);

      list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);

      list->DrawIndexedInstanced((UINT)indices.size(), drawcall->numInstances, 0, 0,
                                 drawcall->instanceOffset);

      uint32_t stripCutValue = 0;
      if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF)
        stripCutValue = 0xffff;
      else if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF)
        stripCutValue = 0xffffffff;

      // rebase existing index buffer to point to the right elements in our stream-out'd
      // vertex buffer
      for(uint32_t i = 0; i < numIndices; i++)
      {
        uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i];

        // preserve primitive restart indices
        if(stripCutValue && i32 == stripCutValue)
          continue;

        // apply baseVertex but clamp to 0 (don't allow index to become negative)
        if(i32 < idxclamp)
          i32 = 0;
        else if(drawcall->baseVertex < 0)
          i32 -= idxclamp;
        else if(drawcall->baseVertex > 0)
          i32 += drawcall->baseVertex;

        if(rs.ibuffer.bytewidth == 2)
          idx16[i] = uint16_t(indexRemap[i32]);
        else
          idx32[i] = uint32_t(indexRemap[i32]);
      }

      idxBuf = NULL;

      if(!idxdata.empty())
      {
        D3D12_RESOURCE_DESC idxBufDesc;
        idxBufDesc.Alignment = 0;
        idxBufDesc.DepthOrArraySize = 1;
        idxBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
        idxBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
        idxBufDesc.Format = DXGI_FORMAT_UNKNOWN;
        idxBufDesc.Height = 1;
        idxBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
        idxBufDesc.MipLevels = 1;
        idxBufDesc.SampleDesc.Count = 1;
        idxBufDesc.SampleDesc.Quality = 0;
        idxBufDesc.Width = idxdata.size();

        D3D12_HEAP_PROPERTIES heapProps;
        heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
        heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
        heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
        heapProps.CreationNodeMask = 1;
        heapProps.VisibleNodeMask = 1;

        hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &idxBufDesc,
                                                D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                                __uuidof(ID3D12Resource), (void **)&idxBuf);
        RDCASSERTEQUAL(hr, S_OK);

        SetObjName(idxBuf, StringFormat::Fmt("PostVS idxBuf for %u", eventId));

        GetDebugManager()->FillBuffer(idxBuf, 0, &idxdata[0], idxdata.size());
      }
    }

    D3D12_RESOURCE_BARRIER sobarr = {};
    sobarr.Transition.pResource = m_SOBuffer;
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

    list->ResourceBarrier(1, &sobarr);

    list->CopyResource(m_SOStagingBuffer, m_SOBuffer);

    // we're done with this after the copy, so we can discard it and reset
    // the counter for the next stream-out
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
    list->DiscardResource(m_SOBuffer, NULL);
    list->ResourceBarrier(1, &sobarr);

    UINT zeroes[4] = {0, 0, 0, 0};
    list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                       GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                       m_SOBuffer, zeroes, 0, NULL);

    list->Close();

    ID3D12CommandList *l = list;
    m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
    m_pDevice->GPUSync();

    GetDebugManager()->ResetDebugAlloc();

    SAFE_RELEASE(pipe);

    byte *byteData = NULL;
    D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize};
    hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData);
    if(FAILED(hr))
    {
      RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(idxBuf);
      SAFE_RELEASE(soSig);
      return;
    }

    range.End = 0;

    uint64_t numBytesWritten = *(uint64_t *)byteData;

    if(numBytesWritten == 0)
    {
      m_PostVSData[eventId] = D3D12PostVSData();
      SAFE_RELEASE(idxBuf);
      SAFE_RELEASE(soSig);
      return;
    }

    // skip past the counter
    byteData += 64;

    uint64_t numPrims = numBytesWritten / stride;

    ID3D12Resource *vsoutBuffer = NULL;

    {
      D3D12_RESOURCE_DESC vertBufDesc;
      vertBufDesc.Alignment = 0;
      vertBufDesc.DepthOrArraySize = 1;
      vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
      vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
      vertBufDesc.Format = DXGI_FORMAT_UNKNOWN;
      vertBufDesc.Height = 1;
      vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
      vertBufDesc.MipLevels = 1;
      vertBufDesc.SampleDesc.Count = 1;
      vertBufDesc.SampleDesc.Quality = 0;
      vertBufDesc.Width = numBytesWritten;

      D3D12_HEAP_PROPERTIES heapProps;
      heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc,
                                              D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                              __uuidof(ID3D12Resource), (void **)&vsoutBuffer);
      RDCASSERTEQUAL(hr, S_OK);

      if(vsoutBuffer)
      {
        SetObjName(vsoutBuffer, StringFormat::Fmt("PostVS vsoutBuffer for %u", eventId));
        GetDebugManager()->FillBuffer(vsoutBuffer, 0, byteData, (size_t)numBytesWritten);
      }
    }

    float nearp = 0.1f;
    float farp = 100.0f;

    Vec4f *pos0 = (Vec4f *)byteData;

    bool found = false;

    for(uint64_t i = 1; numPosComponents == 4 && i < numPrims; i++)
    {
      //////////////////////////////////////////////////////////////////////////////////
      // derive near/far, assuming a standard perspective matrix
      //
      // the transformation from from pre-projection {Z,W} to post-projection {Z,W}
      // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1
      // and we know Wpost = Zpre from the perspective matrix.
      // we can then see from the perspective matrix that
      // m = F/(F-N)
      // c = -(F*N)/(F-N)
      //
      // with re-arranging and substitution, we then get:
      // N = -c/m
      // F = c/(1-m)
      //
      // so if we can derive m and c then we can determine N and F. We can do this with
      // two points, and we pick them reasonably distinct on z to reduce floating-point
      // error

      Vec4f *pos = (Vec4f *)(byteData + i * stride);

      if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f)
      {
        Vec2f A(pos0->w, pos0->z);
        Vec2f B(pos->w, pos->z);

        float m = (B.y - A.y) / (B.x - A.x);
        float c = B.y - B.x * m;

        if(m == 1.0f)
          continue;

        nearp = -c / m;
        farp = c / (1 - m);

        found = true;

        break;
      }
    }

    // if we didn't find anything, all z's and w's were identical.
    // If the z is positive and w greater for the first element then
    // we detect this projection as reversed z with infinite far plane
    if(!found && pos0->z > 0.0f && pos0->w > pos0->z)
    {
      nearp = pos0->z;
      farp = FLT_MAX;
    }

    m_SOStagingBuffer->Unmap(0, &range);

    m_PostVSData[eventId].vsin.topo = topo;
    m_PostVSData[eventId].vsout.buf = vsoutBuffer;
    m_PostVSData[eventId].vsout.vertStride = stride;
    m_PostVSData[eventId].vsout.nearPlane = nearp;
    m_PostVSData[eventId].vsout.farPlane = farp;

    m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer);
    m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices;

    m_PostVSData[eventId].vsout.instStride = 0;
    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].vsout.instStride =
          uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances));

    m_PostVSData[eventId].vsout.idxBuf = NULL;
    if(m_PostVSData[eventId].vsout.useIndices && idxBuf)
    {
      m_PostVSData[eventId].vsout.idxBuf = idxBuf;
      m_PostVSData[eventId].vsout.idxFmt =
          rs.ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
    }

    m_PostVSData[eventId].vsout.hasPosOut = posidx >= 0;

    m_PostVSData[eventId].vsout.topo = topo;
  }
  else
  {
    // empty vertex output signature
    m_PostVSData[eventId].vsin.topo = topo;
    m_PostVSData[eventId].vsout.buf = NULL;
    m_PostVSData[eventId].vsout.instStride = 0;
    m_PostVSData[eventId].vsout.vertStride = 0;
    m_PostVSData[eventId].vsout.nearPlane = 0.0f;
    m_PostVSData[eventId].vsout.farPlane = 0.0f;
    m_PostVSData[eventId].vsout.useIndices = false;
    m_PostVSData[eventId].vsout.hasPosOut = false;
    m_PostVSData[eventId].vsout.idxBuf = NULL;

    m_PostVSData[eventId].vsout.topo = topo;
  }

  if(dxbcGS || dxbcDS)
  {
    stride = 0;
    posidx = -1;
    numPosComponents = 0;

    DXBC::DXBCFile *lastShader = dxbcGS;
    if(dxbcDS)
      lastShader = dxbcDS;

    sodecls.clear();
    for(const SigParameter &sign : lastShader->m_OutputSig)
    {
      D3D12_SO_DECLARATION_ENTRY decl;

      // for now, skip streams that aren't stream 0
      if(sign.stream != 0)
        continue;

      decl.Stream = 0;
      decl.OutputSlot = 0;

      decl.SemanticName = sign.semanticName.c_str();
      decl.SemanticIndex = sign.semanticIndex;
      decl.StartComponent = 0;
      decl.ComponentCount = sign.compCount & 0xff;

      if(sign.systemValue == ShaderBuiltin::Position)
      {
        posidx = (int)sodecls.size();
        numPosComponents = decl.ComponentCount = 4;
      }

      stride += decl.ComponentCount * sizeof(float);
      sodecls.push_back(decl);
    }

    // shift position attribute up to first, keeping order otherwise
    // the same
    if(posidx > 0)
    {
      D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx];
      sodecls.erase(sodecls.begin() + posidx);
      sodecls.insert(sodecls.begin(), pos);
    }

    // enable the other shader stages again
    if(origPSO->DS())
      psoDesc.DS = origPSO->DS()->GetDesc();
    if(origPSO->HS())
      psoDesc.HS = origPSO->HS()->GetDesc();
    if(origPSO->GS())
      psoDesc.GS = origPSO->GS()->GetDesc();

    // configure new SO declarations
    psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size();
    psoDesc.StreamOutput.pSODeclaration = &sodecls[0];
    psoDesc.StreamOutput.NumStrides = 1;
    psoDesc.StreamOutput.pBufferStrides = &stride;

    // we're using the same topology this time
    psoDesc.PrimitiveTopologyType = origPSO->graphics->PrimitiveTopologyType;

    ID3D12PipelineState *pipe = NULL;
    hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState),
                                                (void **)&pipe);
    if(FAILED(hr))
    {
      RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    D3D12_STREAM_OUTPUT_BUFFER_VIEW view;

    ID3D12GraphicsCommandList *list = NULL;

    view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
    view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
    view.SizeInBytes = m_SOBufferSize;
    // draws with multiple instances must be replayed one at a time so we can record the number of
    // primitives from each drawcall, as due to expansion this can vary per-instance.
    if(drawcall->numInstances > 1)
    {
      list = GetDebugManager()->ResetDebugList();

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;

      // do a dummy draw to make sure we have enough space in the output buffer
      list->SOSetTargets(0, 1, &view);

      list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

      // because the result is expanded we don't have to remap index buffers or anything
      if(drawcall->flags & DrawFlags::UseIBuffer)
      {
        list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances,
                                   drawcall->indexOffset, drawcall->baseVertex,
                                   drawcall->instanceOffset);
      }
      else
      {
        list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                            drawcall->instanceOffset);
      }

      list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

      list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1,
                             m_SOStagingBuffer, 0);

      list->Close();

      ID3D12CommandList *l = list;
      m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
      m_pDevice->GPUSync();

      // check that things are OK, and resize up if needed
      D3D12_RANGE range;
      range.Begin = 0;
      range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS);

      D3D12_QUERY_DATA_SO_STATISTICS *data;
      hr = m_SOStagingBuffer->Map(0, &range, (void **)&data);

      D3D12_QUERY_DATA_SO_STATISTICS result = *data;

      range.End = 0;
      m_SOStagingBuffer->Unmap(0, &range);

      if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
      {
        uint64_t oldSize = m_SOBufferSize;
        while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
          m_SOBufferSize *= 2;
        RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize);
        CreateSOBuffers();
      }

      view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
      view.SizeInBytes = m_SOBufferSize;

      GetDebugManager()->ResetDebugAlloc();

      // now do the actual stream out
      list = GetDebugManager()->ResetDebugList();

      // first need to reset the counter byte values which may have either been written to above, or
      // are newly created
      {
        D3D12_RESOURCE_BARRIER sobarr = {};
        sobarr.Transition.pResource = m_SOBuffer;
        sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
        sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;

        list->ResourceBarrier(1, &sobarr);

        D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {};
        counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
        counterDesc.Format = DXGI_FORMAT_R32_UINT;
        counterDesc.Buffer.FirstElement = 0;
        counterDesc.Buffer.NumElements = 4;

        UINT zeroes[4] = {0, 0, 0, 0};
        list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                           GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                           m_SOBuffer, zeroes, 0, NULL);

        std::swap(sobarr.Transition.StateBefore, sobarr.Transition.StateAfter);
        list->ResourceBarrier(1, &sobarr);
      }

      rs.ApplyState(list);

      list->SetPipelineState(pipe);

      if(soSig)
      {
        list->SetGraphicsRootSignature(soSig);
        rs.ApplyGraphicsRootElements(list);
      }

      // reserve space for enough 'buffer filled size' locations
      view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() +
                            AlignUp(uint64_t(drawcall->numInstances * sizeof(UINT64)), 64ULL);

      // do incremental draws to get the output size. We have to do this O(N^2) style because
      // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N instances
      // and count the total number of verts each time, then we can see from the difference how much
      // each instance wrote.
      for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++)
      {
        if(drawcall->flags & DrawFlags::UseIBuffer)
        {
          view.BufferFilledSizeLocation =
              m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64);
          list->SOSetTargets(0, 1, &view);
          list->DrawIndexedInstanced(drawcall->numIndices, inst, drawcall->indexOffset,
                                     drawcall->baseVertex, drawcall->instanceOffset);
        }
        else
        {
          view.BufferFilledSizeLocation =
              m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64);
          list->SOSetTargets(0, 1, &view);
          list->DrawInstanced(drawcall->numIndices, inst, drawcall->vertexOffset,
                              drawcall->instanceOffset);
        }
      }

      list->Close();

      l = list;
      m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
      m_pDevice->GPUSync();

      GetDebugManager()->ResetDebugAlloc();

      // the last draw will have written the actual data we want into the buffer
    }
    else
    {
      // this only loops if we find from a query that we need to resize up
      while(true)
      {
        list = GetDebugManager()->ResetDebugList();

        rs.ApplyState(list);

        list->SetPipelineState(pipe);

        if(soSig)
        {
          list->SetGraphicsRootSignature(soSig);
          rs.ApplyGraphicsRootElements(list);
        }

        view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress();
        view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64;
        view.SizeInBytes = m_SOBufferSize;

        list->SOSetTargets(0, 1, &view);

        list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

        // because the result is expanded we don't have to remap index buffers or anything
        if(drawcall->flags & DrawFlags::UseIBuffer)
        {
          list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances,
                                     drawcall->indexOffset, drawcall->baseVertex,
                                     drawcall->instanceOffset);
        }
        else
        {
          list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset,
                              drawcall->instanceOffset);
        }

        list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0);

        list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1,
                               m_SOStagingBuffer, 0);

        list->Close();

        ID3D12CommandList *l = list;
        m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
        m_pDevice->GPUSync();

        // check that things are OK, and resize up if needed
        D3D12_RANGE range;
        range.Begin = 0;
        range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS);

        D3D12_QUERY_DATA_SO_STATISTICS *data;
        hr = m_SOStagingBuffer->Map(0, &range, (void **)&data);

        if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
        {
          uint64_t oldSize = m_SOBufferSize;
          while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride)
            m_SOBufferSize *= 2;
          RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize);
          CreateSOBuffers();

          continue;
        }

        range.End = 0;
        m_SOStagingBuffer->Unmap(0, &range);

        GetDebugManager()->ResetDebugAlloc();

        break;
      }
    }

    list = GetDebugManager()->ResetDebugList();

    D3D12_RESOURCE_BARRIER sobarr = {};
    sobarr.Transition.pResource = m_SOBuffer;
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;

    list->ResourceBarrier(1, &sobarr);

    list->CopyResource(m_SOStagingBuffer, m_SOBuffer);

    // we're done with this after the copy, so we can discard it and reset
    // the counter for the next stream-out
    sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
    sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
    list->DiscardResource(m_SOBuffer, NULL);
    list->ResourceBarrier(1, &sobarr);

    D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {};
    counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
    counterDesc.Format = DXGI_FORMAT_R32_UINT;
    counterDesc.Buffer.FirstElement = 0;
    counterDesc.Buffer.NumElements = 4;

    UINT zeroes[4] = {0, 0, 0, 0};
    list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV),
                                       GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV),
                                       m_SOBuffer, zeroes, 0, NULL);

    list->Close();

    ID3D12CommandList *l = list;
    m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
    m_pDevice->GPUSync();

    GetDebugManager()->ResetDebugAlloc();

    SAFE_RELEASE(pipe);

    byte *byteData = NULL;
    D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize};
    hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData);
    if(FAILED(hr))
    {
      RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str());
      SAFE_RELEASE(soSig);
      return;
    }

    range.End = 0;

    uint64_t *counters = (uint64_t *)byteData;

    uint64_t numBytesWritten = 0;
    std::vector<D3D12PostVSData::InstData> instData;
    if(drawcall->numInstances > 1)
    {
      uint64_t prevByteCount = 0;

      for(uint32_t inst = 0; inst < drawcall->numInstances; inst++)
      {
        uint64_t byteCount = counters[inst];

        D3D12PostVSData::InstData d;
        d.numVerts = uint32_t((byteCount - prevByteCount) / stride);
        d.bufOffset = prevByteCount;
        prevByteCount = byteCount;

        instData.push_back(d);
      }

      numBytesWritten = prevByteCount;
    }
    else
    {
      numBytesWritten = counters[0];
    }

    if(numBytesWritten == 0)
    {
      SAFE_RELEASE(soSig);
      return;
    }

    // skip past the counter(s)
    byteData += (view.BufferLocation - m_SOBuffer->GetGPUVirtualAddress());

    uint64_t numVerts = numBytesWritten / stride;

    ID3D12Resource *gsoutBuffer = NULL;

    {
      D3D12_RESOURCE_DESC vertBufDesc;
      vertBufDesc.Alignment = 0;
      vertBufDesc.DepthOrArraySize = 1;
      vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
      vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
      vertBufDesc.Format = DXGI_FORMAT_UNKNOWN;
      vertBufDesc.Height = 1;
      vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
      vertBufDesc.MipLevels = 1;
      vertBufDesc.SampleDesc.Count = 1;
      vertBufDesc.SampleDesc.Quality = 0;
      vertBufDesc.Width = numBytesWritten;

      D3D12_HEAP_PROPERTIES heapProps;
      heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
      heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
      heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
      heapProps.CreationNodeMask = 1;
      heapProps.VisibleNodeMask = 1;

      hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc,
                                              D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
                                              __uuidof(ID3D12Resource), (void **)&gsoutBuffer);
      RDCASSERTEQUAL(hr, S_OK);

      if(gsoutBuffer)
      {
        SetObjName(gsoutBuffer, StringFormat::Fmt("PostVS gsoutBuffer for %u", eventId));
        GetDebugManager()->FillBuffer(gsoutBuffer, 0, byteData, (size_t)numBytesWritten);
      }
    }

    float nearp = 0.1f;
    float farp = 100.0f;

    Vec4f *pos0 = (Vec4f *)byteData;

    bool found = false;

    for(UINT64 i = 1; numPosComponents == 4 && i < numVerts; i++)
    {
      //////////////////////////////////////////////////////////////////////////////////
      // derive near/far, assuming a standard perspective matrix
      //
      // the transformation from from pre-projection {Z,W} to post-projection {Z,W}
      // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1
      // and we know Wpost = Zpre from the perspective matrix.
      // we can then see from the perspective matrix that
      // m = F/(F-N)
      // c = -(F*N)/(F-N)
      //
      // with re-arranging and substitution, we then get:
      // N = -c/m
      // F = c/(1-m)
      //
      // so if we can derive m and c then we can determine N and F. We can do this with
      // two points, and we pick them reasonably distinct on z to reduce floating-point
      // error

      Vec4f *pos = (Vec4f *)(byteData + i * stride);

      if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f)
      {
        Vec2f A(pos0->w, pos0->z);
        Vec2f B(pos->w, pos->z);

        float m = (B.y - A.y) / (B.x - A.x);
        float c = B.y - B.x * m;

        if(m == 1.0f)
          continue;

        nearp = -c / m;
        farp = c / (1 - m);

        found = true;

        break;
      }
    }

    // if we didn't find anything, all z's and w's were identical.
    // If the z is positive and w greater for the first element then
    // we detect this projection as reversed z with infinite far plane
    if(!found && pos0->z > 0.0f && pos0->w > pos0->z)
    {
      nearp = pos0->z;
      farp = FLT_MAX;
    }

    m_SOStagingBuffer->Unmap(0, &range);

    m_PostVSData[eventId].gsout.buf = gsoutBuffer;
    m_PostVSData[eventId].gsout.instStride = 0;
    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].gsout.instStride =
          uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances));
    m_PostVSData[eventId].gsout.vertStride = stride;
    m_PostVSData[eventId].gsout.nearPlane = nearp;
    m_PostVSData[eventId].gsout.farPlane = farp;
    m_PostVSData[eventId].gsout.useIndices = false;
    m_PostVSData[eventId].gsout.hasPosOut = posidx >= 0;
    m_PostVSData[eventId].gsout.idxBuf = NULL;

    topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;

    if(lastShader == dxbcGS)
    {
      for(size_t i = 0; i < dxbcGS->GetNumDeclarations(); i++)
      {
        const DXBC::ASMDecl &decl = dxbcGS->GetDeclaration(i);

        if(decl.declaration == DXBC::OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY)
        {
          topo = decl.outTopology;
          break;
        }
      }
    }
    else if(lastShader == dxbcDS)
    {
      for(size_t i = 0; i < dxbcDS->GetNumDeclarations(); i++)
      {
        const DXBC::ASMDecl &decl = dxbcDS->GetDeclaration(i);

        if(decl.declaration == DXBC::OPCODE_DCL_TESS_DOMAIN)
        {
          if(decl.domain == DXBC::DOMAIN_ISOLINE)
            topo = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
          else
            topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
          break;
        }
      }
    }

    m_PostVSData[eventId].gsout.topo = topo;

    // streamout expands strips unfortunately
    if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
    else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ)
      m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;

    m_PostVSData[eventId].gsout.numVerts = (uint32_t)numVerts;

    if(drawcall->flags & DrawFlags::Instanced)
      m_PostVSData[eventId].gsout.numVerts /= RDCMAX(1U, drawcall->numInstances);

    m_PostVSData[eventId].gsout.instData = instData;
  }

  SAFE_RELEASE(soSig);
}
コード例 #17
0
void ReplayOutput::DisplayMesh()
{
	FetchDrawcall *draw = m_pRenderer->GetDrawcallByEID(m_EventID, m_LastDeferredEvent);

	if(!draw) return;
	if(m_MainOutput.outputID == 0) return;
	if(m_Width <= 0 || m_Height <= 0) return;
	if(m_RenderData.meshDisplay.type == eMeshDataStage_Unknown) return;
	if((draw->flags & eDraw_Drawcall) == 0) return;

	if(draw && m_OverlayDirty)
	{
		m_pDevice->ReplayLog(m_FrameID, 0, m_EventID, eReplay_WithoutDraw);
		RefreshOverlay();
		m_pDevice->ReplayLog(m_FrameID, 0, m_EventID, eReplay_OnlyDraw);
	}
	
	m_pDevice->BindOutputWindow(m_MainOutput.outputID, true);
	m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0);

	m_pDevice->RenderCheckerboard(Vec3f(0.666f, 0.666f, 0.666f), Vec3f(0.333f, 0.333f, 0.333f));
	
	m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0);

	MeshDisplay mesh = m_RenderData.meshDisplay;
	mesh.position.buf = m_pDevice->GetLiveID(mesh.position.buf);
	mesh.position.idxbuf = m_pDevice->GetLiveID(mesh.position.idxbuf);
	mesh.second.buf = m_pDevice->GetLiveID(mesh.second.buf);
	mesh.second.idxbuf = m_pDevice->GetLiveID(mesh.second.idxbuf);
	
	vector<MeshFormat> secondaryDraws;
	
	if(m_RenderData.meshDisplay.type != eMeshDataStage_VSIn &&
		 !m_RenderData.meshDisplay.thisDrawOnly)
	{
		mesh.position.unproject = true;
		mesh.second.unproject = true;

		for(size_t i=0; i < passEvents.size(); i++)
		{
			FetchDrawcall *d = m_pRenderer->GetDrawcallByEID(passEvents[i], m_LastDeferredEvent);

			if(d)
			{
				for(uint32_t inst=0; inst < RDCMAX(1U, draw->numInstances); inst++)
				{
					// get the 'most final' stage
					MeshFormat fmt = m_pDevice->GetPostVSBuffers(m_FrameID, passEvents[i], inst, eMeshDataStage_GSOut);
					if(fmt.buf == ResourceId()) fmt = m_pDevice->GetPostVSBuffers(m_FrameID, passEvents[i], inst, eMeshDataStage_VSOut);

					// if unproject is marked, this output had a 'real' system position output
					if(fmt.unproject)
						secondaryDraws.push_back(fmt);
				}
			}
		}

		// draw previous instances in the current drawcall
		if(draw->flags & eDraw_Instanced)
		{
			for(uint32_t inst=0; inst < RDCMAX(1U, draw->numInstances) && inst < m_RenderData.meshDisplay.curInstance; inst++)
			{
				// get the 'most final' stage
				MeshFormat fmt = m_pDevice->GetPostVSBuffers(m_FrameID, draw->eventID, inst, eMeshDataStage_GSOut);
				if(fmt.buf == ResourceId()) fmt = m_pDevice->GetPostVSBuffers(m_FrameID, draw->eventID, inst, eMeshDataStage_VSOut);

				// if unproject is marked, this output had a 'real' system position output
				if(fmt.unproject)
					secondaryDraws.push_back(fmt);
			}
		}
	}

	m_pDevice->RenderMesh(m_FrameID, m_EventID, secondaryDraws, mesh);
}
コード例 #18
0
ファイル: replay_output.cpp プロジェクト: EecheE/renderdoc
void ReplayOutput::DisplayMesh()
{
  FetchDrawcall *draw = m_pRenderer->GetDrawcallByEID(m_EventID, m_LastDeferredEvent);

  if(draw == NULL || m_MainOutput.outputID == 0 || m_Width <= 0 || m_Height <= 0 ||
     (m_RenderData.meshDisplay.type == eMeshDataStage_Unknown) || (draw->flags & eDraw_Drawcall) == 0)
  {
    float color[4] = {0.0f, 0.0f, 0.0f, 0.0f};
    m_pDevice->BindOutputWindow(m_MainOutput.outputID, false);
    m_pDevice->ClearOutputWindowColour(m_MainOutput.outputID, color);
    m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0);
    m_pDevice->RenderCheckerboard(Vec3f(0.666f, 0.666f, 0.666f), Vec3f(0.333f, 0.333f, 0.333f));

    return;
  }

  if(draw && m_OverlayDirty)
  {
    m_pDevice->ReplayLog(m_EventID, eReplay_WithoutDraw);
    RefreshOverlay();
    m_pDevice->ReplayLog(m_EventID, eReplay_OnlyDraw);
  }

  m_pDevice->BindOutputWindow(m_MainOutput.outputID, true);
  m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0);

  m_pDevice->RenderCheckerboard(Vec3f(0.666f, 0.666f, 0.666f), Vec3f(0.333f, 0.333f, 0.333f));

  m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0);

  MeshDisplay mesh = m_RenderData.meshDisplay;
  mesh.position.buf = m_pDevice->GetLiveID(mesh.position.buf);
  mesh.position.idxbuf = m_pDevice->GetLiveID(mesh.position.idxbuf);
  mesh.second.buf = m_pDevice->GetLiveID(mesh.second.buf);
  mesh.second.idxbuf = m_pDevice->GetLiveID(mesh.second.idxbuf);

  vector<MeshFormat> secondaryDraws;

  // we choose a pallette here so that the colours stay consistent (i.e the
  // current draw is always the same colour), but also to indicate somewhat
  // the relationship - ie. instances are closer in colour than other draws
  // in the pass

  // very slightly dark red
  const FloatVector drawItself(0.06f, 0.0f, 0.0f, 1.0f);

  // more desaturated/lighter, but still reddish
  const FloatVector otherInstances(0.18f, 0.1f, 0.1f, 1.0f);

  // lighter grey with blue tinge to contrast from main/instance draws
  const FloatVector passDraws(0.2f, 0.2f, 0.25f, 1.0f);

  if(m_RenderData.meshDisplay.type != eMeshDataStage_VSIn)
  {
    for(size_t i = 0; m_RenderData.meshDisplay.showWholePass && i < passEvents.size(); i++)
    {
      FetchDrawcall *d = m_pRenderer->GetDrawcallByEID(passEvents[i], m_LastDeferredEvent);

      if(d)
      {
        for(uint32_t inst = 0; inst < RDCMAX(1U, draw->numInstances); inst++)
        {
          // get the 'most final' stage
          MeshFormat fmt = m_pDevice->GetPostVSBuffers(passEvents[i], inst, eMeshDataStage_GSOut);
          if(fmt.buf == ResourceId())
            fmt = m_pDevice->GetPostVSBuffers(passEvents[i], inst, eMeshDataStage_VSOut);

          fmt.meshColour = passDraws;

          // if unproject is marked, this output had a 'real' system position output
          if(fmt.unproject)
            secondaryDraws.push_back(fmt);
        }
      }
    }

    // draw previous instances in the current drawcall
    if(draw->flags & eDraw_Instanced)
    {
      uint32_t maxInst = 0;
      if(m_RenderData.meshDisplay.showPrevInstances)
        maxInst = RDCMAX(1U, m_RenderData.meshDisplay.curInstance);
      if(m_RenderData.meshDisplay.showAllInstances)
        maxInst = RDCMAX(1U, draw->numInstances);

      for(uint32_t inst = 0; inst < maxInst; inst++)
      {
        // get the 'most final' stage
        MeshFormat fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_GSOut);
        if(fmt.buf == ResourceId())
          fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_VSOut);

        fmt.meshColour = otherInstances;

        // if unproject is marked, this output had a 'real' system position output
        if(fmt.unproject)
          secondaryDraws.push_back(fmt);
      }
    }
  }

  mesh.position.meshColour = drawItself;

  m_pDevice->RenderMesh(m_EventID, secondaryDraws, mesh);
}
コード例 #19
0
ファイル: utf8printf.cpp プロジェクト: Anteru/renderdoc
void PrintInteger(bool typeUnsigned, uint64_t argu, int base, uint64_t numbits,
                  FormatterParams formatter, bool uppercaseDigits, char *&output,
                  size_t &actualsize, char *end)
{
  int64_t argi = 0;

  union
  {
    uint64_t *u64;
    signed int *i;
    signed char *c;
    signed short *s;
    int64_t *i64;
  } typepun;

  typepun.u64 = &argu;

  // cast the appropriate size to signed version
  switch(formatter.Length)
  {
    default:
    case None:
    case Long: argi = (int64_t)*typepun.i; break;
    case HalfHalf: argi = (int64_t)*typepun.c; break;
    case Half: argi = (int64_t)*typepun.s; break;
    case LongLong: argi = (int64_t)*typepun.i64; break;
  }

  bool negative = false;
  if(base == 10 && !typeUnsigned)
  {
    negative = argi < 0;
  }

  int digwidth = 0;
  int numPad0s = 0;
  int numPadWidth = 0;
  {
    int intwidth = 0;
    int digits = 0;

    // work out the number of decimal digits in the integer
    if(!negative)
    {
      uint64_t accum = argu;
      while(accum)
      {
        digits += 1;
        accum /= base;
      }
    }
    else
    {
      int64_t accum = argi;
      while(accum)
      {
        digits += 1;
        accum /= base;
      }
    }

    intwidth = digwidth = RDCMAX(1, digits);

    // printed int is 2 chars larger for 0x or 0b, and 1 char for 0 (octal)
    if(base == 16 || base == 2)
      intwidth += formatter.Flags & AlternateForm ? 2 : 0;
    if(base == 8)
      intwidth += formatter.Flags & AlternateForm ? 1 : 0;

    if(formatter.Precision != FormatterParams::NoPrecision && formatter.Precision > intwidth)
      numPad0s = formatter.Precision - intwidth;

    intwidth += numPad0s;

    // for decimal we can have a negative sign (or placeholder)
    if(base == 10)
    {
      if(negative)
        intwidth++;
      else if(formatter.Flags & (PrependPos | PrependSpace))
        intwidth++;
    }

    if(formatter.Width != FormatterParams::NoWidth && formatter.Width > intwidth)
      numPadWidth = formatter.Width - intwidth;
  }

  // pad with spaces if necessary
  if((formatter.Flags & (LeftJustify | PadZeroes)) == 0 && numPadWidth > 0)
    addchars(output, actualsize, end, (size_t)numPadWidth, ' ');

  if(base == 16)
  {
    if(formatter.Flags & AlternateForm)
    {
      if(uppercaseDigits)
        appendstring(output, actualsize, end, "0X");
      else
        appendstring(output, actualsize, end, "0x");
    }

    // pad with 0s as appropriate
    if((formatter.Flags & (LeftJustify | PadZeroes)) == PadZeroes && numPadWidth > 0)
      addchars(output, actualsize, end, (size_t)numPadWidth, '0');
    if(numPad0s > 0)
      addchars(output, actualsize, end, (size_t)numPad0s, '0');

    bool left0s = true;

    // mask off each hex digit and print
    for(uint64_t i = 0; i < numbits; i += 4)
    {
      uint64_t shift = numbits - 4 - i;
      uint64_t mask = 0xfULL << shift;
      char digit = char((argu & mask) >> shift);
      if(digit == 0 && left0s && i + 4 < numbits)
        continue;
      left0s = false;

      if(digit < 10)
        addchar(output, actualsize, end, '0' + digit);
      else if(uppercaseDigits)
        addchar(output, actualsize, end, 'A' + digit - 10);
      else
        addchar(output, actualsize, end, 'a' + digit - 10);
    }
  }
  else if(base == 8)