bool ReplayOutput::SetPixelContextLocation(uint32_t x, uint32_t y) { m_ContextX = RDCMAX((float)x, 0.0f); m_ContextY = RDCMAX((float)y, 0.0f); DisplayContext(); return true; }
uint32_t ReplayOutput::PickVertex(uint32_t eventID, uint32_t x, uint32_t y, uint32_t *pickedInstance) { FetchDrawcall *draw = m_pRenderer->GetDrawcallByEID(eventID); if(!draw) return ~0U; if(m_RenderData.meshDisplay.type == eMeshDataStage_Unknown) return ~0U; if((draw->flags & eDraw_Drawcall) == 0) return ~0U; MeshDisplay cfg = m_RenderData.meshDisplay; if(cfg.position.buf == ResourceId()) return ~0U; cfg.position.buf = m_pDevice->GetLiveID(cfg.position.buf); cfg.position.idxbuf = m_pDevice->GetLiveID(cfg.position.idxbuf); cfg.second.buf = m_pDevice->GetLiveID(cfg.second.buf); cfg.second.idxbuf = m_pDevice->GetLiveID(cfg.second.idxbuf); *pickedInstance = 0; if(draw->flags & eDraw_Instanced) { uint32_t maxInst = 0; if(m_RenderData.meshDisplay.showPrevInstances) maxInst = RDCMAX(1U, m_RenderData.meshDisplay.curInstance); if(m_RenderData.meshDisplay.showAllInstances) maxInst = RDCMAX(1U, draw->numInstances); for(uint32_t inst = 0; inst < maxInst; inst++) { // get the 'most final' stage MeshFormat fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_GSOut); if(fmt.buf == ResourceId()) fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_VSOut); cfg.position = fmt; uint32_t ret = m_pDevice->PickVertex(m_EventID, cfg, x, y); if(ret != ~0U) { *pickedInstance = inst; return ret; } } return ~0U; } else { return m_pDevice->PickVertex(m_EventID, cfg, x, y); } }
FetchDrawcall *ReplayRenderer::SetupDrawcallPointers(FetchFrameInfo frame, rdctype::array<FetchDrawcall> &draws, FetchDrawcall *parent, FetchDrawcall *previous) { FetchDrawcall *ret = NULL; for(int32_t i=0; i < draws.count; i++) { FetchDrawcall *draw = &draws[i]; draw->parent = parent ? parent->drawcallID : 0; if(draw->children.count > 0) { ret = previous = SetupDrawcallPointers(frame, draw->children, draw, previous); } else if(draw->flags & (eDraw_PushMarker|eDraw_SetMarker|eDraw_Present)) { // don't want to set up previous/next links for markers } else { if(previous != NULL) previous->next = draw->drawcallID; draw->previous = previous ? previous->drawcallID : 0; RDCASSERT(m_Drawcalls.empty() || draw->eventID > m_Drawcalls.back()->eventID || draw->context != frame.immContextId); m_Drawcalls.resize(RDCMAX(m_Drawcalls.size(), size_t(draw->drawcallID+1))); m_Drawcalls[draw->drawcallID] = draw; ret = previous = draw; } } return ret; }
void VulkanCreationInfo::Image::Init(VulkanResourceManager *resourceMan, VulkanCreationInfo &info, const VkImageCreateInfo* pCreateInfo) { view = VK_NULL_HANDLE; stencilView = VK_NULL_HANDLE; type = pCreateInfo->imageType; format = pCreateInfo->format; extent = pCreateInfo->extent; arrayLayers = pCreateInfo->arrayLayers; mipLevels = pCreateInfo->mipLevels; samples = RDCMAX(VK_SAMPLE_COUNT_1_BIT, pCreateInfo->samples); creationFlags = 0; if(pCreateInfo->usage & VK_IMAGE_USAGE_SAMPLED_BIT) creationFlags |= eTextureCreate_SRV; if(pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT)) creationFlags |= eTextureCreate_RTV; if(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) creationFlags |= eTextureCreate_DSV; if(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) creationFlags |= eTextureCreate_UAV; cube = (pCreateInfo->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) ? true : false; }
bool WrappedOpenGL::Serialise_glPopDebugGroup(SerialiserType &ser) { if(IsReplayingAndReading()) { GLMarkerRegion::End(); m_ReplayEventCount = RDCMAX(0, m_ReplayEventCount - 1); if(IsLoading(m_State) && !m_CurEvents.empty()) { DrawcallDescription draw; draw.name = "API Calls"; draw.flags |= DrawFlags::SetMarker | DrawFlags::APICalls; AddDrawcall(draw, true); } } return true; }
int GetNumMips(const GLHookSet &gl, GLenum target, GLuint tex, GLuint w, GLuint h, GLuint d) { int mips = 1; GLint immut = 0; gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_IMMUTABLE_FORMAT, &immut); if(immut) gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_IMMUTABLE_LEVELS, (GLint *)&mips); else mips = CalcNumMips(w, h, d); GLint maxLevel = 1000; gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_MAX_LEVEL, &maxLevel); mips = RDCMIN(mips, maxLevel+1); if(immut == 0) { // check to see if all mips are set, or clip the number of mips to those that are // set. if(target == eGL_TEXTURE_CUBE_MAP) target = eGL_TEXTURE_CUBE_MAP_POSITIVE_X; for(int i=0; i < mips; i++) { GLint width = 0; gl.glGetTextureLevelParameterivEXT(tex, target, i, eGL_TEXTURE_WIDTH, &width); if(width == 0) { mips = i; break; } } } return RDCMAX(1, mips); }
bool write_dds_to_file(FILE *f, const dds_data &data) { if(!f) return false; uint32_t magic = dds_fourcc; DDS_HEADER header; DDS_HEADER_DXT10 headerDXT10; RDCEraseEl(header); RDCEraseEl(headerDXT10); header.dwSize = sizeof(DDS_HEADER); header.ddspf.dwSize = sizeof(DDS_PIXELFORMAT); header.dwWidth = data.width; header.dwHeight = data.height; header.dwDepth = data.depth; header.dwMipMapCount = data.mips; header.dwFlags = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT; if(data.mips > 1) header.dwFlags |= DDSD_MIPMAPCOUNT; if(data.depth > 1) header.dwFlags |= DDSD_DEPTH; bool blockFormat = false; if(data.format.Special()) { switch(data.format.type) { case ResourceFormatType::BC1: case ResourceFormatType::BC2: case ResourceFormatType::BC3: case ResourceFormatType::BC4: case ResourceFormatType::BC5: case ResourceFormatType::BC6: case ResourceFormatType::BC7: blockFormat = true; break; case ResourceFormatType::ETC2: case ResourceFormatType::EAC: case ResourceFormatType::ASTC: case ResourceFormatType::YUV: RDCERR("Unsupported file format, %u", data.format.type); return false; default: break; } } if(blockFormat) header.dwFlags |= DDSD_LINEARSIZE; else header.dwFlags |= DDSD_PITCH; header.dwCaps = DDSCAPS_TEXTURE; if(data.mips > 1) header.dwCaps |= DDSCAPS_MIPMAP; if(data.mips > 1 || data.slices > 1 || data.depth > 1) header.dwCaps |= DDSCAPS_COMPLEX; header.dwCaps2 = data.depth > 1 ? DDSCAPS2_VOLUME : 0; bool dx10Header = false; headerDXT10.dxgiFormat = ResourceFormat2DXGIFormat(data.format); headerDXT10.resourceDimension = data.depth > 1 ? D3D10_RESOURCE_DIMENSION_TEXTURE3D : D3D10_RESOURCE_DIMENSION_TEXTURE2D; headerDXT10.miscFlag = 0; headerDXT10.arraySize = data.slices; if(headerDXT10.dxgiFormat == DXGI_FORMAT_UNKNOWN) { RDCERR("Couldn't convert resource format to DXGI format"); return false; } if(data.cubemap) { header.dwCaps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALLFACES; headerDXT10.miscFlag |= DDS_RESOURCE_MISC_TEXTURECUBE; headerDXT10.arraySize /= 6; } if(headerDXT10.arraySize > 1) dx10Header = true; // need to specify dx10 header to give array size uint32_t bytesPerPixel = 1; if(blockFormat) { int blockSize = (data.format.type == ResourceFormatType::BC1 || data.format.type == ResourceFormatType::BC4) ? 8 : 16; header.dwPitchOrLinearSize = RDCMAX(1U, ((header.dwWidth + 3) / 4)) * blockSize; } else { switch(data.format.type) { case ResourceFormatType::S8: bytesPerPixel = 1; break; case ResourceFormatType::R10G10B10A2: case ResourceFormatType::R9G9B9E5: case ResourceFormatType::R11G11B10: case ResourceFormatType::D24S8: bytesPerPixel = 4; break; case ResourceFormatType::R5G6B5: case ResourceFormatType::R5G5B5A1: case ResourceFormatType::R4G4B4A4: bytesPerPixel = 2; break; case ResourceFormatType::D32S8: bytesPerPixel = 8; break; case ResourceFormatType::D16S8: case ResourceFormatType::YUV: case ResourceFormatType::R4G4: RDCERR("Unsupported file format %u", data.format.type); return false; default: bytesPerPixel = data.format.compCount * data.format.compByteWidth; } header.dwPitchOrLinearSize = header.dwWidth * bytesPerPixel; } // special case a couple of formats to write out non-DX10 style, for // backwards compatibility if(data.format.compByteWidth == 1 && data.format.compCount == 4 && data.format.compType == CompType::UNorm) { header.ddspf.dwFlags = DDPF_RGBA; header.ddspf.dwRGBBitCount = 32; header.ddspf.dwRBitMask = 0x000000ff; header.ddspf.dwGBitMask = 0x0000ff00; header.ddspf.dwBBitMask = 0x00ff0000; header.ddspf.dwABitMask = 0xff000000; if(data.format.bgraOrder) std::swap(header.ddspf.dwRBitMask, header.ddspf.dwBBitMask); } else if(data.format.type == ResourceFormatType::BC1) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', 'T', '1'); } else if(data.format.type == ResourceFormatType::BC2) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', 'T', '3'); } else if(data.format.type == ResourceFormatType::BC3) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', 'T', '5'); } else if(data.format.type == ResourceFormatType::BC4 && data.format.compType == CompType::UNorm) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('B', 'C', '4', 'U'); } else if(data.format.type == ResourceFormatType::BC4 && data.format.compType == CompType::SNorm) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('B', 'C', '4', 'S'); } else if(data.format.type == ResourceFormatType::BC5 && data.format.compType == CompType::UNorm) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('A', 'T', 'I', '2'); } else if(data.format.type == ResourceFormatType::BC5 && data.format.compType == CompType::SNorm) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('B', 'C', '5', 'S'); } else { // just write out DX10 header dx10Header = true; } if(dx10Header) { header.ddspf.dwFlags = DDPF_FOURCC; header.ddspf.dwFourCC = MAKE_FOURCC('D', 'X', '1', '0'); } { FileIO::fwrite(&magic, sizeof(magic), 1, f); FileIO::fwrite(&header, sizeof(header), 1, f); if(dx10Header) FileIO::fwrite(&headerDXT10, sizeof(headerDXT10), 1, f); int i = 0; for(int slice = 0; slice < RDCMAX(1, data.slices); slice++) { for(int mip = 0; mip < RDCMAX(1, data.mips); mip++) { int numdepths = RDCMAX(1, data.depth >> mip); for(int d = 0; d < numdepths; d++) { byte *bytedata = data.subdata[i]; int rowlen = RDCMAX(1, data.width >> mip); int numRows = RDCMAX(1, data.height >> mip); int pitch = RDCMAX(1U, rowlen * bytesPerPixel); // pitch/rows are in blocks, not pixels, for block formats. if(blockFormat) { numRows = RDCMAX(1, numRows / 4); int blockSize = (data.format.type == ResourceFormatType::BC1 || data.format.type == ResourceFormatType::BC4) ? 8 : 16; pitch = RDCMAX(blockSize, (((rowlen + 3) / 4)) * blockSize); } for(int row = 0; row < numRows; row++) { FileIO::fwrite(bytedata, 1, pitch, f); bytedata += pitch; } i++; } } } } return true; }
VkResult WrappedVulkan::vkCreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { VkDeviceCreateInfo createInfo = *pCreateInfo; uint32_t qCount = 0; VkResult vkr = VK_SUCCESS; ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, NULL); VkQueueFamilyProperties *props = new VkQueueFamilyProperties[qCount]; ObjDisp(physicalDevice)->GetPhysicalDeviceQueueFamilyProperties(Unwrap(physicalDevice), &qCount, props); // find a queue that supports all capabilities, and if one doesn't exist, add it. bool found = false; uint32_t qFamilyIdx = 0; VkQueueFlags search = (VK_QUEUE_GRAPHICS_BIT); // for queue priorities, if we need it float one = 1.0f; // if we need to change the requested queues, it will point to this VkDeviceQueueCreateInfo *modQueues = NULL; for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++) { uint32_t idx = createInfo.pQueueCreateInfos[i].queueFamilyIndex; RDCASSERT(idx < qCount); // this requested queue is one we can use too if((props[idx].queueFlags & search) == search && createInfo.pQueueCreateInfos[i].queueCount > 0) { qFamilyIdx = idx; found = true; break; } } // if we didn't find it, search for which queue family we should add a request for if(!found) { RDCDEBUG("App didn't request a queue family we can use - adding our own"); for(uint32_t i=0; i < qCount; i++) { if((props[i].queueFlags & search) == search) { qFamilyIdx = i; found = true; break; } } if(!found) { SAFE_DELETE_ARRAY(props); RDCERR("Can't add a queue with required properties for RenderDoc! Unsupported configuration"); return VK_ERROR_INITIALIZATION_FAILED; } // we found the queue family, add it modQueues = new VkDeviceQueueCreateInfo[createInfo.queueCreateInfoCount + 1]; for(uint32_t i=0; i < createInfo.queueCreateInfoCount; i++) modQueues[i] = createInfo.pQueueCreateInfos[i]; modQueues[createInfo.queueCreateInfoCount].queueFamilyIndex = qFamilyIdx; modQueues[createInfo.queueCreateInfoCount].queueCount = 1; modQueues[createInfo.queueCreateInfoCount].pQueuePriorities = &one; createInfo.pQueueCreateInfos = modQueues; createInfo.queueCreateInfoCount++; } SAFE_DELETE_ARRAY(props); m_QueueFamilies.resize(createInfo.queueCreateInfoCount); for(size_t i=0; i < createInfo.queueCreateInfoCount; i++) { uint32_t family = createInfo.pQueueCreateInfos[i].queueFamilyIndex; uint32_t count = createInfo.pQueueCreateInfos[i].queueCount; m_QueueFamilies.resize(RDCMAX(m_QueueFamilies.size(), size_t(family+1))); m_QueueFamilies[family] = new VkQueue[count]; for(uint32_t q=0; q < count; q++) m_QueueFamilies[family][q] = VK_NULL_HANDLE; } VkLayerDeviceCreateInfo *layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext; // step through the chain of pNext until we get to the link info while(layerCreateInfo && (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || layerCreateInfo->function != VK_LAYER_LINK_INFO) ) { layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext; } RDCASSERT(layerCreateInfo); PFN_vkGetDeviceProcAddr gdpa = layerCreateInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr; PFN_vkGetInstanceProcAddr gipa = layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr; // move chain on for next layer layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext; PFN_vkCreateDevice createFunc = (PFN_vkCreateDevice)gipa(VK_NULL_HANDLE, "vkCreateDevice"); // now search again through for the loader data callback (if it exists) layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext; // step through the chain of pNext while(layerCreateInfo && (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || layerCreateInfo->function != VK_LOADER_DATA_CALLBACK) ) { layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext; } // if we found one (we might not - on old loaders), then store the func ptr for // use instead of SetDispatchTableOverMagicNumber if(layerCreateInfo) { RDCASSERT(m_SetDeviceLoaderData == layerCreateInfo->u.pfnSetDeviceLoaderData || m_SetDeviceLoaderData == NULL, m_SetDeviceLoaderData, layerCreateInfo->u.pfnSetDeviceLoaderData); m_SetDeviceLoaderData = layerCreateInfo->u.pfnSetDeviceLoaderData; } VkResult ret = createFunc(Unwrap(physicalDevice), &createInfo, pAllocator, pDevice); // don't serialise out any of the pNext stuff for layer initialisation // (note that we asserted above that there was nothing else in the chain) createInfo.pNext = NULL; if(ret == VK_SUCCESS) { InitDeviceTable(*pDevice, gdpa); ResourceId id = GetResourceManager()->WrapResource(*pDevice, *pDevice); if(m_State >= WRITING) { Chunk *chunk = NULL; { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CONTEXT(CREATE_DEVICE); Serialise_vkCreateDevice(localSerialiser, physicalDevice, &createInfo, NULL, pDevice); chunk = scope.Get(); } VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pDevice); RDCASSERT(record); record->AddChunk(chunk); record->memIdxMap = GetRecord(physicalDevice)->memIdxMap; record->instDevInfo = new InstanceDeviceInfo(); #undef CheckExt #define CheckExt(name) record->instDevInfo->name = GetRecord(m_Instance)->instDevInfo->name; // inherit extension enablement from instance, that way GetDeviceProcAddress can check // for enabled extensions for instance functions CheckInstanceExts(); #undef CheckExt #define CheckExt(name) if(!strcmp(createInfo.ppEnabledExtensionNames[i], STRINGIZE(name))) { record->instDevInfo->name = true; } for(uint32_t i=0; i < createInfo.enabledExtensionCount; i++) { CheckDeviceExts(); } InitDeviceExtensionTables(*pDevice); GetRecord(m_Instance)->AddParent(record); } else { GetResourceManager()->AddLiveResource(id, *pDevice); } VkDevice device = *pDevice; RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE m_PhysicalDevice = physicalDevice; m_Device = device; m_QueueFamilyIdx = qFamilyIdx; if(m_InternalCmds.cmdpool == VK_NULL_HANDLE) { VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, NULL, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, qFamilyIdx }; vkr = ObjDisp(device)->CreateCommandPool(Unwrap(device), &poolInfo, NULL, &m_InternalCmds.cmdpool); RDCASSERTEQUAL(vkr, VK_SUCCESS); GetResourceManager()->WrapResource(Unwrap(device), m_InternalCmds.cmdpool); } ObjDisp(physicalDevice)->GetPhysicalDeviceProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.props); ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), &m_PhysicalDeviceData.memProps); ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures(Unwrap(physicalDevice), &m_PhysicalDeviceData.features); m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); m_PhysicalDeviceData.fakeMemProps = GetRecord(physicalDevice)->memProps; m_DebugManager = new VulkanDebugManager(this, device); } SAFE_DELETE_ARRAY(modQueues); return ret; }
bool WrappedID3D11DeviceContext::Serialise_UpdateSubresource1(ID3D11Resource *pDstResource, UINT DstSubresource, const D3D11_BOX *pDstBox, const void *pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch, UINT CopyFlags) { SERIALISE_ELEMENT(ResourceId, idx, GetIDForResource(pDstResource)); SERIALISE_ELEMENT(uint32_t, flags, CopyFlags); SERIALISE_ELEMENT(uint32_t, DestSubresource, DstSubresource); D3D11ResourceRecord *record = m_pDevice->GetResourceManager()->GetResourceRecord(idx); D3D11ResourceRecord *parent = record; if(record && record->NumSubResources > (int)DestSubresource) record = (D3D11ResourceRecord *)record->SubResources[DestSubresource]; SERIALISE_ELEMENT(uint8_t, isUpdate, record->DataInSerialiser); ID3D11Resource *DestResource = pDstResource; if(m_State < WRITING) { if(m_pDevice->GetResourceManager()->HasLiveResource(idx)) DestResource = (ID3D11Resource *)m_pDevice->GetResourceManager()->GetLiveResource(idx); } if(isUpdate) { SERIALISE_ELEMENT(uint8_t, HasDestBox, pDstBox != NULL); SERIALISE_ELEMENT_OPT(D3D11_BOX, box, *pDstBox, HasDestBox); SERIALISE_ELEMENT(uint32_t, SourceRowPitch, SrcRowPitch); SERIALISE_ELEMENT(uint32_t, SourceDepthPitch, SrcDepthPitch); size_t srcLength = 0; if(m_State >= WRITING) { RDCASSERT(record); if(WrappedID3D11Buffer::IsAlloc(DestResource)) { srcLength = record->Length; if(HasDestBox) srcLength = RDCMIN((uint32_t)srcLength, pDstBox->right - pDstBox->left); } else { WrappedID3D11Texture1D *tex1 = WrappedID3D11Texture1D::IsAlloc(DestResource) ? (WrappedID3D11Texture1D *)DestResource : NULL; WrappedID3D11Texture2D *tex2 = WrappedID3D11Texture2D::IsAlloc(DestResource) ? (WrappedID3D11Texture2D *)DestResource : NULL; WrappedID3D11Texture3D *tex3 = WrappedID3D11Texture3D::IsAlloc(DestResource) ? (WrappedID3D11Texture3D *)DestResource : NULL; UINT mipLevel = GetMipForSubresource(DestResource, DestSubresource); if(tex1) { srcLength = record->Length; if(HasDestBox) srcLength = RDCMIN((uint32_t)srcLength, pDstBox->right - pDstBox->left); } else if(tex2) { D3D11_TEXTURE2D_DESC desc = {0}; tex2->GetDesc(&desc); size_t rows = RDCMAX(1U,desc.Height>>mipLevel); DXGI_FORMAT fmt = desc.Format; if(HasDestBox) rows = (pDstBox->bottom - pDstBox->top); if(IsBlockFormat(fmt)) rows = RDCMAX((size_t)1, rows/4); srcLength = SourceRowPitch*rows; } else if(tex3) { D3D11_TEXTURE3D_DESC desc = {0}; tex3->GetDesc(&desc); size_t slices = RDCMAX(1U,desc.Depth>>mipLevel); srcLength = SourceDepthPitch*slices; if(HasDestBox) srcLength = SourceDepthPitch*(pDstBox->back - pDstBox->front); } else { RDCERR("UpdateSubResource on unexpected resource type"); } }
ResourceId D3D12Replay::RenderOverlay(ResourceId texid, CompType typeHint, DebugOverlay overlay, uint32_t eventId, const vector<uint32_t> &passEvents) { ID3D12Resource *resource = WrappedID3D12Resource::GetList()[texid]; if(resource == NULL) return ResourceId(); D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc(); std::vector<D3D12_RESOURCE_BARRIER> barriers; int resType = 0; GetDebugManager()->PrepareTextureSampling(resource, typeHint, resType, barriers); D3D12_RESOURCE_DESC overlayTexDesc; overlayTexDesc.Alignment = 0; overlayTexDesc.DepthOrArraySize = 1; overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM; overlayTexDesc.Height = resourceDesc.Height; overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; overlayTexDesc.MipLevels = 1; overlayTexDesc.SampleDesc = resourceDesc.SampleDesc; overlayTexDesc.Width = resourceDesc.Width; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; D3D12_RESOURCE_DESC currentOverlayDesc; RDCEraseEl(currentOverlayDesc); if(m_Overlay.Texture) currentOverlayDesc = m_Overlay.Texture->GetDesc(); WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture; // need to recreate backing custom render tex if(overlayTexDesc.Width != currentOverlayDesc.Width || overlayTexDesc.Height != currentOverlayDesc.Height || overlayTexDesc.Format != currentOverlayDesc.Format || overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count || overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality) { SAFE_RELEASE(m_Overlay.Texture); m_Overlay.resourceId = ResourceId(); ID3D12Resource *customRenderTex = NULL; HRESULT hr = m_pDevice->CreateCommittedResource( &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL, __uuidof(ID3D12Resource), (void **)&customRenderTex); if(FAILED(hr)) { RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str()); return ResourceId(); } wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex; customRenderTex->SetName(L"customRenderTex"); m_Overlay.Texture = wrappedCustomRenderTex; m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID(); } D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState; ID3D12Resource *renderDepth = NULL; D3D12Descriptor *dsView = GetWrapped(rs.dsv); D3D12_RESOURCE_DESC depthTexDesc = {}; D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {}; if(dsView) { ID3D12Resource *realDepth = dsView->nonsamp.resource; dsViewDesc = dsView->nonsamp.dsv; depthTexDesc = realDepth->GetDesc(); depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; depthTexDesc.Alignment = 0; HRESULT hr = S_OK; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc, D3D12_RESOURCE_STATE_COPY_DEST, NULL, __uuidof(ID3D12Resource), (void **)&renderDepth); if(FAILED(hr)) { RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str()); return m_Overlay.resourceId; } renderDepth->SetName(L"Overlay renderDepth"); ID3D12GraphicsCommandList *list = m_pDevice->GetNewList(); const vector<D3D12_RESOURCE_STATES> &states = m_pDevice->GetSubresourceStates(GetResID(realDepth)); vector<D3D12_RESOURCE_BARRIER> depthBarriers; depthBarriers.reserve(states.size()); for(size_t i = 0; i < states.size(); i++) { D3D12_RESOURCE_BARRIER b; // skip unneeded barriers if(states[i] & D3D12_RESOURCE_STATE_COPY_SOURCE) continue; b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; b.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; b.Transition.pResource = realDepth; b.Transition.Subresource = (UINT)i; b.Transition.StateBefore = states[i]; b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; depthBarriers.push_back(b); } if(!depthBarriers.empty()) list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]); list->CopyResource(renderDepth, realDepth); for(size_t i = 0; i < depthBarriers.size(); i++) std::swap(depthBarriers[i].Transition.StateBefore, depthBarriers[i].Transition.StateAfter); if(!depthBarriers.empty()) list->ResourceBarrier((UINT)depthBarriers.size(), &depthBarriers[0]); D3D12_RESOURCE_BARRIER b = {}; b.Transition.pResource = renderDepth; b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; // prepare tex resource for copying list->ResourceBarrier(1, &b); list->Close(); } D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {}; rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rtDesc.Format = DXGI_FORMAT_R16G16B16A16_UNORM; rtDesc.Texture2D.MipSlice = 0; rtDesc.Texture2D.PlaneSlice = 0; if(overlayTexDesc.SampleDesc.Count > 1 || overlayTexDesc.SampleDesc.Quality > 0) rtDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV); m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv); ID3D12GraphicsCommandList *list = m_pDevice->GetNewList(); FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f}; list->ClearRenderTargetView(rtv, black, 0, NULL); D3D12_CPU_DESCRIPTOR_HANDLE dsv = {}; if(renderDepth) { dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV); m_pDevice->CreateDepthStencilView( renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv); } D3D12_DEPTH_STENCIL_DESC dsDesc; dsDesc.BackFace.StencilFailOp = dsDesc.BackFace.StencilPassOp = dsDesc.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; dsDesc.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; dsDesc.FrontFace.StencilFailOp = dsDesc.FrontFace.StencilPassOp = dsDesc.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; dsDesc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; dsDesc.DepthEnable = TRUE; dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; dsDesc.StencilEnable = FALSE; dsDesc.StencilReadMask = dsDesc.StencilWriteMask = 0xff; WrappedID3D12PipelineState *pipe = NULL; if(rs.pipe != ResourceId()) pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping) { // just need the basic texture } else if(overlay == DebugOverlay::Drawcall) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); float overlayConsts[4] = {0.8f, 0.1f, 0.8f, 1.0f}; ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts); psoDesc.PS.pShaderBytecode = ps->GetBufferPointer(); psoDesc.PS.BytecodeLength = ps->GetBufferSize(); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f}; list->ClearRenderTargetView(rtv, clearColour, 0, NULL); list->Close(); list = NULL; ID3D12PipelineState *pso = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pso); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(ps); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(pso); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE(); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(pso); SAFE_RELEASE(ps); } } else if(overlay == DebugOverlay::BackfaceCull) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode; float redCol[4] = {1.0f, 0.0f, 0.0f, 1.0f}; ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(redCol); float greenCol[4] = {0.0f, 1.0f, 0.0f, 1.0f}; ID3DBlob *green = m_pDevice->GetShaderCache()->MakeFixedColShader(greenCol); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; psoDesc.PS.pShaderBytecode = red->GetBufferPointer(); psoDesc.PS.BytecodeLength = red->GetBufferSize(); list->Close(); list = NULL; ID3D12PipelineState *redPSO = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&redPSO); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(red); SAFE_RELEASE(green); return m_Overlay.resourceId; } psoDesc.RasterizerState.CullMode = origCull; psoDesc.PS.pShaderBytecode = green->GetBufferPointer(); psoDesc.PS.BytecodeLength = green->GetBufferSize(); ID3D12PipelineState *greenPSO = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&greenPSO); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(red); SAFE_RELEASE(redPSO); SAFE_RELEASE(green); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(redPSO); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = D3D12_CPU_DESCRIPTOR_HANDLE(); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs.pipe = GetResID(greenPSO); m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(red); SAFE_RELEASE(green); SAFE_RELEASE(redPSO); SAFE_RELEASE(greenPSO); } } else if(overlay == DebugOverlay::Wireframe) { if(pipe && pipe->IsGraphics()) { D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = pipe->GetGraphicsDesc(); float overlayConsts[] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 1.0f}; ID3DBlob *ps = m_pDevice->GetShaderCache()->MakeFixedColShader(overlayConsts); psoDesc.PS.pShaderBytecode = ps->GetBufferPointer(); psoDesc.PS.BytecodeLength = ps->GetBufferSize(); psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; psoDesc.BlendState.AlphaToCoverageEnable = FALSE; psoDesc.BlendState.IndependentBlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; RDCEraseEl(psoDesc.RTVFormats); psoDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; psoDesc.NumRenderTargets = 1; psoDesc.SampleMask = ~0U; psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME; psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; psoDesc.RasterizerState.FrontCounterClockwise = FALSE; psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; psoDesc.RasterizerState.DepthClipEnable = FALSE; psoDesc.RasterizerState.MultisampleEnable = FALSE; psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; overlayConsts[3] = 0.0f; list->ClearRenderTargetView(rtv, overlayConsts, 0, NULL); list->Close(); list = NULL; ID3D12PipelineState *pso = NULL; HRESULT hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pso); if(FAILED(hr)) { RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(ps); return m_Overlay.resourceId; } D3D12RenderState prev = rs; rs.pipe = GetResID(pso); rs.rtSingle = true; rs.rts.resize(1); rs.rts[0] = rtv; rs.dsv = dsv; m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw); rs = prev; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); SAFE_RELEASE(pso); SAFE_RELEASE(ps); } } else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw) { vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::ClearBeforeDraw) events.clear(); events.push_back(eventId); if(!events.empty()) { list->Close(); list = NULL; bool rtSingle = rs.rtSingle; std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rts = rs.rts; if(overlay == DebugOverlay::ClearBeforePass) m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); list = m_pDevice->GetNewList(); for(size_t i = 0; i < rts.size(); i++) { D3D12Descriptor *desc = rtSingle ? GetWrapped(rts[0]) : GetWrapped(rts[i]); if(desc) { if(rtSingle) desc += i; Unwrap(list)->ClearRenderTargetView(UnwrapCPU(desc), black, 0, NULL); } } list->Close(); list = NULL; for(size_t i = 0; i < events.size(); i++) { m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw); if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size()) m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw); } } } else if(overlay == DebugOverlay::ViewportScissor) { if(pipe && pipe->IsGraphics() && !rs.views.empty()) { list->OMSetRenderTargets(1, &rtv, TRUE, NULL); D3D12_VIEWPORT viewport = rs.views[0]; list->RSSetViewports(1, &viewport); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); list->SetPipelineState(m_General.FixedColPipe); list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig); DebugPixelCBufferData pixelData = {0}; // border colour (dark, 2px, opaque) pixelData.WireframeColour = Vec3f(0.1f, 0.1f, 0.1f); // inner colour (light, transparent) pixelData.Channels = Vec4f(0.2f, 0.2f, 0.9f, 0.7f); pixelData.OutputDisplayFormat = 0; pixelData.RangeMinimum = viewport.TopLeftX; pixelData.InverseRangeSize = viewport.TopLeftY; pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f); D3D12_GPU_VIRTUAL_ADDRESS viewCB = GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData)); list->SetGraphicsRootConstantBufferView(0, viewCB); list->SetGraphicsRootConstantBufferView(1, viewCB); list->SetGraphicsRootConstantBufferView(2, viewCB); Vec4f dummy; list->SetGraphicsRoot32BitConstants(3, 4, &dummy.x, 0); float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f}; list->OMSetBlendFactor(factor); list->DrawInstanced(3, 1, 0, 0); viewport.TopLeftX = (float)rs.scissors[0].left; viewport.TopLeftY = (float)rs.scissors[0].top; viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left); viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top); list->RSSetViewports(1, &viewport); pixelData.OutputDisplayFormat = 1; pixelData.RangeMinimum = viewport.TopLeftX; pixelData.InverseRangeSize = viewport.TopLeftY; pixelData.TextureResolutionPS = Vec3f(viewport.Width, viewport.Height, 0.0f); D3D12_GPU_VIRTUAL_ADDRESS scissorCB = GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData)); list->SetGraphicsRootConstantBufferView(1, scissorCB); list->DrawInstanced(3, 1, 0, 0); } } else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass) { if(pipe && pipe->IsGraphics()) { SCOPED_TIMER("Triangle size"); vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::TriangleSizeDraw) events.clear(); while(!events.empty()) { const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[0]); // remove any non-drawcalls, like the pass boundary. if(!(draw->flags & DrawFlags::Drawcall)) events.erase(events.begin()); else break; } events.push_back(eventId); D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = pipe->GetGraphicsDesc(); pipeDesc.pRootSignature = m_General.ConstOnlyRootSig; pipeDesc.SampleMask = 0xFFFFFFFF; pipeDesc.SampleDesc.Count = 1; pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; pipeDesc.NumRenderTargets = 1; RDCEraseEl(pipeDesc.RTVFormats); pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_UNORM; pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE; pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; D3D12_INPUT_ELEMENT_DESC ia[2] = {}; ia[0].SemanticName = "pos"; ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; ia[1].SemanticName = "sec"; ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; ia[1].InputSlot = 1; ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; pipeDesc.InputLayout.NumElements = 2; pipeDesc.InputLayout.pInputElementDescs = ia; pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize(); pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer(); RDCEraseEl(pipeDesc.HS); RDCEraseEl(pipeDesc.DS); pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize(); pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer(); pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize(); pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer(); pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER) pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL; if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS) pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; // enough for all primitive topology types ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {}; DebugVertexCBuffer vertexData = {}; vertexData.LineStrip = 0; vertexData.ModelViewProj = Matrix4f::Identity(); vertexData.SpriteSize = Vec2f(); Vec4f viewport(rs.views[0].Width, rs.views[0].Height); if(rs.dsv.ptr) { D3D12_CPU_DESCRIPTOR_HANDLE realDSV = Unwrap(rs.dsv); list->OMSetRenderTargets(1, &rtv, TRUE, &realDSV); } list->RSSetViewports(1, &rs.views[0]); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->SetGraphicsRootSignature(m_General.ConstOnlyRootSig); list->SetGraphicsRootConstantBufferView( 0, GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData))); list->SetGraphicsRootConstantBufferView( 1, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp))); list->SetGraphicsRootConstantBufferView( 2, GetDebugManager()->UploadConstants(&viewport, sizeof(viewport))); list->SetGraphicsRoot32BitConstants(3, 4, &viewport.x, 0); for(size_t i = 0; i < events.size(); i++) { const DrawcallDescription *draw = m_pDevice->GetDrawcall(events[i]); for(uint32_t inst = 0; draw && inst < RDCMAX(1U, draw->numInstances); inst++) { MeshFormat fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::GSOut); if(fmt.vertexResourceId == ResourceId()) fmt = GetPostVSBuffers(events[i], inst, MeshDataStage::VSOut); if(fmt.vertexResourceId != ResourceId()) { D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology); if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST || topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST) pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP || topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST || topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ || topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ) pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; else pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; list->IASetPrimitiveTopology(topo); if(pipes[pipeDesc.PrimitiveTopologyType] == NULL) { HRESULT hr = m_pDevice->CreateGraphicsPipelineState( &pipeDesc, __uuidof(ID3D12PipelineState), (void **)&pipes[pipeDesc.PrimitiveTopologyType]); RDCASSERTEQUAL(hr, S_OK); } ID3D12Resource *vb = m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId); D3D12_VERTEX_BUFFER_VIEW vbView = {}; vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset; vbView.StrideInBytes = fmt.vertexByteStride; vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset); // second bind is just a dummy, so we don't have to make a shader // that doesn't accept the secondary stream list->IASetVertexBuffers(0, 1, &vbView); list->IASetVertexBuffers(1, 1, &vbView); list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]); if(fmt.indexByteStride && fmt.indexResourceId != ResourceId()) { ID3D12Resource *ib = m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId); D3D12_INDEX_BUFFER_VIEW view; view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset; view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset); view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; list->IASetIndexBuffer(&view); list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0); } else { list->DrawInstanced(fmt.numIndices, 1, 0, 0); } } } } list->Close(); list = NULL; m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); for(size_t i = 0; i < ARRAY_COUNT(pipes); i++) SAFE_RELEASE(pipes[i]); } // restore back to normal m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw); } else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw) { SCOPED_TIMER("Quad Overdraw"); vector<uint32_t> events = passEvents; if(overlay == DebugOverlay::QuadOverdrawDraw) events.clear(); events.push_back(eventId); if(!events.empty()) { if(overlay == DebugOverlay::QuadOverdrawPass) { list->Close(); m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); list = m_pDevice->GetNewList(); } uint32_t width = uint32_t(resourceDesc.Width >> 1); uint32_t height = resourceDesc.Height >> 1; width = RDCMAX(1U, width); height = RDCMAX(1U, height); D3D12_RESOURCE_DESC uavTexDesc = {}; uavTexDesc.Alignment = 0; uavTexDesc.DepthOrArraySize = 4; uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; uavTexDesc.Format = DXGI_FORMAT_R32_UINT; uavTexDesc.Height = height; uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; uavTexDesc.MipLevels = 1; uavTexDesc.SampleDesc.Count = 1; uavTexDesc.SampleDesc.Quality = 0; uavTexDesc.Width = width; ID3D12Resource *overdrawTex = NULL; HRESULT hr = m_pDevice->CreateCommittedResource( &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex); if(FAILED(hr)) { RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str()); list->Close(); list = NULL; return m_Overlay.resourceId; } m_pDevice->CreateShaderResourceView(overdrawTex, NULL, GetDebugManager()->GetCPUHandle(OVERDRAW_SRV)); m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL, GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)); m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL, GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV)); UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV), GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV), overdrawTex, zeroes, 0, NULL); list->Close(); list = NULL; #if ENABLED(SINGLE_FLUSH_VALIDATE) m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); #endif m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); D3D12_SHADER_BYTECODE quadWrite; quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize(); quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer(); // declare callback struct here D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, events, ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV))); m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full); // resolve pass { list = m_pDevice->GetNewList(); D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {}; // make sure UAV work is done then prepare for reading in PS overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; overdrawBarriers[0].UAV.pResource = overdrawTex; overdrawBarriers[1].Transition.pResource = overdrawTex; overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // prepare tex resource for copying list->ResourceBarrier(2, overdrawBarriers); list->OMSetRenderTargets(1, &rtv, TRUE, NULL); list->RSSetViewports(1, &rs.views[0]); D3D12_RECT scissor = {0, 0, 16384, 16384}; list->RSSetScissorRects(1, &scissor); list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); list->SetPipelineState(m_Overlay.QuadResolvePipe); list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig); GetDebugManager()->SetDescriptorHeaps(list, true, false); list->SetGraphicsRootConstantBufferView( 0, GetDebugManager()->UploadConstants(&overdrawRamp[0].x, sizeof(overdrawRamp))); list->SetGraphicsRootDescriptorTable(1, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV)); list->DrawInstanced(3, 1, 0, 0); list->Close(); list = NULL; } m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it) { SAFE_RELEASE(it->second.pipe); SAFE_RELEASE(it->second.sig); } SAFE_RELEASE(overdrawTex); } if(overlay == DebugOverlay::QuadOverdrawPass) m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw); }
GLXContext glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, GLXContext shareList, Bool direct, const int *attribList) { const int *attribs = attribList; vector<int> attribVec; // modify attribs to our liking { bool flagsNext = false; bool flagsFound = false; const int *a = attribList; while(*a) { int val = *a; if(flagsNext) { flagsNext = false; if(RenderDoc::Inst().GetCaptureOptions().DebugDeviceMode) val |= GLX_CONTEXT_DEBUG_BIT_ARB; else val &= ~GLX_CONTEXT_DEBUG_BIT_ARB; // remove NO_ERROR bit val &= ~GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR; } if(val == GLX_CONTEXT_FLAGS_ARB) { flagsNext = true; flagsFound = true; } attribVec.push_back(val); a++; } if(!flagsFound && RenderDoc::Inst().GetCaptureOptions().DebugDeviceMode) { attribVec.push_back(GLX_CONTEXT_FLAGS_ARB); attribVec.push_back(GLX_CONTEXT_DEBUG_BIT_ARB); } attribVec.push_back(0); attribs = &attribVec[0]; } RDCDEBUG("glXCreateContextAttribsARB:"); bool core = false; int *a = (int *)attribs; while(*a) { RDCDEBUG("%x: %d", a[0], a[1]); a += 2; if(a[0] == GLX_CONTEXT_PROFILE_MASK_ARB) core = (a[1] & GLX_CONTEXT_CORE_PROFILE_BIT_ARB); } GLXContext ret = OpenGLHook::glhooks.glXCreateContextAttribsARB_real(dpy, config, shareList, direct, attribs); XVisualInfo *vis = OpenGLHook::glhooks.glXGetVisualFromFBConfig_real(dpy, config); GLInitParams init; init.width = 0; init.height = 0; int value = 0; Keyboard::CloneDisplay(dpy); OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_BUFFER_SIZE, &value); init.colorBits = value; OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_DEPTH_SIZE, &value); init.depthBits = value; OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_STENCIL_SIZE, &value); init.stencilBits = value; value = 1; // default to srgb OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_FRAMEBUFFER_SRGB_CAPABLE_ARB, &value); init.isSRGB = value; value = 1; OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_SAMPLES_ARB, &value); init.isSRGB = RDCMAX(1, value); XFree(vis); GLWindowingData data; data.dpy = dpy; data.wnd = (GLXDrawable)NULL; data.ctx = ret; OpenGLHook::glhooks.GetDriver()->CreateContext(data, shareList, init, core, true); return ret; }
GLXContext glXCreateContext(Display *dpy, XVisualInfo *vis, GLXContext shareList, Bool direct) { GLXContext ret = OpenGLHook::glhooks.glXCreateContext_real(dpy, vis, shareList, direct); GLInitParams init; init.width = 0; init.height = 0; int value = 0; Keyboard::CloneDisplay(dpy); OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_BUFFER_SIZE, &value); init.colorBits = value; OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_DEPTH_SIZE, &value); init.depthBits = value; OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_STENCIL_SIZE, &value); init.stencilBits = value; value = 1; // default to srgb OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_FRAMEBUFFER_SRGB_CAPABLE_ARB, &value); init.isSRGB = value; value = 1; OpenGLHook::glhooks.glXGetConfig_real(dpy, vis, GLX_SAMPLES_ARB, &value); init.isSRGB = RDCMAX(1, value); GLWindowingData data; data.dpy = dpy; data.wnd = (GLXDrawable)NULL; data.ctx = ret; OpenGLHook::glhooks.GetDriver()->CreateContext(data, shareList, init, false, false); return ret; }
HANDLE WrappedOpenGL::wglDXRegisterObjectNV(HANDLE hDevice, void *dxObject, GLuint name, GLenum type, GLenum access) { RDCASSERT(IsCaptureMode(m_State)); ID3D11Resource *real = UnwrapDXResource(dxObject); if(real == NULL) { SetLastError(ERROR_OPEN_FAILED); return 0; } WrappedHANDLE *wrapped = new WrappedHANDLE(); if(type == eGL_RENDERBUFFER) wrapped->res = RenderbufferRes(GetCtx(), name); else if(type == eGL_NONE) wrapped->res = BufferRes(GetCtx(), name); else wrapped->res = TextureRes(GetCtx(), name); GLResourceRecord *record = GetResourceManager()->GetResourceRecord(wrapped->res); if(!record) { RDCERR("Unrecognised object with type %x and name %u", type, name); delete wrapped; return NULL; } SERIALISE_TIME_CALL(wrapped->real = m_Real.wglDXRegisterObjectNV(hDevice, real, name, type, access)); { RDCASSERT(record); USE_SCRATCH_SERIALISER(); SCOPED_SERIALISE_CHUNK(gl_CurChunk); Serialise_wglDXRegisterObjectNV(ser, wrapped->res, type, dxObject); record->AddChunk(scope.Get()); } if(type != eGL_NONE) { ResourceFormat fmt; uint32_t width = 0, height = 0, depth = 0, mips = 0, layers = 0, samples = 0; GetDXTextureProperties(dxObject, fmt, width, height, depth, mips, layers, samples); // defined as arrays mostly for Coverity code analysis to stay calm about passing // them to the *TexParameter* functions GLint maxlevel[4] = {GLint(mips - 1), 0, 0, 0}; m_Real.glTextureParameteriEXT(wrapped->res.name, type, eGL_TEXTURE_MAX_LEVEL, GLint(mips - 1)); ResourceId texId = record->GetResourceID(); m_Textures[texId].resource = wrapped->res; m_Textures[texId].curType = type; m_Textures[texId].width = width; m_Textures[texId].height = height; m_Textures[texId].depth = RDCMAX(depth, samples); m_Textures[texId].samples = samples; m_Textures[texId].dimension = 2; if(type == eGL_TEXTURE_1D || type == eGL_TEXTURE_1D_ARRAY) m_Textures[texId].dimension = 1; else if(type == eGL_TEXTURE_3D) m_Textures[texId].dimension = 3; m_Textures[texId].internalFormat = MakeGLFormat(fmt); } return wrapped; }
bool WrappedOpenGL::Serialise_wglDXRegisterObjectNV(SerialiserType &ser, GLResource Resource, GLenum type, void *dxObject) { SERIALISE_ELEMENT(Resource); GLenum internalFormat = eGL_NONE; uint32_t width = 0, height = 0, depth = 0, mips = 0, layers = 0, samples = 0; if(ser.IsWriting()) { ResourceFormat format; #if ENABLED(RDOC_WIN32) && ENABLED(RENDERDOC_DX_GL_INTEROP) GetDXTextureProperties(dxObject, format, width, height, depth, mips, layers, samples); if(type != eGL_NONE) internalFormat = MakeGLFormat(format); #else RDCERR("Should never happen - cannot serialise wglDXRegisterObjectNV, interop is disabled"); #endif } SERIALISE_ELEMENT(type); SERIALISE_ELEMENT(internalFormat); SERIALISE_ELEMENT(width); SERIALISE_ELEMENT(height); SERIALISE_ELEMENT(depth); SERIALISE_ELEMENT(mips); SERIALISE_ELEMENT(layers); SERIALISE_ELEMENT(samples); SERIALISE_CHECK_READ_ERRORS(); if(IsReplayingAndReading()) { GLuint name = Resource.name; switch(type) { case eGL_NONE: case eGL_TEXTURE_BUFFER: { m_Real.glNamedBufferDataEXT(name, width, NULL, eGL_STATIC_DRAW); break; } case eGL_TEXTURE_1D: m_Real.glTextureStorage1DEXT(name, type, mips, internalFormat, width); break; case eGL_TEXTURE_1D_ARRAY: m_Real.glTextureStorage2DEXT(name, type, mips, internalFormat, width, layers); break; // treat renderbuffers and texture rects as tex2D just to make things easier case eGL_RENDERBUFFER: case eGL_TEXTURE_RECTANGLE: case eGL_TEXTURE_2D: case eGL_TEXTURE_CUBE_MAP: m_Real.glTextureStorage2DEXT(name, type, mips, internalFormat, width, height); break; case eGL_TEXTURE_2D_ARRAY: case eGL_TEXTURE_CUBE_MAP_ARRAY: m_Real.glTextureStorage3DEXT(name, type, mips, internalFormat, width, height, layers); break; case eGL_TEXTURE_2D_MULTISAMPLE: m_Real.glTextureStorage2DMultisampleEXT(name, type, samples, internalFormat, width, height, GL_TRUE); break; case eGL_TEXTURE_2D_MULTISAMPLE_ARRAY: m_Real.glTextureStorage3DMultisampleEXT(name, type, samples, internalFormat, width, height, layers, GL_TRUE); break; case eGL_TEXTURE_3D: m_Real.glTextureStorage3DEXT(name, type, mips, internalFormat, width, height, depth); break; default: RDCERR("Unexpected type of interop texture: %s", ToStr(type).c_str()); break; } if(type != eGL_NONE) { ResourceId liveId = GetResourceManager()->GetID(Resource); m_Textures[liveId].curType = type; m_Textures[liveId].width = width; m_Textures[liveId].height = height; m_Textures[liveId].depth = RDCMAX(depth, samples); m_Textures[liveId].samples = samples; m_Textures[liveId].dimension = 2; if(type == eGL_TEXTURE_1D || type == eGL_TEXTURE_1D_ARRAY) m_Textures[liveId].dimension = 1; else if(type == eGL_TEXTURE_3D) m_Textures[liveId].dimension = 3; m_Textures[liveId].internalFormat = internalFormat; } AddResourceInitChunk(Resource); } return true; }
dds_data load_dds_from_file(FILE *f) { dds_data ret = {}; dds_data error = {}; FileIO::fseek64(f, 0, SEEK_SET); uint32_t magic = 0; FileIO::fread(&magic, sizeof(magic), 1, f); DDS_HEADER header = {}; FileIO::fread(&header, sizeof(header), 1, f); bool dx10Header = false; DDS_HEADER_DXT10 headerDXT10 = {}; if(header.ddspf.dwFlags == DDPF_FOURCC && header.ddspf.dwFourCC == MAKE_FOURCC('D', 'X', '1', '0')) { FileIO::fread(&headerDXT10, sizeof(headerDXT10), 1, f); dx10Header = true; } ret.width = RDCMAX(1U, header.dwWidth); ret.height = RDCMAX(1U, header.dwHeight); ret.depth = RDCMAX(1U, header.dwDepth); ret.slices = dx10Header ? RDCMAX(1U, headerDXT10.arraySize) : 1; ret.mips = RDCMAX(1U, header.dwMipMapCount); uint32_t cubeFlags = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALLFACES; if((header.dwCaps2 & cubeFlags) == cubeFlags && header.dwCaps & DDSCAPS_COMPLEX) ret.cubemap = true; if(dx10Header && headerDXT10.miscFlag & DDS_RESOURCE_MISC_TEXTURECUBE) ret.cubemap = true; if(ret.cubemap) ret.slices *= 6; if(dx10Header) { ret.format = DXGIFormat2ResourceFormat(headerDXT10.dxgiFormat); if(ret.format.type == ResourceFormatType::Undefined) { RDCWARN("Unsupported DXGI_FORMAT: %u", (uint32_t)headerDXT10.dxgiFormat); return error; } } else if(header.ddspf.dwFlags & DDPF_FOURCC) { switch(header.ddspf.dwFourCC) { case MAKE_FOURCC('D', 'X', 'T', '1'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC1_UNORM); break; case MAKE_FOURCC('D', 'X', 'T', '3'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC2_UNORM); break; case MAKE_FOURCC('D', 'X', 'T', '5'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC3_UNORM); break; case MAKE_FOURCC('A', 'T', 'I', '1'): case MAKE_FOURCC('B', 'C', '4', 'U'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC4_UNORM); break; case MAKE_FOURCC('B', 'C', '4', 'S'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC4_SNORM); break; case MAKE_FOURCC('A', 'T', 'I', '2'): case MAKE_FOURCC('B', 'C', '5', 'U'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC5_UNORM); break; case MAKE_FOURCC('B', 'C', '5', 'S'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_BC5_SNORM); break; case MAKE_FOURCC('R', 'G', 'B', 'G'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R8G8_B8G8_UNORM); break; case MAKE_FOURCC('G', 'R', 'G', 'B'): ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_G8R8_G8B8_UNORM); break; case 36: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16B16A16_UNORM); break; case 110: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16B16A16_SNORM); break; case 111: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16_FLOAT); break; case 112: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16_FLOAT); break; case 113: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R16G16B16A16_FLOAT); break; case 114: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R32_FLOAT); break; case 115: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R32G32_FLOAT); break; case 116: ret.format = DXGIFormat2ResourceFormat(DXGI_FORMAT_R32G32B32A32_FLOAT); break; default: RDCWARN("Unsupported FourCC: %08x", header.ddspf.dwFourCC); return error; } } else { if(header.ddspf.dwRGBBitCount != 32 && header.ddspf.dwRGBBitCount != 24 && header.ddspf.dwRGBBitCount != 16 && header.ddspf.dwRGBBitCount != 8) { RDCWARN("Unsupported RGB bit count: %u", header.ddspf.dwRGBBitCount); return error; } ret.format.compByteWidth = 1; ret.format.compCount = uint8_t(header.ddspf.dwRGBBitCount / 8); ret.format.compType = CompType::UNorm; ret.format.type = ResourceFormatType::Regular; if(header.ddspf.dwBBitMask < header.ddspf.dwRBitMask) ret.format.bgraOrder = true; } uint32_t bytesPerPixel = 1; switch(ret.format.type) { case ResourceFormatType::S8: bytesPerPixel = 1; break; case ResourceFormatType::R10G10B10A2: case ResourceFormatType::R9G9B9E5: case ResourceFormatType::R11G11B10: case ResourceFormatType::D24S8: bytesPerPixel = 4; break; case ResourceFormatType::R5G6B5: case ResourceFormatType::R5G5B5A1: case ResourceFormatType::R4G4B4A4: bytesPerPixel = 2; break; case ResourceFormatType::D32S8: bytesPerPixel = 8; break; case ResourceFormatType::D16S8: case ResourceFormatType::YUV: case ResourceFormatType::R4G4: RDCERR("Unsupported file format %u", ret.format.type); return error; default: bytesPerPixel = ret.format.compCount * ret.format.compByteWidth; } bool blockFormat = false; if(ret.format.Special()) { switch(ret.format.type) { case ResourceFormatType::BC1: case ResourceFormatType::BC2: case ResourceFormatType::BC3: case ResourceFormatType::BC4: case ResourceFormatType::BC5: case ResourceFormatType::BC6: case ResourceFormatType::BC7: blockFormat = true; break; case ResourceFormatType::ETC2: case ResourceFormatType::EAC: case ResourceFormatType::ASTC: case ResourceFormatType::YUV: RDCERR("Unsupported file format, %u", ret.format.type); return error; default: break; } } ret.subsizes = new uint32_t[ret.slices * ret.mips]; ret.subdata = new byte *[ret.slices * ret.mips]; int i = 0; for(int slice = 0; slice < ret.slices; slice++) { for(int mip = 0; mip < ret.mips; mip++) { int rowlen = RDCMAX(1, ret.width >> mip); int numRows = RDCMAX(1, ret.height >> mip); int numdepths = RDCMAX(1, ret.depth >> mip); int pitch = RDCMAX(1U, rowlen * bytesPerPixel); // pitch/rows are in blocks, not pixels, for block formats. if(blockFormat) { numRows = RDCMAX(1, numRows / 4); int blockSize = (ret.format.type == ResourceFormatType::BC1 || ret.format.type == ResourceFormatType::BC4) ? 8 : 16; pitch = RDCMAX(blockSize, (((rowlen + 3) / 4)) * blockSize); } ret.subsizes[i] = numdepths * numRows * pitch; byte *bytedata = ret.subdata[i] = new byte[ret.subsizes[i]]; for(int d = 0; d < numdepths; d++) { for(int row = 0; row < numRows; row++) { FileIO::fread(bytedata, 1, pitch, f); bytedata += pitch; } } i++; } } return ret; }
void D3D12Replay::InitPostVSBuffers(uint32_t eventId) { // go through any aliasing if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end()) eventId = m_PostVSAlias[eventId]; if(m_PostVSData.find(eventId) != m_PostVSData.end()) return; D3D12CommandData *cmd = m_pDevice->GetQueue()->GetCommandData(); const D3D12RenderState &rs = cmd->m_RenderState; if(rs.pipe == ResourceId()) return; WrappedID3D12PipelineState *origPSO = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); if(!origPSO->IsGraphics()) return; D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = origPSO->GetGraphicsDesc(); if(psoDesc.VS.BytecodeLength == 0) return; WrappedID3D12Shader *vs = origPSO->VS(); D3D_PRIMITIVE_TOPOLOGY topo = rs.topo; const DrawcallDescription *drawcall = m_pDevice->GetDrawcall(eventId); if(drawcall->numIndices == 0) return; DXBC::DXBCFile *dxbcVS = vs->GetDXBC(); RDCASSERT(dxbcVS); DXBC::DXBCFile *dxbcGS = NULL; WrappedID3D12Shader *gs = origPSO->GS(); if(gs) { dxbcGS = gs->GetDXBC(); RDCASSERT(dxbcGS); } DXBC::DXBCFile *dxbcDS = NULL; WrappedID3D12Shader *ds = origPSO->DS(); if(ds) { dxbcDS = ds->GetDXBC(); RDCASSERT(dxbcDS); } ID3D12RootSignature *soSig = NULL; HRESULT hr = S_OK; { WrappedID3D12RootSignature *sig = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(rs.graphics.rootsig); D3D12RootSignature rootsig = sig->sig; // create a root signature that allows stream out, if necessary if((rootsig.Flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT) == 0) { rootsig.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT; ID3DBlob *blob = m_pDevice->GetShaderCache()->MakeRootSig(rootsig); hr = m_pDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), __uuidof(ID3D12RootSignature), (void **)&soSig); if(FAILED(hr)) { RDCERR("Couldn't enable stream-out in root signature: HRESULT: %s", ToStr(hr).c_str()); return; } SAFE_RELEASE(blob); } } vector<D3D12_SO_DECLARATION_ENTRY> sodecls; UINT stride = 0; int posidx = -1; int numPosComponents = 0; if(!dxbcVS->m_OutputSig.empty()) { for(const SigParameter &sign : dxbcVS->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } if(stride == 0) { RDCERR("Didn't get valid stride! Setting to 4 bytes"); stride = 4; } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // set up stream output entries and buffers psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; psoDesc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM; // disable all other shader stages psoDesc.HS.BytecodeLength = 0; psoDesc.HS.pShaderBytecode = NULL; psoDesc.DS.BytecodeLength = 0; psoDesc.DS.pShaderBytecode = NULL; psoDesc.GS.BytecodeLength = 0; psoDesc.GS.pShaderBytecode = NULL; psoDesc.PS.BytecodeLength = 0; psoDesc.PS.pShaderBytecode = NULL; // disable any rasterization/use of output targets psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; if(soSig) psoDesc.pRootSignature = soSig; // render as points psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; // disable outputs psoDesc.NumRenderTargets = 0; RDCEraseEl(psoDesc.RTVFormats); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } ID3D12Resource *idxBuf = NULL; bool recreate = false; uint64_t outputSize = uint64_t(drawcall->numIndices) * drawcall->numInstances * stride; if(m_SOBufferSize < outputSize) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < outputSize) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output data", oldSize, m_SOBufferSize); recreate = true; } ID3D12GraphicsCommandList *list = NULL; if(!(drawcall->flags & DrawFlags::UseIBuffer)) { if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } else // drawcall is indexed { bytebuf idxdata; GetBufferData(rs.ibuffer.buf, rs.ibuffer.offs + drawcall->indexOffset * rs.ibuffer.bytewidth, RDCMIN(drawcall->numIndices * rs.ibuffer.bytewidth, rs.ibuffer.size), idxdata); vector<uint32_t> indices; uint16_t *idx16 = (uint16_t *)&idxdata[0]; uint32_t *idx32 = (uint32_t *)&idxdata[0]; // only read as many indices as were available in the buffer uint32_t numIndices = RDCMIN(uint32_t(idxdata.size() / rs.ibuffer.bytewidth), drawcall->numIndices); uint32_t idxclamp = 0; if(drawcall->baseVertex < 0) idxclamp = uint32_t(-drawcall->baseVertex); // grab all unique vertex indices referenced for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; auto it = std::lower_bound(indices.begin(), indices.end(), i32); if(it != indices.end() && *it == i32) continue; indices.insert(it, i32); } // if we read out of bounds, we'll also have a 0 index being referenced // (as 0 is read). Don't insert 0 if we already have 0 though if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0)) indices.insert(indices.begin(), 0); // An index buffer could be something like: 500, 501, 502, 501, 503, 502 // in which case we can't use the existing index buffer without filling 499 slots of vertex // data with padding. Instead we rebase the indices based on the smallest vertex so it becomes // 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer. // // Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512 // which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid. // We just stream-out a tightly packed list of unique indices, and then remap the index buffer // so that what did point to 500 points to 0 (accounting for rebasing), and what did point // to 510 now points to 3 (accounting for the unique sort). // we use a map here since the indices may be sparse. Especially considering if an index // is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries. map<uint32_t, size_t> indexRemap; for(size_t i = 0; i < indices.size(); i++) { // by definition, this index will only appear once in indices[] indexRemap[indices[i]] = i; } if(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for indices", oldSize, m_SOBufferSize); recreate = true; } if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } GetDebugManager()->FillBuffer(m_SOPatchedIndexBuffer, 0, &indices[0], indices.size() * sizeof(uint32_t)); D3D12_INDEX_BUFFER_VIEW patchedIB; patchedIB.BufferLocation = m_SOPatchedIndexBuffer->GetGPUVirtualAddress(); patchedIB.Format = DXGI_FORMAT_R32_UINT; patchedIB.SizeInBytes = UINT(indices.size() * sizeof(uint32_t)); list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); list->IASetIndexBuffer(&patchedIB); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawIndexedInstanced((UINT)indices.size(), drawcall->numInstances, 0, 0, drawcall->instanceOffset); uint32_t stripCutValue = 0; if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF) stripCutValue = 0xffff; else if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF) stripCutValue = 0xffffffff; // rebase existing index buffer to point to the right elements in our stream-out'd // vertex buffer for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // preserve primitive restart indices if(stripCutValue && i32 == stripCutValue) continue; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; if(rs.ibuffer.bytewidth == 2) idx16[i] = uint16_t(indexRemap[i32]); else idx32[i] = uint32_t(indexRemap[i32]); } idxBuf = NULL; if(!idxdata.empty()) { D3D12_RESOURCE_DESC idxBufDesc; idxBufDesc.Alignment = 0; idxBufDesc.DepthOrArraySize = 1; idxBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; idxBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; idxBufDesc.Format = DXGI_FORMAT_UNKNOWN; idxBufDesc.Height = 1; idxBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; idxBufDesc.MipLevels = 1; idxBufDesc.SampleDesc.Count = 1; idxBufDesc.SampleDesc.Quality = 0; idxBufDesc.Width = idxdata.size(); D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &idxBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&idxBuf); RDCASSERTEQUAL(hr, S_OK); SetObjName(idxBuf, StringFormat::Fmt("PostVS idxBuf for %u", eventId)); GetDebugManager()->FillBuffer(idxBuf, 0, &idxdata[0], idxdata.size()); } } D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t numBytesWritten = *(uint64_t *)byteData; if(numBytesWritten == 0) { m_PostVSData[eventId] = D3D12PostVSData(); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } // skip past the counter byteData += 64; uint64_t numPrims = numBytesWritten / stride; ID3D12Resource *vsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&vsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(vsoutBuffer) { SetObjName(vsoutBuffer, StringFormat::Fmt("PostVS vsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(vsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(uint64_t i = 1; numPosComponents == 4 && i < numPrims; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = vsoutBuffer; m_PostVSData[eventId].vsout.vertStride = stride; m_PostVSData[eventId].vsout.nearPlane = nearp; m_PostVSData[eventId].vsout.farPlane = farp; m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer); m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices; m_PostVSData[eventId].vsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].vsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].vsout.idxBuf = NULL; if(m_PostVSData[eventId].vsout.useIndices && idxBuf) { m_PostVSData[eventId].vsout.idxBuf = idxBuf; m_PostVSData[eventId].vsout.idxFmt = rs.ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; } m_PostVSData[eventId].vsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].vsout.topo = topo; } else { // empty vertex output signature m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = NULL; m_PostVSData[eventId].vsout.instStride = 0; m_PostVSData[eventId].vsout.vertStride = 0; m_PostVSData[eventId].vsout.nearPlane = 0.0f; m_PostVSData[eventId].vsout.farPlane = 0.0f; m_PostVSData[eventId].vsout.useIndices = false; m_PostVSData[eventId].vsout.hasPosOut = false; m_PostVSData[eventId].vsout.idxBuf = NULL; m_PostVSData[eventId].vsout.topo = topo; } if(dxbcGS || dxbcDS) { stride = 0; posidx = -1; numPosComponents = 0; DXBC::DXBCFile *lastShader = dxbcGS; if(dxbcDS) lastShader = dxbcDS; sodecls.clear(); for(const SigParameter &sign : lastShader->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; // for now, skip streams that aren't stream 0 if(sign.stream != 0) continue; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // enable the other shader stages again if(origPSO->DS()) psoDesc.DS = origPSO->DS()->GetDesc(); if(origPSO->HS()) psoDesc.HS = origPSO->HS()->GetDesc(); if(origPSO->GS()) psoDesc.GS = origPSO->GS()->GetDesc(); // configure new SO declarations psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; // we're using the same topology this time psoDesc.PrimitiveTopologyType = origPSO->graphics->PrimitiveTopologyType; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; ID3D12GraphicsCommandList *list = NULL; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // draws with multiple instances must be replayed one at a time so we can record the number of // primitives from each drawcall, as due to expansion this can vary per-instance. if(drawcall->numInstances > 1) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // do a dummy draw to make sure we have enough space in the output buffer list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); D3D12_QUERY_DATA_SO_STATISTICS result = *data; range.End = 0; m_SOStagingBuffer->Unmap(0, &range); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; GetDebugManager()->ResetDebugAlloc(); // now do the actual stream out list = GetDebugManager()->ResetDebugList(); // first need to reset the counter byte values which may have either been written to above, or // are newly created { D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); std::swap(sobarr.Transition.StateBefore, sobarr.Transition.StateAfter); list->ResourceBarrier(1, &sobarr); } rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } // reserve space for enough 'buffer filled size' locations view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + AlignUp(uint64_t(drawcall->numInstances * sizeof(UINT64)), 64ULL); // do incremental draws to get the output size. We have to do this O(N^2) style because // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N instances // and count the total number of verts each time, then we can see from the difference how much // each instance wrote. for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++) { if(drawcall->flags & DrawFlags::UseIBuffer) { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawIndexedInstanced(drawcall->numIndices, inst, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawInstanced(drawcall->numIndices, inst, drawcall->vertexOffset, drawcall->instanceOffset); } } list->Close(); l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); // the last draw will have written the actual data we want into the buffer } else { // this only loops if we find from a query that we need to resize up while(true) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); continue; } range.End = 0; m_SOStagingBuffer->Unmap(0, &range); GetDebugManager()->ResetDebugAlloc(); break; } } list = GetDebugManager()->ResetDebugList(); D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t *counters = (uint64_t *)byteData; uint64_t numBytesWritten = 0; std::vector<D3D12PostVSData::InstData> instData; if(drawcall->numInstances > 1) { uint64_t prevByteCount = 0; for(uint32_t inst = 0; inst < drawcall->numInstances; inst++) { uint64_t byteCount = counters[inst]; D3D12PostVSData::InstData d; d.numVerts = uint32_t((byteCount - prevByteCount) / stride); d.bufOffset = prevByteCount; prevByteCount = byteCount; instData.push_back(d); } numBytesWritten = prevByteCount; } else { numBytesWritten = counters[0]; } if(numBytesWritten == 0) { SAFE_RELEASE(soSig); return; } // skip past the counter(s) byteData += (view.BufferLocation - m_SOBuffer->GetGPUVirtualAddress()); uint64_t numVerts = numBytesWritten / stride; ID3D12Resource *gsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&gsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(gsoutBuffer) { SetObjName(gsoutBuffer, StringFormat::Fmt("PostVS gsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(gsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(UINT64 i = 1; numPosComponents == 4 && i < numVerts; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].gsout.buf = gsoutBuffer; m_PostVSData[eventId].gsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].gsout.vertStride = stride; m_PostVSData[eventId].gsout.nearPlane = nearp; m_PostVSData[eventId].gsout.farPlane = farp; m_PostVSData[eventId].gsout.useIndices = false; m_PostVSData[eventId].gsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].gsout.idxBuf = NULL; topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; if(lastShader == dxbcGS) { for(size_t i = 0; i < dxbcGS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcGS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY) { topo = decl.outTopology; break; } } } else if(lastShader == dxbcDS) { for(size_t i = 0; i < dxbcDS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcDS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_TESS_DOMAIN) { if(decl.domain == DXBC::DOMAIN_ISOLINE) topo = D3D_PRIMITIVE_TOPOLOGY_LINELIST; else topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; } } } m_PostVSData[eventId].gsout.topo = topo; // streamout expands strips unfortunately if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; m_PostVSData[eventId].gsout.numVerts = (uint32_t)numVerts; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.numVerts /= RDCMAX(1U, drawcall->numInstances); m_PostVSData[eventId].gsout.instData = instData; } SAFE_RELEASE(soSig); }
void ReplayOutput::DisplayMesh() { FetchDrawcall *draw = m_pRenderer->GetDrawcallByEID(m_EventID, m_LastDeferredEvent); if(!draw) return; if(m_MainOutput.outputID == 0) return; if(m_Width <= 0 || m_Height <= 0) return; if(m_RenderData.meshDisplay.type == eMeshDataStage_Unknown) return; if((draw->flags & eDraw_Drawcall) == 0) return; if(draw && m_OverlayDirty) { m_pDevice->ReplayLog(m_FrameID, 0, m_EventID, eReplay_WithoutDraw); RefreshOverlay(); m_pDevice->ReplayLog(m_FrameID, 0, m_EventID, eReplay_OnlyDraw); } m_pDevice->BindOutputWindow(m_MainOutput.outputID, true); m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0); m_pDevice->RenderCheckerboard(Vec3f(0.666f, 0.666f, 0.666f), Vec3f(0.333f, 0.333f, 0.333f)); m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0); MeshDisplay mesh = m_RenderData.meshDisplay; mesh.position.buf = m_pDevice->GetLiveID(mesh.position.buf); mesh.position.idxbuf = m_pDevice->GetLiveID(mesh.position.idxbuf); mesh.second.buf = m_pDevice->GetLiveID(mesh.second.buf); mesh.second.idxbuf = m_pDevice->GetLiveID(mesh.second.idxbuf); vector<MeshFormat> secondaryDraws; if(m_RenderData.meshDisplay.type != eMeshDataStage_VSIn && !m_RenderData.meshDisplay.thisDrawOnly) { mesh.position.unproject = true; mesh.second.unproject = true; for(size_t i=0; i < passEvents.size(); i++) { FetchDrawcall *d = m_pRenderer->GetDrawcallByEID(passEvents[i], m_LastDeferredEvent); if(d) { for(uint32_t inst=0; inst < RDCMAX(1U, draw->numInstances); inst++) { // get the 'most final' stage MeshFormat fmt = m_pDevice->GetPostVSBuffers(m_FrameID, passEvents[i], inst, eMeshDataStage_GSOut); if(fmt.buf == ResourceId()) fmt = m_pDevice->GetPostVSBuffers(m_FrameID, passEvents[i], inst, eMeshDataStage_VSOut); // if unproject is marked, this output had a 'real' system position output if(fmt.unproject) secondaryDraws.push_back(fmt); } } } // draw previous instances in the current drawcall if(draw->flags & eDraw_Instanced) { for(uint32_t inst=0; inst < RDCMAX(1U, draw->numInstances) && inst < m_RenderData.meshDisplay.curInstance; inst++) { // get the 'most final' stage MeshFormat fmt = m_pDevice->GetPostVSBuffers(m_FrameID, draw->eventID, inst, eMeshDataStage_GSOut); if(fmt.buf == ResourceId()) fmt = m_pDevice->GetPostVSBuffers(m_FrameID, draw->eventID, inst, eMeshDataStage_VSOut); // if unproject is marked, this output had a 'real' system position output if(fmt.unproject) secondaryDraws.push_back(fmt); } } } m_pDevice->RenderMesh(m_FrameID, m_EventID, secondaryDraws, mesh); }
void ReplayOutput::DisplayMesh() { FetchDrawcall *draw = m_pRenderer->GetDrawcallByEID(m_EventID, m_LastDeferredEvent); if(draw == NULL || m_MainOutput.outputID == 0 || m_Width <= 0 || m_Height <= 0 || (m_RenderData.meshDisplay.type == eMeshDataStage_Unknown) || (draw->flags & eDraw_Drawcall) == 0) { float color[4] = {0.0f, 0.0f, 0.0f, 0.0f}; m_pDevice->BindOutputWindow(m_MainOutput.outputID, false); m_pDevice->ClearOutputWindowColour(m_MainOutput.outputID, color); m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0); m_pDevice->RenderCheckerboard(Vec3f(0.666f, 0.666f, 0.666f), Vec3f(0.333f, 0.333f, 0.333f)); return; } if(draw && m_OverlayDirty) { m_pDevice->ReplayLog(m_EventID, eReplay_WithoutDraw); RefreshOverlay(); m_pDevice->ReplayLog(m_EventID, eReplay_OnlyDraw); } m_pDevice->BindOutputWindow(m_MainOutput.outputID, true); m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0); m_pDevice->RenderCheckerboard(Vec3f(0.666f, 0.666f, 0.666f), Vec3f(0.333f, 0.333f, 0.333f)); m_pDevice->ClearOutputWindowDepth(m_MainOutput.outputID, 1.0f, 0); MeshDisplay mesh = m_RenderData.meshDisplay; mesh.position.buf = m_pDevice->GetLiveID(mesh.position.buf); mesh.position.idxbuf = m_pDevice->GetLiveID(mesh.position.idxbuf); mesh.second.buf = m_pDevice->GetLiveID(mesh.second.buf); mesh.second.idxbuf = m_pDevice->GetLiveID(mesh.second.idxbuf); vector<MeshFormat> secondaryDraws; // we choose a pallette here so that the colours stay consistent (i.e the // current draw is always the same colour), but also to indicate somewhat // the relationship - ie. instances are closer in colour than other draws // in the pass // very slightly dark red const FloatVector drawItself(0.06f, 0.0f, 0.0f, 1.0f); // more desaturated/lighter, but still reddish const FloatVector otherInstances(0.18f, 0.1f, 0.1f, 1.0f); // lighter grey with blue tinge to contrast from main/instance draws const FloatVector passDraws(0.2f, 0.2f, 0.25f, 1.0f); if(m_RenderData.meshDisplay.type != eMeshDataStage_VSIn) { for(size_t i = 0; m_RenderData.meshDisplay.showWholePass && i < passEvents.size(); i++) { FetchDrawcall *d = m_pRenderer->GetDrawcallByEID(passEvents[i], m_LastDeferredEvent); if(d) { for(uint32_t inst = 0; inst < RDCMAX(1U, draw->numInstances); inst++) { // get the 'most final' stage MeshFormat fmt = m_pDevice->GetPostVSBuffers(passEvents[i], inst, eMeshDataStage_GSOut); if(fmt.buf == ResourceId()) fmt = m_pDevice->GetPostVSBuffers(passEvents[i], inst, eMeshDataStage_VSOut); fmt.meshColour = passDraws; // if unproject is marked, this output had a 'real' system position output if(fmt.unproject) secondaryDraws.push_back(fmt); } } } // draw previous instances in the current drawcall if(draw->flags & eDraw_Instanced) { uint32_t maxInst = 0; if(m_RenderData.meshDisplay.showPrevInstances) maxInst = RDCMAX(1U, m_RenderData.meshDisplay.curInstance); if(m_RenderData.meshDisplay.showAllInstances) maxInst = RDCMAX(1U, draw->numInstances); for(uint32_t inst = 0; inst < maxInst; inst++) { // get the 'most final' stage MeshFormat fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_GSOut); if(fmt.buf == ResourceId()) fmt = m_pDevice->GetPostVSBuffers(draw->eventID, inst, eMeshDataStage_VSOut); fmt.meshColour = otherInstances; // if unproject is marked, this output had a 'real' system position output if(fmt.unproject) secondaryDraws.push_back(fmt); } } } mesh.position.meshColour = drawItself; m_pDevice->RenderMesh(m_EventID, secondaryDraws, mesh); }
void PrintInteger(bool typeUnsigned, uint64_t argu, int base, uint64_t numbits, FormatterParams formatter, bool uppercaseDigits, char *&output, size_t &actualsize, char *end) { int64_t argi = 0; union { uint64_t *u64; signed int *i; signed char *c; signed short *s; int64_t *i64; } typepun; typepun.u64 = &argu; // cast the appropriate size to signed version switch(formatter.Length) { default: case None: case Long: argi = (int64_t)*typepun.i; break; case HalfHalf: argi = (int64_t)*typepun.c; break; case Half: argi = (int64_t)*typepun.s; break; case LongLong: argi = (int64_t)*typepun.i64; break; } bool negative = false; if(base == 10 && !typeUnsigned) { negative = argi < 0; } int digwidth = 0; int numPad0s = 0; int numPadWidth = 0; { int intwidth = 0; int digits = 0; // work out the number of decimal digits in the integer if(!negative) { uint64_t accum = argu; while(accum) { digits += 1; accum /= base; } } else { int64_t accum = argi; while(accum) { digits += 1; accum /= base; } } intwidth = digwidth = RDCMAX(1, digits); // printed int is 2 chars larger for 0x or 0b, and 1 char for 0 (octal) if(base == 16 || base == 2) intwidth += formatter.Flags & AlternateForm ? 2 : 0; if(base == 8) intwidth += formatter.Flags & AlternateForm ? 1 : 0; if(formatter.Precision != FormatterParams::NoPrecision && formatter.Precision > intwidth) numPad0s = formatter.Precision - intwidth; intwidth += numPad0s; // for decimal we can have a negative sign (or placeholder) if(base == 10) { if(negative) intwidth++; else if(formatter.Flags & (PrependPos | PrependSpace)) intwidth++; } if(formatter.Width != FormatterParams::NoWidth && formatter.Width > intwidth) numPadWidth = formatter.Width - intwidth; } // pad with spaces if necessary if((formatter.Flags & (LeftJustify | PadZeroes)) == 0 && numPadWidth > 0) addchars(output, actualsize, end, (size_t)numPadWidth, ' '); if(base == 16) { if(formatter.Flags & AlternateForm) { if(uppercaseDigits) appendstring(output, actualsize, end, "0X"); else appendstring(output, actualsize, end, "0x"); } // pad with 0s as appropriate if((formatter.Flags & (LeftJustify | PadZeroes)) == PadZeroes && numPadWidth > 0) addchars(output, actualsize, end, (size_t)numPadWidth, '0'); if(numPad0s > 0) addchars(output, actualsize, end, (size_t)numPad0s, '0'); bool left0s = true; // mask off each hex digit and print for(uint64_t i = 0; i < numbits; i += 4) { uint64_t shift = numbits - 4 - i; uint64_t mask = 0xfULL << shift; char digit = char((argu & mask) >> shift); if(digit == 0 && left0s && i + 4 < numbits) continue; left0s = false; if(digit < 10) addchar(output, actualsize, end, '0' + digit); else if(uppercaseDigits) addchar(output, actualsize, end, 'A' + digit - 10); else addchar(output, actualsize, end, 'a' + digit - 10); } } else if(base == 8)