vector<byte> GLReplay::GetBufferData(ResourceId buff, uint32_t offset, uint32_t len) { vector<byte> ret; if(m_pDriver->m_Buffers.find(buff) == m_pDriver->m_Buffers.end()) { RDCWARN("Requesting data for non-existant buffer %llu", buff); return ret; } auto &buf = m_pDriver->m_Buffers[buff]; if(len > 0 && offset+len > buf.size) { RDCWARN("Attempting to read off the end of the array. Will be clamped"); len = RDCMIN(len, uint32_t(buf.size-offset)); } else if(len == 0) { len = (uint32_t)buf.size; } ret.resize(len); WrappedOpenGL &gl = *m_pDriver; MakeCurrentReplayContext(m_DebugCtx); gl.glBindBuffer(eGL_COPY_READ_BUFFER, buf.resource.name); gl.glGetBufferSubData(eGL_COPY_READ_BUFFER, (GLintptr)offset, (GLsizeiptr)len, &ret[0]); return ret; }
// https://en.wikibooks.org/wiki/OpenGL_Programming/Modern_OpenGL_Tutorial_Arcball void Camera::RotateArcball(float ax, float ay, float bx, float by) { Vec3f a, b; Vec2f from(ax, ay); Vec2f to(bx, by); float az = from.x * from.x + from.y * from.y; float bz = to.x * to.x + to.y * to.y; // keep the controls stable by rejecting very small movements. if(fabsf(az - bz) < 1e-5f) return; if(az < 1.0f) { a = Vec3f(from.x, from.y, sqrt(1.0f - az)); } else { a = Vec3f(from.x, from.y, 0.0f); a.Normalise(); } if(bz < 1.0f) { b = Vec3f(to.x, to.y, sqrt(1.0f - bz)); } else { b = Vec3f(to.x, to.y, 0.0f); b.Normalise(); } float angle = acosf(RDCMIN(1.0f, a.Dot(b))); Vec3f axis = a.Cross(b); axis.Normalise(); dirty = true; Quatf delta = Quatf::AxisAngle(axis, angle); arcrot = arcrot * delta; }
int GetNumMips(const GLHookSet &gl, GLenum target, GLuint tex, GLuint w, GLuint h, GLuint d) { int mips = 1; GLint immut = 0; gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_IMMUTABLE_FORMAT, &immut); if(immut) gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_IMMUTABLE_LEVELS, (GLint *)&mips); else mips = CalcNumMips(w, h, d); GLint maxLevel = 1000; gl.glGetTextureParameterivEXT(tex, target, eGL_TEXTURE_MAX_LEVEL, &maxLevel); mips = RDCMIN(mips, maxLevel+1); if(immut == 0) { // check to see if all mips are set, or clip the number of mips to those that are // set. if(target == eGL_TEXTURE_CUBE_MAP) target = eGL_TEXTURE_CUBE_MAP_POSITIVE_X; for(int i=0; i < mips; i++) { GLint width = 0; gl.glGetTextureLevelParameterivEXT(tex, target, i, eGL_TEXTURE_WIDTH, &width); if(width == 0) { mips = i; break; } } } return RDCMAX(1, mips); }
bool GLReplay::RenderTexture(TextureDisplay cfg) { MakeCurrentReplayContext(m_DebugCtx); WrappedOpenGL &gl = *m_pDriver; gl.glUseProgram(DebugData.texDisplayProg); auto &texDetails = m_pDriver->m_Textures[cfg.texid]; gl.glActiveTexture(eGL_TEXTURE0); gl.glBindTexture(eGL_TEXTURE_2D, texDetails.resource.name); if(cfg.mip == 0 && cfg.scale < 1.0f) gl.glBindSampler(0, DebugData.linearSampler); else gl.glBindSampler(0, DebugData.pointSampler); GLint tex_x = texDetails.width, tex_y = texDetails.height, tex_z = texDetails.depth; gl.glBindBufferBase(eGL_UNIFORM_BUFFER, 0, DebugData.UBOs[0]); struct uboData { Vec2f Position; float Scale; float HDRMul; Vec4f Channels; float RangeMinimum; float InverseRangeSize; float MipLevel; float dummy2; Vec3f TextureResolutionPS; int OutputDisplayFormat; Vec2f OutputRes; int RawOutput; float Slice; }; uboData *ubo = (uboData *)gl.glMapBufferRange(eGL_UNIFORM_BUFFER, 0, sizeof(uboData), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); RDCCOMPILE_ASSERT(sizeof(uboData) <= DebugData.UBOSize, "UBO data is too big"); float x = cfg.offx; float y = cfg.offy; ubo->Position.x = x; ubo->Position.y = y; ubo->Scale = cfg.scale; if(cfg.scale <= 0.0f) { float xscale = DebugData.outWidth/float(tex_x); float yscale = DebugData.outHeight/float(tex_y); ubo->Scale = RDCMIN(xscale, yscale); if(yscale > xscale) { ubo->Position.x = 0; ubo->Position.y = (DebugData.outHeight-(tex_y*ubo->Scale) )*0.5f; } else { ubo->Position.y = 0; ubo->Position.x = (DebugData.outWidth-(tex_x*ubo->Scale) )*0.5f; } } ubo->HDRMul = cfg.HDRMul; if(cfg.rangemax <= cfg.rangemin) cfg.rangemax += 0.00001f; ubo->Channels.x = cfg.Red ? 1.0f : 0.0f; ubo->Channels.y = cfg.Green ? 1.0f : 0.0f; ubo->Channels.z = cfg.Blue ? 1.0f : 0.0f; ubo->Channels.w = cfg.Alpha ? 1.0f : 0.0f; ubo->RangeMinimum = cfg.rangemin; ubo->InverseRangeSize = 1.0f/(cfg.rangemax-cfg.rangemin); ubo->MipLevel = (float)cfg.mip; ubo->OutputDisplayFormat = 0x2; // 2d. Unused for now ubo->RawOutput = cfg.rawoutput ? 1 : 0; ubo->TextureResolutionPS.x = float(tex_x); ubo->TextureResolutionPS.y = float(tex_y); ubo->TextureResolutionPS.z = float(tex_z); ubo->OutputRes.x = DebugData.outWidth; ubo->OutputRes.y = DebugData.outHeight; gl.glUnmapBuffer(eGL_UNIFORM_BUFFER); if(cfg.rawoutput) { gl.glDisable(eGL_BLEND); } else { gl.glEnable(eGL_BLEND); gl.glBlendFunc(eGL_SRC_ALPHA, eGL_ONE_MINUS_SRC_ALPHA); } gl.glBindVertexArray(DebugData.emptyVAO); gl.glDrawArrays(eGL_TRIANGLE_STRIP, 0, 4); gl.glBindSampler(0, 0); return true; }
void D3D12RenderState::ApplyState(ID3D12GraphicsCommandList *cmd) const { if(pipe != ResourceId()) cmd->SetPipelineState(GetResourceManager()->GetCurrentAs<ID3D12PipelineState>(pipe)); if(!views.empty()) cmd->RSSetViewports((UINT)views.size(), &views[0]); if(!scissors.empty()) cmd->RSSetScissorRects((UINT)scissors.size(), &scissors[0]); if(topo != D3D_PRIMITIVE_TOPOLOGY_UNDEFINED) cmd->IASetPrimitiveTopology(topo); cmd->OMSetStencilRef(stencilRef); cmd->OMSetBlendFactor(blendFactor); if(ibuffer.buf != ResourceId()) { D3D12_INDEX_BUFFER_VIEW ib; ID3D12Resource *res = GetResourceManager()->GetCurrentAs<ID3D12Resource>(ibuffer.buf); if(res) ib.BufferLocation = res->GetGPUVirtualAddress() + ibuffer.offs; else ib.BufferLocation = 0; ib.Format = (ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT); ib.SizeInBytes = ibuffer.size; cmd->IASetIndexBuffer(&ib); } for(size_t i = 0; i < vbuffers.size(); i++) { D3D12_VERTEX_BUFFER_VIEW vb; ID3D12Resource *res = GetResourceManager()->GetCurrentAs<ID3D12Resource>(vbuffers[i].buf); if(res) vb.BufferLocation = res->GetGPUVirtualAddress() + vbuffers[i].offs; else vb.BufferLocation = 0; vb.StrideInBytes = vbuffers[i].stride; vb.SizeInBytes = vbuffers[i].size; cmd->IASetVertexBuffers((UINT)i, 1, &vb); } std::vector<ID3D12DescriptorHeap *> descHeaps; descHeaps.resize(heaps.size()); for(size_t i = 0; i < heaps.size(); i++) descHeaps[i] = GetResourceManager()->GetCurrentAs<ID3D12DescriptorHeap>(heaps[i]); if(!descHeaps.empty()) cmd->SetDescriptorHeaps((UINT)descHeaps.size(), &descHeaps[0]); if(!rts.empty() || dsv.heap != ResourceId()) { D3D12_CPU_DESCRIPTOR_HANDLE rtHandles[8]; D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle = CPUHandleFromPortableHandle(GetResourceManager(), dsv); UINT rtCount = (UINT)rts.size(); UINT numActualHandles = rtSingle ? RDCMIN(1U, rtCount) : rtCount; for(UINT i = 0; i < numActualHandles; i++) rtHandles[i] = CPUHandleFromPortableHandle(GetResourceManager(), rts[i]); // need to unwrap here, as FromPortableHandle unwraps too. Unwrap(cmd)->OMSetRenderTargets((UINT)rts.size(), rtHandles, rtSingle ? TRUE : FALSE, dsv.heap != ResourceId() ? &dsvHandle : NULL); } if(graphics.rootsig != ResourceId()) { cmd->SetGraphicsRootSignature( GetResourceManager()->GetCurrentAs<ID3D12RootSignature>(graphics.rootsig)); ApplyGraphicsRootElements(cmd); } if(compute.rootsig != ResourceId()) { cmd->SetComputeRootSignature( GetResourceManager()->GetCurrentAs<ID3D12RootSignature>(compute.rootsig)); ApplyComputeRootElements(cmd); } }
void GLRenderState::FetchState(void *ctx, WrappedOpenGL *gl) { GLint boolread = 0; // TODO check GL_MAX_* // TODO check the extensions/core version for these is around { GLenum pnames[] = { eGL_CLIP_DISTANCE0, eGL_CLIP_DISTANCE1, eGL_CLIP_DISTANCE2, eGL_CLIP_DISTANCE3, eGL_CLIP_DISTANCE4, eGL_CLIP_DISTANCE5, eGL_CLIP_DISTANCE6, eGL_CLIP_DISTANCE7, eGL_COLOR_LOGIC_OP, eGL_CULL_FACE, eGL_DEPTH_CLAMP, eGL_DEPTH_TEST, eGL_DEPTH_BOUNDS_TEST_EXT, eGL_DITHER, eGL_FRAMEBUFFER_SRGB, eGL_LINE_SMOOTH, eGL_MULTISAMPLE, eGL_POLYGON_SMOOTH, eGL_POLYGON_OFFSET_FILL, eGL_POLYGON_OFFSET_LINE, eGL_POLYGON_OFFSET_POINT, eGL_PROGRAM_POINT_SIZE, eGL_PRIMITIVE_RESTART, eGL_PRIMITIVE_RESTART_FIXED_INDEX, eGL_SAMPLE_ALPHA_TO_COVERAGE, eGL_SAMPLE_ALPHA_TO_ONE, eGL_SAMPLE_COVERAGE, eGL_SAMPLE_MASK, eGL_RASTER_MULTISAMPLE_EXT, eGL_RASTER_FIXED_SAMPLE_LOCATIONS_EXT, eGL_STENCIL_TEST, eGL_TEXTURE_CUBE_MAP_SEAMLESS, eGL_BLEND_ADVANCED_COHERENT_KHR, }; RDCCOMPILE_ASSERT(ARRAY_COUNT(pnames) == eEnabled_Count, "Wrong number of pnames"); for(GLuint i=0; i < eEnabled_Count; i++) { if(pnames[i] == eGL_BLEND_ADVANCED_COHERENT_KHR && !ExtensionSupported[ExtensionSupported_KHR_blend_equation_advanced_coherent]) { Enabled[i] = true; continue; } if((pnames[i] == eGL_RASTER_MULTISAMPLE_EXT || pnames[i] == eGL_RASTER_FIXED_SAMPLE_LOCATIONS_EXT) && !ExtensionSupported[ExtensionSupported_EXT_raster_multisample]) { Enabled[i] = false; continue; } Enabled[i] = (m_Real->glIsEnabled(pnames[i]) == GL_TRUE); } } m_Real->glGetIntegerv(eGL_ACTIVE_TEXTURE, (GLint *)&ActiveTexture); // TODO fetch bindings for other types than 2D for(GLuint i=0; i < (GLuint)ARRAY_COUNT(Tex2D); i++) { m_Real->glActiveTexture(GLenum(eGL_TEXTURE0 + i)); m_Real->glGetIntegerv(eGL_TEXTURE_BINDING_2D, (GLint*)&Tex2D[i]); m_Real->glGetIntegerv(eGL_SAMPLER_BINDING, (GLint*)&Samplers[i]); } m_Real->glActiveTexture(ActiveTexture); m_Real->glGetIntegerv(eGL_VERTEX_ARRAY_BINDING, (GLint *)&VAO); m_Real->glGetIntegerv(eGL_TRANSFORM_FEEDBACK_BINDING, (GLint *)&FeedbackObj); // the spec says that you can only query for the format that was previously set, or you get // undefined results. Ie. if someone set ints, this might return anything. However there's also // no way to query for the type so we just have to hope for the best and hope most people are // sane and don't use these except for a default "all 0s" attrib. GLuint maxNumAttribs = 0; m_Real->glGetIntegerv(eGL_MAX_VERTEX_ATTRIBS, (GLint *)&maxNumAttribs); for(GLuint i=0; i < RDCMIN(maxNumAttribs, (GLuint)ARRAY_COUNT(GenericVertexAttribs)); i++) m_Real->glGetVertexAttribfv(i, eGL_CURRENT_VERTEX_ATTRIB, &GenericVertexAttribs[i].x); m_Real->glGetFloatv(eGL_POINT_FADE_THRESHOLD_SIZE, &PointFadeThresholdSize); m_Real->glGetIntegerv(eGL_POINT_SPRITE_COORD_ORIGIN, (GLint*)&PointSpriteOrigin); m_Real->glGetFloatv(eGL_LINE_WIDTH, &LineWidth); m_Real->glGetFloatv(eGL_POINT_SIZE, &PointSize); m_Real->glGetIntegerv(eGL_PRIMITIVE_RESTART_INDEX, (GLint *)&PrimitiveRestartIndex); if(GLCoreVersion >= 45 || ExtensionSupported[ExtensionSupported_ARB_clip_control]) { m_Real->glGetIntegerv(eGL_CLIP_ORIGIN, (GLint *)&ClipOrigin); m_Real->glGetIntegerv(eGL_CLIP_DEPTH_MODE, (GLint *)&ClipDepth); } else { ClipOrigin = eGL_LOWER_LEFT; ClipDepth = eGL_NEGATIVE_ONE_TO_ONE; } m_Real->glGetIntegerv(eGL_PROVOKING_VERTEX, (GLint *)&ProvokingVertex); m_Real->glGetIntegerv(eGL_CURRENT_PROGRAM, (GLint *)&Program); m_Real->glGetIntegerv(eGL_PROGRAM_PIPELINE_BINDING, (GLint *)&Pipeline); const GLenum shs[] = { eGL_VERTEX_SHADER, eGL_TESS_CONTROL_SHADER, eGL_TESS_EVALUATION_SHADER, eGL_GEOMETRY_SHADER, eGL_FRAGMENT_SHADER, VendorCheck[VendorCheck_AMD_pipeline_compute_query] ? eGL_NONE : eGL_COMPUTE_SHADER, }; RDCCOMPILE_ASSERT(ARRAY_COUNT(shs) == ARRAY_COUNT(Subroutines), "Subroutine array not the right size"); for(size_t s=0; s < ARRAY_COUNT(shs); s++) { GLuint prog = Program; if(prog == 0 && Pipeline != 0 && shs[s] != eGL_NONE) m_Real->glGetProgramPipelineiv(Pipeline, shs[s], (GLint *)&prog); if(prog == 0) continue; m_Real->glGetProgramStageiv(prog, shs[s], eGL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS, &Subroutines[s].numSubroutines); for(GLint i=0; i < Subroutines[s].numSubroutines; i++) m_Real->glGetUniformSubroutineuiv(shs[s], i, &Subroutines[s].Values[s]); } m_Real->glGetIntegerv(eGL_ARRAY_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Array]); m_Real->glGetIntegerv(eGL_COPY_READ_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Copy_Read]); m_Real->glGetIntegerv(eGL_COPY_WRITE_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Copy_Write]); m_Real->glGetIntegerv(eGL_DRAW_INDIRECT_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Draw_Indirect]); m_Real->glGetIntegerv(eGL_DISPATCH_INDIRECT_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Dispatch_Indirect]); m_Real->glGetIntegerv(eGL_PIXEL_PACK_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Pixel_Pack]); m_Real->glGetIntegerv(eGL_PIXEL_UNPACK_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Pixel_Unpack]); m_Real->glGetIntegerv(eGL_QUERY_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Query]); m_Real->glGetIntegerv(eGL_TEXTURE_BUFFER_BINDING, (GLint*)&BufferBindings[eBufIdx_Texture]); struct { IdxRangeBuffer *bufs; int count; GLenum binding; GLenum start; GLenum size; GLenum maxcount; } idxBufs[] = { { AtomicCounter, ARRAY_COUNT(AtomicCounter), eGL_ATOMIC_COUNTER_BUFFER_BINDING, eGL_ATOMIC_COUNTER_BUFFER_START, eGL_ATOMIC_COUNTER_BUFFER_SIZE, eGL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS, }, { ShaderStorage, ARRAY_COUNT(ShaderStorage), eGL_SHADER_STORAGE_BUFFER_BINDING, eGL_SHADER_STORAGE_BUFFER_START, eGL_SHADER_STORAGE_BUFFER_SIZE, eGL_MAX_SHADER_STORAGE_BUFFER_BINDINGS, }, { TransformFeedback, ARRAY_COUNT(TransformFeedback), eGL_TRANSFORM_FEEDBACK_BUFFER_BINDING, eGL_TRANSFORM_FEEDBACK_BUFFER_START, eGL_TRANSFORM_FEEDBACK_BUFFER_SIZE, eGL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS, }, { UniformBinding, ARRAY_COUNT(UniformBinding), eGL_UNIFORM_BUFFER_BINDING, eGL_UNIFORM_BUFFER_START, eGL_UNIFORM_BUFFER_SIZE, eGL_MAX_UNIFORM_BUFFER_BINDINGS, }, }; for(GLuint b=0; b < (GLuint)ARRAY_COUNT(idxBufs); b++) { GLint maxCount = 0; m_Real->glGetIntegerv(idxBufs[b].maxcount, &maxCount); for(int i=0; i < idxBufs[b].count && i < maxCount; i++) { m_Real->glGetIntegeri_v(idxBufs[b].binding, i, (GLint*)&idxBufs[b].bufs[i].name); m_Real->glGetInteger64i_v(idxBufs[b].start, i, (GLint64*)&idxBufs[b].bufs[i].start); m_Real->glGetInteger64i_v(idxBufs[b].size, i, (GLint64*)&idxBufs[b].bufs[i].size); } } for(GLuint i=0; i < (GLuint)ARRAY_COUNT(Blends); i++) { m_Real->glGetIntegeri_v(eGL_BLEND_EQUATION_RGB, i, (GLint*)&Blends[i].EquationRGB); m_Real->glGetIntegeri_v(eGL_BLEND_EQUATION_ALPHA, i, (GLint*)&Blends[i].EquationAlpha); m_Real->glGetIntegeri_v(eGL_BLEND_SRC_RGB, i, (GLint*)&Blends[i].SourceRGB); m_Real->glGetIntegeri_v(eGL_BLEND_SRC_ALPHA, i, (GLint*)&Blends[i].SourceAlpha); m_Real->glGetIntegeri_v(eGL_BLEND_DST_RGB, i, (GLint*)&Blends[i].DestinationRGB); m_Real->glGetIntegeri_v(eGL_BLEND_DST_ALPHA, i, (GLint*)&Blends[i].DestinationAlpha); Blends[i].Enabled = (m_Real->glIsEnabledi(eGL_BLEND, i) == GL_TRUE); } m_Real->glGetFloatv(eGL_BLEND_COLOR, &BlendColor[0]); for(GLuint i=0; i < (GLuint)ARRAY_COUNT(Viewports); i++) m_Real->glGetFloati_v(eGL_VIEWPORT, i, &Viewports[i].x); for(GLuint i=0; i < (GLuint)ARRAY_COUNT(Scissors); i++) { m_Real->glGetIntegeri_v(eGL_SCISSOR_BOX, i, &Scissors[i].x); Scissors[i].enabled = (m_Real->glIsEnabledi(eGL_SCISSOR_TEST, i) == GL_TRUE); } m_Real->glGetIntegerv(eGL_DRAW_FRAMEBUFFER_BINDING, (GLint *)&DrawFBO); m_Real->glGetIntegerv(eGL_READ_FRAMEBUFFER_BINDING, (GLint *)&ReadFBO); m_Real->glBindFramebuffer(eGL_DRAW_FRAMEBUFFER, 0); m_Real->glBindFramebuffer(eGL_READ_FRAMEBUFFER, 0); for(size_t i=0; i < ARRAY_COUNT(DrawBuffers); i++) m_Real->glGetIntegerv(GLenum(eGL_DRAW_BUFFER0 + i), (GLint *)&DrawBuffers[i]); m_Real->glGetIntegerv(eGL_READ_BUFFER, (GLint *)&ReadBuffer); m_Real->glBindFramebuffer(eGL_DRAW_FRAMEBUFFER, DrawFBO); m_Real->glBindFramebuffer(eGL_READ_FRAMEBUFFER, ReadFBO); m_Real->glGetIntegerv(eGL_FRAGMENT_SHADER_DERIVATIVE_HINT, (GLint *)&Hints.Derivatives); m_Real->glGetIntegerv(eGL_LINE_SMOOTH_HINT, (GLint *)&Hints.LineSmooth); m_Real->glGetIntegerv(eGL_POLYGON_SMOOTH_HINT, (GLint *)&Hints.PolySmooth); m_Real->glGetIntegerv(eGL_TEXTURE_COMPRESSION_HINT, (GLint *)&Hints.TexCompression); m_Real->glGetBooleanv(eGL_DEPTH_WRITEMASK, &DepthWriteMask); m_Real->glGetFloatv(eGL_DEPTH_CLEAR_VALUE, &DepthClearValue); m_Real->glGetIntegerv(eGL_DEPTH_FUNC, (GLint *)&DepthFunc); for(GLuint i=0; i < (GLuint)ARRAY_COUNT(DepthRanges); i++) m_Real->glGetDoublei_v(eGL_DEPTH_RANGE, i, &DepthRanges[i].nearZ); m_Real->glGetDoublev(eGL_DEPTH_BOUNDS_TEST_EXT, &DepthBounds.nearZ); { m_Real->glGetIntegerv(eGL_STENCIL_FUNC, (GLint *)&StencilFront.func); m_Real->glGetIntegerv(eGL_STENCIL_BACK_FUNC, (GLint *)&StencilBack.func); m_Real->glGetIntegerv(eGL_STENCIL_REF, (GLint *)&StencilFront.ref); m_Real->glGetIntegerv(eGL_STENCIL_BACK_REF, (GLint *)&StencilBack.ref); GLint maskval; m_Real->glGetIntegerv(eGL_STENCIL_VALUE_MASK, &maskval); StencilFront.valuemask = uint8_t(maskval&0xff); m_Real->glGetIntegerv(eGL_STENCIL_BACK_VALUE_MASK, &maskval); StencilBack.valuemask = uint8_t(maskval&0xff); m_Real->glGetIntegerv(eGL_STENCIL_WRITEMASK, &maskval); StencilFront.writemask = uint8_t(maskval&0xff); m_Real->glGetIntegerv(eGL_STENCIL_BACK_WRITEMASK, &maskval); StencilBack.writemask = uint8_t(maskval&0xff); m_Real->glGetIntegerv(eGL_STENCIL_FAIL, (GLint *)&StencilFront.stencilFail); m_Real->glGetIntegerv(eGL_STENCIL_BACK_FAIL, (GLint *)&StencilBack.stencilFail); m_Real->glGetIntegerv(eGL_STENCIL_PASS_DEPTH_FAIL, (GLint *)&StencilFront.depthFail); m_Real->glGetIntegerv(eGL_STENCIL_BACK_PASS_DEPTH_FAIL, (GLint *)&StencilBack.depthFail); m_Real->glGetIntegerv(eGL_STENCIL_PASS_DEPTH_PASS, (GLint *)&StencilFront.pass); m_Real->glGetIntegerv(eGL_STENCIL_BACK_PASS_DEPTH_PASS, (GLint *)&StencilBack.pass); } m_Real->glGetIntegerv(eGL_STENCIL_CLEAR_VALUE, (GLint *)&StencilClearValue); for(size_t i=0; i < ARRAY_COUNT(ColorMasks); i++) m_Real->glGetBooleanv(eGL_COLOR_WRITEMASK, &ColorMasks[i].red); m_Real->glGetIntegeri_v(eGL_SAMPLE_MASK_VALUE, 0, (GLint *)&SampleMask[0]); m_Real->glGetIntegerv(eGL_SAMPLE_COVERAGE_VALUE, (GLint *)&SampleCoverage); m_Real->glGetIntegerv(eGL_SAMPLE_COVERAGE_INVERT, (GLint *)&boolread); SampleCoverageInvert = (boolread != 0); m_Real->glGetFloatv(eGL_MIN_SAMPLE_SHADING_VALUE, &MinSampleShading); if(ExtensionSupported[ExtensionSupported_EXT_raster_multisample]) m_Real->glGetIntegerv(eGL_RASTER_SAMPLES_EXT, (GLint *)&RasterSamples); else RasterSamples = 0; if(ExtensionSupported[ExtensionSupported_EXT_raster_multisample]) m_Real->glGetIntegerv(eGL_RASTER_FIXED_SAMPLE_LOCATIONS_EXT, (GLint *)&RasterFixed); else RasterFixed = false; m_Real->glGetIntegerv(eGL_LOGIC_OP_MODE, (GLint *)&LogicOp); m_Real->glGetFloatv(eGL_COLOR_CLEAR_VALUE, &ColorClearValue.red); m_Real->glGetIntegerv(eGL_PATCH_VERTICES, &PatchParams.numVerts); m_Real->glGetFloatv(eGL_PATCH_DEFAULT_INNER_LEVEL, &PatchParams.defaultInnerLevel[0]); m_Real->glGetFloatv(eGL_PATCH_DEFAULT_OUTER_LEVEL, &PatchParams.defaultOuterLevel[0]); if(!VendorCheck[VendorCheck_AMD_polygon_mode_query]) { // This was listed in docs as enumeration[2] even though polygon mode can't be set independently for front // and back faces for a while, so pass large enough array to be sure. // AMD driver claims this doesn't exist anymore in core, so don't return any value, set to // default GL_FILL to be safe GLenum dummy[2] = { eGL_FILL, eGL_FILL }; m_Real->glGetIntegerv(eGL_POLYGON_MODE, (GLint *)&dummy); PolygonMode = dummy[0]; } else { PolygonMode = eGL_FILL; } m_Real->glGetFloatv(eGL_POLYGON_OFFSET_FACTOR, &PolygonOffset[0]); m_Real->glGetFloatv(eGL_POLYGON_OFFSET_UNITS, &PolygonOffset[1]); if(ExtensionSupported[ExtensionSupported_EXT_polygon_offset_clamp]) m_Real->glGetFloatv(eGL_POLYGON_OFFSET_CLAMP_EXT, &PolygonOffset[2]); else PolygonOffset[2] = 0.0f; m_Real->glGetIntegerv(eGL_FRONT_FACE, (GLint *)&FrontFace); m_Real->glGetIntegerv(eGL_CULL_FACE_MODE, (GLint *)&CullFace); }
void GLRenderState::ApplyState(void *ctx, WrappedOpenGL *gl) { { GLenum pnames[] = { eGL_CLIP_DISTANCE0, eGL_CLIP_DISTANCE1, eGL_CLIP_DISTANCE2, eGL_CLIP_DISTANCE3, eGL_CLIP_DISTANCE4, eGL_CLIP_DISTANCE5, eGL_CLIP_DISTANCE6, eGL_CLIP_DISTANCE7, eGL_COLOR_LOGIC_OP, eGL_CULL_FACE, eGL_DEPTH_CLAMP, eGL_DEPTH_TEST, eGL_DEPTH_BOUNDS_TEST_EXT, eGL_DITHER, eGL_FRAMEBUFFER_SRGB, eGL_LINE_SMOOTH, eGL_MULTISAMPLE, eGL_POLYGON_SMOOTH, eGL_POLYGON_OFFSET_FILL, eGL_POLYGON_OFFSET_LINE, eGL_POLYGON_OFFSET_POINT, eGL_PROGRAM_POINT_SIZE, eGL_PRIMITIVE_RESTART, eGL_PRIMITIVE_RESTART_FIXED_INDEX, eGL_SAMPLE_ALPHA_TO_COVERAGE, eGL_SAMPLE_ALPHA_TO_ONE, eGL_SAMPLE_COVERAGE, eGL_SAMPLE_MASK, eGL_RASTER_MULTISAMPLE_EXT, eGL_RASTER_FIXED_SAMPLE_LOCATIONS_EXT, eGL_STENCIL_TEST, eGL_TEXTURE_CUBE_MAP_SEAMLESS, eGL_BLEND_ADVANCED_COHERENT_KHR, }; RDCCOMPILE_ASSERT(ARRAY_COUNT(pnames) == eEnabled_Count, "Wrong number of pnames"); for(GLuint i=0; i < eEnabled_Count; i++) { if(pnames[i] == eGL_BLEND_ADVANCED_COHERENT_KHR && !ExtensionSupported[ExtensionSupported_KHR_blend_equation_advanced_coherent]) continue; if((pnames[i] == eGL_RASTER_MULTISAMPLE_EXT || pnames[i] == eGL_RASTER_FIXED_SAMPLE_LOCATIONS_EXT) && !ExtensionSupported[ExtensionSupported_EXT_raster_multisample]) continue; if(Enabled[i]) m_Real->glEnable(pnames[i]); else m_Real->glDisable(pnames[i]); } } for(GLuint i=0; i < (GLuint)ARRAY_COUNT(Tex2D); i++) { m_Real->glActiveTexture(GLenum(eGL_TEXTURE0 + i)); m_Real->glBindTexture(eGL_TEXTURE_2D, Tex2D[i]); m_Real->glBindSampler(i, Samplers[i]); } m_Real->glActiveTexture(ActiveTexture); m_Real->glBindVertexArray(VAO); m_Real->glBindTransformFeedback(eGL_TRANSFORM_FEEDBACK, FeedbackObj); // See FetchState(). The spec says that you have to SET the right format for the shader too, // but we couldn't query for the format so we can't set it here. GLuint maxNumAttribs = 0; m_Real->glGetIntegerv(eGL_MAX_VERTEX_ATTRIBS, (GLint *)&maxNumAttribs); for(GLuint i=0; i < RDCMIN(maxNumAttribs, (GLuint)ARRAY_COUNT(GenericVertexAttribs)); i++) m_Real->glVertexAttrib4fv(i, &GenericVertexAttribs[i].x); m_Real->glPointParameterf(eGL_POINT_FADE_THRESHOLD_SIZE, PointFadeThresholdSize); m_Real->glPointParameteri(eGL_POINT_SPRITE_COORD_ORIGIN, (GLint)PointSpriteOrigin); m_Real->glLineWidth(LineWidth); m_Real->glPointSize(PointSize); m_Real->glPrimitiveRestartIndex(PrimitiveRestartIndex); if(m_Real->glClipControl) // only available in 4.5+ m_Real->glClipControl(ClipOrigin, ClipDepth); m_Real->glProvokingVertex(ProvokingVertex); m_Real->glUseProgram(Program); m_Real->glBindProgramPipeline(Pipeline); GLenum shs[] = { eGL_VERTEX_SHADER, eGL_TESS_CONTROL_SHADER, eGL_TESS_EVALUATION_SHADER, eGL_GEOMETRY_SHADER, eGL_FRAGMENT_SHADER, eGL_COMPUTE_SHADER }; RDCCOMPILE_ASSERT(ARRAY_COUNT(shs) == ARRAY_COUNT(Subroutines), "Subroutine array not the right size"); for(size_t s=0; s < ARRAY_COUNT(shs); s++) if(Subroutines[s].numSubroutines > 0) m_Real->glUniformSubroutinesuiv(shs[s], Subroutines[s].numSubroutines, Subroutines[s].Values); m_Real->glBindBuffer(eGL_ARRAY_BUFFER, BufferBindings[eBufIdx_Array]); m_Real->glBindBuffer(eGL_COPY_READ_BUFFER, BufferBindings[eBufIdx_Copy_Read]); m_Real->glBindBuffer(eGL_COPY_WRITE_BUFFER, BufferBindings[eBufIdx_Copy_Write]); m_Real->glBindBuffer(eGL_DRAW_INDIRECT_BUFFER, BufferBindings[eBufIdx_Draw_Indirect]); m_Real->glBindBuffer(eGL_DISPATCH_INDIRECT_BUFFER, BufferBindings[eBufIdx_Dispatch_Indirect]); m_Real->glBindBuffer(eGL_PIXEL_PACK_BUFFER, BufferBindings[eBufIdx_Pixel_Pack]); m_Real->glBindBuffer(eGL_PIXEL_UNPACK_BUFFER, BufferBindings[eBufIdx_Pixel_Unpack]); m_Real->glBindBuffer(eGL_QUERY_BUFFER, BufferBindings[eBufIdx_Query]); m_Real->glBindBuffer(eGL_TEXTURE_BUFFER, BufferBindings[eBufIdx_Texture]); struct { IdxRangeBuffer *bufs; int count; GLenum binding; GLenum maxcount; } idxBufs[] = { { AtomicCounter, ARRAY_COUNT(AtomicCounter), eGL_ATOMIC_COUNTER_BUFFER, eGL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS, }, { ShaderStorage, ARRAY_COUNT(ShaderStorage), eGL_SHADER_STORAGE_BUFFER, eGL_MAX_SHADER_STORAGE_BUFFER_BINDINGS, }, { TransformFeedback, ARRAY_COUNT(TransformFeedback), eGL_TRANSFORM_FEEDBACK_BUFFER, eGL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS, }, { UniformBinding, ARRAY_COUNT(UniformBinding), eGL_UNIFORM_BUFFER, eGL_MAX_UNIFORM_BUFFER_BINDINGS, }, }; for(size_t b=0; b < ARRAY_COUNT(idxBufs); b++) { // only restore buffer bindings here if we were using the default transform feedback object if(idxBufs[b].binding == eGL_TRANSFORM_FEEDBACK_BUFFER && FeedbackObj) continue; GLint maxCount = 0; m_Real->glGetIntegerv(idxBufs[b].maxcount, &maxCount); for(int i=0; i < idxBufs[b].count && i < maxCount; i++) { if(idxBufs[b].bufs[i].name == 0 || (idxBufs[b].bufs[i].start == 0 && idxBufs[b].bufs[i].size == 0) ) m_Real->glBindBufferBase(idxBufs[b].binding, i, idxBufs[b].bufs[i].name); else m_Real->glBindBufferRange(idxBufs[b].binding, i, idxBufs[b].bufs[i].name, (GLintptr)idxBufs[b].bufs[i].start, (GLsizeiptr)idxBufs[b].bufs[i].size); } } for(GLuint i=0; i < (GLuint)ARRAY_COUNT(Blends); i++) { m_Real->glBlendFuncSeparatei(i, Blends[i].SourceRGB, Blends[i].DestinationRGB, Blends[i].SourceAlpha, Blends[i].DestinationAlpha); m_Real->glBlendEquationSeparatei(i, Blends[i].EquationRGB, Blends[i].EquationAlpha); if(Blends[i].Enabled) m_Real->glEnablei(eGL_BLEND, i); else m_Real->glDisablei(eGL_BLEND, i); } m_Real->glBlendColor(BlendColor[0], BlendColor[1], BlendColor[2], BlendColor[3]); m_Real->glViewportArrayv(0, ARRAY_COUNT(Viewports), &Viewports[0].x); for (GLuint s = 0; s < (GLuint)ARRAY_COUNT(Scissors); ++s) { m_Real->glScissorIndexedv(s, &Scissors[s].x); if (Scissors[s].enabled) m_Real->glEnablei(eGL_SCISSOR_TEST, s); else m_Real->glDisablei(eGL_SCISSOR_TEST, s); } GLenum DBs[8] = { eGL_NONE }; uint32_t numDBs = 0; for(GLuint i=0; i < (GLuint)ARRAY_COUNT(DrawBuffers); i++) { if(DrawBuffers[i] != eGL_NONE) { numDBs++; DBs[i] = DrawBuffers[i]; if(m_State < WRITING) { // since we are faking the default framebuffer with our own // to see the results, replace back/front/left/right with color attachment 0 if(DBs[i] == eGL_BACK_LEFT || DBs[i] == eGL_BACK_RIGHT || DBs[i] == eGL_FRONT_LEFT || DBs[i] == eGL_FRONT_RIGHT) DBs[i] = eGL_COLOR_ATTACHMENT0; // These aren't valid for glDrawBuffers but can be returned when we call glGet, // assume they mean left implicitly if(DBs[i] == eGL_BACK || DBs[i] == eGL_FRONT) DBs[i] = eGL_COLOR_ATTACHMENT0; } } else { break; } } // apply drawbuffers/readbuffer to default framebuffer m_Real->glBindFramebuffer(eGL_READ_FRAMEBUFFER, gl->GetFakeBBFBO()); m_Real->glBindFramebuffer(eGL_DRAW_FRAMEBUFFER, gl->GetFakeBBFBO()); m_Real->glDrawBuffers(numDBs, DBs); // see above for reasoning for this m_Real->glReadBuffer(eGL_COLOR_ATTACHMENT0); m_Real->glBindFramebuffer(eGL_READ_FRAMEBUFFER, ReadFBO); m_Real->glBindFramebuffer(eGL_DRAW_FRAMEBUFFER, DrawFBO); m_Real->glHint(eGL_FRAGMENT_SHADER_DERIVATIVE_HINT, Hints.Derivatives); m_Real->glHint(eGL_LINE_SMOOTH_HINT, Hints.LineSmooth); m_Real->glHint(eGL_POLYGON_SMOOTH_HINT, Hints.PolySmooth); m_Real->glHint(eGL_TEXTURE_COMPRESSION_HINT, Hints.TexCompression); m_Real->glDepthMask(DepthWriteMask); m_Real->glClearDepth(DepthClearValue); m_Real->glDepthFunc(DepthFunc); for(GLuint i=0; i < (GLuint)ARRAY_COUNT(DepthRanges); i++) { double v[2] = { DepthRanges[i].nearZ, DepthRanges[i].farZ }; m_Real->glDepthRangeArrayv(i, 1, v); } if(m_Real->glDepthBoundsEXT) // extension, not always available m_Real->glDepthBoundsEXT(DepthBounds.nearZ, DepthBounds.farZ); { m_Real->glStencilFuncSeparate(eGL_FRONT, StencilFront.func, StencilFront.ref, StencilFront.valuemask); m_Real->glStencilFuncSeparate(eGL_BACK, StencilBack.func, StencilBack.ref, StencilBack.valuemask); m_Real->glStencilMaskSeparate(eGL_FRONT, StencilFront.writemask); m_Real->glStencilMaskSeparate(eGL_BACK, StencilBack.writemask); m_Real->glStencilOpSeparate(eGL_FRONT, StencilFront.stencilFail, StencilFront.depthFail, StencilFront.pass); m_Real->glStencilOpSeparate(eGL_BACK, StencilBack.stencilFail, StencilBack.depthFail, StencilBack.pass); } m_Real->glClearStencil((GLint)StencilClearValue); for(GLuint i=0; i < (GLuint)ARRAY_COUNT(ColorMasks); i++) m_Real->glColorMaski(i, ColorMasks[i].red, ColorMasks[i].green, ColorMasks[i].blue, ColorMasks[i].alpha); m_Real->glSampleMaski(0, (GLbitfield)SampleMask[0]); m_Real->glSampleCoverage(SampleCoverage, SampleCoverageInvert ? GL_TRUE : GL_FALSE); m_Real->glMinSampleShading(MinSampleShading); if(ExtensionSupported[ExtensionSupported_EXT_raster_multisample]) m_Real->glRasterSamplesEXT(RasterSamples, RasterFixed); m_Real->glLogicOp(LogicOp); m_Real->glClearColor(ColorClearValue.red, ColorClearValue.green, ColorClearValue.blue, ColorClearValue.alpha); m_Real->glPatchParameteri(eGL_PATCH_VERTICES, PatchParams.numVerts); m_Real->glPatchParameterfv(eGL_PATCH_DEFAULT_INNER_LEVEL, PatchParams.defaultInnerLevel); m_Real->glPatchParameterfv(eGL_PATCH_DEFAULT_OUTER_LEVEL, PatchParams.defaultOuterLevel); m_Real->glPolygonMode(eGL_FRONT_AND_BACK, PolygonMode); if(ExtensionSupported[ExtensionSupported_EXT_polygon_offset_clamp]) m_Real->glPolygonOffsetClampEXT(PolygonOffset[0], PolygonOffset[1], PolygonOffset[2]); else m_Real->glPolygonOffset(PolygonOffset[0], PolygonOffset[1]); m_Real->glFrontFace(FrontFace); m_Real->glCullFace(CullFace); }
void D3D12Replay::InitPostVSBuffers(uint32_t eventId) { // go through any aliasing if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end()) eventId = m_PostVSAlias[eventId]; if(m_PostVSData.find(eventId) != m_PostVSData.end()) return; D3D12CommandData *cmd = m_pDevice->GetQueue()->GetCommandData(); const D3D12RenderState &rs = cmd->m_RenderState; if(rs.pipe == ResourceId()) return; WrappedID3D12PipelineState *origPSO = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe); if(!origPSO->IsGraphics()) return; D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = origPSO->GetGraphicsDesc(); if(psoDesc.VS.BytecodeLength == 0) return; WrappedID3D12Shader *vs = origPSO->VS(); D3D_PRIMITIVE_TOPOLOGY topo = rs.topo; const DrawcallDescription *drawcall = m_pDevice->GetDrawcall(eventId); if(drawcall->numIndices == 0) return; DXBC::DXBCFile *dxbcVS = vs->GetDXBC(); RDCASSERT(dxbcVS); DXBC::DXBCFile *dxbcGS = NULL; WrappedID3D12Shader *gs = origPSO->GS(); if(gs) { dxbcGS = gs->GetDXBC(); RDCASSERT(dxbcGS); } DXBC::DXBCFile *dxbcDS = NULL; WrappedID3D12Shader *ds = origPSO->DS(); if(ds) { dxbcDS = ds->GetDXBC(); RDCASSERT(dxbcDS); } ID3D12RootSignature *soSig = NULL; HRESULT hr = S_OK; { WrappedID3D12RootSignature *sig = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12RootSignature>(rs.graphics.rootsig); D3D12RootSignature rootsig = sig->sig; // create a root signature that allows stream out, if necessary if((rootsig.Flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT) == 0) { rootsig.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT; ID3DBlob *blob = m_pDevice->GetShaderCache()->MakeRootSig(rootsig); hr = m_pDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), __uuidof(ID3D12RootSignature), (void **)&soSig); if(FAILED(hr)) { RDCERR("Couldn't enable stream-out in root signature: HRESULT: %s", ToStr(hr).c_str()); return; } SAFE_RELEASE(blob); } } vector<D3D12_SO_DECLARATION_ENTRY> sodecls; UINT stride = 0; int posidx = -1; int numPosComponents = 0; if(!dxbcVS->m_OutputSig.empty()) { for(const SigParameter &sign : dxbcVS->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } if(stride == 0) { RDCERR("Didn't get valid stride! Setting to 4 bytes"); stride = 4; } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // set up stream output entries and buffers psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; psoDesc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM; // disable all other shader stages psoDesc.HS.BytecodeLength = 0; psoDesc.HS.pShaderBytecode = NULL; psoDesc.DS.BytecodeLength = 0; psoDesc.DS.pShaderBytecode = NULL; psoDesc.GS.BytecodeLength = 0; psoDesc.GS.pShaderBytecode = NULL; psoDesc.PS.BytecodeLength = 0; psoDesc.PS.pShaderBytecode = NULL; // disable any rasterization/use of output targets psoDesc.DepthStencilState.DepthEnable = FALSE; psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.StencilEnable = FALSE; if(soSig) psoDesc.pRootSignature = soSig; // render as points psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; // disable outputs psoDesc.NumRenderTargets = 0; RDCEraseEl(psoDesc.RTVFormats); psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } ID3D12Resource *idxBuf = NULL; bool recreate = false; uint64_t outputSize = uint64_t(drawcall->numIndices) * drawcall->numInstances * stride; if(m_SOBufferSize < outputSize) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < outputSize) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output data", oldSize, m_SOBufferSize); recreate = true; } ID3D12GraphicsCommandList *list = NULL; if(!(drawcall->flags & DrawFlags::UseIBuffer)) { if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } else // drawcall is indexed { bytebuf idxdata; GetBufferData(rs.ibuffer.buf, rs.ibuffer.offs + drawcall->indexOffset * rs.ibuffer.bytewidth, RDCMIN(drawcall->numIndices * rs.ibuffer.bytewidth, rs.ibuffer.size), idxdata); vector<uint32_t> indices; uint16_t *idx16 = (uint16_t *)&idxdata[0]; uint32_t *idx32 = (uint32_t *)&idxdata[0]; // only read as many indices as were available in the buffer uint32_t numIndices = RDCMIN(uint32_t(idxdata.size() / rs.ibuffer.bytewidth), drawcall->numIndices); uint32_t idxclamp = 0; if(drawcall->baseVertex < 0) idxclamp = uint32_t(-drawcall->baseVertex); // grab all unique vertex indices referenced for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; auto it = std::lower_bound(indices.begin(), indices.end(), i32); if(it != indices.end() && *it == i32) continue; indices.insert(it, i32); } // if we read out of bounds, we'll also have a 0 index being referenced // (as 0 is read). Don't insert 0 if we already have 0 though if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0)) indices.insert(indices.begin(), 0); // An index buffer could be something like: 500, 501, 502, 501, 503, 502 // in which case we can't use the existing index buffer without filling 499 slots of vertex // data with padding. Instead we rebase the indices based on the smallest vertex so it becomes // 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer. // // Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512 // which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid. // We just stream-out a tightly packed list of unique indices, and then remap the index buffer // so that what did point to 500 points to 0 (accounting for rebasing), and what did point // to 510 now points to 3 (accounting for the unique sort). // we use a map here since the indices may be sparse. Especially considering if an index // is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries. map<uint32_t, size_t> indexRemap; for(size_t i = 0; i < indices.size(); i++) { // by definition, this index will only appear once in indices[] indexRemap[indices[i]] = i; } if(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize / sizeof(Vec4f) < indices.size() * sizeof(uint32_t)) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for indices", oldSize, m_SOBufferSize); recreate = true; } if(recreate) { m_pDevice->GPUSync(); CreateSOBuffers(); } GetDebugManager()->FillBuffer(m_SOPatchedIndexBuffer, 0, &indices[0], indices.size() * sizeof(uint32_t)); D3D12_INDEX_BUFFER_VIEW patchedIB; patchedIB.BufferLocation = m_SOPatchedIndexBuffer->GetGPUVirtualAddress(); patchedIB.Format = DXGI_FORMAT_R32_UINT; patchedIB.SizeInBytes = UINT(indices.size() * sizeof(uint32_t)); list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); list->IASetIndexBuffer(&patchedIB); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); list->DrawIndexedInstanced((UINT)indices.size(), drawcall->numInstances, 0, 0, drawcall->instanceOffset); uint32_t stripCutValue = 0; if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF) stripCutValue = 0xffff; else if(psoDesc.IBStripCutValue == D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF) stripCutValue = 0xffffffff; // rebase existing index buffer to point to the right elements in our stream-out'd // vertex buffer for(uint32_t i = 0; i < numIndices; i++) { uint32_t i32 = rs.ibuffer.bytewidth == 2 ? uint32_t(idx16[i]) : idx32[i]; // preserve primitive restart indices if(stripCutValue && i32 == stripCutValue) continue; // apply baseVertex but clamp to 0 (don't allow index to become negative) if(i32 < idxclamp) i32 = 0; else if(drawcall->baseVertex < 0) i32 -= idxclamp; else if(drawcall->baseVertex > 0) i32 += drawcall->baseVertex; if(rs.ibuffer.bytewidth == 2) idx16[i] = uint16_t(indexRemap[i32]); else idx32[i] = uint32_t(indexRemap[i32]); } idxBuf = NULL; if(!idxdata.empty()) { D3D12_RESOURCE_DESC idxBufDesc; idxBufDesc.Alignment = 0; idxBufDesc.DepthOrArraySize = 1; idxBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; idxBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; idxBufDesc.Format = DXGI_FORMAT_UNKNOWN; idxBufDesc.Height = 1; idxBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; idxBufDesc.MipLevels = 1; idxBufDesc.SampleDesc.Count = 1; idxBufDesc.SampleDesc.Quality = 0; idxBufDesc.Width = idxdata.size(); D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &idxBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&idxBuf); RDCASSERTEQUAL(hr, S_OK); SetObjName(idxBuf, StringFormat::Fmt("PostVS idxBuf for %u", eventId)); GetDebugManager()->FillBuffer(idxBuf, 0, &idxdata[0], idxdata.size()); } } D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t numBytesWritten = *(uint64_t *)byteData; if(numBytesWritten == 0) { m_PostVSData[eventId] = D3D12PostVSData(); SAFE_RELEASE(idxBuf); SAFE_RELEASE(soSig); return; } // skip past the counter byteData += 64; uint64_t numPrims = numBytesWritten / stride; ID3D12Resource *vsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&vsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(vsoutBuffer) { SetObjName(vsoutBuffer, StringFormat::Fmt("PostVS vsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(vsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(uint64_t i = 1; numPosComponents == 4 && i < numPrims; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = vsoutBuffer; m_PostVSData[eventId].vsout.vertStride = stride; m_PostVSData[eventId].vsout.nearPlane = nearp; m_PostVSData[eventId].vsout.farPlane = farp; m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer); m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices; m_PostVSData[eventId].vsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].vsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].vsout.idxBuf = NULL; if(m_PostVSData[eventId].vsout.useIndices && idxBuf) { m_PostVSData[eventId].vsout.idxBuf = idxBuf; m_PostVSData[eventId].vsout.idxFmt = rs.ibuffer.bytewidth == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; } m_PostVSData[eventId].vsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].vsout.topo = topo; } else { // empty vertex output signature m_PostVSData[eventId].vsin.topo = topo; m_PostVSData[eventId].vsout.buf = NULL; m_PostVSData[eventId].vsout.instStride = 0; m_PostVSData[eventId].vsout.vertStride = 0; m_PostVSData[eventId].vsout.nearPlane = 0.0f; m_PostVSData[eventId].vsout.farPlane = 0.0f; m_PostVSData[eventId].vsout.useIndices = false; m_PostVSData[eventId].vsout.hasPosOut = false; m_PostVSData[eventId].vsout.idxBuf = NULL; m_PostVSData[eventId].vsout.topo = topo; } if(dxbcGS || dxbcDS) { stride = 0; posidx = -1; numPosComponents = 0; DXBC::DXBCFile *lastShader = dxbcGS; if(dxbcDS) lastShader = dxbcDS; sodecls.clear(); for(const SigParameter &sign : lastShader->m_OutputSig) { D3D12_SO_DECLARATION_ENTRY decl; // for now, skip streams that aren't stream 0 if(sign.stream != 0) continue; decl.Stream = 0; decl.OutputSlot = 0; decl.SemanticName = sign.semanticName.c_str(); decl.SemanticIndex = sign.semanticIndex; decl.StartComponent = 0; decl.ComponentCount = sign.compCount & 0xff; if(sign.systemValue == ShaderBuiltin::Position) { posidx = (int)sodecls.size(); numPosComponents = decl.ComponentCount = 4; } stride += decl.ComponentCount * sizeof(float); sodecls.push_back(decl); } // shift position attribute up to first, keeping order otherwise // the same if(posidx > 0) { D3D12_SO_DECLARATION_ENTRY pos = sodecls[posidx]; sodecls.erase(sodecls.begin() + posidx); sodecls.insert(sodecls.begin(), pos); } // enable the other shader stages again if(origPSO->DS()) psoDesc.DS = origPSO->DS()->GetDesc(); if(origPSO->HS()) psoDesc.HS = origPSO->HS()->GetDesc(); if(origPSO->GS()) psoDesc.GS = origPSO->GS()->GetDesc(); // configure new SO declarations psoDesc.StreamOutput.NumEntries = (UINT)sodecls.size(); psoDesc.StreamOutput.pSODeclaration = &sodecls[0]; psoDesc.StreamOutput.NumStrides = 1; psoDesc.StreamOutput.pBufferStrides = &stride; // we're using the same topology this time psoDesc.PrimitiveTopologyType = origPSO->graphics->PrimitiveTopologyType; ID3D12PipelineState *pipe = NULL; hr = m_pDevice->CreateGraphicsPipelineState(&psoDesc, __uuidof(ID3D12PipelineState), (void **)&pipe); if(FAILED(hr)) { RDCERR("Couldn't create patched graphics pipeline: HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } D3D12_STREAM_OUTPUT_BUFFER_VIEW view; ID3D12GraphicsCommandList *list = NULL; view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // draws with multiple instances must be replayed one at a time so we can record the number of // primitives from each drawcall, as due to expansion this can vary per-instance. if(drawcall->numInstances > 1) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; // do a dummy draw to make sure we have enough space in the output buffer list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); D3D12_QUERY_DATA_SO_STATISTICS result = *data; range.End = 0; m_SOStagingBuffer->Unmap(0, &range); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; GetDebugManager()->ResetDebugAlloc(); // now do the actual stream out list = GetDebugManager()->ResetDebugList(); // first need to reset the counter byte values which may have either been written to above, or // are newly created { D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); std::swap(sobarr.Transition.StateBefore, sobarr.Transition.StateAfter); list->ResourceBarrier(1, &sobarr); } rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } // reserve space for enough 'buffer filled size' locations view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + AlignUp(uint64_t(drawcall->numInstances * sizeof(UINT64)), 64ULL); // do incremental draws to get the output size. We have to do this O(N^2) style because // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N instances // and count the total number of verts each time, then we can see from the difference how much // each instance wrote. for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++) { if(drawcall->flags & DrawFlags::UseIBuffer) { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawIndexedInstanced(drawcall->numIndices, inst, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress() + (inst - 1) * sizeof(UINT64); list->SOSetTargets(0, 1, &view); list->DrawInstanced(drawcall->numIndices, inst, drawcall->vertexOffset, drawcall->instanceOffset); } } list->Close(); l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); // the last draw will have written the actual data we want into the buffer } else { // this only loops if we find from a query that we need to resize up while(true) { list = GetDebugManager()->ResetDebugList(); rs.ApplyState(list); list->SetPipelineState(pipe); if(soSig) { list->SetGraphicsRootSignature(soSig); rs.ApplyGraphicsRootElements(list); } view.BufferFilledSizeLocation = m_SOBuffer->GetGPUVirtualAddress(); view.BufferLocation = m_SOBuffer->GetGPUVirtualAddress() + 64; view.SizeInBytes = m_SOBufferSize; list->SOSetTargets(0, 1, &view); list->BeginQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); // because the result is expanded we don't have to remap index buffers or anything if(drawcall->flags & DrawFlags::UseIBuffer) { list->DrawIndexedInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->indexOffset, drawcall->baseVertex, drawcall->instanceOffset); } else { list->DrawInstanced(drawcall->numIndices, drawcall->numInstances, drawcall->vertexOffset, drawcall->instanceOffset); } list->EndQuery(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0); list->ResolveQueryData(m_SOQueryHeap, D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0, 0, 1, m_SOStagingBuffer, 0); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); // check that things are OK, and resize up if needed D3D12_RANGE range; range.Begin = 0; range.End = (SIZE_T)sizeof(D3D12_QUERY_DATA_SO_STATISTICS); D3D12_QUERY_DATA_SO_STATISTICS *data; hr = m_SOStagingBuffer->Map(0, &range, (void **)&data); if(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) { uint64_t oldSize = m_SOBufferSize; while(m_SOBufferSize < data->PrimitivesStorageNeeded * 3 * stride) m_SOBufferSize *= 2; RDCWARN("Resizing stream-out buffer from %llu to %llu for output", oldSize, m_SOBufferSize); CreateSOBuffers(); continue; } range.End = 0; m_SOStagingBuffer->Unmap(0, &range); GetDebugManager()->ResetDebugAlloc(); break; } } list = GetDebugManager()->ResetDebugList(); D3D12_RESOURCE_BARRIER sobarr = {}; sobarr.Transition.pResource = m_SOBuffer; sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_STREAM_OUT; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &sobarr); list->CopyResource(m_SOStagingBuffer, m_SOBuffer); // we're done with this after the copy, so we can discard it and reset // the counter for the next stream-out sobarr.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; sobarr.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; list->DiscardResource(m_SOBuffer, NULL); list->ResourceBarrier(1, &sobarr); D3D12_UNORDERED_ACCESS_VIEW_DESC counterDesc = {}; counterDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; counterDesc.Format = DXGI_FORMAT_R32_UINT; counterDesc.Buffer.FirstElement = 0; counterDesc.Buffer.NumElements = 4; UINT zeroes[4] = {0, 0, 0, 0}; list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(STREAM_OUT_UAV), GetDebugManager()->GetUAVClearHandle(STREAM_OUT_UAV), m_SOBuffer, zeroes, 0, NULL); list->Close(); ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); m_pDevice->GPUSync(); GetDebugManager()->ResetDebugAlloc(); SAFE_RELEASE(pipe); byte *byteData = NULL; D3D12_RANGE range = {0, (SIZE_T)m_SOBufferSize}; hr = m_SOStagingBuffer->Map(0, &range, (void **)&byteData); if(FAILED(hr)) { RDCERR("Failed to map sobuffer HRESULT: %s", ToStr(hr).c_str()); SAFE_RELEASE(soSig); return; } range.End = 0; uint64_t *counters = (uint64_t *)byteData; uint64_t numBytesWritten = 0; std::vector<D3D12PostVSData::InstData> instData; if(drawcall->numInstances > 1) { uint64_t prevByteCount = 0; for(uint32_t inst = 0; inst < drawcall->numInstances; inst++) { uint64_t byteCount = counters[inst]; D3D12PostVSData::InstData d; d.numVerts = uint32_t((byteCount - prevByteCount) / stride); d.bufOffset = prevByteCount; prevByteCount = byteCount; instData.push_back(d); } numBytesWritten = prevByteCount; } else { numBytesWritten = counters[0]; } if(numBytesWritten == 0) { SAFE_RELEASE(soSig); return; } // skip past the counter(s) byteData += (view.BufferLocation - m_SOBuffer->GetGPUVirtualAddress()); uint64_t numVerts = numBytesWritten / stride; ID3D12Resource *gsoutBuffer = NULL; { D3D12_RESOURCE_DESC vertBufDesc; vertBufDesc.Alignment = 0; vertBufDesc.DepthOrArraySize = 1; vertBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; vertBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; vertBufDesc.Format = DXGI_FORMAT_UNKNOWN; vertBufDesc.Height = 1; vertBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; vertBufDesc.MipLevels = 1; vertBufDesc.SampleDesc.Count = 1; vertBufDesc.SampleDesc.Quality = 0; vertBufDesc.Width = numBytesWritten; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &vertBufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, __uuidof(ID3D12Resource), (void **)&gsoutBuffer); RDCASSERTEQUAL(hr, S_OK); if(gsoutBuffer) { SetObjName(gsoutBuffer, StringFormat::Fmt("PostVS gsoutBuffer for %u", eventId)); GetDebugManager()->FillBuffer(gsoutBuffer, 0, byteData, (size_t)numBytesWritten); } } float nearp = 0.1f; float farp = 100.0f; Vec4f *pos0 = (Vec4f *)byteData; bool found = false; for(UINT64 i = 1; numPosComponents == 4 && i < numVerts; i++) { ////////////////////////////////////////////////////////////////////////////////// // derive near/far, assuming a standard perspective matrix // // the transformation from from pre-projection {Z,W} to post-projection {Z,W} // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 // and we know Wpost = Zpre from the perspective matrix. // we can then see from the perspective matrix that // m = F/(F-N) // c = -(F*N)/(F-N) // // with re-arranging and substitution, we then get: // N = -c/m // F = c/(1-m) // // so if we can derive m and c then we can determine N and F. We can do this with // two points, and we pick them reasonably distinct on z to reduce floating-point // error Vec4f *pos = (Vec4f *)(byteData + i * stride); if(fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f) { Vec2f A(pos0->w, pos0->z); Vec2f B(pos->w, pos->z); float m = (B.y - A.y) / (B.x - A.x); float c = B.y - B.x * m; if(m == 1.0f) continue; nearp = -c / m; farp = c / (1 - m); found = true; break; } } // if we didn't find anything, all z's and w's were identical. // If the z is positive and w greater for the first element then // we detect this projection as reversed z with infinite far plane if(!found && pos0->z > 0.0f && pos0->w > pos0->z) { nearp = pos0->z; farp = FLT_MAX; } m_SOStagingBuffer->Unmap(0, &range); m_PostVSData[eventId].gsout.buf = gsoutBuffer; m_PostVSData[eventId].gsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.instStride = uint32_t(numBytesWritten / RDCMAX(1U, drawcall->numInstances)); m_PostVSData[eventId].gsout.vertStride = stride; m_PostVSData[eventId].gsout.nearPlane = nearp; m_PostVSData[eventId].gsout.farPlane = farp; m_PostVSData[eventId].gsout.useIndices = false; m_PostVSData[eventId].gsout.hasPosOut = posidx >= 0; m_PostVSData[eventId].gsout.idxBuf = NULL; topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; if(lastShader == dxbcGS) { for(size_t i = 0; i < dxbcGS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcGS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY) { topo = decl.outTopology; break; } } } else if(lastShader == dxbcDS) { for(size_t i = 0; i < dxbcDS->GetNumDeclarations(); i++) { const DXBC::ASMDecl &decl = dxbcDS->GetDeclaration(i); if(decl.declaration == DXBC::OPCODE_DCL_TESS_DOMAIN) { if(decl.domain == DXBC::DOMAIN_ISOLINE) topo = D3D_PRIMITIVE_TOPOLOGY_LINELIST; else topo = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; } } } m_PostVSData[eventId].gsout.topo = topo; // streamout expands strips unfortunately if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; else if(topo == D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ) m_PostVSData[eventId].gsout.topo = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; m_PostVSData[eventId].gsout.numVerts = (uint32_t)numVerts; if(drawcall->flags & DrawFlags::Instanced) m_PostVSData[eventId].gsout.numVerts /= RDCMAX(1U, drawcall->numInstances); m_PostVSData[eventId].gsout.instData = instData; } SAFE_RELEASE(soSig); }
rdctype::array<byte> CaptureFile::GetThumbnail(FileType type, uint32_t maxsize) { rdctype::array<byte> buf; Serialiser ser(Filename(), Serialiser::READING, false); if(ser.HasError()) return buf; ser.Rewind(); int chunkType = ser.PushContext(NULL, NULL, 1, false); if(chunkType != THUMBNAIL_DATA) return buf; bool HasThumbnail = false; ser.Serialise(NULL, HasThumbnail); if(!HasThumbnail) return buf; byte *jpgbuf = NULL; size_t thumblen = 0; uint32_t thumbwidth = 0, thumbheight = 0; { ser.Serialise("ThumbWidth", thumbwidth); ser.Serialise("ThumbHeight", thumbheight); ser.SerialiseBuffer("ThumbnailPixels", jpgbuf, thumblen); } if(jpgbuf == NULL) return buf; // if the desired output is jpg and either there's no max size or it's already satisfied, // return the data directly if(type == FileType::JPG && (maxsize == 0 || (maxsize > thumbwidth && maxsize > thumbheight))) { create_array_init(buf, thumblen, jpgbuf); } else { // otherwise we need to decode, resample maybe, and re-encode int w = (int)thumbwidth; int h = (int)thumbheight; int comp = 3; byte *thumbpixels = jpgd::decompress_jpeg_image_from_memory(jpgbuf, (int)thumblen, &w, &h, &comp, 3); if(maxsize != 0) { uint32_t clampedWidth = RDCMIN(maxsize, thumbwidth); uint32_t clampedHeight = RDCMIN(maxsize, thumbheight); if(clampedWidth != thumbwidth || clampedHeight != thumbheight) { // preserve aspect ratio, take the smallest scale factor and multiply both float scaleX = float(clampedWidth) / float(thumbwidth); float scaleY = float(clampedHeight) / float(thumbheight); if(scaleX < scaleY) clampedHeight = uint32_t(scaleX * thumbheight); else if(scaleY < scaleX) clampedWidth = uint32_t(scaleY * thumbwidth); byte *resizedpixels = (byte *)malloc(3 * clampedWidth * clampedHeight); stbir_resize_uint8_srgb(thumbpixels, thumbwidth, thumbheight, 0, resizedpixels, clampedWidth, clampedHeight, 0, 3, -1, 0); free(thumbpixels); thumbpixels = resizedpixels; thumbwidth = clampedWidth; thumbheight = clampedHeight; } } std::vector<byte> encodedBytes; switch(type) { case FileType::JPG: { int len = thumbwidth * thumbheight * 3; encodedBytes.resize(len); jpge::params p; p.m_quality = 90; jpge::compress_image_to_jpeg_file_in_memory(&encodedBytes[0], len, (int)thumbwidth, (int)thumbheight, 3, thumbpixels, p); encodedBytes.resize(len); break; } case FileType::PNG: { stbi_write_png_to_func(&writeToByteVector, &encodedBytes, (int)thumbwidth, (int)thumbheight, 3, thumbpixels, 0); break; } case FileType::TGA: { stbi_write_tga_to_func(&writeToByteVector, &encodedBytes, (int)thumbwidth, (int)thumbheight, 3, thumbpixels); break; } case FileType::BMP: { stbi_write_bmp_to_func(&writeToByteVector, &encodedBytes, (int)thumbwidth, (int)thumbheight, 3, thumbpixels); break; } default: { RDCERR("Unsupported file type %d in thumbnail fetch", type); free(thumbpixels); delete[] jpgbuf; return buf; } } buf = encodedBytes; free(thumbpixels); } delete[] jpgbuf; return buf; }
bool LZ4Compressor::Write(const void *data, uint64_t numBytes) { // if we encountered a stream error this will be NULL if(!m_CompressBuffer) return false; if(numBytes == 0) return true; // The basic plan is: // Write into page N incrementally until it is completely full. When full, flush it out to lz4 and // swap pages. // This keeps lz4 happy with 64kb of history each time it compresses. // If we are writing some data the crosses the boundary between pages, we write the part that will // fit on one page, flush & swap, write the rest into the next page. if(m_PageOffset + numBytes <= lz4BlockSize) { // simplest path, no page wrapping/spanning at all memcpy(m_Page[0] + m_PageOffset, data, (size_t)numBytes); m_PageOffset += numBytes; return true; } else { // do partial copies that span pages and flush as necessary const byte *src = (const byte *)data; // copy whatever will fit on this page { uint64_t firstBytes = lz4BlockSize - m_PageOffset; memcpy(m_Page[0] + m_PageOffset, src, (size_t)firstBytes); m_PageOffset += firstBytes; numBytes -= firstBytes; src += firstBytes; } bool success = true; while(success && numBytes > 0) { // flush and swap pages success &= FlushPage0(); if(!success) return success; // how many bytes can we copy in this page? uint64_t partialBytes = RDCMIN(lz4BlockSize, numBytes); memcpy(m_Page[0], src, (size_t)partialBytes); // advance the source pointer, dest offset, and remove the bytes we read m_PageOffset += partialBytes; numBytes -= partialBytes; src += partialBytes; } return success; } }
bool WrappedOpenGL::Serialise_wglDXLockObjectsNV(SerialiserType &ser, GLResource Resource) { SERIALISE_ELEMENT(Resource); SERIALISE_ELEMENT_LOCAL(textype, Resource.Namespace == eResBuffer ? eGL_NONE : m_Textures[GetResourceManager()->GetID(Resource)].curType) .Hidden(); const GLHookSet &gl = m_Real; // buffer contents are easier to save if(textype == eGL_NONE) { byte *Contents = NULL; uint32_t length = 1; // while writing, fetch the buffer's size and contents if(ser.IsWriting()) { gl.glGetNamedBufferParameterivEXT(Resource.name, eGL_BUFFER_SIZE, (GLint *)&length); Contents = new byte[length]; GLuint oldbuf = 0; gl.glGetIntegerv(eGL_COPY_READ_BUFFER_BINDING, (GLint *)&oldbuf); gl.glBindBuffer(eGL_COPY_READ_BUFFER, Resource.name); gl.glGetBufferSubData(eGL_COPY_READ_BUFFER, 0, (GLsizeiptr)length, Contents); gl.glBindBuffer(eGL_COPY_READ_BUFFER, oldbuf); } SERIALISE_ELEMENT_ARRAY(Contents, length); SERIALISE_CHECK_READ_ERRORS(); // restore on replay if(IsReplayingAndReading()) { uint32_t liveLength = 1; gl.glGetNamedBufferParameterivEXT(Resource.name, eGL_BUFFER_SIZE, (GLint *)&liveLength); gl.glNamedBufferSubData(Resource.name, 0, (GLsizeiptr)RDCMIN(length, liveLength), Contents); } } else { GLuint ppb = 0, pub = 0; PixelPackState pack; PixelUnpackState unpack; // save and restore pixel pack/unpack state. We only need one or the other but for clarity we // push and pop both always. if(ser.IsWriting() || !IsStructuredExporting(m_State)) { gl.glGetIntegerv(eGL_PIXEL_PACK_BUFFER_BINDING, (GLint *)&ppb); gl.glGetIntegerv(eGL_PIXEL_UNPACK_BUFFER_BINDING, (GLint *)&pub); gl.glBindBuffer(eGL_PIXEL_PACK_BUFFER, 0); gl.glBindBuffer(eGL_PIXEL_UNPACK_BUFFER, 0); pack.Fetch(&gl, false); unpack.Fetch(&gl, false); ResetPixelPackState(gl, false, 1); ResetPixelUnpackState(gl, false, 1); } TextureData &details = m_Textures[GetResourceManager()->GetID(Resource)]; GLuint tex = Resource.name; // serialise the metadata for convenience SERIALISE_ELEMENT_LOCAL(internalFormat, details.internalFormat).Hidden(); SERIALISE_ELEMENT_LOCAL(width, details.width).Hidden(); SERIALISE_ELEMENT_LOCAL(height, details.height).Hidden(); SERIALISE_ELEMENT_LOCAL(depth, details.depth).Hidden(); RDCASSERT(internalFormat == details.internalFormat, internalFormat, details.internalFormat); RDCASSERT(width == details.width, width, details.width); RDCASSERT(height == details.height, height, details.height); RDCASSERT(depth == details.depth, depth, details.depth); GLenum fmt = GetBaseFormat(internalFormat); GLenum type = GetDataType(internalFormat); GLint dim = details.dimension; uint32_t size = (uint32_t)GetByteSize(width, height, depth, fmt, type); int mips = 0; if(IsReplayingAndReading()) mips = GetNumMips(gl, textype, tex, width, height, depth); byte *scratchBuf = NULL; // on read and write, we allocate a single buffer big enough for all mips and re-use it // to avoid repeated new/free. scratchBuf = AllocAlignedBuffer(size); GLuint prevtex = 0; if(!IsStructuredExporting(m_State)) { gl.glGetIntegerv(TextureBinding(details.curType), (GLint *)&prevtex); gl.glBindTexture(textype, tex); } for(int i = 0; i < mips; i++) { int w = RDCMAX(details.width >> i, 1); int h = RDCMAX(details.height >> i, 1); int d = RDCMAX(details.depth >> i, 1); if(textype == eGL_TEXTURE_CUBE_MAP_ARRAY || textype == eGL_TEXTURE_1D_ARRAY || textype == eGL_TEXTURE_2D_ARRAY) d = details.depth; size = (uint32_t)GetByteSize(w, h, d, fmt, type); GLenum targets[] = { eGL_TEXTURE_CUBE_MAP_POSITIVE_X, eGL_TEXTURE_CUBE_MAP_NEGATIVE_X, eGL_TEXTURE_CUBE_MAP_POSITIVE_Y, eGL_TEXTURE_CUBE_MAP_NEGATIVE_Y, eGL_TEXTURE_CUBE_MAP_POSITIVE_Z, eGL_TEXTURE_CUBE_MAP_NEGATIVE_Z, }; int count = ARRAY_COUNT(targets); if(textype != eGL_TEXTURE_CUBE_MAP) { targets[0] = textype; count = 1; } for(int trg = 0; trg < count; trg++) { if(ser.IsWriting()) { // we avoid glGetTextureImageEXT as it seems buggy for cubemap faces gl.glGetTexImage(targets[trg], i, fmt, type, scratchBuf); } // serialise without allocating memory as we already have our scratch buf sized. ser.Serialise("SubresourceContents", scratchBuf, size, SerialiserFlags::NoFlags); if(IsReplayingAndReading() && !ser.IsErrored()) { if(dim == 1) gl.glTextureSubImage1DEXT(tex, targets[trg], i, 0, w, fmt, type, scratchBuf); else if(dim == 2) gl.glTextureSubImage2DEXT(tex, targets[trg], i, 0, 0, w, h, fmt, type, scratchBuf); else if(dim == 3) gl.glTextureSubImage3DEXT(tex, targets[trg], i, 0, 0, 0, w, h, d, fmt, type, scratchBuf); } } } FreeAlignedBuffer(scratchBuf); // restore pixel (un)packing state if(ser.IsWriting() || !IsStructuredExporting(m_State)) { gl.glBindBuffer(eGL_PIXEL_PACK_BUFFER, ppb); gl.glBindBuffer(eGL_PIXEL_UNPACK_BUFFER, pub); pack.Apply(&gl, false); unpack.Apply(&gl, false); } if(!IsStructuredExporting(m_State)) gl.glBindTexture(textype, prevtex); SERIALISE_CHECK_READ_ERRORS(); } return true; }
bool WrappedID3D11DeviceContext::Serialise_UpdateSubresource1(ID3D11Resource *pDstResource, UINT DstSubresource, const D3D11_BOX *pDstBox, const void *pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch, UINT CopyFlags) { SERIALISE_ELEMENT(ResourceId, idx, GetIDForResource(pDstResource)); SERIALISE_ELEMENT(uint32_t, flags, CopyFlags); SERIALISE_ELEMENT(uint32_t, DestSubresource, DstSubresource); D3D11ResourceRecord *record = m_pDevice->GetResourceManager()->GetResourceRecord(idx); D3D11ResourceRecord *parent = record; if(record && record->NumSubResources > (int)DestSubresource) record = (D3D11ResourceRecord *)record->SubResources[DestSubresource]; SERIALISE_ELEMENT(uint8_t, isUpdate, record->DataInSerialiser); ID3D11Resource *DestResource = pDstResource; if(m_State < WRITING) { if(m_pDevice->GetResourceManager()->HasLiveResource(idx)) DestResource = (ID3D11Resource *)m_pDevice->GetResourceManager()->GetLiveResource(idx); } if(isUpdate) { SERIALISE_ELEMENT(uint8_t, HasDestBox, pDstBox != NULL); SERIALISE_ELEMENT_OPT(D3D11_BOX, box, *pDstBox, HasDestBox); SERIALISE_ELEMENT(uint32_t, SourceRowPitch, SrcRowPitch); SERIALISE_ELEMENT(uint32_t, SourceDepthPitch, SrcDepthPitch); size_t srcLength = 0; if(m_State >= WRITING) { RDCASSERT(record); if(WrappedID3D11Buffer::IsAlloc(DestResource)) { srcLength = record->Length; if(HasDestBox) srcLength = RDCMIN((uint32_t)srcLength, pDstBox->right - pDstBox->left); } else { WrappedID3D11Texture1D *tex1 = WrappedID3D11Texture1D::IsAlloc(DestResource) ? (WrappedID3D11Texture1D *)DestResource : NULL; WrappedID3D11Texture2D *tex2 = WrappedID3D11Texture2D::IsAlloc(DestResource) ? (WrappedID3D11Texture2D *)DestResource : NULL; WrappedID3D11Texture3D *tex3 = WrappedID3D11Texture3D::IsAlloc(DestResource) ? (WrappedID3D11Texture3D *)DestResource : NULL; UINT mipLevel = GetMipForSubresource(DestResource, DestSubresource); if(tex1) { srcLength = record->Length; if(HasDestBox) srcLength = RDCMIN((uint32_t)srcLength, pDstBox->right - pDstBox->left); } else if(tex2) { D3D11_TEXTURE2D_DESC desc = {0}; tex2->GetDesc(&desc); size_t rows = RDCMAX(1U,desc.Height>>mipLevel); DXGI_FORMAT fmt = desc.Format; if(HasDestBox) rows = (pDstBox->bottom - pDstBox->top); if(IsBlockFormat(fmt)) rows = RDCMAX((size_t)1, rows/4); srcLength = SourceRowPitch*rows; } else if(tex3) { D3D11_TEXTURE3D_DESC desc = {0}; tex3->GetDesc(&desc); size_t slices = RDCMAX(1U,desc.Depth>>mipLevel); srcLength = SourceDepthPitch*slices; if(HasDestBox) srcLength = SourceDepthPitch*(pDstBox->back - pDstBox->front); } else { RDCERR("UpdateSubResource on unexpected resource type"); } }