size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) { if (!m_ready) // Make sure we initialized OK return 0; // Clamp srcRect to 640x528. BPS: The Strike tries to encode an 800x600 // texture, which is invalid. EFBRectangle correctSrc = srcRect; correctSrc.ClampUL(0, 0, EFB_WIDTH, EFB_HEIGHT); // Validate source rect size if (correctSrc.GetWidth() <= 0 || correctSrc.GetHeight() <= 0) return 0; HRESULT hr; unsigned int blockW = BLOCK_WIDTHS[dstFormat]; unsigned int blockH = BLOCK_HEIGHTS[dstFormat]; // Round up source dims to multiple of block size unsigned int actualWidth = correctSrc.GetWidth() / (scaleByHalf ? 2 : 1); actualWidth = (actualWidth + blockW-1) & ~(blockW-1); unsigned int actualHeight = correctSrc.GetHeight() / (scaleByHalf ? 2 : 1); actualHeight = (actualHeight + blockH-1) & ~(blockH-1); unsigned int numBlocksX = actualWidth/blockW; unsigned int numBlocksY = actualHeight/blockH; unsigned int cacheLinesPerRow; if (dstFormat == 0x6) // RGBA takes two cache lines per block; all others take one cacheLinesPerRow = numBlocksX*2; else cacheLinesPerRow = numBlocksX; _assert_msg_(VIDEO, cacheLinesPerRow*32 <= MAX_BYTES_PER_BLOCK_ROW, "cache lines per row sanity check"); unsigned int totalCacheLines = cacheLinesPerRow * numBlocksY; _assert_msg_(VIDEO, totalCacheLines*32 <= MAX_BYTES_PER_ENCODE, "total encode size sanity check"); size_t encodeSize = 0; // Reset API g_renderer->ResetAPIState(); // Set up all the state for EFB encoding { D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow * 8), FLOAT(numBlocksY)); D3D::context->RSSetViewports(1, &vp); EFBRectangle fullSrcRect; fullSrcRect.left = 0; fullSrcRect.top = 0; fullSrcRect.right = EFB_WIDTH; fullSrcRect.bottom = EFB_HEIGHT; TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr); ID3D11ShaderResourceView* pEFB = (srcFormat == PEControl::Z24) ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : // FIXME: Instead of resolving EFB, it would be better to pick out a // single sample from each pixel. The game may break if it isn't // expecting the blurred edges around multisampled shapes. FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); EFBEncodeParams params; params.SrcLeft = correctSrc.left; params.SrcTop = correctSrc.top; params.DestWidth = actualWidth; params.ScaleFactor = scaleByHalf ? 2 : 1; D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); D3D::stateman->SetPixelConstants(m_encodeParams); // Use linear filtering if (bScaleByHalf), use point filtering otherwise if (scaleByHalf) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), Renderer::GetTargetWidth(), Renderer::GetTargetHeight(), SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf), VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); // Copy to staging buffer D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow * 8, numBlocksY, 1); D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); // Transfer staging buffer to GameCube/Wii RAM D3D11_MAPPED_SUBRESOURCE map = { 0 }; hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map); CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr); u8* src = (u8*)map.pData; for (unsigned int y = 0; y < numBlocksY; ++y) { memcpy(dst, src, cacheLinesPerRow*32); dst += bpmem.copyMipMapStrideChannels*32; src += map.RowPitch; } D3D::context->Unmap(m_outStage, 0); encodeSize = bpmem.copyMipMapStrideChannels*32 * numBlocksY; } // Restore API g_renderer->RestoreAPIState(); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); return encodeSize; }
void D3DPostProcessor::CopyTexture(const TargetRectangle& dst_rect, uintptr_t dst_tex, const TargetRectangle& src_rect, uintptr_t src_tex, const TargetSize& src_size, int src_layer, bool is_depth_texture, bool force_shader_copy) { D3DTexture2D* dst_texture = reinterpret_cast<D3DTexture2D*>(dst_tex); D3DTexture2D* src_texture = reinterpret_cast<D3DTexture2D*>(src_tex); // If the dimensions are the same, we can copy instead of using a shader. bool scaling = (dst_rect.GetWidth() != src_rect.GetWidth() || dst_rect.GetHeight() != src_rect.GetHeight()); if (!scaling && !force_shader_copy) { D3D12_BOX srcbox = { static_cast<UINT>(src_rect.left), static_cast<UINT>(src_rect.top), 0, static_cast<UINT>(src_rect.right), static_cast<UINT>(src_rect.bottom), 1 }; D3D12_TEXTURE_COPY_LOCATION dst = CD3DX12_TEXTURE_COPY_LOCATION(dst_texture->GetTex(), 0); D3D12_TEXTURE_COPY_LOCATION src = CD3DX12_TEXTURE_COPY_LOCATION(src_texture->GetTex(), 0); dst_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST); src_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); if (src_layer < 0) { // Copy all layers for (unsigned int layer = 0; layer < FramebufferManager::GetEFBLayers(); layer++) { src.SubresourceIndex = D3D12CalcSubresource(0, layer, 0, 1, FramebufferManager::GetEFBLayers()); dst.SubresourceIndex = src.SubresourceIndex; D3D::current_command_list->CopyTextureRegion(&dst, dst_rect.left, dst_rect.top, 0, &src, &srcbox); } } else { // Copy single layer to layer 0 D3D::current_command_list->CopyTextureRegion(&dst, dst_rect.left, dst_rect.top, 0, &src, &srcbox); } } else { D3D::SetViewportAndScissor(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight()); dst_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &dst_texture->GetRTV(), FALSE, nullptr); if (scaling) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); D3D12_SHADER_BYTECODE bytecode = {}; D3D::DrawShadedTexQuad(src_texture, src_rect.AsRECT(), src_size.width, src_size.height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), (src_layer < 0) ? StaticShaderCache::GetCopyGeometryShader() : bytecode, 0, dst_texture->GetFormat(), false, dst_texture->GetMultisampled()); } }
void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half) { if (!m_ready) // Make sure we initialized OK return; HRESULT hr; // Resolve MSAA targets before copying. // FIXME: Instead of resolving EFB, it would be better to pick out a // single sample from each pixel. The game may break if it isn't // expecting the blurred edges around multisampled shapes. ID3D11ShaderResourceView* pEFB = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); // Reset API g_renderer->ResetAPIState(); // Set up all the state for EFB encoding { const u32 words_per_row = bytes_per_row / sizeof(u32); D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y)); D3D::context->RSSetViewports(1, &vp); constexpr EFBRectangle fullSrcRect(0, 0, EFB_WIDTH, EFB_HEIGHT); TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr); EFBEncodeParams params; params.SrcLeft = src_rect.left; params.SrcTop = src_rect.top; params.DestWidth = native_width; params.ScaleFactor = scale_by_half ? 2 : 1; D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); D3D::stateman->SetPixelConstants(m_encodeParams); // We also linear filtering for both box filtering and downsampling higher resolutions to 1x // TODO: This only produces perfect downsampling for 1.5x and 2x IR, other resolution will // need more complex down filtering to average all pixels and produce the correct result. // Also, box filtering won't be correct for anything other than 1x IR if (scale_by_half || g_ActiveConfig.iEFBScale != SCALE_1X) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), GetEncodingPixelShader(format), VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); // Copy to staging buffer D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, words_per_row, num_blocks_y, 1); D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); // Transfer staging buffer to GameCube/Wii RAM D3D11_MAPPED_SUBRESOURCE map = {0}; hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map); CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr); u8* src = (u8*)map.pData; u32 readStride = std::min(bytes_per_row, map.RowPitch); for (unsigned int y = 0; y < num_blocks_y; ++y) { memcpy(dst, src, readStride); dst += memory_stride; src += map.RowPitch; } D3D::context->Unmap(m_outStage, 0); } // Restore API g_renderer->RestoreAPIState(); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); }
void D3DPostProcessingShader::Draw(PostProcessor* p, const TargetRectangle& dst_rect, const TargetSize& dst_size, uintptr_t dst_tex, const TargetRectangle& src_rect, const TargetSize& src_size, uintptr_t src_tex, uintptr_t src_depth_tex, int src_layer, float gamma) { D3DPostProcessor* parent = reinterpret_cast<D3DPostProcessor*>(p); D3DTexture2D* dst_texture = reinterpret_cast<D3DTexture2D*>(dst_tex); D3DTexture2D* src_texture = reinterpret_cast<D3DTexture2D*>(src_tex); D3DTexture2D* src_depth_texture = reinterpret_cast<D3DTexture2D*>(src_depth_tex); _dbg_assert_(VIDEO, m_ready && m_internal_size == src_size); // Determine whether we can skip the final copy by writing directly to the output texture, if the last pass is not scaled. bool skip_final_copy = !IsLastPassScaled() && (dst_texture != src_texture || !m_last_pass_uses_color_buffer) && !m_prev_frame_enabled; // Draw each pass. PostProcessor::InputTextureSizeArray input_sizes; TargetRectangle output_rect = {}; TargetSize output_size; D3D12_CPU_DESCRIPTOR_HANDLE base_sampler_cpu; D3D12_GPU_DESCRIPTOR_HANDLE base_sampler_gpu; int required_handles = (int)(POST_PROCESSING_MAX_TEXTURE_INPUTS * m_passes.size()); DX12::D3D::sampler_descriptor_heap_mgr->AllocateGroup(required_handles, &base_sampler_cpu, &base_sampler_gpu); D3D12_CPU_DESCRIPTOR_HANDLE base_texture_cpu; D3D12_GPU_DESCRIPTOR_HANDLE base_texture_gpu; // On the first texture in the group, we need to allocate the space in the descriptor heap. if (!DX12::D3D::gpu_descriptor_heap_mgr->AllocateTemporary(required_handles, &base_texture_cpu, &base_texture_gpu)) { // Kick command buffer before attempting to allocate again. This is slow. D3D::command_list_mgr->ExecuteQueuedWork(); if (!D3D::gpu_descriptor_heap_mgr->AllocateTemporary(required_handles, &base_texture_cpu, &base_texture_gpu)) { PanicAlert("Failed to allocate temporary descriptors."); return; } } MapAndUpdateConfigurationBuffer(); for (size_t pass_index = 0; pass_index < m_passes.size(); pass_index++) { const RenderPassData& pass = m_passes[pass_index]; bool is_last_pass = (pass_index == m_last_pass_index); if (!pass.enabled) continue; D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu = { base_sampler_cpu.ptr + pass_index * POST_PROCESSING_MAX_TEXTURE_INPUTS * D3D::sampler_descriptor_size}; D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu = { base_sampler_gpu.ptr + pass_index * POST_PROCESSING_MAX_TEXTURE_INPUTS * D3D::sampler_descriptor_size }; D3D12_CPU_DESCRIPTOR_HANDLE texture_cpu = { base_texture_cpu.ptr + pass_index * POST_PROCESSING_MAX_TEXTURE_INPUTS * D3D::resource_descriptor_size }; D3D12_GPU_DESCRIPTOR_HANDLE texture_gpu = { base_texture_gpu.ptr + pass_index * POST_PROCESSING_MAX_TEXTURE_INPUTS * D3D::resource_descriptor_size }; // Bind inputs to pipeline for (size_t i = 0; i < pass.inputs.size(); i++) { const InputBinding& input = pass.inputs[i]; D3D12_CPU_DESCRIPTOR_HANDLE textureDestDescriptor; D3DTexture2D* input_texture = nullptr; textureDestDescriptor.ptr = texture_cpu.ptr + i * D3D::resource_descriptor_size; switch (input.type) { case POST_PROCESSING_INPUT_TYPE_COLOR_BUFFER: input_texture = src_texture; input_sizes[i] = src_size; break; case POST_PROCESSING_INPUT_TYPE_DEPTH_BUFFER: input_texture = src_depth_texture; input_sizes[i] = src_size; break; case POST_PROCESSING_INPUT_TYPE_PASS_FRAME_OUTPUT: if (m_prev_frame_enabled) { input_texture = reinterpret_cast<D3DTexture2D*>(GetPrevColorFrame(input.frame_index)->GetInternalObject()); input_sizes[i] = m_prev_frame_size; } break; case POST_PROCESSING_INPUT_TYPE_PASS_DEPTH_FRAME_OUTPUT: if (m_prev_depth_enabled) { input_texture = reinterpret_cast<D3DTexture2D*>(GetPrevDepthFrame(input.frame_index)->GetInternalObject()); input_sizes[i] = m_prev_depth_frame_size; } break; default: TextureCacheBase::TCacheEntryBase* i_texture = input.texture != nullptr ? input.texture : input.prev_texture; if (i_texture != nullptr) { input_texture = reinterpret_cast<D3DTexture2D*>(i_texture->GetInternalObject()); input_sizes[i] = input.size; } else { input_texture = src_texture; input_sizes[i] = src_size; } break; } if (input_texture) { input_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } DX12::D3D::device->CopyDescriptorsSimple( 1, textureDestDescriptor, input_texture != nullptr ? input_texture->GetSRVCPUShadow() : DX12::D3D::null_srv_cpu_shadow, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ); D3D12_CPU_DESCRIPTOR_HANDLE destinationDescriptor; destinationDescriptor.ptr = sampler_cpu.ptr + i * D3D::sampler_descriptor_size; DX12::D3D::device->CopyDescriptorsSimple( 1, destinationDescriptor, parent->GetSamplerHandle(static_cast<UINT>(input.texture_sampler) - 1), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ); } D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, texture_gpu); D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SAMPLER, sampler_gpu); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true); // If this is the last pass and we can skip the final copy, write directly to output texture. if (is_last_pass && skip_final_copy) { // The target rect may differ from the source. output_rect = dst_rect; output_size = dst_size; dst_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &dst_texture->GetRTV(), FALSE, nullptr); } else { output_rect = PostProcessor::ScaleTargetRectangle(API_D3D11, src_rect, pass.output_scale); output_size = pass.output_size; reinterpret_cast<D3DTexture2D*>(pass.output_texture->GetInternalObject())->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &reinterpret_cast<D3DTexture2D*>(pass.output_texture->GetInternalObject())->GetRTV(), FALSE, nullptr); } // Set viewport based on target rect D3D::SetViewportAndScissor(output_rect.left, output_rect.top, output_rect.GetWidth(), output_rect.GetHeight()); parent->MapAndUpdateUniformBuffer(input_sizes, output_rect, output_size, src_rect, src_size, src_layer, gamma); // Select geometry shader based on layers D3D12_SHADER_BYTECODE geometry_shader = {}; if (src_layer < 0 && m_internal_layers > 1) geometry_shader = parent->GetGeometryShader(); // Draw pass D3D::DrawShadedTexQuad(nullptr, src_rect.AsRECT(), src_size.width, src_size.height, reinterpret_cast<RenderPassDx12Data*>(pass.shader)->m_shader_bytecode, parent->GetVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), geometry_shader, std::max(src_layer, 0), DXGI_FORMAT_R8G8B8A8_UNORM, false, dst_texture->GetMultisampled()); } // Copy the last pass output to the target if not done already IncrementFrame(); if (m_prev_depth_enabled && src_depth_tex) { TargetRectangle dst; dst.left = 0; dst.right = m_prev_depth_frame_size.width; dst.top = 0; dst.bottom = m_prev_depth_frame_size.height; parent->CopyTexture(dst, GetPrevDepthFrame(0)->GetInternalObject(), output_rect, src_depth_tex, src_size, src_layer, true, true); } if (!skip_final_copy) { RenderPassData& final_pass = m_passes[m_last_pass_index]; if (m_prev_frame_enabled) { TargetRectangle dst; dst.left = 0; dst.right = m_prev_frame_size.width; dst.top = 0; dst.bottom = m_prev_frame_size.height; parent->CopyTexture(dst, GetPrevColorFrame(0)->GetInternalObject(), output_rect, final_pass.output_texture->GetInternalObject(), final_pass.output_size, src_layer, false, true); } parent->CopyTexture(dst_rect, dst_tex, output_rect, final_pass.output_texture->GetInternalObject(), final_pass.output_size, src_layer); } }
void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half) { // Resolve MSAA targets before copying. // FIXME: Instead of resolving EFB, it would be better to pick out a // single sample from each pixel. The game may break if it isn't // expecting the blurred edges around multisampled shapes. ID3D11ShaderResourceView* pEFB = params.depth ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); // Reset API g_renderer->ResetAPIState(); // Set up all the state for EFB encoding { const u32 words_per_row = bytes_per_row / sizeof(u32); D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y)); D3D::context->RSSetViewports(1, &vp); constexpr EFBRectangle fullSrcRect(0, 0, EFB_WIDTH, EFB_HEIGHT); TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); D3D::context->OMSetRenderTargets( 1, &static_cast<DXTexture*>(m_encoding_render_texture.get())->GetRawTexIdentifier()->GetRTV(), nullptr); EFBEncodeParams encode_params; encode_params.SrcLeft = src_rect.left; encode_params.SrcTop = src_rect.top; encode_params.DestWidth = native_width; encode_params.ScaleFactor = scale_by_half ? 2 : 1; encode_params.y_scale = params.y_scale; D3D::context->UpdateSubresource(m_encode_params, 0, nullptr, &encode_params, 0, 0); D3D::stateman->SetPixelConstants(m_encode_params); // We also linear filtering for both box filtering and downsampling higher resolutions to 1x // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. // Also, box filtering won't be correct for anything other than 1x IR if (scale_by_half || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), GetEncodingPixelShader(params), VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); // Copy to staging buffer MathUtil::Rectangle<int> copy_rect(0, 0, words_per_row, num_blocks_y); m_encoding_readback_texture->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); m_encoding_readback_texture->Flush(); if (m_encoding_readback_texture->Map()) { m_encoding_readback_texture->ReadTexels(copy_rect, dst, memory_stride); m_encoding_readback_texture->Unmap(); } } // Restore API FramebufferManager::BindEFBRenderTarget(); g_renderer->RestoreAPIState(); }