namespace DX11 { static std::unique_ptr<TextureEncoder> g_encoder; const size_t MAX_COPY_BUFFERS = 32; ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = { 0 }; TextureCache::TCacheEntry::~TCacheEntry() { texture->Release(); } void TextureCache::TCacheEntry::Bind(unsigned int stage) { D3D::stateman->SetTexture(stage, texture->GetSRV()); } bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level) { // TODO: Somehow implement this (D3DX11 doesn't support dumping individual LODs) static bool warn_once = true; if (level && warn_once) { WARN_LOG(VIDEO, "Dumping individual LOD not supported by D3D11 backend!"); warn_once = false; return false; } ID3D11Texture2D* pNewTexture = nullptr; ID3D11Texture2D* pSurface = texture->GetTex(); D3D11_TEXTURE2D_DESC desc; pSurface->GetDesc(&desc); desc.BindFlags = 0; desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; desc.Usage = D3D11_USAGE_STAGING; HRESULT hr = D3D::device->CreateTexture2D(&desc, nullptr, &pNewTexture); bool saved_png = false; if (SUCCEEDED(hr) && pNewTexture) { D3D::context->CopyResource(pNewTexture, pSurface); D3D11_MAPPED_SUBRESOURCE map; hr = D3D::context->Map(pNewTexture, 0, D3D11_MAP_READ_WRITE, 0, &map); if (SUCCEEDED(hr)) { saved_png = TextureToPng((u8*)map.pData, map.RowPitch, filename, desc.Width, desc.Height); D3D::context->Unmap(pNewTexture, 0); } SAFE_RELEASE(pNewTexture); } return saved_png; } void TextureCache::TCacheEntry::CopyRectangleFromTexture( const TCacheEntryBase* source, const MathUtil::Rectangle<int> &srcrect, const MathUtil::Rectangle<int> &dstrect) { TCacheEntry* srcentry = (TCacheEntry*)source; if (srcrect.GetWidth() == dstrect.GetWidth() && srcrect.GetHeight() == dstrect.GetHeight()) { const D3D11_BOX *psrcbox = nullptr; D3D11_BOX srcbox; if (srcrect.left != 0 || srcrect.top != 0) { srcbox.left = srcrect.left; srcbox.top = srcrect.top; srcbox.right = srcrect.right; srcbox.bottom = srcrect.bottom; psrcbox = &srcbox; } D3D::context->CopySubresourceRegion( texture->GetTex(), 0, dstrect.left, dstrect.top, 0, srcentry->texture->GetTex(), 0, psrcbox); return; } else if (!config.rendertarget) { return; } g_renderer->ResetAPIState(); // reset any game specific settings const D3D11_VIEWPORT vp = CD3D11_VIEWPORT( float(dstrect.left), float(dstrect.top), float(dstrect.GetWidth()), float(dstrect.GetHeight())); D3D::context->OMSetRenderTargets(1, &texture->GetRTV(), nullptr); D3D::context->RSSetViewports(1, &vp); D3D::SetLinearCopySampler(); D3D11_RECT srcRC; srcRC.left = srcrect.left; srcRC.right = srcrect.right; srcRC.top = srcrect.top; srcRC.bottom = srcrect.bottom; D3D::drawShadedTexQuad(srcentry->texture->GetSRV(), &srcRC, srcentry->config.width, srcentry->config.height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0, 0); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); g_renderer->RestoreAPIState(); } void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int level) { unsigned int src_pitch = 4 * expanded_width; D3D::ReplaceRGBATexture2D(texture->GetTex(), TextureCache::temp, width, height, src_pitch, level, usage); } TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConfig& config) { if (config.rendertarget) { return new TCacheEntry(config, D3DTexture2D::Create(config.width, config.height, (D3D11_BIND_FLAG)((int)D3D11_BIND_RENDER_TARGET | (int)D3D11_BIND_SHADER_RESOURCE), D3D11_USAGE_DEFAULT, DXGI_FORMAT_R8G8B8A8_UNORM, 1, config.layers)); } else { D3D11_USAGE usage = D3D11_USAGE_DEFAULT; D3D11_CPU_ACCESS_FLAG cpu_access = (D3D11_CPU_ACCESS_FLAG)0; if (config.levels == 1) { usage = D3D11_USAGE_DYNAMIC; cpu_access = D3D11_CPU_ACCESS_WRITE; } const D3D11_TEXTURE2D_DESC texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, config.width, config.height, 1, config.levels, D3D11_BIND_SHADER_RESOURCE, usage, cpu_access); ID3D11Texture2D *pTexture; const HRESULT hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &pTexture); CHECK(SUCCEEDED(hr), "Create texture of the TextureCache"); TCacheEntry* const entry = new TCacheEntry(config, new D3DTexture2D(pTexture, D3D11_BIND_SHADER_RESOURCE)); entry->usage = usage; // TODO: better debug names D3D::SetDebugObjectName((ID3D11DeviceChild*)entry->texture->GetTex(), "a texture of the TextureCache"); D3D::SetDebugObjectName((ID3D11DeviceChild*)entry->texture->GetSRV(), "shader resource view of a texture of the TextureCache"); SAFE_RELEASE(pTexture); return entry; } } void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool scaleByHalf, unsigned int cbufid, const float *colmat) { g_renderer->ResetAPIState(); // stretch picture with increased internal resolution const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)config.width, (float)config.height); D3D::context->RSSetViewports(1, &vp); // set transformation if (nullptr == efbcopycbuf[cbufid]) { const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); D3D11_SUBRESOURCE_DATA data; data.pSysMem = colmat; HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]); CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbufid); D3D::SetDebugObjectName((ID3D11DeviceChild*)efbcopycbuf[cbufid], "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); } D3D::stateman->SetPixelConstants(efbcopycbuf[cbufid]); const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); // TODO: try targetSource.asRECT(); const D3D11_RECT sourcerect = CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom); // Use linear filtering if (bScaleByHalf), use point filtering otherwise if (scaleByHalf) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); // Make sure we don't draw with the texture set as both a source and target. // (This can happen because we don't unbind textures when we free them.) D3D::stateman->UnsetTexture(texture->GetSRV()); D3D::context->OMSetRenderTargets(1, &texture->GetRTV(), nullptr); // Create texture copy D3D::drawShadedTexQuad( (srcFormat == PEControl::Z24 ? FramebufferManager::GetEFBDepthTexture() : FramebufferManager::GetEFBColorTexture())->GetSRV(), &sourcerect, Renderer::GetTargetWidth(), Renderer::GetTargetHeight(), srcFormat == PEControl::Z24 ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true), VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); g_renderer->RestoreAPIState(); } void TextureCache::CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) { g_encoder->Encode(dst, format, native_width, bytes_per_row, num_blocks_y, memory_stride, srcFormat, srcRect, isIntensity, scaleByHalf); } const char palette_shader[] = R"HLSL( sampler samp0 : register(s0); Texture2DArray Tex0 : register(t0); Buffer<uint> Tex1 : register(t1); uniform float Multiply; uint Convert3To8(uint v) { // Swizzle bits: 00000123 -> 12312312 return (v << 5) | (v << 2) | (v >> 1); } uint Convert4To8(uint v) { // Swizzle bits: 00001234 -> 12341234 return (v << 4) | v; } uint Convert5To8(uint v) { // Swizzle bits: 00012345 -> 12345123 return (v << 3) | (v >> 2); } uint Convert6To8(uint v) { // Swizzle bits: 00123456 -> 12345612 return (v << 2) | (v >> 4); } float4 DecodePixel_RGB5A3(uint val) { int r,g,b,a; if ((val&0x8000)) { r=Convert5To8((val>>10) & 0x1f); g=Convert5To8((val>>5 ) & 0x1f); b=Convert5To8((val ) & 0x1f); a=0xFF; } else { a=Convert3To8((val>>12) & 0x7); r=Convert4To8((val>>8 ) & 0xf); g=Convert4To8((val>>4 ) & 0xf); b=Convert4To8((val ) & 0xf); } return float4(r, g, b, a) / 255; } float4 DecodePixel_RGB565(uint val) { int r, g, b, a; r = Convert5To8((val >> 11) & 0x1f); g = Convert6To8((val >> 5) & 0x3f); b = Convert5To8((val) & 0x1f); a = 0xFF; return float4(r, g, b, a) / 255; } float4 DecodePixel_IA8(uint val) { int i = val & 0xFF; int a = val >> 8; return float4(i, i, i, a) / 255; } void main( out float4 ocol0 : SV_Target, in float4 pos : SV_Position, in float3 uv0 : TEXCOORD0) { uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r; src = Tex1.Load(src); src = ((src << 8) & 0xFF00) | (src >> 8); ocol0 = DECODE(src); } )HLSL"; void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) { g_renderer->ResetAPIState(); // stretch picture with increased internal resolution const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)unconverted->config.width, (float)unconverted->config.height); D3D::context->RSSetViewports(1, &vp); D3D11_BOX box{ 0, 0, 0, 512, 1, 1 }; D3D::context->UpdateSubresource(palette_buf, 0, &box, palette, 0, 0); D3D::stateman->SetTexture(1, palette_buf_srv); // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) float params[4] = { (unconverted->format & 0xf) == 0 ? 15.f : 255.f }; D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, ¶ms, 0, 0); D3D::stateman->SetPixelConstants(palette_uniform); const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, unconverted->config.width, unconverted->config.height); D3D::SetPointCopySampler(); // Make sure we don't draw with the texture set as both a source and target. // (This can happen because we don't unbind textures when we free them.) D3D::stateman->UnsetTexture(static_cast<TCacheEntry*>(entry)->texture->GetSRV()); D3D::context->OMSetRenderTargets(1, &static_cast<TCacheEntry*>(entry)->texture->GetRTV(), nullptr); // Create texture copy D3D::drawShadedTexQuad( static_cast<TCacheEntry*>(unconverted)->texture->GetSRV(), &sourcerect, unconverted->config.width, unconverted->config.height, palette_pixel_shader[format], VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); g_renderer->RestoreAPIState(); } ID3D11PixelShader *GetConvertShader(const char* Type) { std::string shader = "#define DECODE DecodePixel_"; shader.append(Type); shader.append("\n"); shader.append(palette_shader); return D3D::CompileAndCreatePixelShader(shader); } TextureCache::TextureCache() { // FIXME: Is it safe here? g_encoder = std::make_unique<PSTextureEncoder>(); g_encoder->Init(); palette_buf = nullptr; palette_buf_srv = nullptr; palette_uniform = nullptr; palette_pixel_shader[GX_TL_IA8] = GetConvertShader("IA8"); palette_pixel_shader[GX_TL_RGB565] = GetConvertShader("RGB565"); palette_pixel_shader[GX_TL_RGB5A3] = GetConvertShader("RGB5A3"); auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE); HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf); CHECK(SUCCEEDED(hr), "create palette decoder lut buffer"); D3D::SetDebugObjectName(palette_buf, "texture decoder lut buffer"); // TODO: C14X2 format. auto outlutUavDesc = CD3D11_SHADER_RESOURCE_VIEW_DESC(palette_buf, DXGI_FORMAT_R16_UINT, 0, 256, 0); hr = D3D::device->CreateShaderResourceView(palette_buf, &outlutUavDesc, &palette_buf_srv); CHECK(SUCCEEDED(hr), "create palette decoder lut srv"); D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv"); const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(16, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &palette_uniform); CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer"); D3D::SetDebugObjectName((ID3D11DeviceChild*)palette_uniform, "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); }
namespace DX12 { static std::unique_ptr<TextureEncoder> s_encoder; static std::unique_ptr<TextureScaler> s_scaler; static const unsigned int s_max_copy_buffers = 32; static ID3D12Resource* s_efb_copy_buffers[s_max_copy_buffers] = {}; static ID3D12Resource* s_texture_cache_entry_readback_buffer = nullptr; static void* s_texture_cache_entry_readback_buffer_data = nullptr; static UINT s_texture_cache_entry_readback_buffer_size = 0; TextureCache::TCacheEntry::~TCacheEntry() { m_texture->Release(); SAFE_RELEASE(m_nrm_texture); } void TextureCache::TCacheEntry::Bind(unsigned int stage, unsigned int lastTexture) { static bool s_first_texture_in_group = true; static D3D12_CPU_DESCRIPTOR_HANDLE s_group_base_texture_cpu_handle; static D3D12_GPU_DESCRIPTOR_HANDLE s_group_base_texture_gpu_handle; const bool use_materials = g_ActiveConfig.HiresMaterialMapsEnabled(); if (lastTexture == 0 && !use_materials) { DX12::D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, this->m_texture_srv_gpu_handle); return; } if (s_first_texture_in_group) { const unsigned int num_handles = use_materials ? 16 : 8; // On the first texture in the group, we need to allocate the space in the descriptor heap. DX12::D3D::gpu_descriptor_heap_mgr->AllocateGroup(&s_group_base_texture_cpu_handle, num_handles, &s_group_base_texture_gpu_handle, nullptr, true); // Pave over space with null textures. for (unsigned int i = 0; i < (8 + lastTexture); i++) { D3D12_CPU_DESCRIPTOR_HANDLE nullDestDescriptor; nullDestDescriptor.ptr = s_group_base_texture_cpu_handle.ptr + i * D3D::resource_descriptor_size; DX12::D3D::device12->CopyDescriptorsSimple( 1, nullDestDescriptor, DX12::D3D::null_srv_cpu_shadow, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ); } // Future binding calls will not be the first texture in the group.. until stage == count, below. s_first_texture_in_group = false; } D3D12_CPU_DESCRIPTOR_HANDLE textureDestDescriptor; textureDestDescriptor.ptr = s_group_base_texture_cpu_handle.ptr + stage * D3D::resource_descriptor_size; DX12::D3D::device12->CopyDescriptorsSimple( 1, textureDestDescriptor, this->m_texture_srv_gpu_handle_cpu_shadow, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ); if (m_nrm_texture && use_materials) { textureDestDescriptor.ptr = s_group_base_texture_cpu_handle.ptr + ((8 + stage) * D3D::resource_descriptor_size); DX12::D3D::device12->CopyDescriptorsSimple( 1, textureDestDescriptor, this->m_nrm_texture_srv_gpu_handle_cpu_shadow, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ); } // Stage is zero-based, count is one-based if (stage == lastTexture) { // On the last texture, we need to actually bind the table. DX12::D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, s_group_base_texture_gpu_handle); // Then mark that the next binding call will be the first texture in a group. s_first_texture_in_group = true; } } bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level) { // EXISTINGD3D11TODO: Somehow implement this (D3DX11 doesn't support dumping individual LODs) static bool warn_once = true; if (level && warn_once) { WARN_LOG(VIDEO, "Dumping individual LOD not supported by D3D11 backend!"); warn_once = false; return false; } D3D12_RESOURCE_DESC textureDesc = m_texture->GetTex12()->GetDesc(); UINT requiredReadbackBufferSize = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT + ((textureDesc.Width * 4 + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1) & ~(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)) * textureDesc.Height; if (s_texture_cache_entry_readback_buffer_size < requiredReadbackBufferSize) { s_texture_cache_entry_readback_buffer_size = requiredReadbackBufferSize; // We know the readback buffer won't be in use right now, since we wait on this thread // for the GPU to finish execution right after copying to it. SAFE_RELEASE(s_texture_cache_entry_readback_buffer); } if (!s_texture_cache_entry_readback_buffer_size) { CheckHR( D3D::device12->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size), D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer) ) ); CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &s_texture_cache_entry_readback_buffer_data)); } bool saved_png = false; m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); D3D12_TEXTURE_COPY_LOCATION dst_location = {}; dst_location.pResource = s_texture_cache_entry_readback_buffer; dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; dst_location.PlacedFootprint.Offset = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; dst_location.PlacedFootprint.Footprint.Depth = 1; dst_location.PlacedFootprint.Footprint.Format = textureDesc.Format; dst_location.PlacedFootprint.Footprint.Width = static_cast<UINT>(textureDesc.Width); dst_location.PlacedFootprint.Footprint.Height = textureDesc.Height; dst_location.PlacedFootprint.Footprint.RowPitch = ((textureDesc.Width * 4 + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1) & ~(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)); D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0); D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); D3D::command_list_mgr->ExecuteQueuedWork(true); saved_png = TextureToPng( static_cast<u8*>(s_texture_cache_entry_readback_buffer_data) + D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, dst_location.PlacedFootprint.Footprint.RowPitch, filename, dst_location.PlacedFootprint.Footprint.Width, dst_location.PlacedFootprint.Footprint.Height ); return saved_png; } void TextureCache::TCacheEntry::CopyRectangleFromTexture( const TCacheEntryBase* source, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) { TCacheEntry* srcentry = (TCacheEntry*)source; if (src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()) { const D3D12_BOX *psrcbox = nullptr; D3D12_BOX srcbox; if (src_rect.left != 0 || src_rect.top != 0 || src_rect.GetWidth() != srcentry->config.width || src_rect.GetHeight() != srcentry->config.height) { srcbox.left = src_rect.left; srcbox.top = src_rect.top; srcbox.right = src_rect.right; srcbox.bottom = src_rect.bottom; srcbox.front = 0; srcbox.back = 1; psrcbox = &srcbox; } D3D12_TEXTURE_COPY_LOCATION dst = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0); D3D12_TEXTURE_COPY_LOCATION src = CD3DX12_TEXTURE_COPY_LOCATION(srcentry->m_texture->GetTex12(), 0); m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST); srcentry->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); D3D::current_command_list->CopyTextureRegion(&dst, dst_rect.left, dst_rect.top, 0, &src, psrcbox); return; } else if (!config.rendertarget) { return; } const D3D12_VIEWPORT vp12 = { float(dst_rect.left), float(dst_rect.top), float(dst_rect.GetWidth()), float(dst_rect.GetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; D3D::current_command_list->RSSetViewports(1, &vp12); m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr); D3D::SetLinearCopySampler(); D3D12_RECT srcRC; srcRC.left = src_rect.left; srcRC.right = src_rect.right; srcRC.top = src_rect.top; srcRC.bottom = src_rect.bottom; D3D::DrawShadedTexQuad(srcentry->m_texture, &srcRC, srcentry->config.width, srcentry->config.height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), 1.0, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, m_texture->GetMultisampled()); FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); g_renderer->RestoreAPIState(); } void TextureCache::TCacheEntry::Load(const u8* src, u32 width, u32 height, u32 expanded_width, u32 level) { D3D::ReplaceTexture2D(m_texture->GetTex12(), src, DXGI_format, width, height, expanded_width, level, m_texture->GetResourceUsageState()); } void TextureCache::TCacheEntry::LoadMaterialMap(const u8* src, u32 width, u32 height, u32 level) { D3D::ReplaceTexture2D(m_nrm_texture->GetTex12(), src, DXGI_format, width, height, width, level, m_nrm_texture->GetResourceUsageState()); } void TextureCache::TCacheEntry::Load(const u8* src, u32 width, u32 height, u32 expandedWidth, u32 expandedHeight, const s32 texformat, const u32 tlutaddr, const TlutFormat tlutfmt, u32 level) { TexDecoder_Decode( TextureCache::temp, src, expandedWidth, expandedHeight, texformat, tlutaddr, tlutfmt, DXGI_format == DXGI_FORMAT_R8G8B8A8_UNORM, compressed); u8* data = TextureCache::temp; if (is_scaled) { data = reinterpret_cast<u8*>(s_scaler->Scale((u32*)data, expandedWidth, height)); width *= g_ActiveConfig.iTexScalingFactor; height *= g_ActiveConfig.iTexScalingFactor; expandedWidth *= g_ActiveConfig.iTexScalingFactor; } D3D::ReplaceTexture2D(m_texture->GetTex12(), data, DXGI_format, width, height, expandedWidth, level, m_texture->GetResourceUsageState()); } void TextureCache::TCacheEntry::LoadFromTmem(const u8* ar_src, const u8* gb_src, u32 width, u32 height, u32 expanded_width, u32 expanded_Height, u32 level) { TexDecoder_DecodeRGBA8FromTmem( (u32*)TextureCache::temp, ar_src, gb_src, expanded_width, expanded_Height); u8* data = TextureCache::temp; if (is_scaled) { data = reinterpret_cast<u8*>(s_scaler->Scale((u32*)data, expanded_width, height)); width *= g_ActiveConfig.iTexScalingFactor; height *= g_ActiveConfig.iTexScalingFactor; expanded_width *= g_ActiveConfig.iTexScalingFactor; } D3D::ReplaceTexture2D(m_texture->GetTex12(), data, DXGI_format, width, height, expanded_width, level, m_texture->GetResourceUsageState()); } PC_TexFormat TextureCache::GetNativeTextureFormat(const s32 texformat, const TlutFormat tlutfmt, u32 width, u32 height) { const bool compressed_supported = ((width & 3) == 0) && ((height & 3) == 0); PC_TexFormat pcfmt = GetPC_TexFormat(texformat, tlutfmt, compressed_supported); pcfmt = !g_ActiveConfig.backend_info.bSupportedFormats[pcfmt] ? PC_TEX_FMT_RGBA32 : pcfmt; return pcfmt; } TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConfig& config) { if (config.rendertarget) { D3DTexture2D* texture = D3DTexture2D::Create(config.width, config.height, (D3D11_BIND_FLAG)((int)D3D11_BIND_RENDER_TARGET | (int)D3D11_BIND_SHADER_RESOURCE), D3D11_USAGE_DEFAULT, DXGI_FORMAT_R8G8B8A8_UNORM, 1, config.layers); TCacheEntry* entry = new TCacheEntry(config, texture); entry->m_texture_srv_cpu_handle = texture->GetSRV12CPU(); entry->m_texture_srv_gpu_handle = texture->GetSRV12GPU(); entry->m_texture_srv_gpu_handle_cpu_shadow = texture->GetSRV12GPUCPUShadow(); return entry; } else { static const DXGI_FORMAT PC_TexFormat_To_DXGIFORMAT[11] { DXGI_FORMAT_UNKNOWN,//PC_TEX_FMT_NONE DXGI_FORMAT_R8G8B8A8_UNORM,//PC_TEX_FMT_BGRA32 DXGI_FORMAT_R8G8B8A8_UNORM,//PC_TEX_FMT_RGBA32 DXGI_FORMAT_R8G8B8A8_UNORM,//PC_TEX_FMT_I4_AS_I8 DXGI_FORMAT_R8G8B8A8_UNORM,//PC_TEX_FMT_IA4_AS_IA8 DXGI_FORMAT_R8G8B8A8_UNORM,//PC_TEX_FMT_I8 DXGI_FORMAT_R8G8B8A8_UNORM,//PC_TEX_FMT_IA8 DXGI_FORMAT_R8G8B8A8_UNORM,//PC_TEX_FMT_RGB565 DXGI_FORMAT_BC1_UNORM,//PC_TEX_FMT_DXT1 DXGI_FORMAT_BC2_UNORM,//PC_TEX_FMT_DXT3 DXGI_FORMAT_BC3_UNORM,//PC_TEX_FMT_DXT5 }; DXGI_FORMAT format = PC_TexFormat_To_DXGIFORMAT[config.pcformat]; ID3D12Resource* pTexture12 = nullptr; D3D12_RESOURCE_DESC texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(format, config.width, config.height, 1, config.levels); CheckHR( D3D::device12->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &CD3DX12_RESOURCE_DESC(texdesc12), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&pTexture12) ) ); D3DTexture2D* texture = new D3DTexture2D( pTexture12, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE ); TCacheEntry* const entry = new TCacheEntry( config, texture ); entry->m_texture_srv_cpu_handle = texture->GetSRV12CPU(); entry->m_texture_srv_gpu_handle = texture->GetSRV12GPU(); entry->m_texture_srv_gpu_handle_cpu_shadow = texture->GetSRV12GPUCPUShadow(); entry->DXGI_format = format; if (format != DXGI_FORMAT_R8G8B8A8_UNORM) { entry->compressed = true; } // EXISTINGD3D11TODO: better debug names D3D::SetDebugObjectName12(entry->m_texture->GetTex12(), "a texture of the TextureCache"); SAFE_RELEASE(pTexture12); if (config.materialmap) { pTexture12 = nullptr; CheckHR( D3D::device12->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &CD3DX12_RESOURCE_DESC(texdesc12), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&pTexture12) ) ); entry->m_nrm_texture = new D3DTexture2D( pTexture12, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE ); entry->m_nrm_texture_srv_cpu_handle = entry->m_nrm_texture->GetSRV12CPU(); entry->m_nrm_texture_srv_gpu_handle = entry->m_nrm_texture->GetSRV12GPU(); entry->m_nrm_texture_srv_gpu_handle_cpu_shadow = entry->m_nrm_texture->GetSRV12GPUCPUShadow(); SAFE_RELEASE(pTexture12); } return entry; } } void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, const EFBRectangle& srcRect, bool scale_by_half, unsigned int cbuf_id, const float* colmat) { // When copying at half size, in multisampled mode, resolve the color/depth buffer first. // This is because multisampled texture reads go through Load, not Sample, and the linear // filter is ignored. bool multisampled = (g_ActiveConfig.iMultisamples > 1); D3DTexture2D* efb_tex = (src_format == PEControl::Z24) ? FramebufferManager::GetEFBDepthTexture() : FramebufferManager::GetEFBColorTexture(); if (multisampled && scale_by_half) { multisampled = false; efb_tex = (src_format == PEControl::Z24) ? FramebufferManager::GetResolvedEFBDepthTexture() : FramebufferManager::GetResolvedEFBColorTexture(); } // stretch picture with increased internal resolution const D3D12_VIEWPORT vp = { 0.f, 0.f, (float)config.width, (float)config.height, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; D3D::current_command_list->RSSetViewports(1, &vp); // set transformation if (nullptr == s_efb_copy_buffers[cbuf_id]) { CheckHR( D3D::device12->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE, &CD3DX12_RESOURCE_DESC::Buffer(28 * sizeof(float)), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&s_efb_copy_buffers[cbuf_id]) ) ); void* pData = nullptr; CheckHR(s_efb_copy_buffers[cbuf_id]->Map(0, nullptr, &pData)); memcpy(pData, colmat, 28 * sizeof(float)); } D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_efb_copy_buffers[cbuf_id]->GetGPUVirtualAddress()); D3D::command_list_mgr->m_dirty_ps_cbv = true; const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); // TODO: try targetSource.asRECT(); const D3D11_RECT sourcerect = CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom); // Use linear filtering if (bScaleByHalf), use point filtering otherwise if (scale_by_half) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); // Make sure we don't draw with the texture set as both a source and target. // (This can happen because we don't unbind textures when we free them.) m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr); // Create texture copy D3D::DrawShadedTexQuad( efb_tex, &sourcerect, Renderer::GetTargetWidth(), Renderer::GetTargetHeight(), (src_format == PEControl::Z24) ? StaticShaderCache::GetDepthMatrixPixelShader(multisampled) : StaticShaderCache::GetColorMatrixPixelShader(multisampled), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), StaticShaderCache::GetCopyGeometryShader(), 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, m_texture->GetMultisampled() ); m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); g_renderer->RestoreAPIState(); } void TextureCache::CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, PEControl::PixelFormat src_format, const EFBRectangle& src_rect, bool is_intensity, bool scale_by_half) { s_encoder->Encode(dst, format, native_width, bytes_per_row, num_blocks_y, memory_stride, src_format, src_rect, is_intensity, scale_by_half); } static const constexpr char s_palette_shader_hlsl[] = R"HLSL( sampler samp0 : register(s0); Texture2DArray Tex0 : register(t0); Buffer<uint> Tex1 : register(t1); uniform float Multiply; uint Convert3To8(uint v) { // Swizzle bits: 00000123 -> 12312312 return (v << 5) | (v << 2) | (v >> 1); } uint Convert4To8(uint v) { // Swizzle bits: 00001234 -> 12341234 return (v << 4) | v; } uint Convert5To8(uint v) { // Swizzle bits: 00012345 -> 12345123 return (v << 3) | (v >> 2); } uint Convert6To8(uint v) { // Swizzle bits: 00123456 -> 12345612 return (v << 2) | (v >> 4); } float4 DecodePixel_RGB5A3(uint val) { int r,g,b,a; if ((val&0x8000)) { r=Convert5To8((val>>10) & 0x1f); g=Convert5To8((val>>5 ) & 0x1f); b=Convert5To8((val ) & 0x1f); a=0xFF; } else { a=Convert3To8((val>>12) & 0x7); r=Convert4To8((val>>8 ) & 0xf); g=Convert4To8((val>>4 ) & 0xf); b=Convert4To8((val ) & 0xf); } return float4(r, g, b, a) / 255; } float4 DecodePixel_RGB565(uint val) { int r, g, b, a; r = Convert5To8((val >> 11) & 0x1f); g = Convert6To8((val >> 5) & 0x3f); b = Convert5To8((val) & 0x1f); a = 0xFF; return float4(r, g, b, a) / 255; } float4 DecodePixel_IA8(uint val) { int i = val & 0xFF; int a = val >> 8; return float4(i, i, i, a) / 255; } void main( out float4 ocol0 : SV_Target, in float4 pos : SV_Position, in float3 uv0 : TEXCOORD0) { uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r; src = Tex1.Load(src); src = ((src << 8) & 0xFF00) | (src >> 8); ocol0 = DECODE(src); } )HLSL"; void TextureCache::LoadLut(u32 lutFmt, void* palette, u32 size) { m_lut_format = (TlutFormat)lutFmt; m_lut_size = size; if (m_lut_size > 512) { return; } // D3D12: Copy the palette into a free place in the palette_buf12 upload heap. // Only 1024 palette buffers are supported in flight at once (arbitrary, this should be plenty). m_palette_buffer_index = (m_palette_buffer_index + 1) % 1024; memcpy(static_cast<u8*>(m_palette_buffer_data) + m_palette_buffer_index * 512, palette, std::min(size, 512u)); } bool TextureCache::Palettize(TCacheEntryBase* entry, const TCacheEntryBase* unconverted) { if (m_lut_size > 512) { return false; } const TCacheEntry* base_entry = static_cast<const TCacheEntry*>(unconverted); // stretch picture with increased internal resolution const D3D12_VIEWPORT vp = { 0.f, 0.f, (float)unconverted->config.width, (float)unconverted->config.height, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; D3D::current_command_list->RSSetViewports(1, &vp); // D3D12: Because the second SRV slot is occupied by this buffer, and an arbitrary texture occupies the first SRV slot, // we need to allocate temporary space out of our descriptor heap, place the palette SRV in the second slot, then copy the // existing texture's descriptor into the first slot. // First, allocate the (temporary) space in the descriptor heap. D3D12_CPU_DESCRIPTOR_HANDLE srv_group_cpu_handle[2] = {}; D3D12_GPU_DESCRIPTOR_HANDLE srv_group_gpu_handle[2] = {}; D3D::gpu_descriptor_heap_mgr->AllocateGroup(srv_group_cpu_handle, 2, srv_group_gpu_handle, nullptr, true); srv_group_cpu_handle[1].ptr = srv_group_cpu_handle[0].ptr + D3D::resource_descriptor_size; // Now, create the palette SRV at the appropriate offset. D3D12_SHADER_RESOURCE_VIEW_DESC palette_buffer_srv_desc = { DXGI_FORMAT_R16_UINT, // DXGI_FORMAT Format; D3D12_SRV_DIMENSION_BUFFER, // D3D12_SRV_DIMENSION ViewDimension; D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING // UINT Shader4ComponentMapping; }; palette_buffer_srv_desc.Buffer.FirstElement = m_palette_buffer_index * 256; palette_buffer_srv_desc.Buffer.NumElements = 256; D3D::device12->CreateShaderResourceView(m_palette_buffer, &palette_buffer_srv_desc, srv_group_cpu_handle[1]); // Now, copy the existing texture's descriptor into the new temporary location. base_entry->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); D3D::device12->CopyDescriptorsSimple( 1, srv_group_cpu_handle[0], base_entry->m_texture->GetSRV12GPUCPUShadow(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ); // Finally, bind our temporary location. D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, srv_group_gpu_handle[0]); // D3D11EXISTINGTODO: Add support for C14X2 format. (Different multiplier, more palette entries.) // D3D12: See TextureCache::TextureCache() - because there are only two possible buffer contents here, // just pre-populate the data in two parts of the same upload heap. if ((unconverted->format & 0xf) == GX_TF_I4) { D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, m_palette_uniform_buffer->GetGPUVirtualAddress()); } else { D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, m_palette_uniform_buffer->GetGPUVirtualAddress() + 256); } D3D::command_list_mgr->m_dirty_ps_cbv = true; const D3D11_RECT source_rect = CD3D11_RECT(0, 0, unconverted->config.width, unconverted->config.height); D3D::SetPointCopySampler(); // Make sure we don't draw with the texture set as both a source and target. // (This can happen because we don't unbind textures when we free them.) static_cast<TCacheEntry*>(entry)->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &static_cast<TCacheEntry*>(entry)->m_texture->GetRTV12(), FALSE, nullptr); // Create texture copy D3D::DrawShadedTexQuad( base_entry->m_texture, &source_rect, unconverted->config.width, unconverted->config.height, m_palette_pixel_shaders[m_lut_format], StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), StaticShaderCache::GetCopyGeometryShader(), 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, true, static_cast<TCacheEntry*>(entry)->m_texture->GetMultisampled() ); FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); g_renderer->RestoreAPIState(); return true; } D3D12_SHADER_BYTECODE GetConvertShader(const std::string& type) { std::string shader = "#define DECODE DecodePixel_"; shader.append(type); shader.append("\n"); shader.append(s_palette_shader_hlsl); D3DBlob* pBlob = nullptr; D3D::CompilePixelShader(shader, &pBlob); return { pBlob->Data(), pBlob->Size() }; } TextureCache::TextureCache() { // FIXME: Is it safe here? s_encoder = std::make_unique<PSTextureEncoder>(); s_encoder->Init(); s_scaler = std::make_unique<TextureScaler>(); s_texture_cache_entry_readback_buffer = nullptr; s_texture_cache_entry_readback_buffer_data = nullptr; s_texture_cache_entry_readback_buffer_size = 0; m_palette_pixel_shaders[GX_TL_IA8] = GetConvertShader("IA8"); m_palette_pixel_shaders[GX_TL_RGB565] = GetConvertShader("RGB565"); m_palette_pixel_shaders[GX_TL_RGB5A3] = GetConvertShader("RGB5A3"); CheckHR( D3D::device12->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE, &CD3DX12_RESOURCE_DESC::Buffer(sizeof(u16) * 256 * 1024), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&m_palette_buffer) ) ); D3D::SetDebugObjectName12(m_palette_buffer, "texture decoder lut buffer"); CheckHR(m_palette_buffer->Map(0, nullptr, &m_palette_buffer_data)); // Right now, there are only two variants of palette_uniform data. So, we'll just create an upload heap to permanently store both of these. CheckHR( D3D::device12->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE, &CD3DX12_RESOURCE_DESC::Buffer(((16 + 255) & ~255) * 2), // Constant Buffers have to be 256b aligned. "* 2" to create for two sets of data. D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&m_palette_uniform_buffer) ) ); D3D::SetDebugObjectName12(m_palette_uniform_buffer, "a constant buffer used in TextureCache::ConvertTexture"); CheckHR(m_palette_uniform_buffer->Map(0, nullptr, &m_palette_uniform_buffer_data)); float paramsFormatZero[4] = { 15.f }; float paramsFormatNonzero[4] = { 255.f }; memcpy(static_cast<u8*>(m_palette_uniform_buffer_data), paramsFormatZero, sizeof(paramsFormatZero)); memcpy(static_cast<u8*>(m_palette_uniform_buffer_data) + 256, paramsFormatNonzero, sizeof(paramsFormatNonzero)); }