void FramebufferManagerBase::CopyToVirtualXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)
{
	if (!g_framebuffer_manager)
		return;

	VirtualXFBListType::iterator vxfb = FindVirtualXFB(xfbAddr, sourceRc.GetWidth(), fbHeight);

	if (m_virtualXFBList.end() == vxfb)
	{
		if (m_virtualXFBList.size() < MAX_VIRTUAL_XFB)
		{
			// create a new Virtual XFB and place it at the front of the list
			m_virtualXFBList.emplace_front();
			vxfb = m_virtualXFBList.begin();
		}
		else
		{
			// Replace the last virtual XFB
			--vxfb;
		}
	}
	//else // replace existing virtual XFB

	// move this Virtual XFB to the front of the list.
	if (m_virtualXFBList.begin() != vxfb)
		m_virtualXFBList.splice(m_virtualXFBList.begin(), m_virtualXFBList, vxfb);

	unsigned int target_width, target_height;
	g_framebuffer_manager->GetTargetSize(&target_width, &target_height);

	// recreate if needed
	if (vxfb->xfbSource && (vxfb->xfbSource->texWidth != target_width || vxfb->xfbSource->texHeight != target_height))
		vxfb->xfbSource.reset();

	if (!vxfb->xfbSource)
	{
		vxfb->xfbSource = g_framebuffer_manager->CreateXFBSource(target_width, target_height, m_EFBLayers);
		if (!vxfb->xfbSource)
			return;

		vxfb->xfbSource->texWidth = target_width;
		vxfb->xfbSource->texHeight = target_height;
	}

	vxfb->xfbSource->srcAddr = vxfb->xfbAddr = xfbAddr;
	vxfb->xfbSource->srcWidth = vxfb->xfbWidth = sourceRc.GetWidth();
	vxfb->xfbSource->srcHeight = vxfb->xfbHeight = fbHeight;

	vxfb->xfbSource->sourceRc = g_renderer->ConvertEFBRectangle(sourceRc);

	// keep stale XFB data from being used
	ReplaceVirtualXFB();

	// Copy EFB data to XFB and restore render target again
	vxfb->xfbSource->CopyEFB(Gamma);
}
Example #2
0
void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight,
                                       const EFBRectangle& sourceRc, float Gamma)
{
  u8* dst = Memory::GetPointer(xfbAddr);

  // The destination stride can differ from the copy region width, in which case the pixels
  // outside the copy region should not be written to.
  s_xfbEncoder.Encode(dst, static_cast<u32>(sourceRc.GetWidth()), fbHeight, sourceRc, Gamma);
}
void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)
{
    u8* xfb_in_ram = Memory::GetPointer(xfbAddr);
    if (!xfb_in_ram)
    {
        WARN_LOG(VIDEO, "Tried to copy to invalid XFB address");
        return;
    }

    TargetRectangle targetRc = g_renderer->ConvertEFBRectangle(sourceRc);
    std::swap(targetRc.top, targetRc.bottom);
    TextureConverter::EncodeToRamYUYV(GetEFBColorTexture(), targetRc, xfb_in_ram, sourceRc.GetWidth(), fbStride, fbHeight, Gamma);
}
Example #4
0
void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, PEControl::PixelFormat srcFormat,
	const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf)
{
	// Emulation methods:
	//
	// - EFB to RAM:
	//      Encodes the requested EFB data at its native resolution to the emulated RAM using shaders.
	//      Load() decodes the data from there again (using TextureDecoder) if the EFB copy is being used as a texture again.
	//      Advantage: CPU can read data from the EFB copy and we don't lose any important updates to the texture
	//      Disadvantage: Encoding+decoding steps often are redundant because only some games read or modify EFB copies before using them as textures.
	//
	// - EFB to texture:
	//      Copies the requested EFB data to a texture object in VRAM, performing any color conversion using shaders.
	//      Advantage: Works for many games, since in most cases EFB copies aren't read or modified at all before being used as a texture again.
	//                 Since we don't do any further encoding or decoding here, this method is much faster.
	//                 It also allows enhancing the visual quality by doing scaled EFB copies.
	//
	// - Hybrid EFB copies:
	//      1a) Whenever this function gets called, encode the requested EFB data to RAM (like EFB to RAM)
	//      1b) Set type to TCET_EC_DYNAMIC for all texture cache entries in the destination address range.
	//          If EFB copy caching is enabled, further checks will (try to) prevent redundant EFB copies.
	//      2) Check if a texture cache entry for the specified dstAddr already exists (i.e. if an EFB copy was triggered to that address before):
	//      2a) Entry doesn't exist:
	//          - Also copy the requested EFB data to a texture object in VRAM (like EFB to texture)
	//          - Create a texture cache entry for the target (type = TCET_EC_VRAM)
	//          - Store a hash of the encoded RAM data in the texcache entry.
	//      2b) Entry exists AND type is TCET_EC_VRAM:
	//          - Like case 2a, but reuse the old texcache entry instead of creating a new one.
	//      2c) Entry exists AND type is TCET_EC_DYNAMIC:
	//          - Only encode the texture to RAM (like EFB to RAM) and store a hash of the encoded data in the existing texcache entry.
	//          - Do NOT copy the requested EFB data to a VRAM object. Reason: the texture is dynamic, i.e. the CPU is modifying it. Storing a VRAM copy is useless, because we'd always end up deleting it and reloading the data from RAM anyway.
	//      3) If the EFB copy gets used as a texture, compare the source RAM hash with the hash you stored when encoding the EFB data to RAM.
	//      3a) If the two hashes match AND type is TCET_EC_VRAM, reuse the VRAM copy you created
	//      3b) If the two hashes differ AND type is TCET_EC_VRAM, screw your existing VRAM copy. Set type to TCET_EC_DYNAMIC.
	//          Redecode the source RAM data to a VRAM object. The entry basically behaves like a normal texture now.
	//      3c) If type is TCET_EC_DYNAMIC, treat the EFB copy like a normal texture.
	//      Advantage: Non-dynamic EFB copies can be visually enhanced like with EFB to texture.
	//                 Compatibility is as good as EFB to RAM.
	//      Disadvantage: Slower than EFB to texture and often even slower than EFB to RAM.
	//                    EFB copy cache depends on accurate texture hashing being enabled. However, with accurate hashing you end up being as slow as without a copy cache anyway.
	//
	// Disadvantage of all methods: Calling this function requires the GPU to perform a pipeline flush which stalls any further CPU processing.
	//
	// For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies.

	float colmat[28] = {0};
	float *const fConstAdd = colmat + 16;
	float *const ColorMask = colmat + 20;
	ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
	ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
	unsigned int cbufid = -1;
	bool efbHasAlpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;

	if (srcFormat == PEControl::Z24)
	{
		switch (dstFormat)
		{
		case 0: // Z4
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
			cbufid = 0;
			break;
		case 1: // Z8
		case 8: // Z8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
			cbufid = 1;
			break;

		case 3: // Z16
			colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
			cbufid = 2;
			break;

		case 11: // Z16 (reverse order)
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 3;
			break;

		case 6: // Z24X8
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			cbufid = 4;
			break;

		case 9: // Z8M
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 5;
			break;

		case 10: // Z8L
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 6;
			break;

		case 12: // Z16L - copy lower 16 depth bits
			// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits stored as alpha)
			// Used e.g. in Zelda: Skyward Sword
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 7;
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat);
			colmat[2] = colmat[5] = colmat[8] = 1.0f;
			cbufid = 8;
			break;
		}
	}
	else if (isIntensity)
	{
		fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f/255.0f;
		switch (dstFormat)
		{
		case 0: // I4
		case 1: // I8
		case 2: // IA4
		case 3: // IA8
		case 8: // I8
			// TODO - verify these coefficients
			colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f;
			colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f;
			colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f;

			if (dstFormat < 2 || dstFormat == 8)
			{
				colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
				fConstAdd[3] = 16.0f/255.0f;
				if (dstFormat == 0)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
					cbufid = 9;
				}
				else
				{
					cbufid = 10;
				}
			}
			else// alpha
			{
				colmat[15] = 1;
				if (dstFormat == 2)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
					cbufid = 11;
				}
				else
				{
					cbufid = 12;
				}

			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 13;
			break;
		}
	}
	else
	{
		switch (dstFormat)
		{
		case 0: // R4
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			ColorMask[0] = 15.0f;
			ColorMask[4] = 1.0f / 15.0f;
			cbufid = 14;
			break;
		case 1: // R8
		case 8: // R8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			cbufid = 15;
			break;

		case 2: // RA4
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[3] = 15.0f;
			ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;

			cbufid = 16;
			if (!efbHasAlpha) {
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 17;
			}
			break;
		case 3: // RA8
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;

			cbufid = 18;
			if (!efbHasAlpha) {
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 19;
			}
			break;

		case 7: // A8
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;

			cbufid = 20;
			if (!efbHasAlpha) {
				ColorMask[3] = 0.0f;
				fConstAdd[0] = 1.0f;
				fConstAdd[1] = 1.0f;
				fConstAdd[2] = 1.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 21;
			}
			break;

		case 9: // G8
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 22;
			break;
		case 10: // B8
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 23;
			break;

		case 11: // RG8
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 24;
			break;

		case 12: // GB8
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 25;
			break;

		case 4: // RGB565
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			ColorMask[0] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[1] = 63.0f;
			ColorMask[5] = 1.0f / 63.0f;
			fConstAdd[3] = 1.0f; // set alpha to 1
			cbufid = 26;
			break;

		case 5: // RGB5A3
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[1] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[3] = 7.0f;
			ColorMask[7] = 1.0f / 7.0f;

			cbufid = 27;
			if (!efbHasAlpha) {
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 28;
			}
			break;
		case 6: // RGBA8
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;

			cbufid = 29;
			if (!efbHasAlpha) {
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 30;
			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 31;
			break;
		}
	}

	const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth()/2 : srcRect.GetWidth();
	const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight()/2 : srcRect.GetHeight();

	unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w;
	unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h;


	TCacheEntryBase *entry = textures[dstAddr];
	if (entry)
	{
		if (entry->type == TCET_EC_DYNAMIC && entry->native_width == tex_w && entry->native_height == tex_h)
		{
			scaled_tex_w = tex_w;
			scaled_tex_h = tex_h;
		}
		else if (!(entry->type == TCET_EC_VRAM && entry->virtual_width == scaled_tex_w && entry->virtual_height == scaled_tex_h))
		{
			// remove it and recreate it as a render target
			delete entry;
			entry = nullptr;
		}
	}

	if (nullptr == entry)
	{
		// create the texture
		textures[dstAddr] = entry = g_texture_cache->CreateRenderTargetTexture(scaled_tex_w, scaled_tex_h);

		// TODO: Using the wrong dstFormat, dumb...
		entry->SetGeneralParameters(dstAddr, 0, dstFormat, 1);
		entry->SetDimensions(tex_w, tex_h, scaled_tex_w, scaled_tex_h);
		entry->SetHashes(TEXHASH_INVALID);
		entry->type = TCET_EC_VRAM;
	}

	entry->frameCount = frameCount;

	entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat);
}
Example #5
0
void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat,
	const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf)
{
	// Emulation methods:
	//
	// - EFB to RAM:
	//      Encodes the requested EFB data at its native resolution to the emulated RAM using shaders.
	//      Load() decodes the data from there again (using TextureDecoder) if the EFB copy is being used as a texture again.
	//      Advantage: CPU can read data from the EFB copy and we don't lose any important updates to the texture
	//      Disadvantage: Encoding+decoding steps often are redundant because only some games read or modify EFB copies before using them as textures.
	//
	// - EFB to texture:
	//      Copies the requested EFB data to a texture object in VRAM, performing any color conversion using shaders.
	//      Advantage: Works for many games, since in most cases EFB copies aren't read or modified at all before being used as a texture again.
	//                 Since we don't do any further encoding or decoding here, this method is much faster.
	//                 It also allows enhancing the visual quality by doing scaled EFB copies.
	//
	// - Hybrid EFB copies:
	//      1a) Whenever this function gets called, encode the requested EFB data to RAM (like EFB to RAM)
	//      1b) Set type to TCET_EC_DYNAMIC for all texture cache entries in the destination address range.
	//          If EFB copy caching is enabled, further checks will (try to) prevent redundant EFB copies.
	//      2) Check if a texture cache entry for the specified dstAddr already exists (i.e. if an EFB copy was triggered to that address before):
	//      2a) Entry doesn't exist:
	//          - Also copy the requested EFB data to a texture object in VRAM (like EFB to texture)
	//          - Create a texture cache entry for the target (type = TCET_EC_VRAM)
	//          - Store a hash of the encoded RAM data in the texcache entry.
	//      2b) Entry exists AND type is TCET_EC_VRAM:
	//          - Like case 2a, but reuse the old texcache entry instead of creating a new one.
	//      2c) Entry exists AND type is TCET_EC_DYNAMIC:
	//          - Only encode the texture to RAM (like EFB to RAM) and store a hash of the encoded data in the existing texcache entry.
	//          - Do NOT copy the requested EFB data to a VRAM object. Reason: the texture is dynamic, i.e. the CPU is modifying it. Storing a VRAM copy is useless, because we'd always end up deleting it and reloading the data from RAM anyway.
	//      3) If the EFB copy gets used as a texture, compare the source RAM hash with the hash you stored when encoding the EFB data to RAM.
	//      3a) If the two hashes match AND type is TCET_EC_VRAM, reuse the VRAM copy you created
	//      3b) If the two hashes differ AND type is TCET_EC_VRAM, screw your existing VRAM copy. Set type to TCET_EC_DYNAMIC.
	//          Redecode the source RAM data to a VRAM object. The entry basically behaves like a normal texture now.
	//      3c) If type is TCET_EC_DYNAMIC, treat the EFB copy like a normal texture.
	//      Advantage: Non-dynamic EFB copies can be visually enhanced like with EFB to texture.
	//                 Compatibility is as good as EFB to RAM.
	//      Disadvantage: Slower than EFB to texture and often even slower than EFB to RAM.
	//                    EFB copy cache depends on accurate texture hashing being enabled. However, with accurate hashing you end up being as slow as without a copy cache anyway.
	//
	// Disadvantage of all methods: Calling this function requires the GPU to perform a pipeline flush which stalls any further CPU processing.
	//
	// For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies.

	float colmat[28] = { 0 };
	float *const fConstAdd = colmat + 16;
	float *const ColorMask = colmat + 20;
	ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
	ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
	unsigned int cbufid = -1;
	bool efbHasAlpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;

	if (srcFormat == PEControl::Z24)
	{
		switch (dstFormat)
		{
		case 0: // Z4
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
			cbufid = 0;
			dstFormat |= _GX_TF_CTF;
			break;
		case 8: // Z8H
			dstFormat |= _GX_TF_CTF;
		case 1: // Z8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
			cbufid = 1;
			break;

		case 3: // Z16
			colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
			cbufid = 2;
			break;

		case 11: // Z16 (reverse order)
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 3;
			dstFormat |= _GX_TF_CTF;
			break;

		case 6: // Z24X8
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			cbufid = 4;
			break;

		case 9: // Z8M
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 5;
			dstFormat |= _GX_TF_CTF;
			break;

		case 10: // Z8L
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 6;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // Z16L - copy lower 16 depth bits
			// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits stored as alpha)
			// Used e.g. in Zelda: Skyward Sword
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 7;
			dstFormat |= _GX_TF_CTF;
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat);
			colmat[2] = colmat[5] = colmat[8] = 1.0f;
			cbufid = 8;
			break;
		}

		dstFormat |= _GX_TF_ZTF;
	}
	else if (isIntensity)
	{
		fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f / 255.0f;
		switch (dstFormat)
		{
		case 0: // I4
		case 1: // I8
		case 2: // IA4
		case 3: // IA8
		case 8: // I8
			// TODO - verify these coefficients
			colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f;
			colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f;
			colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f;

			if (dstFormat < 2 || dstFormat == 8)
			{
				colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
				fConstAdd[3] = 16.0f / 255.0f;
				if (dstFormat == 0)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
					cbufid = 9;
				}
				else
				{
					cbufid = 10;
				}
			}
			else// alpha
			{
				colmat[15] = 1;
				if (dstFormat == 2)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
					cbufid = 11;
				}
				else
				{
					cbufid = 12;
				}

			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 13;
			break;
		}
	}
	else
	{
		switch (dstFormat)
		{
		case 0: // R4
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			ColorMask[0] = 15.0f;
			ColorMask[4] = 1.0f / 15.0f;
			cbufid = 14;
			dstFormat |= _GX_TF_CTF;
			break;
		case 1: // R8
		case 8: // R8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			cbufid = 15;
			dstFormat |= _GX_TF_CTF;
			break;

		case 2: // RA4
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[3] = 15.0f;
			ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;

			cbufid = 16;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 17;
			}
			dstFormat |= _GX_TF_CTF;
			break;
		case 3: // RA8
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;

			cbufid = 18;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 19;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 7: // A8
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;

			cbufid = 20;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[0] = 1.0f;
				fConstAdd[1] = 1.0f;
				fConstAdd[2] = 1.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 21;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 9: // G8
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 22;
			dstFormat |= _GX_TF_CTF;
			break;
		case 10: // B8
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 23;
			dstFormat |= _GX_TF_CTF;
			break;

		case 11: // RG8
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 24;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // GB8
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 25;
			dstFormat |= _GX_TF_CTF;
			break;

		case 4: // RGB565
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			ColorMask[0] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[1] = 63.0f;
			ColorMask[5] = 1.0f / 63.0f;
			fConstAdd[3] = 1.0f; // set alpha to 1
			cbufid = 26;
			break;

		case 5: // RGB5A3
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[1] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[3] = 7.0f;
			ColorMask[7] = 1.0f / 7.0f;

			cbufid = 27;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 28;
			}
			break;
		case 6: // RGBA8
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;

			cbufid = 29;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 30;
			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 31;
			break;
		}
	}

	u8* dst = Memory::GetPointer(dstAddr);
	if (dst == nullptr)
	{
		ERROR_LOG(VIDEO, "Trying to copy from EFB to invalid address 0x%8x", dstAddr);
		return;
	}

	const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth() / 2 : srcRect.GetWidth();
	const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight() / 2 : srcRect.GetHeight();

	unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w;
	unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h;

	// remove all texture cache entries at dstAddr
	{
		std::pair<TexCache::iterator, TexCache::iterator> iter_range = textures_by_address.equal_range((u64)dstAddr);
		TexCache::iterator iter = iter_range.first;
		while (iter != iter_range.second)
		{
			iter = FreeTexture(iter);
		}
	}

	// create the texture
	TCacheEntryConfig config;
	config.rendertarget = true;
	config.width = scaled_tex_w;
	config.height = scaled_tex_h;
	config.layers = FramebufferManagerBase::GetEFBLayers();

	TCacheEntryBase* entry = AllocateTexture(config);

	entry->SetGeneralParameters(dstAddr, 0, dstFormat);
	entry->SetDimensions(tex_w, tex_h, 1);

	entry->frameCount = FRAMECOUNT_INVALID;
	entry->SetEfbCopy(dstStride);
	entry->is_custom_tex = false;

	entry->FromRenderTarget(dst, dstFormat, dstStride, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat);

	u64 hash = entry->CalculateHash();
	entry->SetHashes(hash, hash);

	// Invalidate all textures that overlap the range of our efb copy.
	// Unless our efb copy has a weird stride, then we want avoid invalidating textures which
	// we might be able to do a partial texture update on.
	if (entry->memory_stride == entry->CacheLinesPerRow() * 32)
	{
		TexCache::iterator iter = textures_by_address.begin();
		while (iter != textures_by_address.end())
		{
			if (iter->second->OverlapsMemoryRange(dstAddr, entry->size_in_bytes))
				iter = FreeTexture(iter);
			else
				++iter;
		}
	}

	if (g_ActiveConfig.bDumpEFBTarget)
	{
		static int count = 0;
		entry->Save(StringFromFormat("%sefb_frame_%i.png", File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(),
			count++), 0);
	}

	if (g_bRecordFifoData)
	{
		// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
		u32 address = dstAddr;
		for (u32 i = 0; i < entry->NumBlocksY(); i++)
		{
			FifoRecorder::GetInstance().UseMemory(address, entry->CacheLinesPerRow() * 32, MemoryUpdate::TEXTURE_MAP, true);
			address += entry->memory_stride;
		}
	}

	textures_by_address.emplace((u64)dstAddr, entry);
}
void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat,
                                                 const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf)
{
	// Emulation methods:
	//
	// - EFB to RAM:
	//      Encodes the requested EFB data at its native resolution to the emulated RAM using shaders.
	//      Load() decodes the data from there again (using TextureDecoder) if the EFB copy is being used as a texture again.
	//      Advantage: CPU can read data from the EFB copy and we don't lose any important updates to the texture
	//      Disadvantage: Encoding+decoding steps often are redundant because only some games read or modify EFB copies before using them as textures.
	//
	// - EFB to texture:
	//      Copies the requested EFB data to a texture object in VRAM, performing any color conversion using shaders.
	//      Advantage: Works for many games, since in most cases EFB copies aren't read or modified at all before being used as a texture again.
	//                 Since we don't do any further encoding or decoding here, this method is much faster.
	//                 It also allows enhancing the visual quality by doing scaled EFB copies.
	//
	// - Hybrid EFB copies:
	//      1a) Whenever this function gets called, encode the requested EFB data to RAM (like EFB to RAM)
	//      1b) Set type to TCET_EC_DYNAMIC for all texture cache entries in the destination address range.
	//          If EFB copy caching is enabled, further checks will (try to) prevent redundant EFB copies.
	//      2) Check if a texture cache entry for the specified dstAddr already exists (i.e. if an EFB copy was triggered to that address before):
	//      2a) Entry doesn't exist:
	//          - Also copy the requested EFB data to a texture object in VRAM (like EFB to texture)
	//          - Create a texture cache entry for the target (type = TCET_EC_VRAM)
	//          - Store a hash of the encoded RAM data in the texcache entry.
	//      2b) Entry exists AND type is TCET_EC_VRAM:
	//          - Like case 2a, but reuse the old texcache entry instead of creating a new one.
	//      2c) Entry exists AND type is TCET_EC_DYNAMIC:
	//          - Only encode the texture to RAM (like EFB to RAM) and store a hash of the encoded data in the existing texcache entry.
	//          - Do NOT copy the requested EFB data to a VRAM object. Reason: the texture is dynamic, i.e. the CPU is modifying it. Storing a VRAM copy is useless, because we'd always end up deleting it and reloading the data from RAM anyway.
	//      3) If the EFB copy gets used as a texture, compare the source RAM hash with the hash you stored when encoding the EFB data to RAM.
	//      3a) If the two hashes match AND type is TCET_EC_VRAM, reuse the VRAM copy you created
	//      3b) If the two hashes differ AND type is TCET_EC_VRAM, screw your existing VRAM copy. Set type to TCET_EC_DYNAMIC.
	//          Redecode the source RAM data to a VRAM object. The entry basically behaves like a normal texture now.
	//      3c) If type is TCET_EC_DYNAMIC, treat the EFB copy like a normal texture.
	//      Advantage: Non-dynamic EFB copies can be visually enhanced like with EFB to texture.
	//                 Compatibility is as good as EFB to RAM.
	//      Disadvantage: Slower than EFB to texture and often even slower than EFB to RAM.
	//                    EFB copy cache depends on accurate texture hashing being enabled. However, with accurate hashing you end up being as slow as without a copy cache anyway.
	//
	// Disadvantage of all methods: Calling this function requires the GPU to perform a pipeline flush which stalls any further CPU processing.
	//
	// For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies.

	float colmat[28] = { 0 };
	float *const fConstAdd = colmat + 16;
	float *const ColorMask = colmat + 20;
	ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
	ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
	unsigned int cbufid = -1;
	bool efbHasAlpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;

	if (srcFormat == PEControl::Z24)
	{
		switch (dstFormat)
		{
		case 0: // Z4
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
			cbufid = 0;
			dstFormat |= _GX_TF_CTF;
			break;
		case 8: // Z8H
			dstFormat |= _GX_TF_CTF;
		case 1: // Z8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
			cbufid = 1;
			break;

		case 3: // Z16
			colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
			cbufid = 2;
			break;

		case 11: // Z16 (reverse order)
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 3;
			dstFormat |= _GX_TF_CTF;
			break;

		case 6: // Z24X8
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			cbufid = 4;
			break;

		case 9: // Z8M
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 5;
			dstFormat |= _GX_TF_CTF;
			break;

		case 10: // Z8L
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 6;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // Z16L - copy lower 16 depth bits
			// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits stored as alpha)
			// Used e.g. in Zelda: Skyward Sword
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 7;
			dstFormat |= _GX_TF_CTF;
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat);
			colmat[2] = colmat[5] = colmat[8] = 1.0f;
			cbufid = 8;
			break;
		}

		dstFormat |= _GX_TF_ZTF;
	}
	else if (isIntensity)
	{
		fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f / 255.0f;
		switch (dstFormat)
		{
		case 0: // I4
		case 1: // I8
		case 2: // IA4
		case 3: // IA8
		case 8: // I8
			// TODO - verify these coefficients
			colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f;
			colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f;
			colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f;

			if (dstFormat < 2 || dstFormat == 8)
			{
				colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
				fConstAdd[3] = 16.0f / 255.0f;
				if (dstFormat == 0)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
					cbufid = 9;
				}
				else
				{
					cbufid = 10;
				}
			}
			else// alpha
			{
				colmat[15] = 1;
				if (dstFormat == 2)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
					cbufid = 11;
				}
				else
				{
					cbufid = 12;
				}

			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 13;
			break;
		}
	}
	else
	{
		switch (dstFormat)
		{
		case 0: // R4
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			ColorMask[0] = 15.0f;
			ColorMask[4] = 1.0f / 15.0f;
			cbufid = 14;
			dstFormat |= _GX_TF_CTF;
			break;
		case 1: // R8
		case 8: // R8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			cbufid = 15;
			dstFormat = GX_CTF_R8;
			break;

		case 2: // RA4
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[3] = 15.0f;
			ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;

			cbufid = 16;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 17;
			}
			dstFormat |= _GX_TF_CTF;
			break;
		case 3: // RA8
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;

			cbufid = 18;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 19;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 7: // A8
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;

			cbufid = 20;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[0] = 1.0f;
				fConstAdd[1] = 1.0f;
				fConstAdd[2] = 1.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 21;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 9: // G8
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 22;
			dstFormat |= _GX_TF_CTF;
			break;
		case 10: // B8
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 23;
			dstFormat |= _GX_TF_CTF;
			break;

		case 11: // RG8
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 24;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // GB8
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 25;
			dstFormat |= _GX_TF_CTF;
			break;

		case 4: // RGB565
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			ColorMask[0] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[1] = 63.0f;
			ColorMask[5] = 1.0f / 63.0f;
			fConstAdd[3] = 1.0f; // set alpha to 1
			cbufid = 26;
			break;

		case 5: // RGB5A3
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[1] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[3] = 7.0f;
			ColorMask[7] = 1.0f / 7.0f;

			cbufid = 27;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 28;
			}
			break;
		case 6: // RGBA8
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;

			cbufid = 29;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 30;
			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 31;
			break;
		}
	}

	u8* dst = Memory::GetPointer(dstAddr);
	if (dst == nullptr)
	{
		ERROR_LOG(VIDEO, "Trying to copy from EFB to invalid address 0x%8x", dstAddr);
		return;
	}

	const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth() / 2 : srcRect.GetWidth();
	const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight() / 2 : srcRect.GetHeight();

	unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w;
	unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h;

	// Remove all texture cache entries at dstAddr
	//   It's not possible to have two EFB copies at the same address, this makes sure any old efb copies
	//   (or normal textures) are removed from texture cache. They are also un-linked from any partially
	//   updated textures, which forces that partially updated texture to be updated.
	// TODO: This also wipes out non-efb copies, which is counterproductive.
	{
		std::pair<TexCache::iterator, TexCache::iterator> iter_range = textures_by_address.equal_range((u64)dstAddr);
		TexCache::iterator iter = iter_range.first;
		while (iter != iter_range.second)
		{
			iter = InvalidateTexture(iter);
		}
	}

	// Get the base (in memory) format of this efb copy.
	int baseFormat = TexDecoder_GetEfbCopyBaseFormat(dstFormat);

	u32 blockH = TexDecoder_GetBlockHeightInTexels(baseFormat);
	const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat);

	// Round up source height to multiple of block size
	u32 actualHeight = ROUND_UP(tex_h, blockH);
	const u32 actualWidth = ROUND_UP(tex_w, blockW);

	u32 num_blocks_y = actualHeight / blockH;
	const u32 num_blocks_x = actualWidth / blockW;

	// RGBA takes two cache lines per block; all others take one
	const u32 bytes_per_block = baseFormat == GX_TF_RGBA8 ? 64 : 32;

	u32 bytes_per_row = num_blocks_x * bytes_per_block;

	bool copy_to_ram = !g_ActiveConfig.bSkipEFBCopyToRam;
	bool copy_to_vram = true;

	if (copy_to_ram)
	{
		g_texture_cache->CopyEFB(
			dst,
			dstFormat,
			tex_w,
			bytes_per_row,
			num_blocks_y,
			dstStride,
			srcFormat,
			srcRect,
			isIntensity,
			scaleByHalf);
	}
	else
	{
		// Hack: Most games don't actually need the correct texture data in RAM
		//       and we can just keep a copy in VRAM. We zero the memory so we
		//       can check it hasn't changed before using our copy in VRAM.
		u8* ptr = dst;
		for (u32 i = 0; i < num_blocks_y; i++)
		{
			memset(ptr, 0, bytes_per_row);
			ptr += dstStride;
		}
	}

	if (g_bRecordFifoData)
	{
		// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
		u32 address = dstAddr;
		for (u32 i = 0; i < num_blocks_y; i++)
		{
			FifoRecorder::GetInstance().UseMemory(address, bytes_per_row, MemoryUpdate::TEXTURE_MAP, true);
			address += dstStride;
		}
	}

	if (dstStride < bytes_per_row)
	{
		// This kind of efb copy results in a scrambled image.
		// I'm pretty sure no game actually wants to do this, it might be caused by a
		// programming bug in the game, or a CPU/Bounding box emulation issue with dolphin.
		// The copy_to_ram code path above handles this "correctly" and scrambles the image
		// but the copy_to_vram code path just saves and uses unscrambled texture instead.

		// To avoid a "incorrect" result, we simply skip doing the copy_to_vram code path
		// so if the game does try to use the scrambled texture, dolphin will grab the scrambled
		// texture (or black if copy_to_ram is also disabled) out of ram.
		ERROR_LOG(VIDEO, "Memory stride too small (%i < %i)", dstStride, bytes_per_row);
		copy_to_vram = false;
	}

	// Invalidate all textures that overlap the range of our efb copy.
	// Unless our efb copy has a weird stride, then we want avoid invalidating textures which
	// we might be able to do a partial texture update on.
	// TODO: This also invalidates partial overlaps, which we currently don't have a better way
	//       of dealing with.
	if (dstStride == bytes_per_row || !copy_to_vram)
	{
		TexCache::iterator iter = textures_by_address.begin();
		while (iter != textures_by_address.end())
		{
			if (iter->second->addr + iter->second->size_in_bytes <= dstAddr || iter->second->addr >= dstAddr + num_blocks_y * dstStride)
				++iter;
			else
				iter = InvalidateTexture(iter);
		}
	}

	if (copy_to_vram)
	{
		// create the texture
		TCacheEntryConfig config;
		config.rendertarget = true;
		config.width = scaled_tex_w;
		config.height = scaled_tex_h;
		config.layers = FramebufferManagerBase::GetEFBLayers();

		TCacheEntryBase* entry = AllocateTexture(config);

		if (entry)
		{
			entry->SetGeneralParameters(dstAddr, 0, baseFormat);
			entry->SetDimensions(tex_w, tex_h, 1);

			entry->frameCount = FRAMECOUNT_INVALID;
			entry->SetEfbCopy(dstStride);
			entry->is_custom_tex = false;

			entry->FromRenderTarget(dst, srcFormat, srcRect, scaleByHalf, cbufid, colmat);

			u64 hash = entry->CalculateHash();
			entry->SetHashes(hash, hash);

			if (g_ActiveConfig.bDumpEFBTarget)
			{
				static int count = 0;
				entry->Save(StringFromFormat("%sefb_frame_%i.png", File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(),
					count++), 0);
			}

			textures_by_address.emplace((u64)dstAddr, entry);
		}
	}
}
size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat,
	PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
	bool isIntensity, bool scaleByHalf)
{
	if (!m_ready) // Make sure we initialized OK
		return 0;

	// Clamp srcRect to 640x528. BPS: The Strike tries to encode an 800x600
	// texture, which is invalid.
	EFBRectangle correctSrc = srcRect;
	correctSrc.ClampUL(0, 0, EFB_WIDTH, EFB_HEIGHT);

	// Validate source rect size
	if (correctSrc.GetWidth() <= 0 || correctSrc.GetHeight() <= 0)
		return 0;

	HRESULT hr;

	unsigned int blockW = BLOCK_WIDTHS[dstFormat];
	unsigned int blockH = BLOCK_HEIGHTS[dstFormat];

	// Round up source dims to multiple of block size
	unsigned int actualWidth = correctSrc.GetWidth() / (scaleByHalf ? 2 : 1);
	actualWidth = (actualWidth + blockW-1) & ~(blockW-1);
	unsigned int actualHeight = correctSrc.GetHeight() / (scaleByHalf ? 2 : 1);
	actualHeight = (actualHeight + blockH-1) & ~(blockH-1);

	unsigned int numBlocksX = actualWidth/blockW;
	unsigned int numBlocksY = actualHeight/blockH;

	unsigned int cacheLinesPerRow;
	if (dstFormat == 0x6) // RGBA takes two cache lines per block; all others take one
		cacheLinesPerRow = numBlocksX*2;
	else
		cacheLinesPerRow = numBlocksX;
	_assert_msg_(VIDEO, cacheLinesPerRow*32 <= MAX_BYTES_PER_BLOCK_ROW, "cache lines per row sanity check");

	unsigned int totalCacheLines = cacheLinesPerRow * numBlocksY;
	_assert_msg_(VIDEO, totalCacheLines*32 <= MAX_BYTES_PER_ENCODE, "total encode size sanity check");

	size_t encodeSize = 0;

	// Reset API
	g_renderer->ResetAPIState();

	// Set up all the state for EFB encoding

	{
		D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow * 8), FLOAT(numBlocksY));
		D3D::context->RSSetViewports(1, &vp);

		EFBRectangle fullSrcRect;
		fullSrcRect.left = 0;
		fullSrcRect.top = 0;
		fullSrcRect.right = EFB_WIDTH;
		fullSrcRect.bottom = EFB_HEIGHT;
		TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect);

		D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr);

		ID3D11ShaderResourceView* pEFB = (srcFormat == PEControl::Z24) ?
			FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() :
			// FIXME: Instead of resolving EFB, it would be better to pick out a
			// single sample from each pixel. The game may break if it isn't
			// expecting the blurred edges around multisampled shapes.
			FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();

		EFBEncodeParams params;
		params.SrcLeft = correctSrc.left;
		params.SrcTop = correctSrc.top;
		params.DestWidth = actualWidth;
		params.ScaleFactor = scaleByHalf ? 2 : 1;
		D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &params, 0, 0);
		D3D::stateman->SetPixelConstants(m_encodeParams);

		// Use linear filtering if (bScaleByHalf), use point filtering otherwise
		if (scaleByHalf)
			D3D::SetLinearCopySampler();
		else
			D3D::SetPointCopySampler();

		D3D::drawShadedTexQuad(pEFB,
			targetRect.AsRECT(),
			Renderer::GetTargetWidth(),
			Renderer::GetTargetHeight(),
			SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf),
			VertexShaderCache::GetSimpleVertexShader(),
			VertexShaderCache::GetSimpleInputLayout());

		// Copy to staging buffer
		D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow * 8, numBlocksY, 1);
		D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox);

		// Transfer staging buffer to GameCube/Wii RAM
		D3D11_MAPPED_SUBRESOURCE map = { 0 };
		hr = D3D::context->Map(m_outStage, 0, D3D11_MAP_READ, 0, &map);
		CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr);

		u8* src = (u8*)map.pData;
		for (unsigned int y = 0; y < numBlocksY; ++y)
		{
			memcpy(dst, src, cacheLinesPerRow*32);
			dst += bpmem.copyMipMapStrideChannels*32;
			src += map.RowPitch;
		}

		D3D::context->Unmap(m_outStage, 0);

		encodeSize = bpmem.copyMipMapStrideChannels*32 * numBlocksY;
	}

	// Restore API
	g_renderer->RestoreAPIState();
	D3D::context->OMSetRenderTargets(1,
		&FramebufferManager::GetEFBColorTexture()->GetRTV(),
		FramebufferManager::GetEFBDepthTexture()->GetDSV());

	return encodeSize;
}