Ejemplo n.º 1
0
void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat,
	const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf)
{
	// Emulation methods:
	//
	// - EFB to RAM:
	//      Encodes the requested EFB data at its native resolution to the emulated RAM using shaders.
	//      Load() decodes the data from there again (using TextureDecoder) if the EFB copy is being used as a texture again.
	//      Advantage: CPU can read data from the EFB copy and we don't lose any important updates to the texture
	//      Disadvantage: Encoding+decoding steps often are redundant because only some games read or modify EFB copies before using them as textures.
	//
	// - EFB to texture:
	//      Copies the requested EFB data to a texture object in VRAM, performing any color conversion using shaders.
	//      Advantage: Works for many games, since in most cases EFB copies aren't read or modified at all before being used as a texture again.
	//                 Since we don't do any further encoding or decoding here, this method is much faster.
	//                 It also allows enhancing the visual quality by doing scaled EFB copies.
	//
	// - Hybrid EFB copies:
	//      1a) Whenever this function gets called, encode the requested EFB data to RAM (like EFB to RAM)
	//      1b) Set type to TCET_EC_DYNAMIC for all texture cache entries in the destination address range.
	//          If EFB copy caching is enabled, further checks will (try to) prevent redundant EFB copies.
	//      2) Check if a texture cache entry for the specified dstAddr already exists (i.e. if an EFB copy was triggered to that address before):
	//      2a) Entry doesn't exist:
	//          - Also copy the requested EFB data to a texture object in VRAM (like EFB to texture)
	//          - Create a texture cache entry for the target (type = TCET_EC_VRAM)
	//          - Store a hash of the encoded RAM data in the texcache entry.
	//      2b) Entry exists AND type is TCET_EC_VRAM:
	//          - Like case 2a, but reuse the old texcache entry instead of creating a new one.
	//      2c) Entry exists AND type is TCET_EC_DYNAMIC:
	//          - Only encode the texture to RAM (like EFB to RAM) and store a hash of the encoded data in the existing texcache entry.
	//          - Do NOT copy the requested EFB data to a VRAM object. Reason: the texture is dynamic, i.e. the CPU is modifying it. Storing a VRAM copy is useless, because we'd always end up deleting it and reloading the data from RAM anyway.
	//      3) If the EFB copy gets used as a texture, compare the source RAM hash with the hash you stored when encoding the EFB data to RAM.
	//      3a) If the two hashes match AND type is TCET_EC_VRAM, reuse the VRAM copy you created
	//      3b) If the two hashes differ AND type is TCET_EC_VRAM, screw your existing VRAM copy. Set type to TCET_EC_DYNAMIC.
	//          Redecode the source RAM data to a VRAM object. The entry basically behaves like a normal texture now.
	//      3c) If type is TCET_EC_DYNAMIC, treat the EFB copy like a normal texture.
	//      Advantage: Non-dynamic EFB copies can be visually enhanced like with EFB to texture.
	//                 Compatibility is as good as EFB to RAM.
	//      Disadvantage: Slower than EFB to texture and often even slower than EFB to RAM.
	//                    EFB copy cache depends on accurate texture hashing being enabled. However, with accurate hashing you end up being as slow as without a copy cache anyway.
	//
	// Disadvantage of all methods: Calling this function requires the GPU to perform a pipeline flush which stalls any further CPU processing.
	//
	// For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies.

	float colmat[28] = { 0 };
	float *const fConstAdd = colmat + 16;
	float *const ColorMask = colmat + 20;
	ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
	ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
	unsigned int cbufid = -1;
	bool efbHasAlpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;

	if (srcFormat == PEControl::Z24)
	{
		switch (dstFormat)
		{
		case 0: // Z4
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
			cbufid = 0;
			dstFormat |= _GX_TF_CTF;
			break;
		case 8: // Z8H
			dstFormat |= _GX_TF_CTF;
		case 1: // Z8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
			cbufid = 1;
			break;

		case 3: // Z16
			colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
			cbufid = 2;
			break;

		case 11: // Z16 (reverse order)
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 3;
			dstFormat |= _GX_TF_CTF;
			break;

		case 6: // Z24X8
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			cbufid = 4;
			break;

		case 9: // Z8M
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 5;
			dstFormat |= _GX_TF_CTF;
			break;

		case 10: // Z8L
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 6;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // Z16L - copy lower 16 depth bits
			// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits stored as alpha)
			// Used e.g. in Zelda: Skyward Sword
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 7;
			dstFormat |= _GX_TF_CTF;
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat);
			colmat[2] = colmat[5] = colmat[8] = 1.0f;
			cbufid = 8;
			break;
		}

		dstFormat |= _GX_TF_ZTF;
	}
	else if (isIntensity)
	{
		fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f / 255.0f;
		switch (dstFormat)
		{
		case 0: // I4
		case 1: // I8
		case 2: // IA4
		case 3: // IA8
		case 8: // I8
			// TODO - verify these coefficients
			colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f;
			colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f;
			colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f;

			if (dstFormat < 2 || dstFormat == 8)
			{
				colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
				fConstAdd[3] = 16.0f / 255.0f;
				if (dstFormat == 0)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
					cbufid = 9;
				}
				else
				{
					cbufid = 10;
				}
			}
			else// alpha
			{
				colmat[15] = 1;
				if (dstFormat == 2)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
					cbufid = 11;
				}
				else
				{
					cbufid = 12;
				}

			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 13;
			break;
		}
	}
	else
	{
		switch (dstFormat)
		{
		case 0: // R4
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			ColorMask[0] = 15.0f;
			ColorMask[4] = 1.0f / 15.0f;
			cbufid = 14;
			dstFormat |= _GX_TF_CTF;
			break;
		case 1: // R8
		case 8: // R8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			cbufid = 15;
			dstFormat |= _GX_TF_CTF;
			break;

		case 2: // RA4
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[3] = 15.0f;
			ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;

			cbufid = 16;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 17;
			}
			dstFormat |= _GX_TF_CTF;
			break;
		case 3: // RA8
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;

			cbufid = 18;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 19;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 7: // A8
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;

			cbufid = 20;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[0] = 1.0f;
				fConstAdd[1] = 1.0f;
				fConstAdd[2] = 1.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 21;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 9: // G8
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 22;
			dstFormat |= _GX_TF_CTF;
			break;
		case 10: // B8
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 23;
			dstFormat |= _GX_TF_CTF;
			break;

		case 11: // RG8
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 24;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // GB8
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 25;
			dstFormat |= _GX_TF_CTF;
			break;

		case 4: // RGB565
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			ColorMask[0] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[1] = 63.0f;
			ColorMask[5] = 1.0f / 63.0f;
			fConstAdd[3] = 1.0f; // set alpha to 1
			cbufid = 26;
			break;

		case 5: // RGB5A3
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[1] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[3] = 7.0f;
			ColorMask[7] = 1.0f / 7.0f;

			cbufid = 27;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 28;
			}
			break;
		case 6: // RGBA8
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;

			cbufid = 29;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 30;
			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 31;
			break;
		}
	}

	u8* dst = Memory::GetPointer(dstAddr);
	if (dst == nullptr)
	{
		ERROR_LOG(VIDEO, "Trying to copy from EFB to invalid address 0x%8x", dstAddr);
		return;
	}

	const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth() / 2 : srcRect.GetWidth();
	const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight() / 2 : srcRect.GetHeight();

	unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w;
	unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h;

	// remove all texture cache entries at dstAddr
	{
		std::pair<TexCache::iterator, TexCache::iterator> iter_range = textures_by_address.equal_range((u64)dstAddr);
		TexCache::iterator iter = iter_range.first;
		while (iter != iter_range.second)
		{
			iter = FreeTexture(iter);
		}
	}

	// create the texture
	TCacheEntryConfig config;
	config.rendertarget = true;
	config.width = scaled_tex_w;
	config.height = scaled_tex_h;
	config.layers = FramebufferManagerBase::GetEFBLayers();

	TCacheEntryBase* entry = AllocateTexture(config);

	entry->SetGeneralParameters(dstAddr, 0, dstFormat);
	entry->SetDimensions(tex_w, tex_h, 1);

	entry->frameCount = FRAMECOUNT_INVALID;
	entry->SetEfbCopy(dstStride);
	entry->is_custom_tex = false;

	entry->FromRenderTarget(dst, dstFormat, dstStride, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat);

	u64 hash = entry->CalculateHash();
	entry->SetHashes(hash, hash);

	// Invalidate all textures that overlap the range of our efb copy.
	// Unless our efb copy has a weird stride, then we want avoid invalidating textures which
	// we might be able to do a partial texture update on.
	if (entry->memory_stride == entry->CacheLinesPerRow() * 32)
	{
		TexCache::iterator iter = textures_by_address.begin();
		while (iter != textures_by_address.end())
		{
			if (iter->second->OverlapsMemoryRange(dstAddr, entry->size_in_bytes))
				iter = FreeTexture(iter);
			else
				++iter;
		}
	}

	if (g_ActiveConfig.bDumpEFBTarget)
	{
		static int count = 0;
		entry->Save(StringFromFormat("%sefb_frame_%i.png", File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(),
			count++), 0);
	}

	if (g_bRecordFifoData)
	{
		// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
		u32 address = dstAddr;
		for (u32 i = 0; i < entry->NumBlocksY(); i++)
		{
			FifoRecorder::GetInstance().UseMemory(address, entry->CacheLinesPerRow() * 32, MemoryUpdate::TEXTURE_MAP, true);
			address += entry->memory_stride;
		}
	}

	textures_by_address.emplace((u64)dstAddr, entry);
}
Ejemplo n.º 2
0
void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat,
                                                 const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf)
{
	// Emulation methods:
	//
	// - EFB to RAM:
	//      Encodes the requested EFB data at its native resolution to the emulated RAM using shaders.
	//      Load() decodes the data from there again (using TextureDecoder) if the EFB copy is being used as a texture again.
	//      Advantage: CPU can read data from the EFB copy and we don't lose any important updates to the texture
	//      Disadvantage: Encoding+decoding steps often are redundant because only some games read or modify EFB copies before using them as textures.
	//
	// - EFB to texture:
	//      Copies the requested EFB data to a texture object in VRAM, performing any color conversion using shaders.
	//      Advantage: Works for many games, since in most cases EFB copies aren't read or modified at all before being used as a texture again.
	//                 Since we don't do any further encoding or decoding here, this method is much faster.
	//                 It also allows enhancing the visual quality by doing scaled EFB copies.
	//
	// - Hybrid EFB copies:
	//      1a) Whenever this function gets called, encode the requested EFB data to RAM (like EFB to RAM)
	//      1b) Set type to TCET_EC_DYNAMIC for all texture cache entries in the destination address range.
	//          If EFB copy caching is enabled, further checks will (try to) prevent redundant EFB copies.
	//      2) Check if a texture cache entry for the specified dstAddr already exists (i.e. if an EFB copy was triggered to that address before):
	//      2a) Entry doesn't exist:
	//          - Also copy the requested EFB data to a texture object in VRAM (like EFB to texture)
	//          - Create a texture cache entry for the target (type = TCET_EC_VRAM)
	//          - Store a hash of the encoded RAM data in the texcache entry.
	//      2b) Entry exists AND type is TCET_EC_VRAM:
	//          - Like case 2a, but reuse the old texcache entry instead of creating a new one.
	//      2c) Entry exists AND type is TCET_EC_DYNAMIC:
	//          - Only encode the texture to RAM (like EFB to RAM) and store a hash of the encoded data in the existing texcache entry.
	//          - Do NOT copy the requested EFB data to a VRAM object. Reason: the texture is dynamic, i.e. the CPU is modifying it. Storing a VRAM copy is useless, because we'd always end up deleting it and reloading the data from RAM anyway.
	//      3) If the EFB copy gets used as a texture, compare the source RAM hash with the hash you stored when encoding the EFB data to RAM.
	//      3a) If the two hashes match AND type is TCET_EC_VRAM, reuse the VRAM copy you created
	//      3b) If the two hashes differ AND type is TCET_EC_VRAM, screw your existing VRAM copy. Set type to TCET_EC_DYNAMIC.
	//          Redecode the source RAM data to a VRAM object. The entry basically behaves like a normal texture now.
	//      3c) If type is TCET_EC_DYNAMIC, treat the EFB copy like a normal texture.
	//      Advantage: Non-dynamic EFB copies can be visually enhanced like with EFB to texture.
	//                 Compatibility is as good as EFB to RAM.
	//      Disadvantage: Slower than EFB to texture and often even slower than EFB to RAM.
	//                    EFB copy cache depends on accurate texture hashing being enabled. However, with accurate hashing you end up being as slow as without a copy cache anyway.
	//
	// Disadvantage of all methods: Calling this function requires the GPU to perform a pipeline flush which stalls any further CPU processing.
	//
	// For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies.

	float colmat[28] = { 0 };
	float *const fConstAdd = colmat + 16;
	float *const ColorMask = colmat + 20;
	ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
	ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
	unsigned int cbufid = -1;
	bool efbHasAlpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;

	if (srcFormat == PEControl::Z24)
	{
		switch (dstFormat)
		{
		case 0: // Z4
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
			cbufid = 0;
			dstFormat |= _GX_TF_CTF;
			break;
		case 8: // Z8H
			dstFormat |= _GX_TF_CTF;
		case 1: // Z8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
			cbufid = 1;
			break;

		case 3: // Z16
			colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
			cbufid = 2;
			break;

		case 11: // Z16 (reverse order)
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 3;
			dstFormat |= _GX_TF_CTF;
			break;

		case 6: // Z24X8
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			cbufid = 4;
			break;

		case 9: // Z8M
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 5;
			dstFormat |= _GX_TF_CTF;
			break;

		case 10: // Z8L
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 6;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // Z16L - copy lower 16 depth bits
			// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits stored as alpha)
			// Used e.g. in Zelda: Skyward Sword
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 7;
			dstFormat |= _GX_TF_CTF;
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat);
			colmat[2] = colmat[5] = colmat[8] = 1.0f;
			cbufid = 8;
			break;
		}

		dstFormat |= _GX_TF_ZTF;
	}
	else if (isIntensity)
	{
		fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f / 255.0f;
		switch (dstFormat)
		{
		case 0: // I4
		case 1: // I8
		case 2: // IA4
		case 3: // IA8
		case 8: // I8
			// TODO - verify these coefficients
			colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f;
			colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f;
			colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f;

			if (dstFormat < 2 || dstFormat == 8)
			{
				colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
				fConstAdd[3] = 16.0f / 255.0f;
				if (dstFormat == 0)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
					cbufid = 9;
				}
				else
				{
					cbufid = 10;
				}
			}
			else// alpha
			{
				colmat[15] = 1;
				if (dstFormat == 2)
				{
					ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f;
					ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
					cbufid = 11;
				}
				else
				{
					cbufid = 12;
				}

			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 13;
			break;
		}
	}
	else
	{
		switch (dstFormat)
		{
		case 0: // R4
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			ColorMask[0] = 15.0f;
			ColorMask[4] = 1.0f / 15.0f;
			cbufid = 14;
			dstFormat |= _GX_TF_CTF;
			break;
		case 1: // R8
		case 8: // R8
			colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
			cbufid = 15;
			dstFormat = GX_CTF_R8;
			break;

		case 2: // RA4
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[3] = 15.0f;
			ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;

			cbufid = 16;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 17;
			}
			dstFormat |= _GX_TF_CTF;
			break;
		case 3: // RA8
			colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;

			cbufid = 18;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 19;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 7: // A8
			colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;

			cbufid = 20;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[0] = 1.0f;
				fConstAdd[1] = 1.0f;
				fConstAdd[2] = 1.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 21;
			}
			dstFormat |= _GX_TF_CTF;
			break;

		case 9: // G8
			colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
			cbufid = 22;
			dstFormat |= _GX_TF_CTF;
			break;
		case 10: // B8
			colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
			cbufid = 23;
			dstFormat |= _GX_TF_CTF;
			break;

		case 11: // RG8
			colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
			cbufid = 24;
			dstFormat |= _GX_TF_CTF;
			break;

		case 12: // GB8
			colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
			cbufid = 25;
			dstFormat |= _GX_TF_CTF;
			break;

		case 4: // RGB565
			colmat[0] = colmat[5] = colmat[10] = 1.0f;
			ColorMask[0] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[1] = 63.0f;
			ColorMask[5] = 1.0f / 63.0f;
			fConstAdd[3] = 1.0f; // set alpha to 1
			cbufid = 26;
			break;

		case 5: // RGB5A3
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			ColorMask[0] = ColorMask[1] = ColorMask[2] = 31.0f;
			ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
			ColorMask[3] = 7.0f;
			ColorMask[7] = 1.0f / 7.0f;

			cbufid = 27;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 28;
			}
			break;
		case 6: // RGBA8
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;

			cbufid = 29;
			if (!efbHasAlpha)
			{
				ColorMask[3] = 0.0f;
				fConstAdd[3] = 1.0f;
				cbufid = 30;
			}
			break;

		default:
			ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat);
			colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
			cbufid = 31;
			break;
		}
	}

	u8* dst = Memory::GetPointer(dstAddr);
	if (dst == nullptr)
	{
		ERROR_LOG(VIDEO, "Trying to copy from EFB to invalid address 0x%8x", dstAddr);
		return;
	}

	const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth() / 2 : srcRect.GetWidth();
	const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight() / 2 : srcRect.GetHeight();

	unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w;
	unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h;

	// Remove all texture cache entries at dstAddr
	//   It's not possible to have two EFB copies at the same address, this makes sure any old efb copies
	//   (or normal textures) are removed from texture cache. They are also un-linked from any partially
	//   updated textures, which forces that partially updated texture to be updated.
	// TODO: This also wipes out non-efb copies, which is counterproductive.
	{
		std::pair<TexCache::iterator, TexCache::iterator> iter_range = textures_by_address.equal_range((u64)dstAddr);
		TexCache::iterator iter = iter_range.first;
		while (iter != iter_range.second)
		{
			iter = InvalidateTexture(iter);
		}
	}

	// Get the base (in memory) format of this efb copy.
	int baseFormat = TexDecoder_GetEfbCopyBaseFormat(dstFormat);

	u32 blockH = TexDecoder_GetBlockHeightInTexels(baseFormat);
	const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat);

	// Round up source height to multiple of block size
	u32 actualHeight = ROUND_UP(tex_h, blockH);
	const u32 actualWidth = ROUND_UP(tex_w, blockW);

	u32 num_blocks_y = actualHeight / blockH;
	const u32 num_blocks_x = actualWidth / blockW;

	// RGBA takes two cache lines per block; all others take one
	const u32 bytes_per_block = baseFormat == GX_TF_RGBA8 ? 64 : 32;

	u32 bytes_per_row = num_blocks_x * bytes_per_block;

	bool copy_to_ram = !g_ActiveConfig.bSkipEFBCopyToRam;
	bool copy_to_vram = true;

	if (copy_to_ram)
	{
		g_texture_cache->CopyEFB(
			dst,
			dstFormat,
			tex_w,
			bytes_per_row,
			num_blocks_y,
			dstStride,
			srcFormat,
			srcRect,
			isIntensity,
			scaleByHalf);
	}
	else
	{
		// Hack: Most games don't actually need the correct texture data in RAM
		//       and we can just keep a copy in VRAM. We zero the memory so we
		//       can check it hasn't changed before using our copy in VRAM.
		u8* ptr = dst;
		for (u32 i = 0; i < num_blocks_y; i++)
		{
			memset(ptr, 0, bytes_per_row);
			ptr += dstStride;
		}
	}

	if (g_bRecordFifoData)
	{
		// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
		u32 address = dstAddr;
		for (u32 i = 0; i < num_blocks_y; i++)
		{
			FifoRecorder::GetInstance().UseMemory(address, bytes_per_row, MemoryUpdate::TEXTURE_MAP, true);
			address += dstStride;
		}
	}

	if (dstStride < bytes_per_row)
	{
		// This kind of efb copy results in a scrambled image.
		// I'm pretty sure no game actually wants to do this, it might be caused by a
		// programming bug in the game, or a CPU/Bounding box emulation issue with dolphin.
		// The copy_to_ram code path above handles this "correctly" and scrambles the image
		// but the copy_to_vram code path just saves and uses unscrambled texture instead.

		// To avoid a "incorrect" result, we simply skip doing the copy_to_vram code path
		// so if the game does try to use the scrambled texture, dolphin will grab the scrambled
		// texture (or black if copy_to_ram is also disabled) out of ram.
		ERROR_LOG(VIDEO, "Memory stride too small (%i < %i)", dstStride, bytes_per_row);
		copy_to_vram = false;
	}

	// Invalidate all textures that overlap the range of our efb copy.
	// Unless our efb copy has a weird stride, then we want avoid invalidating textures which
	// we might be able to do a partial texture update on.
	// TODO: This also invalidates partial overlaps, which we currently don't have a better way
	//       of dealing with.
	if (dstStride == bytes_per_row || !copy_to_vram)
	{
		TexCache::iterator iter = textures_by_address.begin();
		while (iter != textures_by_address.end())
		{
			if (iter->second->addr + iter->second->size_in_bytes <= dstAddr || iter->second->addr >= dstAddr + num_blocks_y * dstStride)
				++iter;
			else
				iter = InvalidateTexture(iter);
		}
	}

	if (copy_to_vram)
	{
		// create the texture
		TCacheEntryConfig config;
		config.rendertarget = true;
		config.width = scaled_tex_w;
		config.height = scaled_tex_h;
		config.layers = FramebufferManagerBase::GetEFBLayers();

		TCacheEntryBase* entry = AllocateTexture(config);

		if (entry)
		{
			entry->SetGeneralParameters(dstAddr, 0, baseFormat);
			entry->SetDimensions(tex_w, tex_h, 1);

			entry->frameCount = FRAMECOUNT_INVALID;
			entry->SetEfbCopy(dstStride);
			entry->is_custom_tex = false;

			entry->FromRenderTarget(dst, srcFormat, srcRect, scaleByHalf, cbufid, colmat);

			u64 hash = entry->CalculateHash();
			entry->SetHashes(hash, hash);

			if (g_ActiveConfig.bDumpEFBTarget)
			{
				static int count = 0;
				entry->Save(StringFromFormat("%sefb_frame_%i.png", File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(),
					count++), 0);
			}

			textures_by_address.emplace((u64)dstAddr, entry);
		}
	}
}
Ejemplo n.º 3
0
TextureCacheBase::TCacheEntryBase* TextureCacheBase::DoPartialTextureUpdates(TexCache::iterator iter_t)
{
	TCacheEntryBase* entry_to_update = iter_t->second;
	const bool isPaletteTexture = (entry_to_update->format == GX_TF_C4
		|| entry_to_update->format == GX_TF_C8
		|| entry_to_update->format == GX_TF_C14X2
		|| entry_to_update->format >= 0x10000);

	// Efb copies and paletted textures are excluded from these updates, until there's an example where a game would
	// benefit from this. Both would require more work to be done.
	// TODO: Implement upscaling support for normal textures, and then remove the efb to ram and the scaled efb restrictions
	if (entry_to_update->IsEfbCopy()
		|| isPaletteTexture)
		return entry_to_update;

	u32 block_width = TexDecoder_GetBlockWidthInTexels(entry_to_update->format & 0xf);
	u32 block_height = TexDecoder_GetBlockHeightInTexels(entry_to_update->format & 0xf);
	u32 block_size = block_width * block_height * TexDecoder_GetTexelSizeInNibbles(entry_to_update->format & 0xf) / 2;

	u32 numBlocksX = (entry_to_update->native_width + block_width - 1) / block_width;

	TexCache::iterator iter = textures_by_address.lower_bound(entry_to_update->addr);
	TexCache::iterator iterend = textures_by_address.upper_bound(entry_to_update->addr + entry_to_update->size_in_bytes);
	bool entry_need_scaling = true;
	while (iter != iterend)
	{
		TCacheEntryBase* entry = iter->second;
		if (entry != entry_to_update
			&& entry->IsEfbCopy()
			&& entry_to_update->addr <= entry->addr
			&& entry->addr + entry->size_in_bytes <= entry_to_update->addr + entry_to_update->size_in_bytes
			&& entry->frameCount == FRAMECOUNT_INVALID
			&& entry->memory_stride == numBlocksX * block_size)
		{
			if (entry->hash == entry->CalculateHash())
			{
				u32 block_offset = (entry->addr - entry_to_update->addr) / block_size;
				u32 block_x = block_offset % numBlocksX;
				u32 block_y = block_offset / numBlocksX;

				u32 x = block_x * block_width;
				u32 y = block_y * block_height;
				MathUtil::Rectangle<int> srcrect, dstrect;
				srcrect.left = 0;
				srcrect.top = 0;
				dstrect.left = 0;
				dstrect.top = 0;
				if (entry_need_scaling)
				{
					entry_need_scaling = false;
					u32 w = entry_to_update->native_width * entry->config.width / entry->native_width;
					u32 h = entry_to_update->native_height * entry->config.height / entry->native_height;
					u32 max = g_renderer->GetMaxTextureSize();
					if (max < w || max < h)
					{
						iter++;
						continue;
					}
					if (entry_to_update->config.width != w || entry_to_update->config.height != h)
					{
						TextureCacheBase::TCacheEntryConfig newconfig;
						newconfig.width = w;
						newconfig.height = h;
						newconfig.rendertarget = true;
						TCacheEntryBase* newentry = AllocateTexture(newconfig);
						if (newentry)
						{
							newentry->SetGeneralParameters(entry_to_update->addr, entry_to_update->size_in_bytes, entry_to_update->format);
							newentry->SetDimensions(entry_to_update->native_width, entry_to_update->native_height, 1);
							newentry->SetHashes(entry_to_update->base_hash, entry_to_update->hash);
							newentry->frameCount = frameCount;
							newentry->is_efb_copy = false;
							srcrect.right = entry_to_update->config.width;
							srcrect.bottom = entry_to_update->config.height;
							dstrect.right = w;
							dstrect.bottom = h;
							newentry->CopyRectangleFromTexture(entry_to_update, srcrect, dstrect);
							entry_to_update = newentry;
							u64 key = iter_t->first;
							iter_t = FreeTexture(iter_t);
							textures_by_address.emplace(key, entry_to_update);
						}
					}
				}
				srcrect.right = entry->config.width;
				srcrect.bottom = entry->config.height;
				dstrect.left = x * entry_to_update->config.width / entry_to_update->native_width;
				dstrect.top = y * entry_to_update->config.height / entry_to_update->native_height;
				dstrect.right = (x + entry->native_width) * entry_to_update->config.width / entry_to_update->native_width;
				dstrect.bottom = (y + entry->native_height) * entry_to_update->config.height / entry_to_update->native_height;
				entry_to_update->CopyRectangleFromTexture(entry, srcrect, dstrect);
				// Mark the texture update as used, so it isn't applied more than once
				entry->frameCount = frameCount;
			}
			else
			{
				// If the hash does not match, this EFB copy will not be used for anything, so remove it
				iter = FreeTexture(iter);
				continue;
			}
		}
		++iter;
	}
	return entry_to_update;
}
Ejemplo n.º 4
0
TextureCacheBase::TCacheEntryBase* TextureCacheBase::DoPartialTextureUpdates(TexCache::iterator iter_t, u8* palette, u32 tlutfmt)
{
	TCacheEntryBase* entry_to_update = iter_t->second;
	const bool isPaletteTexture = (entry_to_update->format == GX_TF_C4
		|| entry_to_update->format == GX_TF_C8
		|| entry_to_update->format == GX_TF_C14X2
		|| entry_to_update->format >= 0x10000);

	// EFB copies are excluded from these updates, until there's an example where a game would
	// benefit from updating. This would require more work to be done.
	if (entry_to_update->IsEfbCopy())
		return entry_to_update;

	u32 block_width = TexDecoder_GetBlockWidthInTexels(entry_to_update->format & 0xf);
	u32 block_height = TexDecoder_GetBlockHeightInTexels(entry_to_update->format & 0xf);
	u32 block_size = block_width * block_height * TexDecoder_GetTexelSizeInNibbles(entry_to_update->format & 0xf) / 2;

	u32 numBlocksX = (entry_to_update->native_width + block_width - 1) / block_width;

	TexCache::iterator iter = textures_by_address.lower_bound(entry_to_update->addr > MAX_TEXTURE_BINARY_SIZE ? entry_to_update->addr - MAX_TEXTURE_BINARY_SIZE : 0);
	TexCache::iterator iterend = textures_by_address.upper_bound(entry_to_update->addr + entry_to_update->size_in_bytes);
	while (iter != iterend)
	{
		TCacheEntryBase* entry = iter->second;
		if (entry != entry_to_update
			&& entry->IsEfbCopy()
			&& entry->references.count(entry_to_update) == 0
			&& entry->OverlapsMemoryRange(entry_to_update->addr, entry_to_update->size_in_bytes)
			&& entry->memory_stride == numBlocksX * block_size)
		{
			if (entry->hash == entry->CalculateHash())
			{
				if (isPaletteTexture)
				{
					TCacheEntryBase *decoded_entry = entry->ApplyPalette(palette, tlutfmt);
					if (decoded_entry)
					{
						// Link the efb copy with the partially updated texture, so we won't apply this partial update again
						entry->CreateReference(entry_to_update);
						// Mark the texture update as used, as if it was loaded directly
						entry->frameCount = FRAMECOUNT_INVALID;
						entry = decoded_entry;
					}
					else
					{
						++iter;
						continue;
					}
				}

				u32 src_x, src_y, dst_x, dst_y;

				// Note for understanding the math:
				// Normal textures can't be strided, so the 2 missing cases with src_x > 0 don't exist
				if (entry->addr >= entry_to_update->addr)
				{
					u32 block_offset = (entry->addr - entry_to_update->addr) / block_size;
					u32 block_x = block_offset % numBlocksX;
					u32 block_y = block_offset / numBlocksX;
					src_x = 0;
					src_y = 0;
					dst_x = block_x * block_width;
					dst_y = block_y * block_height;
				}
				else
				{
					u32 block_offset = (entry_to_update->addr - entry->addr) / block_size;
					u32 block_x = (~block_offset + 1) % numBlocksX;
					u32 block_y = (block_offset + block_x) / numBlocksX;
					src_x = 0;
					src_y = block_y * block_height;
					dst_x = block_x * block_width;
					dst_y = 0;
				}

				u32 copy_width = std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x);
				u32 copy_height = std::min(entry->native_height - src_y, entry_to_update->native_height - dst_y);

				// If one of the textures is scaled, scale both with the current efb scaling factor
				if (entry_to_update->native_width != entry_to_update->config.width
					|| entry_to_update->native_height != entry_to_update->config.height
					|| entry->native_width != entry->config.width || entry->native_height != entry->config.height)
				{
					ScaleTextureCacheEntryTo(&entry_to_update, Renderer::EFBToScaledX(entry_to_update->native_width), Renderer::EFBToScaledY(entry_to_update->native_height));
					ScaleTextureCacheEntryTo(&entry, Renderer::EFBToScaledX(entry->native_width), Renderer::EFBToScaledY(entry->native_height));

					src_x = Renderer::EFBToScaledX(src_x);
					src_y = Renderer::EFBToScaledY(src_y);
					dst_x = Renderer::EFBToScaledX(dst_x);
					dst_y = Renderer::EFBToScaledY(dst_y);
					copy_width = Renderer::EFBToScaledX(copy_width);
					copy_height = Renderer::EFBToScaledY(copy_height);
				}

				MathUtil::Rectangle<int> srcrect, dstrect;
				srcrect.left = src_x;
				srcrect.top = src_y;
				srcrect.right = (src_x + copy_width);
				srcrect.bottom = (src_y + copy_height);
				dstrect.left = dst_x;
				dstrect.top = dst_y;
				dstrect.right = (dst_x + copy_width);
				dstrect.bottom = (dst_y + copy_height);
				entry_to_update->CopyRectangleFromTexture(entry, srcrect, dstrect);


				if (isPaletteTexture)
				{
					// Remove the temporary converted texture, it won't be used anywhere else
					// TODO: It would be nice to convert and copy in one step, but this code path isn't common
					InvalidateTexture(GetTexCacheIter(entry));
				}
				else
				{
					// Link the two textures together, so we won't apply this partial update again
					entry->CreateReference(entry_to_update);
					// Mark the texture update as used, as if it was loaded directly
					entry->frameCount = FRAMECOUNT_INVALID;
				}
			}
			else
			{
				// If the hash does not match, this EFB copy will not be used for anything, so remove it
				iter = InvalidateTexture(iter);
				continue;
			}
		}
		++iter;
	}
	return entry_to_update;
}
Ejemplo n.º 5
0
TextureCacheBase::TCacheEntryBase* TextureCacheBase::DoPartialTextureUpdates(TexCache::iterator iter_t)
{
	TCacheEntryBase* entry_to_update = iter_t->second;
	const bool isPaletteTexture = (entry_to_update->format == GX_TF_C4
		|| entry_to_update->format == GX_TF_C8
		|| entry_to_update->format == GX_TF_C14X2
		|| entry_to_update->format >= 0x10000);

	// Efb copies and paletted textures are excluded from these updates, until there's an example where a game would
	// benefit from this. Both would require more work to be done.
	if (entry_to_update->IsEfbCopy()
		|| isPaletteTexture)
		return entry_to_update;

	u32 block_width = TexDecoder_GetBlockWidthInTexels(entry_to_update->format & 0xf);
	u32 block_height = TexDecoder_GetBlockHeightInTexels(entry_to_update->format & 0xf);
	u32 block_size = block_width * block_height * TexDecoder_GetTexelSizeInNibbles(entry_to_update->format & 0xf) / 2;

	u32 numBlocksX = (entry_to_update->native_width + block_width - 1) / block_width;

	TexCache::iterator iter = textures_by_address.lower_bound(entry_to_update->addr);
	TexCache::iterator iterend = textures_by_address.upper_bound(entry_to_update->addr + entry_to_update->size_in_bytes);
	while (iter != iterend)
	{
		TCacheEntryBase* entry = iter->second;
		if (entry != entry_to_update
			&& entry->IsEfbCopy()
			&& entry->OverlapsMemoryRange(entry_to_update->addr, entry_to_update->size_in_bytes)
			&& entry->frameCount == FRAMECOUNT_INVALID
			&& entry->memory_stride == numBlocksX * block_size)
		{
			if (entry->hash == entry->CalculateHash())
			{
				u32 src_x, src_y, dst_x, dst_y;

				// Note for understanding the math:
				// Normal textures can't be strided, so the 2 missing cases with src_x > 0 don't exist
				if (entry->addr >= entry_to_update->addr)
				{
					u32 block_offset = (entry->addr - entry_to_update->addr) / block_size;
					u32 block_x = block_offset % numBlocksX;
					u32 block_y = block_offset / numBlocksX;
					src_x = 0;
					src_y = 0;
					dst_x = block_x * block_width;
					dst_y = block_y * block_height;
				}
				else
				{
					u32 block_offset = (entry_to_update->addr - entry->addr) / block_size;
					u32 block_x = (~block_offset + 1) % numBlocksX;
					u32 block_y = (block_offset + block_x) / numBlocksX;
					src_x = 0;
					src_y = block_y * block_height;
					dst_x = block_x * block_width;
					dst_y = 0;
				}

				u32 copy_width = std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x);
				u32 copy_height = std::min(entry->native_height - src_y, entry_to_update->native_height - dst_y);

				// If one of the textures is scaled, scale both with the current efb scaling factor
				if (entry_to_update->native_width != entry_to_update->config.width
					|| entry_to_update->native_height != entry_to_update->config.height
					|| entry->native_width != entry->config.width || entry->native_height != entry->config.height)
				{
					ScaleTextureCacheEntryTo(&entry_to_update, Renderer::EFBToScaledX(entry_to_update->native_width), Renderer::EFBToScaledY(entry_to_update->native_height));
					ScaleTextureCacheEntryTo(&entry, Renderer::EFBToScaledX(entry->native_width), Renderer::EFBToScaledY(entry->native_height));

					src_x = Renderer::EFBToScaledX(src_x);
					src_y = Renderer::EFBToScaledY(src_y);
					dst_x = Renderer::EFBToScaledX(dst_x);
					dst_y = Renderer::EFBToScaledY(dst_y);
					copy_width = Renderer::EFBToScaledX(copy_width);
					copy_height = Renderer::EFBToScaledY(copy_height);
				}

				MathUtil::Rectangle<int> srcrect, dstrect;
				srcrect.left = src_x;
				srcrect.top = src_y;
				srcrect.right = (src_x + copy_width);
				srcrect.bottom = (src_y + copy_height);
				dstrect.left = dst_x;
				dstrect.top = dst_y;
				dstrect.right = (dst_x + copy_width);
				dstrect.bottom = (dst_y + copy_height);
				entry_to_update->CopyRectangleFromTexture(entry, srcrect, dstrect);
				// Mark the texture update as used, so it isn't applied more than once
				entry->frameCount = frameCount;
			}
			else
			{
				// If the hash does not match, this EFB copy will not be used for anything, so remove it
				iter = FreeTexture(iter);
				continue;
			}
		}
		++iter;
	}
	return entry_to_update;
}