Exemplo n.º 1
0
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
{
	const GSDrawingContext* context = m_state->m_context;

	int n = 1;

	switch(primclass)
	{
	case GS_POINT_CLASS:
		n = 1;
		break;
	case GS_LINE_CLASS:
	case GS_SPRITE_CLASS:
		n = 2;
		break;
	case GS_TRIANGLE_CLASS:
		n = 3;
		break;
	}

	GSVector4 tmin = s_minmax.xxxx();
	GSVector4 tmax = s_minmax.yyyy();
	GSVector4i cmin = GSVector4i::xffffffff();
	GSVector4i cmax = GSVector4i::zero();

	#if _M_SSE >= 0x401

	GSVector4i pmin = GSVector4i::xffffffff();
	GSVector4i pmax = GSVector4i::zero();

	#else

	GSVector4 pmin = s_minmax.xxxx();
	GSVector4 pmax = s_minmax.yyyy();
	
	#endif

	const GSVertex* RESTRICT v = (GSVertex*)vertex;

	for(int i = 0; i < count; i += n)
	{
		if(primclass == GS_POINT_CLASS)
		{
			GSVector4i c(v[index[i]].m[0]);

			if(color)
			{
				cmin = cmin.min_u8(c);
				cmax = cmax.max_u8(c);
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq = GSVector4::cast(c);

					GSVector4 q = stq.wwww();

					stq = (stq.xyww() * q.rcpnr()).xyww(q);

					tmin = tmin.min(stq);
					tmax = tmax.max(stq);
				}
				else
				{
					GSVector4i uv(v[index[i]].m[1]);

					GSVector4 st = GSVector4(uv.uph16()).xyxy();

					tmin = tmin.min(st);
					tmax = tmax.max(st);
				}
			}

			GSVector4i xyzf(v[index[i]].m[1]);

			GSVector4i xy = xyzf.upl16();
			GSVector4i z = xyzf.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf));

			pmin = pmin.min_u32(p);
			pmax = pmax.max_u32(p);

			#else

			GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww())));

			pmin = pmin.min(p);
			pmax = pmax.max(p);

			#endif
		}
		else if(primclass == GS_LINE_CLASS)
		{
			GSVector4i c0(v[index[i + 0]].m[0]);
			GSVector4i c1(v[index[i + 1]].m[0]);

			if(color)
			{
				if(iip)
				{
					cmin = cmin.min_u8(c0.min_u8(c1));
					cmax = cmax.max_u8(c0.max_u8(c1));
				}
				else
				{
					cmin = cmin.min_u8(c1);
					cmax = cmax.max_u8(c1);
				}
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq0 = GSVector4::cast(c0);
					GSVector4 stq1 = GSVector4::cast(c1);

					GSVector4 q = stq0.wwww(stq1).rcpnr();

					stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
					stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);

					tmin = tmin.min(stq0.min(stq1));
					tmax = tmax.max(stq0.max(stq1));
				}
				else
				{
					GSVector4i uv0(v[index[i + 0]].m[1]);
					GSVector4i uv1(v[index[i + 1]].m[1]);

					GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
					GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();

					tmin = tmin.min(st0.min(st1));
					tmax = tmax.max(st0.max(st1));
				}
			}

			GSVector4i xyzf0(v[index[i + 0]].m[1]);
			GSVector4i xyzf1(v[index[i + 1]].m[1]);

			GSVector4i xy0 = xyzf0.upl16();
			GSVector4i z0 = xyzf0.yyyy();
			GSVector4i xy1 = xyzf1.upl16();
			GSVector4i z1 = xyzf1.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
			GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));

			pmin = pmin.min_u32(p0.min_u32(p1));
			pmax = pmax.max_u32(p0.max_u32(p1));

			#else

			GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
			GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));

			pmin = pmin.min(p0.min(p1));
			pmax = pmax.max(p0.max(p1));

			#endif
		}
		else if(primclass == GS_TRIANGLE_CLASS)
		{
			GSVector4i c0(v[index[i + 0]].m[0]);
			GSVector4i c1(v[index[i + 1]].m[0]);
			GSVector4i c2(v[index[i + 2]].m[0]);

			if(color)
			{
				if(iip)
				{
					cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1));
					cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1));
				}
				else
				{
					cmin = cmin.min_u8(c2);
					cmax = cmax.max_u8(c2);
				}
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq0 = GSVector4::cast(c0);
					GSVector4 stq1 = GSVector4::cast(c1);
					GSVector4 stq2 = GSVector4::cast(c2);

					GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();

					stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
					stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
					stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);

					tmin = tmin.min(stq2).min(stq0.min(stq1));
					tmax = tmax.max(stq2).max(stq0.max(stq1));
				}
				else
				{
					GSVector4i uv0(v[index[i + 0]].m[1]);
					GSVector4i uv1(v[index[i + 1]].m[1]);
					GSVector4i uv2(v[index[i + 2]].m[1]);

					GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
					GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
					GSVector4 st2 = GSVector4(uv2.uph16()).xyxy();

					tmin = tmin.min(st2).min(st0.min(st1));
					tmax = tmax.max(st2).max(st0.max(st1));
				}
			}

			GSVector4i xyzf0(v[index[i + 0]].m[1]);
			GSVector4i xyzf1(v[index[i + 1]].m[1]);
			GSVector4i xyzf2(v[index[i + 2]].m[1]);

			GSVector4i xy0 = xyzf0.upl16();
			GSVector4i z0 = xyzf0.yyyy();
			GSVector4i xy1 = xyzf1.upl16();
			GSVector4i z1 = xyzf1.yyyy();
			GSVector4i xy2 = xyzf2.upl16();
			GSVector4i z2 = xyzf2.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
			GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
			GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2));

			pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1));
			pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1));

			#else

			GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
			GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
			GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww())));

			pmin = pmin.min(p2).min(p0.min(p1));
			pmax = pmax.max(p2).max(p0.max(p1));

			#endif
		}
		else if(primclass == GS_SPRITE_CLASS)
		{
			GSVector4i c0(v[index[i + 0]].m[0]);
			GSVector4i c1(v[index[i + 1]].m[0]);

			if(color)
			{
				if(iip)
				{
					cmin = cmin.min_u8(c0.min_u8(c1));
					cmax = cmax.max_u8(c0.max_u8(c1));
				}
				else
				{
					cmin = cmin.min_u8(c1);
					cmax = cmax.max_u8(c1);
				}
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq0 = GSVector4::cast(c0);
					GSVector4 stq1 = GSVector4::cast(c1);

					GSVector4 q = stq1.wwww().rcpnr();

					stq0 = (stq0.xyww() * q).xyww(stq1);
					stq1 = (stq1.xyww() * q).xyww(stq1);

					tmin = tmin.min(stq0.min(stq1));
					tmax = tmax.max(stq0.max(stq1));
				}
				else
				{
					GSVector4i uv0(v[index[i + 0]].m[1]);
					GSVector4i uv1(v[index[i + 1]].m[1]);

					GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
					GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();

					tmin = tmin.min(st0.min(st1));
					tmax = tmax.max(st0.max(st1));
				}
			}

			GSVector4i xyzf0(v[index[i + 0]].m[1]);
			GSVector4i xyzf1(v[index[i + 1]].m[1]);

			GSVector4i xy0 = xyzf0.upl16();
			GSVector4i z0 = xyzf0.yyyy();
			GSVector4i xy1 = xyzf1.upl16();
			GSVector4i z1 = xyzf1.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1));
			GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));

			pmin = pmin.min_u32(p0.min_u32(p1));
			pmax = pmax.max_u32(p0.max_u32(p1));

			#else

			GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww())));
			GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));

			pmin = pmin.min(p0.min(p1));
			pmax = pmax.max(p0.max(p1));

			#endif
		}
	}

	#if _M_SSE >= 0x401

	pmin = pmin.blend16<0x30>(pmin.srl32(1));
	pmax = pmax.blend16<0x30>(pmax.srl32(1));

	#endif

	GSVector4 o(context->XYOFFSET);
	GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);

	m_min.p = (GSVector4(pmin) - o) * s;
	m_max.p = (GSVector4(pmax) - o) * s;

	if(tme)
	{
		if(fst)
		{
			s = GSVector4(1.0f / 16, 1.0f).xxyy();
		}
		else
		{
			s = GSVector4(1 << context->TEX0.TW, 1 << context->TEX0.TH, 1, 1);
		}

		m_min.t = tmin * s;
		m_max.t = tmax * s;
	}
	else
	{
		m_min.t = GSVector4::zero();
		m_max.t = GSVector4::zero();
	}

	if(color)
	{
		m_min.c = cmin.zzzz().u8to32();
		m_max.c = cmax.zzzz().u8to32();
	}
	else
	{
		m_min.c = GSVector4i::zero();
		m_max.c = GSVector4i::zero();
	}
}
Exemplo n.º 2
0
bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
	if(m_complete)
	{
		return true;
	}

	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];

	GSVector2i bs = psm.bs;

	int shift = psm.pal == 0 ? 2 : 0;

	int tw = std::max<int>(1 << m_TEX0.TW, bs.x);
	int th = std::max<int>(1 << m_TEX0.TH, bs.y);

	GSVector4i r = rect;

	r = r.ralign<Align_Outside>(bs);

	if(r.eq(GSVector4i(0, 0, tw, th)))
	{
		m_complete = true; // lame, but better than nothing
	}

	if(m_buff == NULL)
	{
		uint32 pitch = (1 << m_tw) << shift;
		
		m_buff = _aligned_malloc(pitch * th * 4, 32);

		if(m_buff == NULL)
		{
			return false;
		}
	}

	GSLocalMemory& mem = m_state->m_mem;

	const GSOffset* RESTRICT off = m_offset;

	uint32 blocks = 0;

	GSLocalMemory::readTextureBlock rtxbP = psm.rtxbP;

	uint32 pitch = (1 << m_tw) << shift;

	uint8* dst = (uint8*)m_buff + pitch * r.top;

	int block_pitch = pitch * bs.y;

	r = r.srl32(3);

	bs.x >>= 3;
	bs.y >>= 3;

	shift += 3;

	if(m_repeating)
	{
		for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
		{
			uint32 base = off->block.row[y];

			for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
			{
				uint32 block = (base + off->block.col[x]) % MAX_BLOCKS;

				uint32 row = i >> 5;
				uint32 col = 1 << (i & 31);

				if((m_valid[row] & col) == 0)
				{
					m_valid[row] |= col;

					(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA);

					blocks++;
				}
			}
		}
	}
	else
	{
		for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)