Пример #1
0
void GPURendererSW::VertexKick()
{
	GSVertexSW& dst = m_vl.AddTail();

	// TODO: x/y + off.x/y should wrap around at +/-1024

	int x = (int)(m_v.XY.X + m_env.DROFF.X) << m_scale.x;
	int y = (int)(m_v.XY.Y + m_env.DROFF.Y) << m_scale.y;

	int u = m_v.UV.X;
	int v = m_v.UV.Y;

	GSVector4 pt(x, y, u, v);

	dst.p = pt.xyxy(GSVector4::zero());
	dst.t = (pt.zwzw(GSVector4::zero()) + GSVector4(0.125f)) * 256.0f;
	// dst.c = GSVector4(m_v.RGB.u32) * 128.0f;
	dst.c = GSVector4(GSVector4i::load((int)m_v.RGB.u32).u8to32() << 7);

	int count = 0;

	if(GSVertexSW* v = DrawingKick(count))
	{
		// TODO

		m_count += count;
	}
}
Пример #2
0
void GSDevice10::StretchRect(Texture& st, const GSVector4& sr, Texture& dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, ID3D10BlendState* bs, bool linear)
{
	BeginScene();

	// om

	OMSetDepthStencilState(m_convert.dss, 0);
	OMSetBlendState(bs, 0);
	OMSetRenderTargets(dt, NULL);

	// ia

	float left = dr.x * 2 / dt.GetWidth() - 1.0f;
	float top = 1.0f - dr.y * 2 / dt.GetHeight();
	float right = dr.z * 2 / dt.GetWidth() - 1.0f;
	float bottom = 1.0f - dr.w * 2 / dt.GetHeight();

	GSVertexPT1 vertices[] =
	{
		{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sr.x, sr.y)},
		{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sr.z, sr.y)},
		{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sr.x, sr.w)},
		{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sr.z, sr.w)},
	};

	D3D10_BOX box = {0, 0, 0, sizeof(vertices), 1, 1};

	m_dev->UpdateSubresource(m_convert.vb, 0, &box, vertices, 0, 0);

	IASetVertexBuffer(m_convert.vb, sizeof(vertices[0]));
	IASetInputLayout(m_convert.il);
	IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);

	// vs

	VSSetShader(m_convert.vs, NULL);

	// gs

	GSSetShader(NULL);

	// ps

	PSSetShader(ps, ps_cb);
	PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL);
	PSSetShaderResources(st, NULL);

	// rs

	RSSet(dt.GetWidth(), dt.GetHeight());

	//

	DrawPrimitive(countof(vertices));

	//

	EndScene();
}
Пример #3
0
void GSDevice11::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear)
{
	BeginScene();

	GSVector2i ds = dt->GetSize();

	// om

	OMSetDepthStencilState(m_convert.dss, 0);
	OMSetBlendState(bs, 0);
	OMSetRenderTargets(dt, NULL);

	// ia

	float left = dr.x * 2 / ds.x - 1.0f;
	float top = 1.0f - dr.y * 2 / ds.y;
	float right = dr.z * 2 / ds.x - 1.0f;
	float bottom = 1.0f - dr.w * 2 / ds.y;

	GSVertexPT1 vertices[] =
	{
		{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sr.x, sr.y)},
		{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sr.z, sr.y)},
		{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sr.x, sr.w)},
		{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sr.z, sr.w)},
	};

	IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
	IASetInputLayout(m_convert.il);
	IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);

	// vs

	VSSetShader(m_convert.vs, NULL);

	// gs

	GSSetShader(NULL);

	// ps

	PSSetShaderResources(st, NULL);
	PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL);
	PSSetShader(ps, ps_cb);

	//

	DrawPrimitive();

	//

	EndScene();

	PSSetShaderResources(NULL, NULL);
}
Пример #4
0
void GSDevice::Present(const GSVector4i& r, int shader)
{
	GSVector4i cr = m_wnd->GetClientRect();

	int w = std::max<int>(cr.width(), 1);
	int h = std::max<int>(cr.height(), 1);

	if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
	{
		if(!Reset(w, h))
		{
			return;
		}
	}

	GL_PUSH("Present");

	ClearRenderTarget(m_backbuffer, 0);

	if(m_current)
	{
		static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE,
			ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER,
			ShaderConvert_COMPLEX_FILTER}; // FIXME

		Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]);
	}

	Flip();
}
Пример #5
0
void GSDevice::Present(const GSVector4i& r, int shader)
{
	GSVector4i cr = m_wnd->GetClientRect();

	int w = std::max<int>(cr.width(), 1);
	int h = std::max<int>(cr.height(), 1);

	if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
	{
		if(!Reset(w, h))
		{
			return;
		}
	}

	GL_PUSH("Present");

	ClearRenderTarget(m_backbuffer, 0);

	if(m_current)
	{
		static int s_shader[5] = {0, 5, 6, 8, 9}; // FIXME

		Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]);
	}

	Flip();

	GL_POP();
}
Пример #6
0
void GSRendererSW::InitVectors()
{
	m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);

#if _M_SSE >= 0x501
	m_pos_scale2 = GSVector8(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#endif
}
Пример #7
0
void GSDrawScanlineCodeGenerator::InitVectors()
{
#if _M_SSE >= 0x501
    GSVector8 log2_coef[4] =
    {
        GSVector8(0.204446009836232697516f),
        GSVector8(-1.04913055217340124191f),
        GSVector8(2.28330284476918490682f),
        GSVector8(1.0f),
    };

    for (size_t n = 0; n < countof(log2_coef); ++n)
        m_log2_coef[n] = log2_coef[n];

#else
    GSVector4i test[8] =
    {
        GSVector4i::zero(),
        GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
        GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
        GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
        GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
        GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
        GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
        GSVector4i::zero(),
    };

    GSVector4 log2_coef[4] =
    {
        GSVector4(0.204446009836232697516f),
        GSVector4(-1.04913055217340124191f),
        GSVector4(2.28330284476918490682f),
        GSVector4(1.0f),
    };

    for (size_t n = 0; n < countof(test); ++n)
        m_test[n] = test[n];

    for (size_t n = 0; n < countof(log2_coef); ++n)
        m_log2_coef[n] = log2_coef[n];

#endif
}
Пример #8
0
void GSDevice11::DoShadeBoost(GSTexture* sTex, GSTexture* dTex)
{
	GSVector2i s = dTex->GetSize();

	GSVector4 sRect(0, 0, 1, 1);
	GSVector4 dRect(0, 0, s.x, s.y);

	ShadeBoostConstantBuffer cb;

	cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f);
	cb.rcpFrameOpt = GSVector4::zero();

	m_ctx->UpdateSubresource(m_shadeboost.cb, 0, NULL, &cb, 0, 0);

	StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps, m_shadeboost.cb, true);
}
Пример #9
0
void GSDevice11::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset)
{
	GSVector4 s = GSVector4(dTex->GetSize());

	GSVector4 sRect(0, 0, 1, 1);
	GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset);

	InterlaceConstantBuffer cb;

	cb.ZrH = GSVector2(0, 1.0f / s.y);
	cb.hH = s.y / 2;

	m_ctx->UpdateSubresource(m_interlace.cb, 0, NULL, &cb, 0, 0);

	StretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[shader], m_interlace.cb, linear);
}
Пример #10
0
void GSDevice11::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
{
	GSVector2i s = dTex->GetSize();

	GSVector4 sRect(0, 0, 1, 1);
	GSVector4 dRect(0, 0, s.x, s.y);

	ExternalFXConstantBuffer cb;

	InitExternalFX();

	cb.xyFrame = GSVector2((float)s.x, (float)s.y);
	cb.rcpFrame = GSVector4(1.0f / (float)s.x, 1.0f / (float)s.y, 0.0f, 0.0f);
	cb.rcpFrameOpt = GSVector4::zero();

	m_ctx->UpdateSubresource(m_shaderfx.cb, 0, NULL, &cb, 0, 0);

	StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, m_shaderfx.cb, true);
}
Пример #11
0
void GSRendererDX9::UpdateFBA(GSTexture* rt)
{
	if (!rt)
		return;

	GSDevice9* dev = (GSDevice9*)m_dev;

	dev->BeginScene();

	// om

	dev->OMSetDepthStencilState(&m_fba.dss);
	dev->OMSetBlendState(&m_fba.bs, 0);

	// ia

	GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
	GSVector4 off = GSVector4(-1.0f, 1.0f);

	GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + off.xxyy()) * s.xyxy()).sat(off.zzyy());
	GSVector4 dst = src * 2.0f + off.xxxx();

	GSVertexPT1 vertices[] =
	{
		{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(0)},
		{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(0)},
		{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(0)},
		{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(0)},
	};

	dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
	dev->IASetInputLayout(dev->m_convert.il);
	dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP);

	// vs

	dev->VSSetShader(dev->m_convert.vs, NULL, 0);

	// ps

	dev->PSSetShader(dev->m_convert.ps[4], NULL, 0);

	//

	dev->DrawPrimitive();

	//

	dev->EndScene();
}
Пример #12
0
void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex)
{
	GSVector2i s = dTex->GetSize();

	GSVector4 sRect(0, 0, 1, 1);
	GSVector4 dRect(0, 0, s.x, s.y);

	FXAAConstantBuffer cb;

	InitFXAA();

	cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f);
	cb.rcpFrameOpt = GSVector4::zero();

	m_ctx->UpdateSubresource(m_fxaa.cb, 0, NULL, &cb, 0, 0);

	StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, m_fxaa.cb, true);

	//sTex->Save("c:\\temp1\\1.bmp");
	//dTex->Save("c:\\temp1\\2.bmp");
}
Пример #13
0
void GSDevice10::Present(const CRect& r)
{
	CRect cr;

	GetClientRect(m_hWnd, &cr);

	if(m_backbuffer.GetWidth() != cr.Width() || m_backbuffer.GetHeight() != cr.Height())
	{
		Reset(cr.Width(), cr.Height(), false);		
	}

	float color[4] = {0, 0, 0, 0};

	m_dev->ClearRenderTargetView(m_backbuffer, color);

	if(m_current)
	{
		StretchRect(m_current, m_backbuffer, GSVector4(r));
	}

	m_swapchain->Present(m_vsync ? 1 : 0, 0);
}
Пример #14
0
void GSRendererDX9::SetupIA()
{
	D3DPRIMITIVETYPE topology;

	switch(m_vt.m_primclass)
	{
	case GS_POINT_CLASS:

		topology = D3DPT_POINTLIST;

		break;

	case GS_LINE_CLASS:

		topology = D3DPT_LINELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 2) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 1];
				m_index.buff[i + 1] = tmp;
			}
		}

		break;

	case GS_TRIANGLE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 3) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 2];
				m_index.buff[i + 2] = tmp;
			}
		}

		break;

	case GS_SPRITE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		// each sprite converted to quad needs twice the space

		while(m_vertex.tail * 2 > m_vertex.maxcount)
		{
			GrowVertexBuffer();
		}

		// assume vertices are tightly packed and sequentially indexed (it should be the case)

		if(m_vertex.next >= 2)
		{
			size_t count = m_vertex.next;

			int i = (int)count * 2 - 4;
			GSVertex* s = &m_vertex.buff[count - 2];
			GSVertex* q = &m_vertex.buff[count * 2 - 4];
			uint32* RESTRICT index = &m_index.buff[count * 3 - 6];
		
			for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) 
			{
				GSVertex v0 = s[0];
				GSVertex v1 = s[1];

				v0.RGBAQ = v1.RGBAQ;
				v0.XYZ.Z = v1.XYZ.Z;
				v0.FOG = v1.FOG;

				q[0] = v0;
				q[3] = v1;

				// swap x, s, u

				uint16 x = v0.XYZ.X;
				v0.XYZ.X = v1.XYZ.X;
				v1.XYZ.X = x;

				float s = v0.ST.S;
				v0.ST.S = v1.ST.S;
				v1.ST.S = s;

				uint16 u = v0.U;
				v0.U = v1.U;
				v1.U = u;

				q[1] = v0;
				q[2] = v1;

				index[0] = i + 0;
				index[1] = i + 1;
				index[2] = i + 2;
				index[3] = i + 1;
				index[4] = i + 2;
				index[5] = i + 3;
			}

			m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
			m_index.tail = count * 3;
		}

		break;

	default:
		__assume(0);
	}

	GSDevice9* dev = (GSDevice9*)m_dev;

	(*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO

	void* ptr = NULL;

	if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next))
	{
		GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff;
		GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr;

		for(uint32 i = 0; i < m_vertex.next; i++, s++, d++)
		{
			GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());

			if(PRIM->TME && !PRIM->FST)
			{
				p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
			}
			else
			{
				p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
			}

			GSVector4 t = GSVector4::zero();

			if(PRIM->TME)
			{
				if(PRIM->FST)
				{
					if(UserHacks_WildHack && !isPackedUV_HackFlag)
					{
						t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16());
						//printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : "");
					}
					else
					{
						t = GSVector4(GSVector4i::load(s->UV).upl16());
					}
				}
				else
				{
					t = GSVector4::loadl(&s->ST);
				}
			}

			t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));

			d->p = p;
			d->t = t;
		}

		dev->IAUnmapVertexBuffer();
	}

	dev->IASetIndexBuffer(m_index.buff, m_index.tail);

	dev->IASetPrimitiveTopology(topology);
}
Пример #15
0
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
	GSDrawingEnvironment& env = m_env;
	GSDrawingContext* context = m_context;

	const GSVector2i& rtsize = rt->GetSize();
	const GSVector2& rtscale = rt->GetScale();

	bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;

	GSTexture* rtcopy = NULL;

	ASSERT(m_dev != NULL);

	GSDeviceDX* dev = (GSDeviceDX*)m_dev;

	if(DATE)
	{
		if(dev->HasStencil())
		{
			GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
			GSVector4 o = GSVector4(-1.0f, 1.0f);

			GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
			GSVector4 dst = src * 2.0f + o.xxxx();

			GSVertexPT1 vertices[] =
			{
				{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
				{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
				{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
				{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
			};

			dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
		}
		else
		{
			rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());

			// I'll use VertexTrace when I consider it more trustworthy

			dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
		}
	}

	//

	dev->BeginScene();

	// om

	GSDeviceDX::OMDepthStencilSelector om_dssel;

	if(context->TEST.ZTE)
	{
		om_dssel.ztst = context->TEST.ZTST;
		om_dssel.zwe = !context->ZBUF.ZMSK;
	}
	else
	{
		om_dssel.ztst = ZTST_ALWAYS;
	}

	if(m_fba)
	{
		om_dssel.fba = context->FBA.FBA;
	}

	GSDeviceDX::OMBlendSelector om_bsel;

	if(!IsOpaque())
	{
		om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;

		om_bsel.a = context->ALPHA.A;
		om_bsel.b = context->ALPHA.B;
		om_bsel.c = context->ALPHA.C;
		om_bsel.d = context->ALPHA.D;

		if(env.PABE.PABE)
		{
			if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
			{
				// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
				// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result

				om_bsel.abe = 0;
			}
			else
			{
				//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
				//ASSERT(0);
			}
		}
	}

	om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();

	// vs

	GSDeviceDX::VSSelector vs_sel;

	vs_sel.tme = PRIM->TME;
	vs_sel.fst = PRIM->FST;
	vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
	vs_sel.rtcopy = !!rtcopy;

	// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
	// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
	// We are probably receiving bad coordinates from VU1 in these cases.

	if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
	{
		if(context->ZBUF.PSM == PSM_PSMZ24)
		{
			if(m_vt.m_max.p.z > 0xffffff)
			{
				ASSERT(m_vt.m_min.p.z > 0xffffff);
				// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
				if (m_vt.m_min.p.z > 0xffffff)
				{
					vs_sel.bppz = 1;
					om_dssel.ztst = ZTST_ALWAYS;
				}
			}
		}
		else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
		{
			if(m_vt.m_max.p.z > 0xffff)
			{
				ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
				// Fixme : Same as above, I guess.
				if (m_vt.m_min.p.z > 0xffff)
				{
					vs_sel.bppz = 2;
					om_dssel.ztst = ZTST_ALWAYS;
				}
			}
		}
	}

	GSDeviceDX::VSConstantBuffer vs_cb;

	float sx = 2.0f * rtscale.x / (rtsize.x << 4);
	float sy = 2.0f * rtscale.y / (rtsize.y << 4);
	float ox = (float)(int)context->XYOFFSET.OFX;
	float oy = (float)(int)context->XYOFFSET.OFY;
	float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
	float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;

	//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
	//because DX10 and DX9 have a different pixel center.)
	//
	//The resulting shifted output aligns better with common blending / corona / blurring effects,
	//but introduces a few bad pixels on the edges.

	if(rt->LikelyOffset)
	{
		// DX9 has pixelcenter set to 0.0, so give it some value here

		if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
		
		ox2 *= rt->OffsetHack_modx;
		oy2 *= rt->OffsetHack_mody;
	}

	vs_cb.VertexScale  = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
	vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);

	// gs

	GSDeviceDX::GSSelector gs_sel;

	gs_sel.iip = PRIM->IIP;
	gs_sel.prim = m_vt.m_primclass;

	// ps

	GSDeviceDX::PSSelector ps_sel;
	GSDeviceDX::PSSamplerSelector ps_ssel;
	GSDeviceDX::PSConstantBuffer ps_cb;

	if(DATE)
	{
		if(dev->HasStencil())
		{
			om_dssel.date = 1;
		}
		else
		{
			ps_sel.date = 1 + context->TEST.DATM;
		}
	}

	if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
	{
		ps_sel.colclip = 1;
	}

	ps_sel.clr1 = om_bsel.IsCLR1();
	ps_sel.fba = context->FBA.FBA;
	ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
		
	if(UserHacks_AlphaHack) ps_sel.aout = 1;

	if(PRIM->FGE)
	{
		ps_sel.fog = 1;

		ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
	}

	if(context->TEST.ATE)
	{
		ps_sel.atst = context->TEST.ATST;

		switch(ps_sel.atst)
		{
		case ATST_LESS:
			ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
			break;
		case ATST_GREATER:
			ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
			break;
		default:
			ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
			break;
		}
	}
	else
	{
		ps_sel.atst = ATST_ALWAYS;
	}

	if(tex)
	{
		ps_sel.wms = context->CLAMP.WMS;
		ps_sel.wmt = context->CLAMP.WMT;
		ps_sel.fmt = tex->m_fmt;
		ps_sel.aem = env.TEXA.AEM;
		ps_sel.tfx = context->TEX0.TFX;
		ps_sel.tcc = context->TEX0.TCC;
		ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
		ps_sel.rt = tex->m_target;

		int w = tex->m_texture->GetWidth();
		int h = tex->m_texture->GetHeight();

		int tw = (int)(1 << context->TEX0.TW);
		int th = (int)(1 << context->TEX0.TH);

		GSVector4 WH(tw, th, w, h);

		if(PRIM->FST)
		{
			vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
			//Maybe better?
			//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
			ps_sel.fst = 1;
		}

		ps_cb.WH = WH;
		ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
		ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);

		GSVector4 clamp(ps_cb.MskFix);
		GSVector4 ta(env.TEXA & GSVector4i::x000000ff());

		ps_cb.MinMax = clamp / WH.xyxy();
		ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));

		ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
		ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
		ps_ssel.ltf = ps_sel.ltf;
	}
	else
	{
Пример #16
0
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
{
	if (!t->m_dirty.empty() || r.width() == 0 || r.height() == 0)
		return;

	const GIFRegTEX0& TEX0 = t->m_TEX0;

	GLuint fmt;
	int ps_shader;
	switch (TEX0.PSM)
	{
		case PSM_PSMCT32:
		case PSM_PSMCT24:
			fmt = GL_RGBA8;
			ps_shader = ShaderConvert_COPY;
			break;

		case PSM_PSMCT16:
		case PSM_PSMCT16S:
			fmt = GL_R16UI;
			ps_shader = ShaderConvert_RGBA8_TO_16_BITS;
			break;

		case PSM_PSMZ32:
			fmt = GL_R32UI;
			ps_shader = ShaderConvert_FLOAT32_TO_32_BITS;
			break;

		case PSM_PSMZ24:
			fmt = GL_R32UI;
			ps_shader = ShaderConvert_FLOAT32_TO_32_BITS;
			break;

		case PSM_PSMZ16:
		case PSM_PSMZ16S:
			fmt = GL_R16UI;
			ps_shader = ShaderConvert_FLOAT32_TO_32_BITS;
			break;

		default:
			return;
	}


	// Yes lots of logging, but I'm not confident with this code
	GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM);

	GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d",
			t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height());

	GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy();

	if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader))
	{
		GSTexture::GSMap m;
		GSVector4i r_offscreen(0, 0, r.width(), r.height());

		if(offscreen->Map(m, &r_offscreen))
		{
			// TODO: block level write

			GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);

			switch(TEX0.PSM)
			{
				case PSM_PSMCT32:
					m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMCT24:
					m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMCT16:
				case PSM_PSMCT16S:
					m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r);
					break;

				case PSM_PSMZ32:
					m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMZ24:
					m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMZ16:
				case PSM_PSMZ16S:
					m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r);
					break;

				default:
					ASSERT(0);
			}

			offscreen->Unmap();
		}

		// FIXME invalidate data
		m_renderer->m_dev->Recycle(offscreen);
	}
}
Пример #17
0
void GSRendererDX9::VertexKick(bool skip)
{
	GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16());

	if(tme && !fst)
	{
		p = p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q));
	}
	else
	{
		p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z));
	}

	GSVertexHW9& dst = m_vl.AddTail();

	dst.p = p;

	int Uadjust = 0;
	int Vadjust = 0;

	if(tme)
	{
		if(fst)
		{
			dst.t = m_v.GetUV();

			#ifdef USE_UPSCALE_HACKS

			int Udiff = 0;
			int Vdiff = 0;

			int multiplier = GetUpscaleMultiplier();

			if(multiplier > 1)
			{
				Udiff = m_v.UV.U & 4095;
				Vdiff = m_v.UV.V & 4095;

				if(Udiff != 0)
				{
					if		(Udiff >= 4080)	{/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/  Uadjust = -1; }
					else if (Udiff <= 16)	{/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/  Uadjust = 1; }
				}
				
				if(Vdiff != 0)
				{
					if		(Vdiff >= 4080)	{/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/  Vadjust = -1; }
					else if	(Vdiff <= 16)	{/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/  Vadjust = 1; }
				}

				Udiff = m_v.UV.U & 255;
				Vdiff = m_v.UV.V & 255;

				if(Udiff != 0)
				{
					if		(Udiff >= 248)	{ Uadjust = -1;	}
					else if (Udiff <= 8)	{ Uadjust = 1; }
				}

				if(Vdiff != 0)
				{
					if		(Vdiff >= 248)	{ Vadjust = -1;	}
					else if	(Vdiff <= 8)	{ Vadjust = 1; }
				}

				Udiff = m_v.UV.U & 15;
				Vdiff = m_v.UV.V & 15;

				if(Udiff != 0)
				{
					if		(Udiff >= 15)	{ Uadjust = -1; }
					else if (Udiff <= 1)	{ Uadjust = 1; }
				}

				if(Vdiff != 0)
				{
					if		(Vdiff >= 15)	{ Vadjust = -1; }
					else if	(Vdiff <= 1)	{ Vadjust = 1; }
				}
			}

			dst.t.x -= (float) Uadjust;
			dst.t.y -= (float) Vadjust;

			#endif
		}
		else
		{
			dst.t = GSVector4::loadl(&m_v.ST);
		}
	}

	dst._c0() = m_v.RGBAQ.u32[0];
	dst._c1() = m_v.FOG.u32[1];

	//

	// BaseDrawingKick can never return NULL here because the DrawingKick function
	// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
	// condition where this function would return NULL).

	int count = 0;
	
	if(GSVertexHW9* v = DrawingKick<prim>(skip, count))
	{
		GSVector4 scissor = m_context->scissor.dx9;

		GSVector4 pmin, pmax;

		switch(prim)
		{
		case GS_POINTLIST:
			pmin = v[0].p;
			pmax = v[0].p;
			break;
		case GS_LINELIST:
		case GS_LINESTRIP:
		case GS_SPRITE:
			pmin = v[0].p.min(v[1].p);
			pmax = v[0].p.max(v[1].p);
			break;
		case GS_TRIANGLELIST:
		case GS_TRIANGLESTRIP:
		case GS_TRIANGLEFAN:
			pmin = v[0].p.min(v[1].p).min(v[2].p);
			pmax = v[0].p.max(v[1].p).max(v[2].p);
			break;
		}

		GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());

		switch(prim)
		{
		case GS_TRIANGLELIST:
		case GS_TRIANGLESTRIP:
		case GS_TRIANGLEFAN:
		case GS_SPRITE:
			test |= pmin == pmax;
			break;
		}

		if(test.mask() & 3)
		{
			return;
		}

		switch(prim)
		{
		case GS_POINTLIST:
			break;
		case GS_LINELIST:
		case GS_LINESTRIP:
			if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
			break;
		case GS_TRIANGLELIST:
		case GS_TRIANGLESTRIP:
		case GS_TRIANGLEFAN:
			if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();}
			break;
		case GS_SPRITE:
			if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
			v[0].p.z = v[1].p.z;
			v[0].p.w = v[1].p.w;
			v[0]._c1() = v[1]._c1();
			v[2] = v[1];
			v[3] = v[1];
			v[1].p.y = v[0].p.y;
			v[1].t.y = v[0].t.y;
			v[2].p.x = v[0].p.x;
			v[2].t.x = v[0].t.x;
			v[4] = v[1];
			v[5] = v[2];
			count += 4;
			break;
		}

		m_count += count;
	}
}
Пример #18
0
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
{
	const GSDrawingContext* context = m_state->m_context;

	int n = 1;

	switch(primclass)
	{
	case GS_POINT_CLASS:
		n = 1;
		break;
	case GS_LINE_CLASS:
	case GS_SPRITE_CLASS:
		n = 2;
		break;
	case GS_TRIANGLE_CLASS:
		n = 3;
		break;
	}

	GSVector4 tmin = s_minmax.xxxx();
	GSVector4 tmax = s_minmax.yyyy();
	GSVector4i cmin = GSVector4i::xffffffff();
	GSVector4i cmax = GSVector4i::zero();

	#if _M_SSE >= 0x401

	GSVector4i pmin = GSVector4i::xffffffff();
	GSVector4i pmax = GSVector4i::zero();

	#else

	GSVector4 pmin = s_minmax.xxxx();
	GSVector4 pmax = s_minmax.yyyy();
	
	#endif

	const GSVertex* RESTRICT v = (GSVertex*)vertex;

	for(int i = 0; i < count; i += n)
	{
		if(primclass == GS_POINT_CLASS)
		{
			GSVector4i c(v[index[i]].m[0]);

			if(color)
			{
				cmin = cmin.min_u8(c);
				cmax = cmax.max_u8(c);
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq = GSVector4::cast(c);

					GSVector4 q = stq.wwww();

					stq = (stq.xyww() * q.rcpnr()).xyww(q);

					tmin = tmin.min(stq);
					tmax = tmax.max(stq);
				}
				else
				{
					GSVector4i uv(v[index[i]].m[1]);

					GSVector4 st = GSVector4(uv.uph16()).xyxy();

					tmin = tmin.min(st);
					tmax = tmax.max(st);
				}
			}

			GSVector4i xyzf(v[index[i]].m[1]);

			GSVector4i xy = xyzf.upl16();
			GSVector4i z = xyzf.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf));

			pmin = pmin.min_u32(p);
			pmax = pmax.max_u32(p);

			#else

			GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww())));

			pmin = pmin.min(p);
			pmax = pmax.max(p);

			#endif
		}
		else if(primclass == GS_LINE_CLASS)
		{
			GSVector4i c0(v[index[i + 0]].m[0]);
			GSVector4i c1(v[index[i + 1]].m[0]);

			if(color)
			{
				if(iip)
				{
					cmin = cmin.min_u8(c0.min_u8(c1));
					cmax = cmax.max_u8(c0.max_u8(c1));
				}
				else
				{
					cmin = cmin.min_u8(c1);
					cmax = cmax.max_u8(c1);
				}
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq0 = GSVector4::cast(c0);
					GSVector4 stq1 = GSVector4::cast(c1);

					GSVector4 q = stq0.wwww(stq1).rcpnr();

					stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
					stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);

					tmin = tmin.min(stq0.min(stq1));
					tmax = tmax.max(stq0.max(stq1));
				}
				else
				{
					GSVector4i uv0(v[index[i + 0]].m[1]);
					GSVector4i uv1(v[index[i + 1]].m[1]);

					GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
					GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();

					tmin = tmin.min(st0.min(st1));
					tmax = tmax.max(st0.max(st1));
				}
			}

			GSVector4i xyzf0(v[index[i + 0]].m[1]);
			GSVector4i xyzf1(v[index[i + 1]].m[1]);

			GSVector4i xy0 = xyzf0.upl16();
			GSVector4i z0 = xyzf0.yyyy();
			GSVector4i xy1 = xyzf1.upl16();
			GSVector4i z1 = xyzf1.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
			GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));

			pmin = pmin.min_u32(p0.min_u32(p1));
			pmax = pmax.max_u32(p0.max_u32(p1));

			#else

			GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
			GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));

			pmin = pmin.min(p0.min(p1));
			pmax = pmax.max(p0.max(p1));

			#endif
		}
		else if(primclass == GS_TRIANGLE_CLASS)
		{
			GSVector4i c0(v[index[i + 0]].m[0]);
			GSVector4i c1(v[index[i + 1]].m[0]);
			GSVector4i c2(v[index[i + 2]].m[0]);

			if(color)
			{
				if(iip)
				{
					cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1));
					cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1));
				}
				else
				{
					cmin = cmin.min_u8(c2);
					cmax = cmax.max_u8(c2);
				}
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq0 = GSVector4::cast(c0);
					GSVector4 stq1 = GSVector4::cast(c1);
					GSVector4 stq2 = GSVector4::cast(c2);

					GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();

					stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
					stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
					stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);

					tmin = tmin.min(stq2).min(stq0.min(stq1));
					tmax = tmax.max(stq2).max(stq0.max(stq1));
				}
				else
				{
					GSVector4i uv0(v[index[i + 0]].m[1]);
					GSVector4i uv1(v[index[i + 1]].m[1]);
					GSVector4i uv2(v[index[i + 2]].m[1]);

					GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
					GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
					GSVector4 st2 = GSVector4(uv2.uph16()).xyxy();

					tmin = tmin.min(st2).min(st0.min(st1));
					tmax = tmax.max(st2).max(st0.max(st1));
				}
			}

			GSVector4i xyzf0(v[index[i + 0]].m[1]);
			GSVector4i xyzf1(v[index[i + 1]].m[1]);
			GSVector4i xyzf2(v[index[i + 2]].m[1]);

			GSVector4i xy0 = xyzf0.upl16();
			GSVector4i z0 = xyzf0.yyyy();
			GSVector4i xy1 = xyzf1.upl16();
			GSVector4i z1 = xyzf1.yyyy();
			GSVector4i xy2 = xyzf2.upl16();
			GSVector4i z2 = xyzf2.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
			GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
			GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2));

			pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1));
			pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1));

			#else

			GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
			GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
			GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww())));

			pmin = pmin.min(p2).min(p0.min(p1));
			pmax = pmax.max(p2).max(p0.max(p1));

			#endif
		}
		else if(primclass == GS_SPRITE_CLASS)
		{
			GSVector4i c0(v[index[i + 0]].m[0]);
			GSVector4i c1(v[index[i + 1]].m[0]);

			if(color)
			{
				if(iip)
				{
					cmin = cmin.min_u8(c0.min_u8(c1));
					cmax = cmax.max_u8(c0.max_u8(c1));
				}
				else
				{
					cmin = cmin.min_u8(c1);
					cmax = cmax.max_u8(c1);
				}
			}

			if(tme)
			{
				if(!fst)
				{
					GSVector4 stq0 = GSVector4::cast(c0);
					GSVector4 stq1 = GSVector4::cast(c1);

					GSVector4 q = stq1.wwww().rcpnr();

					stq0 = (stq0.xyww() * q).xyww(stq1);
					stq1 = (stq1.xyww() * q).xyww(stq1);

					tmin = tmin.min(stq0.min(stq1));
					tmax = tmax.max(stq0.max(stq1));
				}
				else
				{
					GSVector4i uv0(v[index[i + 0]].m[1]);
					GSVector4i uv1(v[index[i + 1]].m[1]);

					GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
					GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();

					tmin = tmin.min(st0.min(st1));
					tmax = tmax.max(st0.max(st1));
				}
			}

			GSVector4i xyzf0(v[index[i + 0]].m[1]);
			GSVector4i xyzf1(v[index[i + 1]].m[1]);

			GSVector4i xy0 = xyzf0.upl16();
			GSVector4i z0 = xyzf0.yyyy();
			GSVector4i xy1 = xyzf1.upl16();
			GSVector4i z1 = xyzf1.yyyy();

			#if _M_SSE >= 0x401

			GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1));
			GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));

			pmin = pmin.min_u32(p0.min_u32(p1));
			pmax = pmax.max_u32(p0.max_u32(p1));

			#else

			GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww())));
			GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));

			pmin = pmin.min(p0.min(p1));
			pmax = pmax.max(p0.max(p1));

			#endif
		}
	}

	#if _M_SSE >= 0x401

	pmin = pmin.blend16<0x30>(pmin.srl32(1));
	pmax = pmax.blend16<0x30>(pmax.srl32(1));

	#endif

	GSVector4 o(context->XYOFFSET);
	GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);

	m_min.p = (GSVector4(pmin) - o) * s;
	m_max.p = (GSVector4(pmax) - o) * s;

	if(tme)
	{
		if(fst)
		{
			s = GSVector4(1.0f / 16, 1.0f).xxyy();
		}
		else
		{
			s = GSVector4(1 << context->TEX0.TW, 1 << context->TEX0.TH, 1, 1);
		}

		m_min.t = tmin * s;
		m_max.t = tmax * s;
	}
	else
	{
		m_min.t = GSVector4::zero();
		m_max.t = GSVector4::zero();
	}

	if(color)
	{
		m_min.c = cmin.zzzz().u8to32();
		m_max.c = cmax.zzzz().u8to32();
	}
	else
	{
		m_min.c = GSVector4i::zero();
		m_max.c = GSVector4i::zero();
	}
}
Пример #19
0
void GSRendererDX9::SetupIA(const float& sx, const float& sy)
{
	D3DPRIMITIVETYPE topology;

	switch(m_vt.m_primclass)
	{
	case GS_POINT_CLASS:

		topology = D3DPT_POINTLIST;

		break;

	case GS_LINE_CLASS:

		topology = D3DPT_LINELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 2) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 1];
				m_index.buff[i + 1] = tmp;
			}
		}

		break;

	case GS_TRIANGLE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 3) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 2];
				m_index.buff[i + 2] = tmp;
			}
		}

		break;

	case GS_SPRITE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		// each sprite converted to quad needs twice the space

		Lines2Sprites();

		break;

	default:
		__assume(0);
	}

	GSDevice9* dev = (GSDevice9*)m_dev;

	(*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO

	void* ptr = NULL;

	if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next))
	{
		GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff;
		GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr;

		for(uint32 i = 0; i < m_vertex.next; i++, s++, d++)
		{
			GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());

			if(PRIM->TME && !PRIM->FST)
			{
				p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
			}
			else
			{
				p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
			}

			GSVector4 t = GSVector4::zero();

			if(PRIM->TME)
			{
				if(PRIM->FST)
				{
					if(UserHacks_WildHack && !isPackedUV_HackFlag)
					{
						t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16());
						//printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : "");
					}
					else
					{
						t = GSVector4(GSVector4i::load(s->UV).upl16());
					}
				}
				else
				{
					t = GSVector4::loadl(&s->ST);
				}
			}

			t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));

			d->p = p;
			d->t = t;
		}

		dev->IAUnmapVertexBuffer();
	}

	dev->IASetIndexBuffer(m_index.buff, m_index.tail);

	dev->IASetPrimitiveTopology(topology);
}
Пример #20
0
void GSRenderer::VSync(int field)
{
	GSPerfMonAutoTimer pmat(&m_perfmon);

	m_perfmon.Put(GSPerfMon::Frame);

	Flush();

	if(!m_dev->IsLost(true))
	{
		if(!Merge(field ? 1 : 0))
		{
			return;
		}
	}
	else
	{
		ResetDevice();
	}

	m_dev->AgePool();

	// osd

	if((m_perfmon.GetFrame() & 0x1f) == 0)
	{
		m_perfmon.Update();

		double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame);

		string s;

#ifdef GSTITLEINFO_API_FORCE_VERBOSE
		if(1)//force verbose reply
#else
		if(m_wnd->IsManaged())
#endif
		{
			//GSdx owns the window's title, be verbose.

			string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive";

			s = format(
				"%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d S/%d P/%d D | %d%% CPU | %.2f | %.2f",
				m_perfmon.GetFrame(), GetInternalResolution().x, GetInternalResolution().y, fps, (int)(100.0 * fps / GetTvRefreshRate()),
				s2.c_str(),
				theApp.m_gs_interlace[m_interlace].name.c_str(),
				theApp.m_gs_aspectratio[m_aspectratio].name.c_str(),
				(int)m_perfmon.Get(GSPerfMon::SyncPoint),
				(int)m_perfmon.Get(GSPerfMon::Prim),
				(int)m_perfmon.Get(GSPerfMon::Draw),
				m_perfmon.CPU(),
				m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
				m_perfmon.Get(GSPerfMon::Unswizzle) / 1024
			);

			double fillrate = m_perfmon.Get(GSPerfMon::Fillrate);

			if(fillrate > 0)
			{
				s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024));

				int sum = 0;

				for(int i = 0; i < 16; i++)
				{
					sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
				}

				s += format(" | %d%% CPU", sum);
			}
		}
		else
		{
			// Satisfy PCSX2's request for title info: minimal verbosity due to more external title text

			s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str());
		}

		if(m_capture.IsCapturing())
		{
			s += " | Recording...";
		}

		if(m_wnd->IsManaged())
		{
			m_wnd->SetWindowText(s.c_str());
		}
		else
		{
			// note: do not use TryEnterCriticalSection.  It is unnecessary code complication in
			// an area that absolutely does not matter (even if it were 100 times slower, it wouldn't
			// be noticeable).  Besides, these locks are extremely short -- overhead of conditional
			// is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air

			std::lock_guard<std::mutex> lock(m_pGSsetTitle_Crit);

			strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1);

			m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows
		}
	}
	else
	{
		// [TODO]
		// We don't have window title rights, or the window has no title,
		// so let's use actual OSD!
	}

	if(m_frameskip)
	{
		return;
	}

	// present

	m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader);

	// snapshot

	if(!m_snapshot.empty())
	{
		bool shift = false;

		#ifdef _WIN32

		shift = !!(::GetAsyncKeyState(VK_SHIFT) & 0x8000);

		#else

		shift = m_shift_key;

		#endif

		if(!m_dump && shift)
		{
			GSFreezeData fd;
			fd.size = 0;
			fd.data = NULL;
			Freeze(&fd, true);
			fd.data = new uint8[fd.size];
			Freeze(&fd, false);

			m_dump.Open(m_snapshot, m_crc, fd, m_regs);

			delete [] fd.data;
		}

		if(GSTexture* t = m_dev->GetCurrent())
		{
			t->Save(m_snapshot + ".bmp");
		}

		m_snapshot.clear();
	}
	else
	{
		if(m_dump)
		{
            bool control = false;

            #ifdef _WIN32

            control = !!(::GetAsyncKeyState(VK_CONTROL) & 0x8000);

			#else

			control = m_control_key;

            #endif

	    	m_dump.VSync(field, !control, m_regs);
		}
	}

	// capture

	if(m_capture.IsCapturing())
	{
		if(GSTexture* current = m_dev->GetCurrent())
		{
			GSVector2i size = m_capture.GetSize();

			if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y))
			{
				GSTexture::GSMap m;

				if(offscreen->Map(m))
				{
					m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped());

					offscreen->Unmap();
				}

				m_dev->Recycle(offscreen);
			}
		}
	}
}
Пример #21
0
GSVector4 GSVector4::cast(const GSVector4i& v)
{
	return GSVector4(_mm_castsi128_ps(v.m));
}
Пример #22
0
void GSDevice10::StretchRect(Texture& st, Texture& dt, const GSVector4& dr, bool linear)
{
	StretchRect(st, GSVector4(0, 0, 1, 1), dt, dr, linear);
}
Пример #23
0
void GSVertexTrace::InitVectors()
{
	s_minmax = GSVector4(FLT_MAX, -FLT_MAX);
}
Пример #24
0
void GSRenderer::VSync(int field)
{
	GSPerfMonAutoTimer pmat(&m_perfmon);

	m_perfmon.Put(GSPerfMon::Frame);

	Flush();

	if(s_dump && s_n >= s_saven)
	{
		m_regs->Dump(root_sw + format("%05d_f%lld_gs_reg.txt", s_n, m_perfmon.GetFrame()));
	}

	if(!m_dev->IsLost(true))
	{
		if(!Merge(field ? 1 : 0))
		{
			return;
		}
	}
	else
	{
		ResetDevice();
	}

	m_dev->AgePool();

	// osd

	if((m_perfmon.GetFrame() & 0x1f) == 0)
	{
		m_perfmon.Update();

		double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame);

		string s;

#ifdef GSTITLEINFO_API_FORCE_VERBOSE
		if(1)//force verbose reply
#else
		if(m_wnd->IsManaged())
#endif
		{
			//GSdx owns the window's title, be verbose.

			string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive";

			s = format(
				"%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d S/%d P/%d D | %d%% CPU | %.2f | %.2f",
				m_perfmon.GetFrame(), GetInternalResolution().x, GetInternalResolution().y, fps, (int)(100.0 * fps / GetTvRefreshRate()),
				s2.c_str(),
				theApp.m_gs_interlace[m_interlace].name.c_str(),
				theApp.m_gs_aspectratio[m_aspectratio].name.c_str(),
				(int)m_perfmon.Get(GSPerfMon::SyncPoint),
				(int)m_perfmon.Get(GSPerfMon::Prim),
				(int)m_perfmon.Get(GSPerfMon::Draw),
				m_perfmon.CPU(),
				m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
				m_perfmon.Get(GSPerfMon::Unswizzle) / 1024
			);

			double fillrate = m_perfmon.Get(GSPerfMon::Fillrate);

			if(fillrate > 0)
			{
				s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024));

				int sum = 0;

				for(int i = 0; i < 16; i++)
				{
					sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
				}

				s += format(" | %d%% CPU", sum);
			}
		}
		else
		{
			// Satisfy PCSX2's request for title info: minimal verbosity due to more external title text

			s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str());
		}

		if(m_capture.IsCapturing())
		{
			s += " | Recording...";
		}

		if(m_wnd->IsManaged())
		{
			m_wnd->SetWindowText(s.c_str());
		}
		else
		{
			// note: do not use TryEnterCriticalSection.  It is unnecessary code complication in
			// an area that absolutely does not matter (even if it were 100 times slower, it wouldn't
			// be noticeable).  Besides, these locks are extremely short -- overhead of conditional
			// is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air

			std::lock_guard<std::mutex> lock(m_pGSsetTitle_Crit);

			strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1);

			m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows
		}
	}
	else
	{
		// [TODO]
		// We don't have window title rights, or the window has no title,
		// so let's use actual OSD!
	}

	if(m_frameskip)
	{
		return;
	}

	// present

#if 0
	// This will scale the OSD to the PS2's output resolution.
	// Will be affected by 2x, 4x, etc scaling.
	m_dev->m_osd.m_real_size = m_real_size
#elif 0
	// This will scale the OSD to the window's size.
	// Will maintiain the font size no matter what size the window is.
	GSVector4i window_size = m_wnd->GetClientRect();
	m_dev->m_osd.m_real_size.x = window_size.v[2];
	m_dev->m_osd.m_real_size.y = window_size.v[3];
#else
	// This will scale the OSD to the native resolution.
	// Will size font relative to the window's size.
  // TODO this should probably be done with native calls
	m_dev->m_osd.m_real_size.x = 1024;
	m_dev->m_osd.m_real_size.y = 768;
#endif
	m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader);

	// snapshot

	if(!m_snapshot.empty())
	{
		if(!m_dump && m_shift_key)
		{
			GSFreezeData fd = {0, nullptr};
			Freeze(&fd, true);
			fd.data = new uint8[fd.size];
			Freeze(&fd, false);

#ifdef LZMA_SUPPORTED
			if (m_control_key)
				m_dump = std::unique_ptr<GSDumpBase>(new GSDump(m_snapshot, m_crc, fd, m_regs));
			else
				m_dump = std::unique_ptr<GSDumpBase>(new GSDumpXz(m_snapshot, m_crc, fd, m_regs));
#else
			m_dump = std::unique_ptr<GSDumpBase>(new GSDump(m_snapshot, m_crc, fd, m_regs));
#endif

			delete [] fd.data;
		}

		if(GSTexture* t = m_dev->GetCurrent())
		{
			t->Save(m_snapshot + ".bmp");
		}

		m_snapshot.clear();
	}
	else if(m_dump)
	{
		if(m_dump->VSync(field, !m_control_key, m_regs))
			m_dump.reset();
	}

	// capture

	if(m_capture.IsCapturing())
	{
		if(GSTexture* current = m_dev->GetCurrent())
		{
			GSVector2i size = m_capture.GetSize();

			if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y))
			{
				GSTexture::GSMap m;

				if(offscreen->Map(m))
				{
					m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped());

					offscreen->Unmap();
				}

				m_dev->Recycle(offscreen);
			}
		}
	}
}
Пример #25
0
void GSDevice::StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear)
{
	StretchRect(sTex, GSVector4(0, 0, 1, 1), dTex, dRect, shader, linear);
}
Пример #26
0
bool GSRenderer::Merge(int field)
{
	bool en[2];

	GSVector4i fr[2];
	GSVector4i dr[2];

	GSVector2i display_baseline = { INT_MAX, INT_MAX };
	GSVector2i frame_baseline = { INT_MAX, INT_MAX };

	for(int i = 0; i < 2; i++)
	{
		en[i] = IsEnabled(i);

		if(en[i])
		{
			fr[i] = GetFrameRect(i);
			dr[i] = GetDisplayRect(i);

			display_baseline.x = min(dr[i].left, display_baseline.x);
			display_baseline.y = min(dr[i].top, display_baseline.y);
			frame_baseline.x = min(fr[i].left, frame_baseline.x);
			frame_baseline.y = min(fr[i].top, frame_baseline.y);

			//printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w);
		}
	}

	if(!en[0] && !en[1])
	{
		return false;
	}

	GL_PUSH("Renderer Merge %d (0: enabled %d 0x%x, 1: enabled %d 0x%x)", s_n, en[0], m_regs->DISP[0].DISPFB.Block(), en[1], m_regs->DISP[1].DISPFB.Block());

	// try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero)
	//
	// NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending)

	bool samesrc =
		en[0] && en[1] &&
		m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP &&
		m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW &&
		m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM;

	if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/)
	{
		// persona 4:
		//
		// fr[0] = 0 0 640 448
		// fr[1] = 0 1 640 448
		// dr[0] = 159 50 779 498
		// dr[1] = 159 50 779 497
		//
		// second image shifted up by 1 pixel and blended over itself
		//
		// god of war:
		//
		// fr[0] = 0 1 512 448
		// fr[1] = 0 0 512 448
		// dr[0] = 127 50 639 497
		// dr[1] = 127 50 639 498
		//
		// same just the first image shifted
		//
		// These kinds of cases are now fixed by the more generic frame_diff code below, as the code here was too specific and has become obsolete.
		// NOTE: Persona 4 and God Of War are not rare exceptions, many games have the same(or very similar) offsets.

		int topDiff = fr[0].top - fr[1].top;
		if (dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, topDiff, 0, topDiff)) || fr[1].eq(fr[0] + GSVector4i(0, topDiff, 0, topDiff))))
		{
			// dq5:
			//
			// fr[0] = 0 1 512 445
			// fr[1] = 0 0 512 444
			// dr[0] = 127 50 639 494
			// dr[1] = 127 50 639 494

			int top = min(fr[0].top, fr[1].top);
			int bottom = min(fr[0].bottom, fr[1].bottom);

			fr[0].top = fr[1].top = top;
			fr[0].bottom = fr[1].bottom = bottom;
		}
	}

	GSVector2i fs(0, 0);
	GSVector2i ds(0, 0);

	GSTexture* tex[3] = {NULL, NULL, NULL};
	int y_offset[3]   = {0, 0, 0};

	s_n++;

	bool feedback_merge = m_regs->EXTWRITE.WRITE == 1;

	if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge)
	{
		tex[0]      = GetOutput(0, y_offset[0]);
		tex[1]      = tex[0]; // saves one texture fetch
		y_offset[1] = y_offset[0];
	}
	else
	{
		if(en[0]) tex[0] = GetOutput(0, y_offset[0]);
		if(en[1]) tex[1] = GetOutput(1, y_offset[1]);
		if(feedback_merge) tex[2] = GetFeedbackOutput();
	}

	GSVector4 src[2];
	GSVector4 src_hw[2];
	GSVector4 dst[2];

	for(int i = 0; i < 2; i++)
	{
		if(!en[i] || !tex[i]) continue;

		GSVector4i r = fr[i];
		GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();

		src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy();
		src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy();

		GSVector2 off(0);
		GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y);
		GSVector2i frame_diff(fr[i].left - frame_baseline.x, fr[i].top - frame_baseline.y);

		// Time Crisis 2/3 uses two side by side images when in split screen mode.
		// Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII
		if(display_diff.x > 2)
		{
			off.x = tex[i]->GetScale().x * display_diff.x;
		}
		// If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D
		else if(display_diff.x != frame_diff.x)
		{
			off.x = tex[i]->GetScale().x * frame_diff.x;
		}

		if(display_diff.y >= 4) // Shouldn't this be >= 2?
		{
			off.y = tex[i]->GetScale().y * display_diff.y;

			if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
			{
				off.y /= 2;
			}
		}
		else if(display_diff.y != frame_diff.y)
		{
			off.y = tex[i]->GetScale().y * frame_diff.y;
		}

		dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize());

		fs.x = max(fs.x, (int)(dst[i].z + 0.5f));
		fs.y = max(fs.y, (int)(dst[i].w + 0.5f));
	}

	ds = fs;

	if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
	{
		ds.y *= 2;
	}
	m_real_size = ds;

	bool slbg = m_regs->PMODE.SLBG;

	if(tex[0] || tex[1])
	{
		if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue())
		{
			// the two outputs are identical, skip drawing one of them (the one that is alpha blended)

			tex[0] = NULL;
		}

		GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255;

		m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c);

		if(m_regs->SMODE2.INT && m_interlace > 0)
		{
			if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting
			{
				int field2 = 0;
				int mode = 2;
				m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y);
			}
			else
			{
				int field2 = 1 - ((m_interlace - 1) & 1);
				int mode = (m_interlace - 1) >> 1;
				m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y);
			}
		}

		if(m_shadeboost)
		{
			m_dev->ShadeBoost();
		}

		if(m_shaderfx)
		{
			m_dev->ExternalFX();
		}

		if(m_fxaa)
		{
			m_dev->FXAA();
		}
	}

	return true;
}
Пример #27
0
			s_n++;
		}
	}

	return m_texture[i];
}

template<uint32 primclass, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
{
	#if 0//_M_SSE >= 0x501

	// TODO: something isn't right here, this makes other functions slower (split load/store? old sse code in 3rd party lib?)

	GSVector8i o2((GSVector4i)m_context->XYOFFSET);
	GSVector8 tsize2(GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0));

	for(int i = (int)m_vertex.next; i > 0; i -= 2, src += 2, dst += 2) // ok to overflow, allocator makes sure there is one more dummy vertex
	{
		GSVector8i v0 = GSVector8i::load<true>(src[0].m);
		GSVector8i v1 = GSVector8i::load<true>(src[1].m);

		GSVector8 stcq = GSVector8::cast(v0.ac(v1));
		GSVector8i xyzuvf = v0.bd(v1);

		//GSVector8 stcq = GSVector8::load(&src[0].m[0], &src[1].m[0]);
		//GSVector8i xyzuvf = GSVector8i::load(&src[0].m[1], &src[1].m[1]);

		GSVector8i xy = xyzuvf.upl16() - o2;
		GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00());
Пример #28
0
		movdqa(xmm1, xmm0);
		pshufd(xmm1, xmm1, _MM_SHUFFLE(1, 0, 3, 2));
		punpcklwd(xmm0, xmm1);

		// if(!tme) c = c.srl16(7);

		if(m_env.sel.tfx == TFX_NONE)
		{
			psrlw(xmm0, 7);
		}

		// m_env.c.rb = c.xxxx();
		// m_env.c.ga = c.zzzz();

		movdqa(xmm1, xmm0);
		pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
		pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
		movdqa(xmmword[&m_env.c.rb], xmm0);
		movdqa(xmmword[&m_env.c.ga], xmm1);
	}
}

const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] = 
{
	GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
	GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
	GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
	GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
	GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
};
Пример #29
0
void GSTextureCache9::Read(Target* t, const GSVector4i& r)
{
	if(t->m_type != RenderTarget)
	{
		// TODO

		return;
	}

	const GIFRegTEX0& TEX0 = t->m_TEX0;

	if(TEX0.PSM != PSM_PSMCT32
	&& TEX0.PSM != PSM_PSMCT24
	&& TEX0.PSM != PSM_PSMCT16
	&& TEX0.PSM != PSM_PSMCT16S)
	{
		//ASSERT(0);

		return;
	}

	if(!t->m_dirty.empty())
	{
		return;
	}

	// printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0);

	int w = r.width();
	int h = r.height();

	GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy();

	if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h))
	{
		GSTexture::GSMap m;

		if(offscreen->Map(m))
		{
			// TODO: block level write

			GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);

			switch(TEX0.PSM)
			{
			case PSM_PSMCT32:
				m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r);
				break;
			case PSM_PSMCT24:
				m_renderer->m_mem.WritePixel24(m.bits, m.pitch, o, r);
				break;
			case PSM_PSMCT16:
			case PSM_PSMCT16S:
				m_renderer->m_mem.WriteFrame16(m.bits, m.pitch, o, r);
				break;
			default:
				ASSERT(0);
			}

			offscreen->Unmap();
		}

		m_renderer->m_dev->Recycle(offscreen);
	}
}
Пример #30
0
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear)
{
	if(!sTex || !dTex)
	{
		ASSERT(0);
		return;
	}

	BeginScene();

	GSVector2i ds = dTex->GetSize();

	// om

	OMSetDepthStencilState(m_convert.dss, 0);
	OMSetBlendState(bs, 0);
	OMSetRenderTargets(dTex, NULL);

	// ia

	float left = dRect.x * 2 / ds.x - 1.0f;
	float top = 1.0f - dRect.y * 2 / ds.y;
	float right = dRect.z * 2 / ds.x - 1.0f;
	float bottom = 1.0f - dRect.w * 2 / ds.y;

	GSVertexPT1 vertices[] =
	{
		{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)},
		{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)},
		{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)},
		{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)},
	};



	IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
	IASetInputLayout(m_convert.il);
	IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);

	// vs

	VSSetShader(m_convert.vs, NULL);


	// gs
	/* NVIDIA HACK!!!!
	Not sure why, but having the Geometry shader disabled causes the strange stretching in recent drivers*/

	GSSelector sel;
	//Don't use shading for stretching, we're just passing through - Note: With Win10 it seems to cause other bugs when shading is off if any of the coords is greater than 0
	//I really don't know whats going on there, but this seems to resolve it mostly (if not all, not tester a lot of games, only BIOS, FFXII and VP2)
	//sel.iip = (sRect.y > 0.0f || sRect.w > 0.0f) ? 1 : 0; 
	//sel.prim = 2; //Triangle Strip
	//SetupGS(sel);

	GSSetShader(NULL, NULL);

	/*END OF HACK*/
	
	//

	// ps

	PSSetShaderResources(sTex, NULL);
	PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL);
	PSSetShader(ps, ps_cb);

	//

	DrawPrimitive();

	//

	EndScene();

	PSSetShaderResources(NULL, NULL);
}