예제 #1
0
void GPULocalMemory::InitVectors()
{
	m_xxxa = GSVector4i(0x00008000);
	m_xxbx = GSVector4i(0x00007c00);
	m_xgxx = GSVector4i(0x000003e0);
	m_rxxx = GSVector4i(0x0000001f);
}
예제 #2
0
void GSDevice11::OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor)
{
	m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(0, NULL, NULL, 0, count, uav, counters);

	m_state.rtv = NULL;
	m_state.dsv = NULL;

	if(m_state.viewport != rtsize)
	{
		m_state.viewport = rtsize;

		D3D11_VIEWPORT vp;

		memset(&vp, 0, sizeof(vp));

		vp.TopLeftX = 0;
		vp.TopLeftY = 0;
		vp.Width = (float)rtsize.x;
		vp.Height = (float)rtsize.y;
		vp.MinDepth = 0.0f;
		vp.MaxDepth = 1.0f;

		m_ctx->RSSetViewports(1, &vp);
	}

	GSVector4i r = scissor ? *scissor : GSVector4i(rtsize).zwxy();

	if(!m_state.scissor.eq(r))
	{
		m_state.scissor = r;

		m_ctx->RSSetScissorRects(1, r);
	}
}
예제 #3
0
GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
	: m_state(state)
	, m_buff(NULL)
	, m_tw(tw0)
	, m_age(0)
	, m_complete(false)
	, m_p2t(NULL)
{
	m_TEX0 = TEX0;
	m_TEXA = TEXA;

	if(m_tw == 0)
	{
		m_tw = std::max<int>(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
	}

	memset(m_valid, 0, sizeof(m_valid));

	m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);

	m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);

	m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
	memcpy(m_pages.bm, m_offset->GetPagesAsBits(TEX0), sizeof(m_pages.bm));

	m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower

	if(m_repeating)
	{
		m_p2t = m_state->m_mem.GetPage2TileMap(m_TEX0);
	}
}
예제 #4
0
bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r)
{
	GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y);

	// LOTS OF CRAP CODE!!!! PLEASE FIX ME !!!
	if (m_type == GSTexture::Offscreen) {
		// The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx
		// architecture is waiting the data right now.

#if 0
		// Maybe it is as good as the code below. I don't know
		// With openGL 4.5 you can use glGetTextureSubImage

		glGetTextureImage(m_texture_id, GL_TEX_LEVEL_0, m_int_format, m_int_type, 1024*1024*16, m_local_buffer);

#else

		// Bind the texture to the read framebuffer to avoid any disturbance
		glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
		glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0);

		glPixelStorei(GL_PACK_ALIGNMENT, m_int_alignment);
		glReadPixels(r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, m_local_buffer);

		glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);

#endif

		m.bits = m_local_buffer;
		m.pitch = m_size.x << m_int_shift;

		return true;
	} else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) {
		GL_PUSH("Upload Texture %d", m_texture_id); // POP is in Unmap

		m_dirty = true;
		m_clean = false;

		uint32 row_byte = r.width() << m_int_shift;
		uint32 map_size = r.height() * row_byte;

		m.bits = (uint8*)PboPool::Map(map_size);
		m.pitch = row_byte;

#ifdef ENABLE_OGL_DEBUG_MEM_BW
	g_real_texture_upload_byte += map_size;
#endif

		// Save the area for the unmap
		m_r_x = r.x;
		m_r_y = r.y;
		m_r_w = r.width();
		m_r_h = r.height();

		return true;
	}

	return false;
}
예제 #5
0
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
{
	memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));

	if(m_global.sel.tme && m_global.sel.twin)
	{
		uint32 u, v;

		u = ~(m_global.twin.x << 3) & 0xff; // TWW
		v = ~(m_global.twin.y << 3) & 0xff; // TWH

		m_local.twin[0].u = GSVector4i((u << 16) | u);
		m_local.twin[0].v = GSVector4i((v << 16) | v);

		u = m_global.twin.z << 3; // TWX
		v = m_global.twin.w << 3; // TWY

		m_local.twin[1].u = GSVector4i((u << 16) | u) & ~m_local.twin[0].u;
		m_local.twin[1].v = GSVector4i((v << 16) | v) & ~m_local.twin[0].v;
	}

	m_ds = m_ds_map[m_global.sel];

	m_de = NULL;

	m_dr = NULL; // TODO

	// doesn't need all bits => less functions generated

	GPUScanlineSelector sel;

	sel.key = 0;

	sel.iip = m_global.sel.iip;
	sel.tfx = m_global.sel.tfx;
	sel.twin = m_global.sel.twin;
	sel.sprite = m_global.sel.sprite;

	m_sp = m_sp_map[sel];
}
예제 #6
0
void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
{
	GSVertexSW longest;

	longest.p = v[1].p - v[0].p;

	int i = longest.p.upl(longest.p == GSVector4::zero()).mask();

	if(i & 2) return;

	i &= 1;

	GSVertexSW& l = v[i];
	GSVector4& r = v[1 - i].p;

	GSVector4 fscissor(scissor);

	GSVector4 tb = l.p.upl(v[2].p).ceil();

	GSVector4 tbmax = tb.max(fscissor.yyyy());
	GSVector4 tbmin = tb.min(fscissor.wwww());

	GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));

	int top = tbi.extract32<0>();
	int bottom = tbi.extract32<2>();

	if(top >= bottom) return;

	longest.t = v[1].t - v[0].t;
	longest.c = v[1].c - v[0].c;

	GSVertexSW dscan = longest * longest.p.xxxx().rcp();

	GSVertexSW vl = v[2] - l;
	GSVector4 vr = v[2].p - r;

	GSVertexSW dl = vl / vl.p.yyyy();
	GSVector4 dr = vr / vr.yyyy();

	GSVector4 dy = tbmax.zzzz() - l.p.yyyy();

	l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
	dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y

	l += dl * dy;

	m_dsf.ssp(v, dscan);

	DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
}
예제 #7
0
// Note: hack is safe, but it could impact the perf a little (normally games do only a couple of clear by frame)
void GSRendererHW::OI_GsMemClear()
{
	// Rectangle draw without texture
	if ((m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && !PRIM->TME && !PRIM->ABE) {
		// 0 clear
		if (m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(0))) {
			GL_INS("OI_GsMemClear");
			GSOffset* off = m_context->offset.fb;
			GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));

			// Based on WritePixel32
			for(int y = r.top; y < r.bottom; y++)
			{
				uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]];
				int* RESTRICT col = off->pixel.col[0];

				for(int x = r.left; x < r.right; x++)
				{
					d[col[x]] = 0; // Here the constant color
				}
			}
		}
	}
예제 #8
0
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor)
{
	ID3D11RenderTargetView* rtv = NULL;
	ID3D11DepthStencilView* dsv = NULL;

	if (!rt && !ds)
		throw GSDXRecoverableError();

	if(rt) rtv = *(GSTexture11*)rt;
	if(ds) dsv = *(GSTexture11*)ds;

	if(m_state.rtv != rtv || m_state.dsv != dsv)
	{
		m_state.rtv = rtv;
		m_state.dsv = dsv;

		m_ctx->OMSetRenderTargets(1, &rtv, dsv);
	}

	GSVector2i size = rt ? rt->GetSize() : ds->GetSize();
	if(m_state.viewport != size)
	{
		bool isNative = theApp.GetConfig("upscale_multiplier", 1) == 1;
		m_state.viewport = size;

		D3D11_VIEWPORT vp;

		memset(&vp, 0, sizeof(vp));

		vp.TopLeftX = (spritehack > 0 || isNative) ? 0.0f : -0.01f;
		vp.TopLeftY = (spritehack > 0 || isNative) ? 0.0f : -0.01f;
		vp.Width = (float)size.x;
		vp.Height = (float)size.y;
		vp.MinDepth = 0.0f;
		vp.MaxDepth = 1.0f;

		m_ctx->RSSetViewports(1, &vp);
	}

	GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy();

	if(!m_state.scissor.eq(r))
	{
		m_state.scissor = r;

		m_ctx->RSSetScissorRects(1, r);
	}
}
예제 #9
0
GSVector4i GSWndOGL::GetClientRect()
{
	unsigned int h = 480;
	unsigned int w = 640;

	unsigned int borderDummy;
	unsigned int depthDummy;
	Window winDummy;
    int xDummy;
    int yDummy;

	if (!m_NativeDisplay) m_NativeDisplay = XOpenDisplay(NULL);
	XGetGeometry(m_NativeDisplay, m_NativeWindow, &winDummy, &xDummy, &yDummy, &w, &h, &borderDummy, &depthDummy);

	return GSVector4i(0, 0, (int)w, (int)h);
}
예제 #10
0
	void Clear() {
		fbo = 0;
		viewport = GSVector2i(0, 0);
		scissor = GSVector4i(0, 0, 0, 0);

		blend = false;
		eq_RGB = 0;
		eq_A   = 0;
		f_sRGB = 0;
		f_dRGB = 0;
		f_sA = 0;
		f_dA = 0;
		r_msk = true;
		g_msk = true;
		b_msk = true;
		a_msk = true;
		bf = 0.0;

		depth = false;
		depth_func = 0;
		depth_mask = false;

		stencil = false;
		stencil_func = 0;
		stencil_pass = 0;

		ubo = 0;

		ps_ss = 0;

		rt = 0;
		ds = 0;
		tex_unit[0] = 0;
		tex_unit[1] = 0;
		tex = 0;
		tex_handle[0] = 0;
		tex_handle[1] = 0;

		ps = 0;
		gs = 0;
		vs = 0;
		program = 0;
		dirty_prog = false;
		dirty_subroutine_vs = false;
		dirty_subroutine_ps = false;
		dirty_ressources = false;
	}
예제 #11
0
bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)
{
	GSVector4i r2 = r != NULL ? *r : GSVector4i(0, 0, m_size.x, m_size.y);

	if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y)
	{
		if (!m_mapped.test_and_set(std::memory_order_acquire))
		{
			m.bits = (uint8*)m_data + m_pitch * r2.top + (r2.left << 2);
			m.pitch = m_pitch;

			return true;
		}
	}

	return false;
}
예제 #12
0
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor)
{
	ID3D11RenderTargetView* rtv = NULL;
	ID3D11DepthStencilView* dsv = NULL;

	if(rt) rtv = *(GSTexture11*)rt;
	if(ds) dsv = *(GSTexture11*)ds;

	if(m_state.rtv != rtv || m_state.dsv != dsv)
	{
		m_state.rtv = rtv;
		m_state.dsv = dsv;

		m_ctx->OMSetRenderTargets(1, &rtv, dsv);
	}

	if(m_state.viewport != rt->GetSize())
	{
		m_state.viewport = rt->GetSize();

		D3D11_VIEWPORT vp;

		memset(&vp, 0, sizeof(vp));

		vp.TopLeftX = 0;
		vp.TopLeftY = 0;
		vp.Width = (FLOAT)rt->GetWidth();
		vp.Height = (FLOAT)rt->GetHeight();
		vp.MinDepth = 0.0f;
		vp.MaxDepth = 1.0f;

		m_ctx->RSSetViewports(1, &vp);
	}

	GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy();

	if(!m_state.scissor.eq(r))
	{
		m_state.scissor = r;

		m_ctx->RSSetScissorRects(1, r);
	}
}
예제 #13
0
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& fscissor)
{
	ASSERT(top < bottom);

	while(1)
	{
		do
		{
			if(IsOneOfMyScanlines(top))
			{
				GSVector4 lr = l.p.xyxy(r).ceil();

				GSVector4 lrmax = lr.max(fscissor.xxxx());
				GSVector4 lrmin = lr.min(fscissor.zzzz());

				GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin));

				int left = lri.extract32<0>();
				int right = lri.extract32<2>();

				int pixels = right - left;

				if(pixels > 0)
				{
					m_stats.pixels += pixels;

					GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();

					m_dsf.ssl(right, left, top, scan);
				}
			}
		}
		while(0);

		if(++top >= bottom) break;

		l += dl;
		r += dr;
	}
}
	void Clear() {
		fbo = 0;
		viewport = GSVector2i(0, 0);
		scissor = GSVector4i(0, 0, 0, 0);

		blend = false;
		eq_RGB = 0;
		f_sRGB = 0;
		f_dRGB = 0;
		wrgba = 0xF;
		bf = 0.0;

		depth = false;
		depth_func = 0;
		depth_mask = false;

		stencil = false;
		stencil_func = 0;
		stencil_pass = 0;

		ubo = 0;

		ps_ss = 0;

		rt = 0;
		ds = 0;
		for (size_t i = 0; i < countof(tex_unit); i++)
			tex_unit[i] = 0;
		for (size_t i = 0; i < countof(tex_handle); i++)
			tex_handle[i] = 0;

		ps = 0;
		gs = 0;
		vs = 0;
		program = 0;
		dirty_prog = false;
		dirty_ressources = false;
	}
예제 #15
0
void GSDrawScanlineCodeGenerator::InitVectors()
{
#if _M_SSE >= 0x501
    GSVector8 log2_coef[4] =
    {
        GSVector8(0.204446009836232697516f),
        GSVector8(-1.04913055217340124191f),
        GSVector8(2.28330284476918490682f),
        GSVector8(1.0f),
    };

    for (size_t n = 0; n < countof(log2_coef); ++n)
        m_log2_coef[n] = log2_coef[n];

#else
    GSVector4i test[8] =
    {
        GSVector4i::zero(),
        GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
        GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
        GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
        GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
        GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
        GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
        GSVector4i::zero(),
    };

    GSVector4 log2_coef[4] =
    {
        GSVector4(0.204446009836232697516f),
        GSVector4(-1.04913055217340124191f),
        GSVector4(2.28330284476918490682f),
        GSVector4(1.0f),
    };

    for (size_t n = 0; n < countof(test); ++n)
        m_test[n] = test[n];

    for (size_t n = 0; n < countof(log2_coef); ++n)
        m_log2_coef[n] = log2_coef[n];

#endif
}
예제 #16
0
			{
				lines = 0;
			}
		}
		else if(m_vt.m_primclass == GS_LINE_CLASS)
		{
			if(m_vertex.next == lines * 2)
			{
				// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
				// but we use the stored video data to create a new texture, and replace the lines with two triangles

				m_dev->Recycle(t->m_texture);

				t->m_texture = m_dev->CreateTexture(512, 512);

				t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);

				m_vertex.buff[2] = m_vertex.buff[m_vertex.next - 2];
				m_vertex.buff[3] = m_vertex.buff[m_vertex.next - 1];

				m_index.buff[0] = 0;
				m_index.buff[1] = 1;
				m_index.buff[2] = 2;
				m_index.buff[3] = 1;
				m_index.buff[4] = 2;
				m_index.buff[5] = 3;

				m_vertex.head = m_vertex.tail = m_vertex.next = 4;
				m_index.tail = 6;

				m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
예제 #17
0
void GSRendererDX9::SetupIA()
{
	D3DPRIMITIVETYPE topology;

	switch(m_vt.m_primclass)
	{
	case GS_POINT_CLASS:

		topology = D3DPT_POINTLIST;

		break;

	case GS_LINE_CLASS:

		topology = D3DPT_LINELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 2) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 1];
				m_index.buff[i + 1] = tmp;
			}
		}

		break;

	case GS_TRIANGLE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 3) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 2];
				m_index.buff[i + 2] = tmp;
			}
		}

		break;

	case GS_SPRITE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		// each sprite converted to quad needs twice the space

		while(m_vertex.tail * 2 > m_vertex.maxcount)
		{
			GrowVertexBuffer();
		}

		// assume vertices are tightly packed and sequentially indexed (it should be the case)

		if(m_vertex.next >= 2)
		{
			size_t count = m_vertex.next;

			int i = (int)count * 2 - 4;
			GSVertex* s = &m_vertex.buff[count - 2];
			GSVertex* q = &m_vertex.buff[count * 2 - 4];
			uint32* RESTRICT index = &m_index.buff[count * 3 - 6];
		
			for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) 
			{
				GSVertex v0 = s[0];
				GSVertex v1 = s[1];

				v0.RGBAQ = v1.RGBAQ;
				v0.XYZ.Z = v1.XYZ.Z;
				v0.FOG = v1.FOG;

				q[0] = v0;
				q[3] = v1;

				// swap x, s, u

				uint16 x = v0.XYZ.X;
				v0.XYZ.X = v1.XYZ.X;
				v1.XYZ.X = x;

				float s = v0.ST.S;
				v0.ST.S = v1.ST.S;
				v1.ST.S = s;

				uint16 u = v0.U;
				v0.U = v1.U;
				v1.U = u;

				q[1] = v0;
				q[2] = v1;

				index[0] = i + 0;
				index[1] = i + 1;
				index[2] = i + 2;
				index[3] = i + 1;
				index[4] = i + 2;
				index[5] = i + 3;
			}

			m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
			m_index.tail = count * 3;
		}

		break;

	default:
		__assume(0);
	}

	GSDevice9* dev = (GSDevice9*)m_dev;

	(*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO

	void* ptr = NULL;

	if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next))
	{
		GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff;
		GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr;

		for(uint32 i = 0; i < m_vertex.next; i++, s++, d++)
		{
			GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());

			if(PRIM->TME && !PRIM->FST)
			{
				p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
			}
			else
			{
				p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
			}

			GSVector4 t = GSVector4::zero();

			if(PRIM->TME)
			{
				if(PRIM->FST)
				{
					if(UserHacks_WildHack && !isPackedUV_HackFlag)
					{
						t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16());
						//printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : "");
					}
					else
					{
						t = GSVector4(GSVector4i::load(s->UV).upl16());
					}
				}
				else
				{
					t = GSVector4::loadl(&s->ST);
				}
			}

			t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));

			d->p = p;
			d->t = t;
		}

		dev->IAUnmapVertexBuffer();
	}

	dev->IASetIndexBuffer(m_index.buff, m_index.tail);

	dev->IASetPrimitiveTopology(topology);
}
예제 #18
0
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
	GSDrawingEnvironment& env = m_env;
	GSDrawingContext* context = m_context;

	const GSVector2i& rtsize = rt->GetSize();
	const GSVector2& rtscale = rt->GetScale();

	bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;

	GSTexture* rtcopy = NULL;

	ASSERT(m_dev != NULL);

	GSDeviceDX* dev = (GSDeviceDX*)m_dev;

	if(DATE)
	{
		if(dev->HasStencil())
		{
			GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
			GSVector4 o = GSVector4(-1.0f, 1.0f);

			GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
			GSVector4 dst = src * 2.0f + o.xxxx();

			GSVertexPT1 vertices[] =
			{
				{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
				{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
				{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
				{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
			};

			dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
		}
		else
		{
			rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());

			// I'll use VertexTrace when I consider it more trustworthy

			dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
		}
	}

	//

	dev->BeginScene();

	// om

	GSDeviceDX::OMDepthStencilSelector om_dssel;

	if(context->TEST.ZTE)
	{
		om_dssel.ztst = context->TEST.ZTST;
		om_dssel.zwe = !context->ZBUF.ZMSK;
	}
	else
	{
		om_dssel.ztst = ZTST_ALWAYS;
	}

	if(m_fba)
	{
		om_dssel.fba = context->FBA.FBA;
	}

	GSDeviceDX::OMBlendSelector om_bsel;

	if(!IsOpaque())
	{
		om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;

		om_bsel.a = context->ALPHA.A;
		om_bsel.b = context->ALPHA.B;
		om_bsel.c = context->ALPHA.C;
		om_bsel.d = context->ALPHA.D;

		if(env.PABE.PABE)
		{
			if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
			{
				// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
				// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result

				om_bsel.abe = 0;
			}
			else
			{
				//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
				//ASSERT(0);
			}
		}
	}

	om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();

	// vs

	GSDeviceDX::VSSelector vs_sel;

	vs_sel.tme = PRIM->TME;
	vs_sel.fst = PRIM->FST;
	vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
	vs_sel.rtcopy = !!rtcopy;

	// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
	// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
	// We are probably receiving bad coordinates from VU1 in these cases.

	if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
	{
		if(context->ZBUF.PSM == PSM_PSMZ24)
		{
			if(m_vt.m_max.p.z > 0xffffff)
			{
				ASSERT(m_vt.m_min.p.z > 0xffffff);
				// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
				if (m_vt.m_min.p.z > 0xffffff)
				{
					vs_sel.bppz = 1;
					om_dssel.ztst = ZTST_ALWAYS;
				}
			}
		}
		else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
		{
			if(m_vt.m_max.p.z > 0xffff)
			{
				ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
				// Fixme : Same as above, I guess.
				if (m_vt.m_min.p.z > 0xffff)
				{
					vs_sel.bppz = 2;
					om_dssel.ztst = ZTST_ALWAYS;
				}
			}
		}
	}

	GSDeviceDX::VSConstantBuffer vs_cb;

	float sx = 2.0f * rtscale.x / (rtsize.x << 4);
	float sy = 2.0f * rtscale.y / (rtsize.y << 4);
	float ox = (float)(int)context->XYOFFSET.OFX;
	float oy = (float)(int)context->XYOFFSET.OFY;
	float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
	float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;

	//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
	//because DX10 and DX9 have a different pixel center.)
	//
	//The resulting shifted output aligns better with common blending / corona / blurring effects,
	//but introduces a few bad pixels on the edges.

	if(rt->LikelyOffset)
	{
		// DX9 has pixelcenter set to 0.0, so give it some value here

		if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
		
		ox2 *= rt->OffsetHack_modx;
		oy2 *= rt->OffsetHack_mody;
	}

	vs_cb.VertexScale  = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
	vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);

	// gs

	GSDeviceDX::GSSelector gs_sel;

	gs_sel.iip = PRIM->IIP;
	gs_sel.prim = m_vt.m_primclass;

	// ps

	GSDeviceDX::PSSelector ps_sel;
	GSDeviceDX::PSSamplerSelector ps_ssel;
	GSDeviceDX::PSConstantBuffer ps_cb;

	if(DATE)
	{
		if(dev->HasStencil())
		{
			om_dssel.date = 1;
		}
		else
		{
			ps_sel.date = 1 + context->TEST.DATM;
		}
	}

	if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
	{
		ps_sel.colclip = 1;
	}

	ps_sel.clr1 = om_bsel.IsCLR1();
	ps_sel.fba = context->FBA.FBA;
	ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
		
	if(UserHacks_AlphaHack) ps_sel.aout = 1;

	if(PRIM->FGE)
	{
		ps_sel.fog = 1;

		ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
	}

	if(context->TEST.ATE)
	{
		ps_sel.atst = context->TEST.ATST;

		switch(ps_sel.atst)
		{
		case ATST_LESS:
			ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
			break;
		case ATST_GREATER:
			ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
			break;
		default:
			ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
			break;
		}
	}
	else
	{
		ps_sel.atst = ATST_ALWAYS;
	}

	if(tex)
	{
		ps_sel.wms = context->CLAMP.WMS;
		ps_sel.wmt = context->CLAMP.WMT;
		ps_sel.fmt = tex->m_fmt;
		ps_sel.aem = env.TEXA.AEM;
		ps_sel.tfx = context->TEX0.TFX;
		ps_sel.tcc = context->TEX0.TCC;
		ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
		ps_sel.rt = tex->m_target;

		int w = tex->m_texture->GetWidth();
		int h = tex->m_texture->GetHeight();

		int tw = (int)(1 << context->TEX0.TW);
		int th = (int)(1 << context->TEX0.TH);

		GSVector4 WH(tw, th, w, h);

		if(PRIM->FST)
		{
			vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
			//Maybe better?
			//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
			ps_sel.fst = 1;
		}

		ps_cb.WH = WH;
		ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
		ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);

		GSVector4 clamp(ps_cb.MskFix);
		GSVector4 ta(env.TEXA & GSVector4i::x000000ff());

		ps_cb.MinMax = clamp / WH.xyxy();
		ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));

		ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
		ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
		ps_ssel.ltf = ps_sel.ltf;
	}
	else
	{
예제 #19
0
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side)
{
	// orientation:
	// - true: |dv.p.y| > |dv.p.x|
	// - false |dv.p.x| > |dv.p.y|
	// side:
	// - true: top/left edge
	// - false: bottom/right edge

	// TODO: bit slow and too much duplicated code
	// TODO: inner pre-step is still missing (hardly noticable)

	GSVector4 fscissor(scissor);

	GSVector4 lrtb = v0.p.upl(v1.p).ceil();

	if(orientation)
	{
		GSVector4 tbmax = lrtb.max(fscissor.yyyy());
		GSVector4 tbmin = lrtb.min(fscissor.wwww());

		GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));

		int top, bottom;

		GSVertexSW edge, dedge;

		if((dv.p >= GSVector4::zero()).mask() & 2)
		{
			top = tbi.extract32<0>();
			bottom = tbi.extract32<3>();

			if(top >= bottom) return;

			edge = v0;
			dedge = dv / dv.p.yyyy();

			edge += dedge * (tbmax.zzzz() - edge.p.yyyy());
		}
		else
		{
			top = tbi.extract32<1>();
			bottom = tbi.extract32<2>();

			if(top >= bottom) return;

			edge = v1;
			dedge = dv / dv.p.yyyy();

			edge += dedge * (tbmax.wwww() - edge.p.yyyy());
		}

		GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000);

		int x = p.extract32<0>();
		int dx = p.extract32<1>();

		if(side)
		{
			while(1)
			{
				do
				{
					int xi = x >> 16;
					int xf = x & 0xffff;

					if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
					{
						m_stats.pixels++;

						edge.t.u32[3] = (0x10000 - xf) & 0xffff;

						m_dsf.ssle(xi + 1, xi, top, edge);

						edge.t.u32[3] = 0;
					}
				}
				while(0);

				if(++top >= bottom) break;

				edge += dedge;
				x += dx;
			}
		}
		else
		{
			while(1)
			{
				do
				{
					int xi = (x >> 16) + 1;
					int xf = x & 0xffff;

					if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
					{
						m_stats.pixels++;

						edge.t.u32[3] = xf;

						m_dsf.ssle(xi + 1, xi, top, edge);

						edge.t.u32[3] = 0;
					}
				}
				while(0);

				if(++top >= bottom) break;

				edge += dedge;
				x += dx;
			}
		}
	}
예제 #20
0
파일: GSVector.cpp 프로젝트: Aced14/pcsx2
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA USA.
 *  http://www.gnu.org/copyleft/gpl.html
 *
 */

#include "stdafx.h"
#include "GSVector.h"

const GSVector4i GSVector4i::m_xff[17] = 
{
	GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), 
	GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), 
	GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), 
	GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), 
	GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
	GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), 
	GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), 
	GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), 
	GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
	GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), 
	GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), 
	GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), 
	GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
	GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), 
	GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), 
	GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), 
예제 #21
0
	{
		blend(a, b, xmm0);
	}
}

void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask)
{
	pand(b, mask);
	pandn(mask, a);
	por(b, mask);
	movdqa(a, b);
}

const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] =
{
	GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
	GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
	GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff),
	GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
	GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff),
	GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
	GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000),
	GSVector4i::zero(),
};

alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
{
	{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
	{2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4},
	{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
	{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
예제 #22
0
void GPURendererSW::Draw()
{
	GPUDrawScanline::SharedData* sd = new GPUDrawScanline::SharedData();

	shared_ptr<GSRasterizerData> data(sd);

	GPUScanlineGlobalData& gd = sd->global;

	const GPUDrawingEnvironment& env = m_env;

	gd.sel.key = 0;
	gd.sel.iip = env.PRIM.IIP;
	gd.sel.me = env.STATUS.ME;

	if(env.PRIM.ABE)
	{
		gd.sel.abe = env.PRIM.ABE;
		gd.sel.abr = env.STATUS.ABR;
	}

	gd.sel.tge = env.PRIM.TGE;

	if(env.PRIM.TME)
	{
		gd.sel.tme = env.PRIM.TME;
		gd.sel.tlu = env.STATUS.TP < 2;
		gd.sel.twin = (env.TWIN.u32 & 0xfffff) != 0;
		gd.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;

		const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY);

		if(!t) {ASSERT(0); return;}

		gd.tex = t;

		gd.clut = (uint16*)_aligned_malloc(sizeof(uint16) * 256, 32);

		memcpy(gd.clut, m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y), sizeof(uint16) * (env.STATUS.TP == 0 ? 16 : 256));

		gd.twin = GSVector4i(env.TWIN.TWW, env.TWIN.TWH, env.TWIN.TWX, env.TWIN.TWY);
	}

	gd.sel.dtd = m_dither ? env.STATUS.DTD : 0;
	gd.sel.md = env.STATUS.MD;
	gd.sel.sprite = env.PRIM.TYPE == GPU_SPRITE;
	gd.sel.scalex = m_mem.GetScale().x;

	gd.vm = m_mem.GetPixelAddress(0, 0);

	data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
	data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
	data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
	data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
	
	data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
	data->vertex = (GSVertexSW*)data->buff;
	data->vertex_count = m_count;

	memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count);
	
	data->frame = m_perfmon.GetFrame();

	int prims = 0;

	switch(env.PRIM.TYPE)
	{
	case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break;
	case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break;
	case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break;
	default: __assume(0);
	}

	// TODO: VertexTrace

	GSVector4 tl(+1e10f);
	GSVector4 br(-1e10f);

	GSVertexSW* v = data->vertex;

	for(int i = 0, j = data->vertex_count; i < j; i++)
	{
		GSVector4 p = v[i].p;

		tl = tl.min(p);
		br = br.max(p);
	}

	data->bbox = GSVector4i(tl.xyxy(br));

	GSVector4i r = data->bbox.rintersect(data->scissor);

	r.left >>= m_scale.x;
	r.top >>= m_scale.y;
	r.right >>= m_scale.x;
	r.bottom >>= m_scale.y;

	Invalidate(r);

	m_rl->Queue(data);

	m_rl->Sync();

	m_perfmon.Put(GSPerfMon::Draw, 1);
	m_perfmon.Put(GSPerfMon::Prim, prims);
	m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
}
예제 #23
0
void GSRendererDX9::SetupIA(const float& sx, const float& sy)
{
	D3DPRIMITIVETYPE topology;

	switch(m_vt.m_primclass)
	{
	case GS_POINT_CLASS:

		topology = D3DPT_POINTLIST;

		break;

	case GS_LINE_CLASS:

		topology = D3DPT_LINELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 2) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 1];
				m_index.buff[i + 1] = tmp;
			}
		}

		break;

	case GS_TRIANGLE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		if(PRIM->IIP == 0)
		{
			for(size_t i = 0, j = m_index.tail; i < j; i += 3) 
			{
				uint32 tmp = m_index.buff[i + 0]; 
				m_index.buff[i + 0] = m_index.buff[i + 2];
				m_index.buff[i + 2] = tmp;
			}
		}

		break;

	case GS_SPRITE_CLASS:

		topology = D3DPT_TRIANGLELIST;

		// each sprite converted to quad needs twice the space

		Lines2Sprites();

		break;

	default:
		__assume(0);
	}

	GSDevice9* dev = (GSDevice9*)m_dev;

	(*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO

	void* ptr = NULL;

	if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next))
	{
		GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff;
		GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr;

		for(uint32 i = 0; i < m_vertex.next; i++, s++, d++)
		{
			GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());

			if(PRIM->TME && !PRIM->FST)
			{
				p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
			}
			else
			{
				p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
			}

			GSVector4 t = GSVector4::zero();

			if(PRIM->TME)
			{
				if(PRIM->FST)
				{
					if(UserHacks_WildHack && !isPackedUV_HackFlag)
					{
						t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16());
						//printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : "");
					}
					else
					{
						t = GSVector4(GSVector4i::load(s->UV).upl16());
					}
				}
				else
				{
					t = GSVector4::loadl(&s->ST);
				}
			}

			t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));

			d->p = p;
			d->t = t;
		}

		dev->IAUnmapVertexBuffer();
	}

	dev->IASetIndexBuffer(m_index.buff, m_index.tail);

	dev->IASetPrimitiveTopology(topology);
}
예제 #24
0
bool GSRenderer::Merge(int field)
{
	bool en[2];

	GSVector4i fr[2];
	GSVector4i dr[2];

	int baseline = INT_MAX;

	for(int i = 0; i < 2; i++)
	{
		en[i] = IsEnabled(i);

		if(en[i])
		{
			fr[i] = GetFrameRect(i);
			dr[i] = GetDisplayRect(i);

			baseline = min(dr[i].top, baseline);

			//printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w);
		}
	}

	if(!en[0] && !en[1])
	{
		return false;
	}

	// try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero)
	//
	// NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending)

	bool samesrc =
		en[0] && en[1] &&
		m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP &&
		m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW &&
		m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM;

	// bool blurdetected = false;

	if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/)
	{
		if(fr[0].eq(fr[1] + GSVector4i(0, -1, 0, 0)) && dr[0].eq(dr[1] + GSVector4i(0, 0, 0, 1))
		|| fr[1].eq(fr[0] + GSVector4i(0, -1, 0, 0)) && dr[1].eq(dr[0] + GSVector4i(0, 0, 0, 1)))
		{
			// persona 4:
			//
			// fr[0] = 0 0 640 448
			// fr[1] = 0 1 640 448
			// dr[0] = 159 50 779 498
			// dr[1] = 159 50 779 497
			//
			// second image shifted up by 1 pixel and blended over itself
			//
			// god of war:
			//
			// fr[0] = 0 1 512 448
			// fr[1] = 0 0 512 448
			// dr[0] = 127 50 639 497
			// dr[1] = 127 50 639 498
			//
			// same just the first image shifted

			int top = min(fr[0].top, fr[1].top);
			int bottom = max(dr[0].bottom, dr[1].bottom);

			fr[0].top = top;
			fr[1].top = top;
			dr[0].bottom = bottom;
			dr[1].bottom = bottom;

			// blurdetected = true;
		}
		else if(dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, 1, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, 1, 0, 1))))
		{
			// dq5:
			//
			// fr[0] = 0 1 512 445
			// fr[1] = 0 0 512 444
			// dr[0] = 127 50 639 494
			// dr[1] = 127 50 639 494

			int top = min(fr[0].top, fr[1].top);
			int bottom = min(fr[0].bottom, fr[1].bottom);

			fr[0].top = fr[1].top = top;
			fr[0].bottom = fr[1].bottom = bottom;

			// blurdetected = true;
		}
		//printf("samesrc = %d blurdetected = %d\n",samesrc,blurdetected);
	}

	GSVector2i fs(0, 0);
	GSVector2i ds(0, 0);

	GSTexture* tex[2] = {NULL, NULL};

	if(samesrc && fr[0].bottom == fr[1].bottom)
	{
		tex[0] = GetOutput(0);
		tex[1] = tex[0]; // saves one texture fetch
	}
	else
	{
		if(en[0]) tex[0] = GetOutput(0);
		if(en[1]) tex[1] = GetOutput(1);
	}

	GSVector4 src[2];
	GSVector4 dst[2];

	for(int i = 0; i < 2; i++)
	{
		if(!en[i] || !tex[i]) continue;

		GSVector4i r = fr[i];

		// overscan hack

		if(dr[i].height() > 512) // hmm
		{
			int y = GetDeviceSize(i).y;
			if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) y /= 2;
			r.bottom = r.top + y;
		}

		GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();

		src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy();

		GSVector2 o(0, 0);

		if(dr[i].top - baseline >= 4) // 2?
		{
			o.y = tex[i]->GetScale().y * (dr[i].top - baseline);

			if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
			{
				o.y /= 2;
			}
		}

		dst[i] = GSVector4(o).xyxy() + scale * GSVector4(r.rsize());

		fs.x = max(fs.x, (int)(dst[i].z + 0.5f));
		fs.y = max(fs.y, (int)(dst[i].w + 0.5f));
	}

	ds = fs;

	if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
	{
		ds.y *= 2;
	}

	bool slbg = m_regs->PMODE.SLBG;
	bool mmod = m_regs->PMODE.MMOD;

	if(tex[0] || tex[1])
	{
		if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue())
		{
			// the two outputs are identical, skip drawing one of them (the one that is alpha blended)

			tex[0] = NULL;
		}

		GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255;

		m_dev->Merge(tex, src, dst, fs, slbg, mmod, c);

		if(m_regs->SMODE2.INT && m_interlace > 0)
		{
			if (m_interlace == 7 && m_regs->SMODE2.FFMD == 1) // Auto interlace enabled / Odd frame interlace setting
			{
				int field2 = 0;
				int mode = 2;
				m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y);
			}
			else
			{
				int field2 = 1 - ((m_interlace - 1) & 1);
				int mode = (m_interlace - 1) >> 1;
				m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y);
			}
		}

		if(m_shadeboost)
		{
			m_dev->ShadeBoost();
		}

		if (m_shaderfx)
		{
			m_dev->ExternalFX();
		}

		if(m_fxaa)
		{
			m_dev->FXAA();
		}
	}

	return true;
}
예제 #25
0
GSVector4i GSVector4i::cast(const GSVector4& v)
{
	return GSVector4i(_mm_castps_si128(v.m));
}
예제 #26
0
void GSRendererHW::Draw()
{
	if(m_dev->IsLost() || GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) {
		GL_INS("Warning skipping a draw call (%d)", s_n);
		s_n += 3; // Keep it sync with SW renderer
		return;
	}
	GL_PUSH("HW Draw %d", s_n);

	GSDrawingEnvironment& env = m_env;
	GSDrawingContext* context = m_context;

	// It is allowed to use the depth and rt at the same location. However at least 1 must
	// be disabled.
	// 1/ GoW uses a Cd blending on a 24 bits buffer (no alpha)
	// 2/ SuperMan really draws (0,0,0,0) color and a (0) 32-bits depth
	// 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth
	// Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0
	const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1));
	const bool no_ds = !no_rt && (
			// Depth is always pass (no read) and write are discarded (tekken 5).  (Note: DATE is currently implemented with a stencil buffer)
			(context->ZBUF.ZMSK && m_context->TEST.ZTST == ZTST_ALWAYS && !m_context->TEST.DATE) ||
			// Depth will be written through the RT
			(context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && !context->ZBUF.ZMSK && !context->FRAME.FBMSK && context->TEST.ZTE)
			);

	GIFRegTEX0 TEX0;

	TEX0.TBP0 = context->FRAME.Block();
	TEX0.TBW = context->FRAME.FBW;
	TEX0.PSM = context->FRAME.PSM;

	GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
	GSTexture* rt_tex = rt ? rt->m_texture : NULL;

	TEX0.TBP0 = context->ZBUF.Block();
	TEX0.TBW = context->FRAME.FBW;
	TEX0.PSM = context->ZBUF.PSM;

	GSTextureCache::Target* ds = no_ds ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
	GSTexture* ds_tex = ds ? ds->m_texture : NULL;

	if(!(rt || no_rt) || !(ds || no_ds))
	{
		GL_POP();
		ASSERT(0);
		return;
	}

	GSTextureCache::Source* tex = NULL;
	m_texture_shuffle = false;

	if(PRIM->TME)
	{
		/*
		
		// m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out

		if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
		{
			m_mem.m_clut.Read32(context->TEX0, env.TEXA);
		}

		*/

		GSVector4i r;

		GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());

		tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);

		if(!tex) {
			GL_POP();
			return;
		}

		// FIXME: Could be removed on openGL
		if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
		{
			m_mem.m_clut.Read32(context->TEX0, env.TEXA);
		}

		// Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target.
		// Initially code also tested the RT but it gives too much false-positive
		//
		// Both input and output are 16 bits and texture was initially 32 bits!
		m_texture_shuffle = (context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt;

		// Texture shuffle is not yet supported with strange clamp mode
		ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3));
	}
	if (rt) {
		// Be sure texture shuffle detection is properly propagated
		// Otherwise set or clear the flag (Code in texture cache only set the flag)
		// Note: it is important to clear the flag when RT is used as a real 16 bits target.
		rt->m_32_bits_fmt = m_texture_shuffle || !(context->FRAME.PSM & 0x2);
	}

	if(s_dump)
	{
		uint64 frame = m_perfmon.GetFrame();

		string s;

		if (s_n >= s_saven) {
			// Dump Register state
			s = format("%05d_context.txt", s_n);

			m_env.Dump(root_hw+s);
			m_context->Dump(root_hw+s);
		}

		if(s_savet && s_n >= s_saven && tex)
		{
			s = format("%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
				s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
				(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
				(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
				(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);

			tex->m_texture->Save(root_hw+s, true);

			if(tex->m_palette)
			{
				s = format("%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);

				tex->m_palette->Save(root_hw+s, true);
			}
		}

		s_n++;

		if(s_save && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);

			if (rt)
				rt->m_texture->Save(root_hw+s);
		}

		if(s_savez && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);

			if (ds_tex)
				ds_tex->Save(root_hw+s);
		}

		s_n++;

#ifdef ENABLE_OGL_DEBUG
	} else {
		s_n += 2;
#endif
	}

	if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, tex))
	{
		s_n += 1; // keep counter sync
		GL_POP();
		return;
	}

	// skip alpha test if possible

	GIFRegTEST TEST = context->TEST;
	GIFRegFRAME FRAME = context->FRAME;
	GIFRegZBUF ZBUF = context->ZBUF;

	uint32 fm = context->FRAME.FBMSK;
	uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;

	if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
	{
		if(GSRenderer::TryAlphaTest(fm, zm))
		{
			context->TEST.ATST = ATST_ALWAYS;
		}
	}

	context->FRAME.FBMSK = fm;
	context->ZBUF.ZMSK = zm != 0;

	// A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite
	if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) {
		size_t count = m_vertex.next;
		GSVertex* v = &m_vertex.buff[0];

		// Hack to avoid vertical black line in various games (ace combat/tekken)
		if (m_userhacks_align_sprite_X) {
			// Note for performance reason I do the check only once on the first
			// primitive
			int win_position = v[1].XYZ.X - context->XYOFFSET.OFX;
			const bool unaligned_position = ((win_position & 0xF) == 8);
			const bool unaligned_texture  = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful
			const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X);
			if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) {
				// Normaly vertex are aligned on full pixels and texture in half
				// pixels. Let's extend the coverage of an half-pixel to avoid
				// hole after upscaling
				for(size_t i = 0; i < count; i += 2) {
					v[i+1].XYZ.X += 8;
					// I really don't know if it is a good idea. Neither what to do for !PRIM->FST
					if (unaligned_texture)
						v[i+1].U += 8;
				}
			}
		}

		if (PRIM->FST) {
			if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) {
				if (m_vt.IsLinear())
					RoundSpriteOffset<true>();
				else
					RoundSpriteOffset<false>();
			}
		} else {
			; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior)
		}
	}

	//

	DrawPrims(rt_tex, ds_tex, tex);

	//

	context->TEST = TEST;
	context->FRAME = FRAME;
	context->ZBUF = ZBUF;

	//

	GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));

	if(fm != 0xffffffff && rt)
	{
		rt->m_valid = rt->m_valid.runion(r);

		m_tc->InvalidateVideoMem(context->offset.fb, r, false);

		m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block());
	}

	if(zm != 0xffffffff && ds)
	{
		ds->m_valid = ds->m_valid.runion(r);

		m_tc->InvalidateVideoMem(context->offset.zb, r, false);

		m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block());
	}

	//

	if(m_hacks.m_oo)
	{
		(this->*m_hacks.m_oo)();
	}

	if(s_dump)
	{
		uint64 frame = m_perfmon.GetFrame();

		string s;

		if(s_save && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);

			if (rt)
				rt->m_texture->Save(root_hw+s);
		}

		if(s_savez && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);

			if (ds_tex)
				ds_tex->Save(root_hw+s);
		}

		s_n++;

		if(s_savel > 0 && (s_n - s_saven) > s_savel)
		{
			s_dump = 0;
		}
#ifdef ENABLE_OGL_DEBUG
	} else {
		s_n += 1;
#endif
	}

	#ifdef DISABLE_HW_TEXTURE_CACHE
	
	if (rt)
		m_tc->Read(rt, r);

	#endif

	GL_POP();
}
예제 #27
0
void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scissor)
{
	GSVertexSW dv[3];

	dv[0] = v[1] - v[0];
	dv[1] = v[2] - v[0];

	GSVertexSW longest = dv[1] * (dv[0].p / dv[1].p).yyyy() - dv[0];

	int i = longest.p.upl(longest.p == GSVector4::zero()).mask();

	if(i & 2) return;

	i &= 1;

	GSVertexSW dscan = longest * longest.p.xxxx().rcp();

	m_dsf.ssp(v, dscan);

	GSVector4 fscissor(scissor);

	GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil();

	GSVector4 tbmax = tb.max(fscissor.yyyy());
	GSVector4 tbmin = tb.min(fscissor.wwww());

	GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin));

	int top = tbi.extract32<0>();
	int bottom = tbi.extract32<2>();

	GSVertexSW& l = v[0];
	GSVector4 r = v[0].p;

	GSVertexSW dl = dv[i] / dv[i].p.yyyy();
	GSVector4 dr = dv[1 - i].p / dv[1 - i].p.yyyy();

	GSVector4 dy = tbmax.xxxx() - l.p.yyyy();

	l += dl * dy;
	r += dr * dy;

	if(top < bottom)
	{
		DrawTriangleSection(top, bottom, l, dl, r, dr, dscan, fscissor);
	}

	top = tbi.y;
	bottom = tbi.w;

	if(top < bottom)
	{
		if(i == 0)
		{
			l = v[1];
			dv[2] = v[2] - v[1];
			dl = dv[2] / dv[2].p.yyyy();
		}
		else
		{
			r = v[1].p;
			dv[2].p = v[2].p - v[1].p;
			dr = dv[2].p / dv[2].p.yyyy();
		}

		l += dl * (tbmax.zzzz() - l.p.yyyy());
		r += dr * (tbmax.zzzz() - r.yyyy());

		l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
		dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y

		DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
	}
}
예제 #28
0
bool GSRenderer::Merge(int field)
{
	bool en[2];

	GSVector4i fr[2];
	GSVector4i dr[2];

	GSVector2i display_baseline = { INT_MAX, INT_MAX };
	GSVector2i frame_baseline = { INT_MAX, INT_MAX };

	for(int i = 0; i < 2; i++)
	{
		en[i] = IsEnabled(i);

		if(en[i])
		{
			fr[i] = GetFrameRect(i);
			dr[i] = GetDisplayRect(i);

			display_baseline.x = min(dr[i].left, display_baseline.x);
			display_baseline.y = min(dr[i].top, display_baseline.y);
			frame_baseline.x = min(fr[i].left, frame_baseline.x);
			frame_baseline.y = min(fr[i].top, frame_baseline.y);

			//printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w);
		}
	}

	if(!en[0] && !en[1])
	{
		return false;
	}

	GL_PUSH("Renderer Merge %d (0: enabled %d 0x%x, 1: enabled %d 0x%x)", s_n, en[0], m_regs->DISP[0].DISPFB.Block(), en[1], m_regs->DISP[1].DISPFB.Block());

	// try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero)
	//
	// NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending)

	bool samesrc =
		en[0] && en[1] &&
		m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP &&
		m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW &&
		m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM;

	if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/)
	{
		// persona 4:
		//
		// fr[0] = 0 0 640 448
		// fr[1] = 0 1 640 448
		// dr[0] = 159 50 779 498
		// dr[1] = 159 50 779 497
		//
		// second image shifted up by 1 pixel and blended over itself
		//
		// god of war:
		//
		// fr[0] = 0 1 512 448
		// fr[1] = 0 0 512 448
		// dr[0] = 127 50 639 497
		// dr[1] = 127 50 639 498
		//
		// same just the first image shifted
		//
		// These kinds of cases are now fixed by the more generic frame_diff code below, as the code here was too specific and has become obsolete.
		// NOTE: Persona 4 and God Of War are not rare exceptions, many games have the same(or very similar) offsets.

		int topDiff = fr[0].top - fr[1].top;
		if (dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, topDiff, 0, topDiff)) || fr[1].eq(fr[0] + GSVector4i(0, topDiff, 0, topDiff))))
		{
			// dq5:
			//
			// fr[0] = 0 1 512 445
			// fr[1] = 0 0 512 444
			// dr[0] = 127 50 639 494
			// dr[1] = 127 50 639 494

			int top = min(fr[0].top, fr[1].top);
			int bottom = min(fr[0].bottom, fr[1].bottom);

			fr[0].top = fr[1].top = top;
			fr[0].bottom = fr[1].bottom = bottom;
		}
	}

	GSVector2i fs(0, 0);
	GSVector2i ds(0, 0);

	GSTexture* tex[3] = {NULL, NULL, NULL};
	int y_offset[3]   = {0, 0, 0};

	s_n++;

	bool feedback_merge = m_regs->EXTWRITE.WRITE == 1;

	if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge)
	{
		tex[0]      = GetOutput(0, y_offset[0]);
		tex[1]      = tex[0]; // saves one texture fetch
		y_offset[1] = y_offset[0];
	}
	else
	{
		if(en[0]) tex[0] = GetOutput(0, y_offset[0]);
		if(en[1]) tex[1] = GetOutput(1, y_offset[1]);
		if(feedback_merge) tex[2] = GetFeedbackOutput();
	}

	GSVector4 src[2];
	GSVector4 src_hw[2];
	GSVector4 dst[2];

	for(int i = 0; i < 2; i++)
	{
		if(!en[i] || !tex[i]) continue;

		GSVector4i r = fr[i];
		GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();

		src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy();
		src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy();

		GSVector2 off(0);
		GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y);
		GSVector2i frame_diff(fr[i].left - frame_baseline.x, fr[i].top - frame_baseline.y);

		// Time Crisis 2/3 uses two side by side images when in split screen mode.
		// Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII
		if(display_diff.x > 2)
		{
			off.x = tex[i]->GetScale().x * display_diff.x;
		}
		// If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D
		else if(display_diff.x != frame_diff.x)
		{
			off.x = tex[i]->GetScale().x * frame_diff.x;
		}

		if(display_diff.y >= 4) // Shouldn't this be >= 2?
		{
			off.y = tex[i]->GetScale().y * display_diff.y;

			if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
			{
				off.y /= 2;
			}
		}
		else if(display_diff.y != frame_diff.y)
		{
			off.y = tex[i]->GetScale().y * frame_diff.y;
		}

		dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize());

		fs.x = max(fs.x, (int)(dst[i].z + 0.5f));
		fs.y = max(fs.y, (int)(dst[i].w + 0.5f));
	}

	ds = fs;

	if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
	{
		ds.y *= 2;
	}
	m_real_size = ds;

	bool slbg = m_regs->PMODE.SLBG;

	if(tex[0] || tex[1])
	{
		if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue())
		{
			// the two outputs are identical, skip drawing one of them (the one that is alpha blended)

			tex[0] = NULL;
		}

		GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255;

		m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c);

		if(m_regs->SMODE2.INT && m_interlace > 0)
		{
			if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting
			{
				int field2 = 0;
				int mode = 2;
				m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y);
			}
			else
			{
				int field2 = 1 - ((m_interlace - 1) & 1);
				int mode = (m_interlace - 1) >> 1;
				m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y);
			}
		}

		if(m_shadeboost)
		{
			m_dev->ShadeBoost();
		}

		if(m_shaderfx)
		{
			m_dev->ExternalFX();
		}

		if(m_fxaa)
		{
			m_dev->FXAA();
		}
	}

	return true;
}
예제 #29
0
 *  any later version.
 *
 *  This Program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 *  http://www.gnu.org/copyleft/gpl.html
 *
 */

#include "stdafx.h"
#include "GSBlock.h"

const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);

const GSVector4i GSBlock::m_xxxa(0x00008000);
const GSVector4i GSBlock::m_xxbx(0x00007c00);
const GSVector4i GSBlock::m_xgxx(0x000003e0);
const GSVector4i GSBlock::m_rxxx(0x0000001f);

const GSVector4i GSBlock::m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
const GSVector4i GSBlock::m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
const GSVector4i GSBlock::m_uw8hmask2 = GSVector4i(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
const GSVector4i GSBlock::m_uw8hmask3 = GSVector4i(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
예제 #30
0
void GSRendererHW::Draw()
{
	if(m_dev->IsLost()) return;

	if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;

	GSDrawingEnvironment& env = m_env;
	GSDrawingContext* context = m_context;

	GIFRegTEX0 TEX0;

	TEX0.TBP0 = context->FRAME.Block();
	TEX0.TBW = context->FRAME.FBW;
	TEX0.PSM = context->FRAME.PSM;
	GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);

	TEX0.TBP0 = context->ZBUF.Block();
	TEX0.TBW = context->FRAME.FBW;
	TEX0.PSM = context->ZBUF.PSM;

	GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());

	if(!rt || !ds)
	{
		ASSERT(0);

		return;
	}

	GSTextureCache::Source* tex = NULL;

	if(PRIM->TME)
	{
		/*
		
		// m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out

		if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
		{
			m_mem.m_clut.Read32(context->TEX0, env.TEXA);
		}

		*/

		GSVector4i r;

		GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());

		tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);

		if(!tex) return;

		if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
		{
			m_mem.m_clut.Read32(context->TEX0, env.TEXA);
		}
	}

	if(s_dump)
	{
		uint64 frame = m_perfmon.GetFrame();

		string s;

		if(s_save && s_n >= s_saven && tex)
		{
			s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
				s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
				(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
				(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
				(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);

			tex->m_texture->Save(s, true);

			if(tex->m_palette)
			{
				s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);

				tex->m_palette->Save(s, true);
			}
		}

		s_n++;

		if(s_save && s_n >= s_saven)
		{
			s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);

			rt->m_texture->Save(s);
		}

		if(s_savez && s_n >= s_saven)
		{
			s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);

			ds->m_texture->Save(s);
		}

		s_n++;
	}

	if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
	{
		return;
	}

	// skip alpha test if possible

	GIFRegTEST TEST = context->TEST;
	GIFRegFRAME FRAME = context->FRAME;
	GIFRegZBUF ZBUF = context->ZBUF;

	uint32 fm = context->FRAME.FBMSK;
	uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;

	if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
	{
		if(GSRenderer::TryAlphaTest(fm, zm))
		{
			context->TEST.ATST = ATST_ALWAYS;
		}
	}

	context->FRAME.FBMSK = fm;
	context->ZBUF.ZMSK = zm != 0;

	//

	DrawPrims(rt->m_texture, ds->m_texture, tex);

	//

	context->TEST = TEST;
	context->FRAME = FRAME;
	context->ZBUF = ZBUF;

	//

	GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));

	if(fm != 0xffffffff)
	{
		rt->m_valid = rt->m_valid.runion(r);

		m_tc->InvalidateVideoMem(context->offset.fb, r, false);
	}

	if(zm != 0xffffffff)
	{
		ds->m_valid = ds->m_valid.runion(r);

		m_tc->InvalidateVideoMem(context->offset.zb, r, false);
	}

	//

	if(m_hacks.m_oo)
	{
		(this->*m_hacks.m_oo)();
	}

	if(s_dump)
	{
		uint64 frame = m_perfmon.GetFrame();

		string s;

		if(s_save && s_n >= s_saven)
		{
			s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);

			rt->m_texture->Save(s);
		}

		if(s_savez && s_n >= s_saven)
		{
			s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);

			ds->m_texture->Save(s);
		}

		s_n++;
	}

	#ifdef DISABLE_HW_TEXTURE_CACHE
	
	m_tc->Read(rt, r);

	#endif
}