Пример #1
0
void GSDevice::Present(const GSVector4i& r, int shader)
{
	GSVector4i cr = m_wnd->GetClientRect();

	int w = std::max<int>(cr.width(), 1);
	int h = std::max<int>(cr.height(), 1);

	if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
	{
		if(!Reset(w, h))
		{
			return;
		}
	}

	GL_PUSH("Present");

	ClearRenderTarget(m_backbuffer, 0);

	if(m_current)
	{
		static int s_shader[5] = {0, 5, 6, 8, 9}; // FIXME

		Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]);
	}

	Flip();

	GL_POP();
}
Пример #2
0
void GSDevice::Present(const GSVector4i& r, int shader)
{
	GSVector4i cr = m_wnd->GetClientRect();

	int w = std::max<int>(cr.width(), 1);
	int h = std::max<int>(cr.height(), 1);

	if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
	{
		if(!Reset(w, h))
		{
			return;
		}
	}

	GL_PUSH("Present");

	ClearRenderTarget(m_backbuffer, 0);

	if(m_current)
	{
		static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE,
			ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER,
			ShaderConvert_COMPLEX_FILTER}; // FIXME

		Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]);
	}

	Flip();

	GL_POP();
}
Пример #3
0
void GSDeviceOGL::CreateTextureFX()
{
	m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer));
	m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer));

	// warning 1 sampler by image unit. So you cannot reuse m_ps_ss...
	m_palette_ss = CreateSampler(false, false, false);
	glBindSampler(1, m_palette_ss);

	// Pre compile all Geometry & Vertex Shader
	// It might cost a seconds at startup but it would reduce benchmark pollution
	GL_PUSH("Compile GS");

	for (uint32 key = 0; key < countof(m_gs); key++) {
		GSSelector sel(key);
		if (sel.point == sel.sprite)
			m_gs[key] = 0;
		else
			m_gs[key] = CompileGS(GSSelector(key));
	}

	GL_POP();

	GL_PUSH("Compile VS");

	for (uint32 key = 0; key < countof(m_vs); key++) {
		// wildhack is only useful if both TME and FST are enabled.
		VSSelector sel(key);
		if (sel.wildhack && (!sel.tme || !sel.fst))
			m_vs[key] = 0;
		else
			m_vs[key] = CompileVS(sel, !GLLoader::found_GL_ARB_clip_control);
	}

	GL_POP();

	// Enable all bits for stencil operations. Technically 1 bit is
	// enough but buffer is polluted with noise. Clear will be limited
	// to the mask.
	glStencilMask(0xFF);
	for (uint32 key = 0; key < countof(m_om_dss); key++) {
		m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
	}

	// Help to debug FS in apitrace
	m_apitrace = CompilePS(PSSelector());
}
Пример #4
0
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
{
	ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen);
	GL_PUSH("Upload Texture %d", m_texture_id);

	m_dirty = true;
	m_clean = false;

	glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);

	char* src = (char*)data;
	uint32 row_byte = r.width() << m_int_shift;
	uint32 map_size = r.height() * row_byte;
	char* map = PboPool::Map(map_size);

#ifdef ENABLE_OGL_DEBUG_MEM_BW
	g_real_texture_upload_byte += map_size;
#endif

	// PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch
	// Note: row_byte != pitch
	for (int h = 0; h < r.height(); h++) {
		memcpy(map, src, row_byte);
		map += row_byte;
		src += pitch;
	}

	PboPool::Unmap();

	glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());

	// FIXME OGL4: investigate, only 1 unpack buffer always bound
	PboPool::UnbindPbo();

	PboPool::EndTransfer();

	GL_POP();
	return true;

	// For reference, standard upload without pbo (Used to crash on FGLRX)
#if 0
	// pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel
	glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
	glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);

	glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);

	// FIXME useful?
	glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior

	return true;
#endif
}
Пример #5
0
void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb)
{
    GL_PUSH("UBO");
    if(m_vs_cb_cache.Update(vs_cb)) {
        m_vs_cb->upload(vs_cb);
    }

    if(m_ps_cb_cache.Update(ps_cb)) {
        m_ps_cb->upload(ps_cb);
    }
    GL_POP();
}
Пример #6
0
void GSTextureOGL::Unmap()
{
	if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) {

		PboPool::Unmap();

		glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type, (const void*)PboPool::Offset());

		// FIXME OGL4: investigate, only 1 unpack buffer always bound
		PboPool::UnbindPbo();

		PboPool::EndTransfer();

		GL_POP(); // PUSH is in Map
	}
}
Пример #7
0
void GSDeviceOGL::CreateTextureFX()
{
	GL_PUSH("CreateTextureFX");

	m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer));
	m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer));

	// warning 1 sampler by image unit. So you cannot reuse m_ps_ss...
	m_palette_ss = CreateSampler(false, false, false);
	gl_BindSampler(1, m_palette_ss);

	// Pre compile all Geometry & Vertex Shader
	// It might cost a seconds at startup but it would reduce benchmark pollution
	m_gs = CompileGS();

	int logz = theApp.GetConfig("logz", 1);
	// Don't do it in debug build, so it can still be tested
#ifndef _DEBUG
	if (GLLoader::found_GL_ARB_clip_control && logz) {
		fprintf(stderr, "Your driver supports advance depth. Logz will be disabled\n");
		logz = 0;
	} else if (!GLLoader::found_GL_ARB_clip_control && !logz) {
		fprintf(stderr, "Your driver DOESN'T support advance depth (GL_ARB_clip_control)\n It is higly recommmended to enable logz\n");
	}
#endif
	for (uint32 key = 0; key < VSSelector::size(); key++) {
		// wildhack is only useful if both TME and FST are enabled.
		VSSelector sel(key);
		if (sel.wildhack && (!sel.tme || !sel.fst))
			m_vs[key] = 0;
		else
			m_vs[key] = CompileVS(sel, logz);
	}

	// Enable all bits for stencil operations. Technically 1 bit is
	// enough but buffer is polluted with noise. Clear will be limited
	// to the mask.
	glStencilMask(0xFF);
	for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++)
		m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));

	// Help to debug FS in apitrace
	m_apitrace = CompilePS(PSSelector());

	GL_POP();
}
Пример #8
0
void GSShaderOGL::UseProgram()
{
	GL_PUSH("Use Program And Uniform");

	if (GLState::dirty_prog) {
		if (!GLLoader::found_GL_ARB_separate_shader_objects) {
			GLState::dirty_ressources = true;

			hash_map<uint64, GLuint >::iterator it;
			// Note: shader are integer lookup pointer. They start from 1 and incr
			// every time you create a new shader OR a new program.
			// Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128. GS has only 2 programs
			// We migth be able to pack the value in a 32bits int
			// I would need to check the behavior on Nvidia (pause/resume).
			uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps;
			it = m_single_prog.find(sel);
			if (it == m_single_prog.end()) {
				GLState::program = LinkNewProgram();
				m_single_prog[sel] = GLState::program;

				ValidateProgram(GLState::program);

				gl_UseProgram(GLState::program);
			} else {
				GLuint prog = it->second;
				if (prog != GLState::program) {
					GLState::program = prog;
					gl_UseProgram(GLState::program);
				}
			}

		} else {
			ValidatePipeline(m_pipeline);
		}
	}

	SetupRessources();

	GLState::dirty_prog = false;

	GL_POP();
}
Пример #9
0
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
{
	if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0))
		return;

	const GIFRegTEX0& TEX0 = t->m_TEX0;

	GLuint fmt;
	int ps_shader;
	switch (TEX0.PSM)
	{
		case PSM_PSMCT32:
		case PSM_PSMCT24:
			fmt = GL_RGBA8;
			ps_shader = 0;
			break;

		case PSM_PSMCT16:
		case PSM_PSMCT16S:
			fmt = GL_R16UI;
			ps_shader = 1;
			break;

		case PSM_PSMZ32:
			fmt = GL_R32UI;
			ps_shader = 10;
			break;

		case PSM_PSMZ24:
			fmt = GL_R32UI;
			ps_shader = 10;
			break;

		case PSM_PSMZ16:
		case PSM_PSMZ16S:
			fmt = GL_R16UI;
			ps_shader = 10;
			break;

		default:
			return;
	}


	// Yes lots of logging, but I'm not confident with this code
	GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM);

	GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d",
			t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height());

	GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy();

	if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader))
	{
		GSTexture::GSMap m;

		if(offscreen->Map(m))
		{
			// TODO: block level write

			GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);

			switch(TEX0.PSM)
			{
				case PSM_PSMCT32:
					m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMCT24:
					m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMCT16:
				case PSM_PSMCT16S:
					m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r);
					break;

				case PSM_PSMZ32:
					m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMZ24:
					m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r);
					break;
				case PSM_PSMZ16:
				case PSM_PSMZ16S:
					m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r);
					break;

				default:
					ASSERT(0);
			}

			offscreen->Unmap();
		}

		// FIXME invalidate data
		m_renderer->m_dev->Recycle(offscreen);
	}

	GL_POP();
}
Пример #10
0
void GSRendererHW::Draw()
{
	if(m_dev->IsLost() || GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) {
		GL_INS("Warning skipping a draw call (%d)", s_n);
		s_n += 3; // Keep it sync with SW renderer
		return;
	}
	GL_PUSH("HW Draw %d", s_n);

	GSDrawingEnvironment& env = m_env;
	GSDrawingContext* context = m_context;

	// It is allowed to use the depth and rt at the same location. However at least 1 must
	// be disabled.
	// 1/ GoW uses a Cd blending on a 24 bits buffer (no alpha)
	// 2/ SuperMan really draws (0,0,0,0) color and a (0) 32-bits depth
	// 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth
	// Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0
	const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1));
	const bool no_ds = !no_rt && (
			// Depth is always pass (no read) and write are discarded (tekken 5).  (Note: DATE is currently implemented with a stencil buffer)
			(context->ZBUF.ZMSK && m_context->TEST.ZTST == ZTST_ALWAYS && !m_context->TEST.DATE) ||
			// Depth will be written through the RT
			(context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && !context->ZBUF.ZMSK && !context->FRAME.FBMSK && context->TEST.ZTE)
			);

	GIFRegTEX0 TEX0;

	TEX0.TBP0 = context->FRAME.Block();
	TEX0.TBW = context->FRAME.FBW;
	TEX0.PSM = context->FRAME.PSM;

	GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
	GSTexture* rt_tex = rt ? rt->m_texture : NULL;

	TEX0.TBP0 = context->ZBUF.Block();
	TEX0.TBW = context->FRAME.FBW;
	TEX0.PSM = context->ZBUF.PSM;

	GSTextureCache::Target* ds = no_ds ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
	GSTexture* ds_tex = ds ? ds->m_texture : NULL;

	if(!(rt || no_rt) || !(ds || no_ds))
	{
		GL_POP();
		ASSERT(0);
		return;
	}

	GSTextureCache::Source* tex = NULL;
	m_texture_shuffle = false;

	if(PRIM->TME)
	{
		/*
		
		// m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out

		if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
		{
			m_mem.m_clut.Read32(context->TEX0, env.TEXA);
		}

		*/

		GSVector4i r;

		GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());

		tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);

		if(!tex) {
			GL_POP();
			return;
		}

		// FIXME: Could be removed on openGL
		if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
		{
			m_mem.m_clut.Read32(context->TEX0, env.TEXA);
		}

		// Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target.
		// Initially code also tested the RT but it gives too much false-positive
		//
		// Both input and output are 16 bits and texture was initially 32 bits!
		m_texture_shuffle = (context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt;

		// Texture shuffle is not yet supported with strange clamp mode
		ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3));
	}
	if (rt) {
		// Be sure texture shuffle detection is properly propagated
		// Otherwise set or clear the flag (Code in texture cache only set the flag)
		// Note: it is important to clear the flag when RT is used as a real 16 bits target.
		rt->m_32_bits_fmt = m_texture_shuffle || !(context->FRAME.PSM & 0x2);
	}

	if(s_dump)
	{
		uint64 frame = m_perfmon.GetFrame();

		string s;

		if (s_n >= s_saven) {
			// Dump Register state
			s = format("%05d_context.txt", s_n);

			m_env.Dump(root_hw+s);
			m_context->Dump(root_hw+s);
		}

		if(s_savet && s_n >= s_saven && tex)
		{
			s = format("%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
				s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
				(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
				(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
				(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);

			tex->m_texture->Save(root_hw+s, true);

			if(tex->m_palette)
			{
				s = format("%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);

				tex->m_palette->Save(root_hw+s, true);
			}
		}

		s_n++;

		if(s_save && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);

			if (rt)
				rt->m_texture->Save(root_hw+s);
		}

		if(s_savez && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);

			if (ds_tex)
				ds_tex->Save(root_hw+s);
		}

		s_n++;

#ifdef ENABLE_OGL_DEBUG
	} else {
		s_n += 2;
#endif
	}

	if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, tex))
	{
		s_n += 1; // keep counter sync
		GL_POP();
		return;
	}

	// skip alpha test if possible

	GIFRegTEST TEST = context->TEST;
	GIFRegFRAME FRAME = context->FRAME;
	GIFRegZBUF ZBUF = context->ZBUF;

	uint32 fm = context->FRAME.FBMSK;
	uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;

	if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
	{
		if(GSRenderer::TryAlphaTest(fm, zm))
		{
			context->TEST.ATST = ATST_ALWAYS;
		}
	}

	context->FRAME.FBMSK = fm;
	context->ZBUF.ZMSK = zm != 0;

	// A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite
	if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) {
		size_t count = m_vertex.next;
		GSVertex* v = &m_vertex.buff[0];

		// Hack to avoid vertical black line in various games (ace combat/tekken)
		if (m_userhacks_align_sprite_X) {
			// Note for performance reason I do the check only once on the first
			// primitive
			int win_position = v[1].XYZ.X - context->XYOFFSET.OFX;
			const bool unaligned_position = ((win_position & 0xF) == 8);
			const bool unaligned_texture  = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful
			const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X);
			if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) {
				// Normaly vertex are aligned on full pixels and texture in half
				// pixels. Let's extend the coverage of an half-pixel to avoid
				// hole after upscaling
				for(size_t i = 0; i < count; i += 2) {
					v[i+1].XYZ.X += 8;
					// I really don't know if it is a good idea. Neither what to do for !PRIM->FST
					if (unaligned_texture)
						v[i+1].U += 8;
				}
			}
		}

		if (PRIM->FST) {
			if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) {
				if (m_vt.IsLinear())
					RoundSpriteOffset<true>();
				else
					RoundSpriteOffset<false>();
			}
		} else {
			; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior)
		}
	}

	//

	DrawPrims(rt_tex, ds_tex, tex);

	//

	context->TEST = TEST;
	context->FRAME = FRAME;
	context->ZBUF = ZBUF;

	//

	GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));

	if(fm != 0xffffffff && rt)
	{
		rt->m_valid = rt->m_valid.runion(r);

		m_tc->InvalidateVideoMem(context->offset.fb, r, false);

		m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block());
	}

	if(zm != 0xffffffff && ds)
	{
		ds->m_valid = ds->m_valid.runion(r);

		m_tc->InvalidateVideoMem(context->offset.zb, r, false);

		m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block());
	}

	//

	if(m_hacks.m_oo)
	{
		(this->*m_hacks.m_oo)();
	}

	if(s_dump)
	{
		uint64 frame = m_perfmon.GetFrame();

		string s;

		if(s_save && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);

			if (rt)
				rt->m_texture->Save(root_hw+s);
		}

		if(s_savez && s_n >= s_saven)
		{
			s = format("%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);

			if (ds_tex)
				ds_tex->Save(root_hw+s);
		}

		s_n++;

		if(s_savel > 0 && (s_n - s_saven) > s_savel)
		{
			s_dump = 0;
		}
#ifdef ENABLE_OGL_DEBUG
	} else {
		s_n += 1;
#endif
	}

	#ifdef DISABLE_HW_TEXTURE_CACHE
	
	if (rt)
		m_tc->Read(rt, r);

	#endif

	GL_POP();
}
Пример #11
0
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
{
	if(t->m_type != RenderTarget)
	{
		ASSERT(0);

		return;
	}

	const GIFRegTEX0& TEX0 = t->m_TEX0;

	if(TEX0.PSM != PSM_PSMCT32
	&& TEX0.PSM != PSM_PSMCT24
	&& TEX0.PSM != PSM_PSMCT16
	&& TEX0.PSM != PSM_PSMCT16S)
	{
		//ASSERT(0);

		return;
	}

	if(!t->m_dirty.empty())
	{
		return;
	}

	GL_PUSH("Texture Cache Read");

	// printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0);

	int w = r.width();
	int h = r.height();

	GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy();

	GLuint format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? GL_R16UI : GL_RGBA8;
	//if (format == GL_R16UI) fprintf(stderr, "Format 16 bits integer\n");
#if 0
	DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM;
#endif

	if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format))
	{
		GSTexture::GSMap m;

		if(offscreen->Map(m))
		{
			// TODO: block level write

			GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);

			switch(TEX0.PSM)
			{
			case PSM_PSMCT32:
				m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
				break;
			case PSM_PSMCT24:
				m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r);
				break;
			case PSM_PSMCT16:
			case PSM_PSMCT16S:
				m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r);
				break;
			default:
				ASSERT(0);
			}

			offscreen->Unmap();
		}

		// FIXME invalidate data
		m_renderer->m_dev->Recycle(offscreen);
	}

	GL_POP();
}
Пример #12
0
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
{
	ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen);

	// Default upload path for the texture is the Map/Unmap
	// This path is mostly used for palette. But also for texture that could
	// overflow the pbo buffer
	// Data upload is rather small typically 64B or 1024B. So don't bother with PBO
	// and directly send the data to the GL synchronously

	m_clean = false;

	uint32 row_byte = r.width() << m_int_shift;
	uint32 map_size = r.height() * row_byte;
#ifdef ENABLE_OGL_DEBUG_MEM_BW
	g_real_texture_upload_byte += map_size;
#endif

#if 0
	if (r.height() == 1) {
		// Palette data. Transfer is small either 64B or 1024B.
		// Sometimes it is faster, sometimes slower.
		glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);
		return true;
	}
#endif

	GL_PUSH("Upload Texture %d", m_texture_id);

	// The easy solution without PBO
#if 0
	// Likely a bad texture
	glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);

	glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);

	glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior
#endif

	// The complex solution with PBO
#if 1
	char* src = (char*)data;
	char* map = PboPool::Map(map_size);

	// PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch
	// Note: row_byte != pitch
	for (int h = 0; h < r.height(); h++) {
		memcpy(map, src, row_byte);
		map += row_byte;
		src += pitch;
	}

	PboPool::Unmap();

	glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());

	// FIXME OGL4: investigate, only 1 unpack buffer always bound
	PboPool::UnbindPbo();

	PboPool::EndTransfer();
#endif

	GL_POP();

	return true;
}