Beispiel #1
0
void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
{
	auto &fragment_program = current_fragment_program;
	auto &vertex_program = current_vertex_program;

	for (auto &vtx : vertex_program.rsx_vertex_inputs)
	{
		auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location];
		if (array_info.type() == rsx::vertex_base_type::s1 ||
			array_info.type() == rsx::vertex_base_type::cmp)
		{
			//Some vendors do not support GL_x_SNORM buffer textures
			verify(HERE), vtx.flags == 0;
			vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT;
		}
	}

	vertex_program.skip_vertex_input_check = true;	//not needed for us since decoding is done server side
	void* pipeline_properties = nullptr;

	m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, pipeline_properties);
	m_program->use();

	if (m_prog_buffer.check_cache_missed())
		m_shaders_cache->store(pipeline_properties, vertex_program, fragment_program);

	u8 *buf;
	u32 vertex_state_offset;
	u32 vertex_constants_offset;
	u32 fragment_constants_offset;

	const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
	const u32 fragment_buffer_size = fragment_constants_size + (17 * 4 * sizeof(float));

	if (manually_flush_ring_buffers)
	{
		m_vertex_state_buffer->reserve_storage_on_heap(512);
		m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256));
		if (m_transform_constants_dirty) m_transform_constants_buffer->reserve_storage_on_heap(8192);
	}

	// Vertex state
	auto mapping = m_vertex_state_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align);
	buf = static_cast<u8*>(mapping.first);
	vertex_state_offset = mapping.second;
	fill_scale_offset_data(buf, false);
	fill_user_clip_data(buf + 64);
	*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
	*(reinterpret_cast<u32*>(buf + 132)) = vertex_base;
	fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast<s32*>(buf + 144));

	if (m_transform_constants_dirty)
	{
		// Vertex constants
		mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
		buf = static_cast<u8*>(mapping.first);
		vertex_constants_offset = mapping.second;
		fill_vertex_program_constants_data(buf);
	}

	// Fragment constants
	mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align);
	buf = static_cast<u8*>(mapping.first);
	fragment_constants_offset = mapping.second;
	if (fragment_constants_size)
		m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, fragment_program);

	// Fragment state
	fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program);

	m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512);
	m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);

	if (m_transform_constants_dirty) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192);

	if (manually_flush_ring_buffers)
	{
		m_vertex_state_buffer->unmap();
		m_fragment_constants_buffer->unmap();

		if (m_transform_constants_dirty) m_transform_constants_buffer->unmap();
	}

	m_transform_constants_dirty = false;
}
Beispiel #2
0
bool GLGSRender::load_program()
{
	auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
	{
		gl::render_target *surface = nullptr;
		if (!is_depth)
			surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr);
		else
			surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);

		if (!surface) return std::make_tuple(false, 0);
		return std::make_tuple(true, surface->get_native_pitch());
	};

	RSXVertexProgram vertex_program = get_current_vertex_program();
	RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func);

	std::array<float, 16> rtt_scaling;
	u32 unnormalized_rtts = 0;

	for (auto &vtx : vertex_program.rsx_vertex_inputs)
	{
		auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location];
		if (array_info.type() == rsx::vertex_base_type::s1 ||
			array_info.type() == rsx::vertex_base_type::cmp)
		{
			//Some vendors do not support GL_x_SNORM buffer textures
			verify(HERE), vtx.flags == 0;
			vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT;
		}
	}

	auto old_program = m_program;
	m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr);
	m_program->use();

	if (old_program == m_program && !m_transform_constants_dirty)
	{
		//This path is taken alot so the savings are tangible
		struct scale_offset_layout
		{
			u16 clip_w, clip_h;
			float scale_x, offset_x, scale_y, offset_y, scale_z, offset_z;
			float fog0, fog1;
			u32   alpha_tested;
			float alpha_ref;
		}
		tmp = {};
		
		tmp.clip_w = rsx::method_registers.surface_clip_width();
		tmp.clip_h = rsx::method_registers.surface_clip_height();
		tmp.scale_x = rsx::method_registers.viewport_scale_x();
		tmp.offset_x = rsx::method_registers.viewport_offset_x();
		tmp.scale_y = rsx::method_registers.viewport_scale_y();
		tmp.offset_y = rsx::method_registers.viewport_offset_y();
		tmp.scale_z = rsx::method_registers.viewport_scale_z();
		tmp.offset_z = rsx::method_registers.viewport_offset_z();
		tmp.fog0 = rsx::method_registers.fog_params_0();
		tmp.fog1 = rsx::method_registers.fog_params_1();
		tmp.alpha_tested = rsx::method_registers.alpha_test_enabled();
		tmp.alpha_ref = rsx::method_registers.alpha_ref();

		size_t old_hash = m_transform_buffer_hash;
		m_transform_buffer_hash = 0;

		u8 *data = reinterpret_cast<u8*>(&tmp);
		for (int i = 0; i < sizeof(tmp); ++i)
			m_transform_buffer_hash ^= std::hash<char>()(data[i]);

		if (old_hash == m_transform_buffer_hash)
			return true;
	}

	m_transform_constants_dirty = false;

	u32 fragment_constants_size = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
	u32 fragment_buffer_size = fragment_constants_size + (17 * 4 * sizeof(float));
	u32 max_buffer_sz = 512 + 8192 + align(fragment_constants_size, m_uniform_buffer_offset_align);

	if (manually_flush_ring_buffers)
		m_uniform_ring_buffer->reserve_storage_on_heap(align(max_buffer_sz, 512));

	u8 *buf;
	u32 scale_offset_offset;
	u32 vertex_constants_offset;
	u32 fragment_constants_offset;

	// Scale offset
	auto mapping = m_uniform_ring_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align);
	buf = static_cast<u8*>(mapping.first);
	scale_offset_offset = mapping.second;
	fill_scale_offset_data(buf, false);

	// Vertex constants
	mapping = m_uniform_ring_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
	buf = static_cast<u8*>(mapping.first);
	vertex_constants_offset = mapping.second;
	fill_vertex_program_constants_data(buf);

	// Fragment constants
	mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align);
	buf = static_cast<u8*>(mapping.first);
	fragment_constants_offset = mapping.second;
	if (fragment_constants_size)
		m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, fragment_program);
	
	// Fragment state
	fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program);

	m_uniform_ring_buffer->bind_range(0, scale_offset_offset, 512);
	m_uniform_ring_buffer->bind_range(1, vertex_constants_offset, 8192);
	m_uniform_ring_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);

	if (manually_flush_ring_buffers)
		m_uniform_ring_buffer->unmap();

	return true;
}