Exemple #1
0
bool GLGSRender::do_method(u32 cmd, u32 arg)
{
	switch (cmd)
	{
	case NV4097_CLEAR_SURFACE:
	{
		if (arg & 0xF3)
		{
			//Only do all this if we have actual work to do
			init_buffers(true);
			synchronize_buffers();
			clear_surface(arg);
		}

		return true;
	}
	case NV4097_CLEAR_ZCULL_SURFACE:
	{
		// NOP
		// Clearing zcull memory does not modify depth/stencil buffers 'bound' to the zcull region
		return true;
	}
	case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
	case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
		flush_draw_buffers = true;
		return true;
	}

	return false;
}
Exemple #2
0
bool GLGSRender::do_method(u32 cmd, u32 arg)
{
	switch (cmd)
	{
	case NV4097_CLEAR_SURFACE:
	{
		if (arg & 0xF3)
		{
			//Only do all this if we have actual work to do
			init_buffers(true);
			synchronize_buffers();
			clear_surface(arg);
		}

		return true;
	}
	case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
	case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
		flush_draw_buffers = true;
		return true;
	}

	return false;
}
Exemple #3
0
void GLGSRender::init_buffers(bool skip_reading)
{
	if (draw_fbo && !m_rtts_dirty)
	{
		set_viewport();
		return;
	}

	//We are about to change buffers, flush any pending requests for the old buffers
	synchronize_buffers();

	m_rtts_dirty = false;

	const u16 clip_horizontal = rsx::method_registers.surface_clip_width();
	const u16 clip_vertical = rsx::method_registers.surface_clip_height();

	const auto pitchs = get_pitchs();
	const auto surface_format = rsx::method_registers.surface_color();
	const auto depth_format = rsx::method_registers.surface_depth_fmt();

	const auto surface_addresses = get_color_surface_addresses();
	const auto depth_address = get_zeta_surface_address();

	m_rtts.prepare_render_target(nullptr, surface_format, depth_format,  clip_horizontal, clip_vertical,
		rsx::method_registers.surface_color_target(),
		surface_addresses, depth_address);

	draw_fbo.recreate();

	for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
	{
		if (std::get<0>(m_rtts.m_bound_render_targets[i]))
		{
			__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);

			std::get<1>(m_rtts.m_bound_render_targets[i])->set_rsx_pitch(pitchs[i]);
			surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical };

			//Verify pitch given is correct if pitch <= 64 (especially 64)
			if (pitchs[i] <= 64)
			{	
				const u16 native_pitch = std::get<1>(m_rtts.m_bound_render_targets[i])->get_native_pitch();
				if (native_pitch > pitchs[i])
				{
					LOG_WARNING(RSX, "Bad color surface pitch given: surface_width=%d, format=%d, pitch=%d, native_pitch=%d",
						clip_horizontal, (u32)surface_format, pitchs[i], native_pitch);

					//Will not transfer this surface between cell and rsx due to misalignment
					//TODO: Verify correct behaviour
					surface_info[i].pitch = 0;
				}
			}
		}
		else
			surface_info[i] = {};
	}

	if (std::get<0>(m_rtts.m_bound_depth_stencil))
	{
		if (depth_format == rsx::surface_depth_format::z24s8)
			__glcheck draw_fbo.depth_stencil = *std::get<1>(m_rtts.m_bound_depth_stencil);
		else
			__glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil);

		const u32 depth_surface_pitch = rsx::method_registers.surface_z_pitch();
		std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch());
		depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical };

		//Verify pitch given is correct if pitch <= 64 (especially 64)
		if (depth_surface_pitch <= 64)
		{
			const u16 native_pitch = std::get<1>(m_rtts.m_bound_depth_stencil)->get_native_pitch();
			if (native_pitch > depth_surface_pitch)
			{
				LOG_WARNING(RSX, "Bad depth surface pitch given: surface_width=%d, format=%d, pitch=%d, native_pitch=%d",
					clip_horizontal, (u32)depth_format, depth_surface_pitch, native_pitch);

				//Will not transfer this surface between cell and rsx due to misalignment
				//TODO: Verify correct behaviour
				depth_surface_info.pitch = 0;
			}
		}
	}
	else
		depth_surface_info = {};

	if (!draw_fbo.check())
		return;

	draw_fbo.bind();
	set_viewport();

	switch (rsx::method_registers.surface_color_target())
	{
	case rsx::surface_target::none: break;

	case rsx::surface_target::surface_a:
		__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
		__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
		break;

	case rsx::surface_target::surface_b:
		__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
		__glcheck draw_fbo.read_buffer(draw_fbo.color[1]);
		break;

	case rsx::surface_target::surfaces_a_b:
		__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
		__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
		break;

	case rsx::surface_target::surfaces_a_b_c:
		__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
		__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
		break;

	case rsx::surface_target::surfaces_a_b_c_d:
		__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
		__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
		break;
	}

	//Mark buffer regions as NO_ACCESS on Cell visible side
	if (g_cfg_rsx_write_color_buffers)
	{
		auto color_format = rsx::internals::surface_color_format_to_gl(surface_format);

		for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
		{
			if (!surface_info[i].address || !surface_info[i].pitch) continue;

			const u32 range = surface_info[i].pitch * surface_info[i].height;
			m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch,
				color_format.format, color_format.type, color_format.swap_bytes, *std::get<1>(m_rtts.m_bound_render_targets[i]));
		}
	}

	if (g_cfg_rsx_write_depth_buffer)
	{
		if (depth_surface_info.address && depth_surface_info.pitch)
		{
			auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format);

			u32 pitch = depth_surface_info.width * 2;
			if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;

			const u32 range = pitch * depth_surface_info.height;

			//TODO: Verify that depth surface pitch variance affects results
			if (pitch != depth_surface_info.pitch)
				LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch);

			m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch,
				depth_format_gl.format, depth_format_gl.type, true, *std::get<1>(m_rtts.m_bound_depth_stencil));
		}
	}
}
Exemple #4
0
void GLGSRender::end()
{
	if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state())
	{
		rsx::thread::end();
		return;
	}

	if (manually_flush_ring_buffers)
	{
		//Use approximations to reseve space. This path is mostly for debug purposes anyway
		u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
		u32 approx_working_buffer_size = approx_vertex_count * 256;

		//Allocate 256K heap if we have no approximation at this time (inlined array)
		m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
		m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
	}

	//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
	u32 vertex_draw_count;
	u32 actual_vertex_count;
	u32 vertex_base;
	std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
	std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer();

	std::chrono::time_point<steady_clock> program_start = steady_clock::now();
	//Load program here since it is dependent on vertex state

	load_program(vertex_base, actual_vertex_count);

	std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
	m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();

	if (manually_flush_ring_buffers)
	{
		m_attrib_ring_buffer->unmap();
		m_index_ring_buffer->unmap();
	}
	else
	{
		//DMA push; not needed with MAP_COHERENT
		//glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
	}

	//Check if depth buffer is bound and valid
	//If ds is not initialized clear it; it seems new depth textures should have depth cleared
	auto copy_rtt_contents = [](gl::render_target *surface)
	{
		//Copy data from old contents onto this one
		//1. Clip a rectangular region defning the data
		//2. Perform a GPU blit
		u16 parent_w = surface->old_contents->width();
		u16 parent_h = surface->old_contents->height();
		u16 copy_w, copy_h;

		std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
		glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
		surface->set_cleared();
		surface->old_contents = nullptr;
	};

	//Check if we have any 'recycled' surfaces in memory and if so, clear them
	std::vector<int> buffers_to_clear;
	bool clear_all_color = true;
	bool clear_depth = false;

	for (int index = 0; index < 4; index++)
	{
		if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
		{
			if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
				clear_all_color = false;
			else
				buffers_to_clear.push_back(index);
		}
	}

	gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
	if (ds && !ds->cleared())
	{
		clear_depth = true;
	}

	//Temporarily disable pixel tests
	glDisable(GL_SCISSOR_TEST);

	if (clear_depth || buffers_to_clear.size() > 0)
	{
		GLenum mask = 0;

		if (clear_depth)
		{
			gl_state.depth_mask(GL_TRUE);
			gl_state.clear_depth(1.0);
			gl_state.clear_stencil(255);
			mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
		}

		if (clear_all_color)
			mask |= GL_COLOR_BUFFER_BIT;

		glClear(mask);

		if (buffers_to_clear.size() > 0 && !clear_all_color)
		{
			GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
			//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
			for (auto &i: buffers_to_clear)
				glClearBufferfv(draw_fbo.id(), i, colors);
		}

		if (clear_depth)
			gl_state.depth_mask(rsx::method_registers.depth_write_enabled());

		ds->set_cleared();
	}

	if (g_cfg.video.strict_rendering_mode)
	{
		if (ds->old_contents != nullptr)
			copy_rtt_contents(ds);

		for (auto &rtt : m_rtts.m_bound_render_targets)
		{
			if (std::get<0>(rtt) != 0)
			{
				auto surface = std::get<1>(rtt);
				if (surface->old_contents != nullptr)
					copy_rtt_contents(surface);
			}
		}
	}

	glEnable(GL_SCISSOR_TEST);

	std::chrono::time_point<steady_clock> textures_start = steady_clock::now();

	//Setup textures
	//Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it
	for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
	{
		int location;
		if (!rsx::method_registers.fragment_textures[i].enabled())
		{
			if (m_textures_dirty[i])
			{
				glActiveTexture(GL_TEXTURE0 + i);
				glBindTexture(GL_TEXTURE_2D, 0);

				m_textures_dirty[i] = false;
			}
			continue;
		}

		if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
		{
			m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]));
			__glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts);
			__glcheck m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
		}
	}

	//Vertex textures
	for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
	{
		int texture_index = i + rsx::limits::fragment_textures_count;
		int location;

		if (!rsx::method_registers.vertex_textures[i].enabled())
		{
			//glActiveTexture(GL_TEXTURE0 + texture_index);
			//glBindTexture(GL_TEXTURE_2D, 0);
			continue;
		}

		if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
		{
			m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i]));
			__glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts);
		}
	}

	std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
	m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();

	std::chrono::time_point<steady_clock> draw_start = steady_clock::now();

	if (g_cfg.video.debug_output)
	{
		m_program->validate();
	}

	if (indexed_draw_info)
	{
		const GLenum index_type = std::get<0>(indexed_draw_info.value());
		const u32 index_offset = std::get<1>(indexed_draw_info.value());

		if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
		{
			__glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
		}

		__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
	}
	else
	{
		glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count);
	}

	m_attrib_ring_buffer->notify();
	m_index_ring_buffer->notify();
	m_vertex_state_buffer->notify();
	m_fragment_constants_buffer->notify();
	m_transform_constants_buffer->notify();

	std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
	m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
	m_draw_calls++;

	if (zcull_task_queue.active_query &&
		zcull_task_queue.active_query->active)
		zcull_task_queue.active_query->num_draws++;

	synchronize_buffers();
	rsx::thread::end();
}