예제 #1
0
void GLGSRender::end()
{
	if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state())
	{
		rsx::thread::end();
		return;
	}

	if (manually_flush_ring_buffers)
	{
		//Use approximations to reseve space. This path is mostly for debug purposes anyway
		u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
		u32 approx_working_buffer_size = approx_vertex_count * 256;

		//Allocate 256K heap if we have no approximation at this time (inlined array)
		m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
		m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
	}

	//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
	u32 vertex_draw_count;
	u32 actual_vertex_count;
	u32 vertex_base;
	std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
	std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer();

	std::chrono::time_point<steady_clock> program_start = steady_clock::now();
	//Load program here since it is dependent on vertex state

	load_program(vertex_base, actual_vertex_count);

	std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
	m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();

	if (manually_flush_ring_buffers)
	{
		m_attrib_ring_buffer->unmap();
		m_index_ring_buffer->unmap();
	}
	else
	{
		//DMA push; not needed with MAP_COHERENT
		//glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
	}

	//Check if depth buffer is bound and valid
	//If ds is not initialized clear it; it seems new depth textures should have depth cleared
	auto copy_rtt_contents = [](gl::render_target *surface)
	{
		//Copy data from old contents onto this one
		//1. Clip a rectangular region defning the data
		//2. Perform a GPU blit
		u16 parent_w = surface->old_contents->width();
		u16 parent_h = surface->old_contents->height();
		u16 copy_w, copy_h;

		std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
		glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
		surface->set_cleared();
		surface->old_contents = nullptr;
	};

	//Check if we have any 'recycled' surfaces in memory and if so, clear them
	std::vector<int> buffers_to_clear;
	bool clear_all_color = true;
	bool clear_depth = false;

	for (int index = 0; index < 4; index++)
	{
		if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
		{
			if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
				clear_all_color = false;
			else
				buffers_to_clear.push_back(index);
		}
	}

	gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
	if (ds && !ds->cleared())
	{
		clear_depth = true;
	}

	//Temporarily disable pixel tests
	glDisable(GL_SCISSOR_TEST);

	if (clear_depth || buffers_to_clear.size() > 0)
	{
		GLenum mask = 0;

		if (clear_depth)
		{
			gl_state.depth_mask(GL_TRUE);
			gl_state.clear_depth(1.0);
			gl_state.clear_stencil(255);
			mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
		}

		if (clear_all_color)
			mask |= GL_COLOR_BUFFER_BIT;

		glClear(mask);

		if (buffers_to_clear.size() > 0 && !clear_all_color)
		{
			GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
			//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
			for (auto &i: buffers_to_clear)
				glClearBufferfv(draw_fbo.id(), i, colors);
		}

		if (clear_depth)
			gl_state.depth_mask(rsx::method_registers.depth_write_enabled());

		ds->set_cleared();
	}

	if (g_cfg.video.strict_rendering_mode)
	{
		if (ds->old_contents != nullptr)
			copy_rtt_contents(ds);

		for (auto &rtt : m_rtts.m_bound_render_targets)
		{
			if (std::get<0>(rtt) != 0)
			{
				auto surface = std::get<1>(rtt);
				if (surface->old_contents != nullptr)
					copy_rtt_contents(surface);
			}
		}
	}

	glEnable(GL_SCISSOR_TEST);

	std::chrono::time_point<steady_clock> textures_start = steady_clock::now();

	//Setup textures
	//Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it
	for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
	{
		int location;
		if (!rsx::method_registers.fragment_textures[i].enabled())
		{
			if (m_textures_dirty[i])
			{
				glActiveTexture(GL_TEXTURE0 + i);
				glBindTexture(GL_TEXTURE_2D, 0);

				m_textures_dirty[i] = false;
			}
			continue;
		}

		if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
		{
			m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]));
			__glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts);
			__glcheck m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
		}
	}

	//Vertex textures
	for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
	{
		int texture_index = i + rsx::limits::fragment_textures_count;
		int location;

		if (!rsx::method_registers.vertex_textures[i].enabled())
		{
			//glActiveTexture(GL_TEXTURE0 + texture_index);
			//glBindTexture(GL_TEXTURE_2D, 0);
			continue;
		}

		if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
		{
			m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i]));
			__glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts);
		}
	}

	std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
	m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();

	std::chrono::time_point<steady_clock> draw_start = steady_clock::now();

	if (g_cfg.video.debug_output)
	{
		m_program->validate();
	}

	if (indexed_draw_info)
	{
		const GLenum index_type = std::get<0>(indexed_draw_info.value());
		const u32 index_offset = std::get<1>(indexed_draw_info.value());

		if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
		{
			__glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
		}

		__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
	}
	else
	{
		glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count);
	}

	m_attrib_ring_buffer->notify();
	m_index_ring_buffer->notify();
	m_vertex_state_buffer->notify();
	m_fragment_constants_buffer->notify();
	m_transform_constants_buffer->notify();

	std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
	m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
	m_draw_calls++;

	if (zcull_task_queue.active_query &&
		zcull_task_queue.active_query->active)
		zcull_task_queue.active_query->num_draws++;

	synchronize_buffers();
	rsx::thread::end();
}
예제 #2
0
파일: GLGSRender.cpp 프로젝트: kd-11/rpcs3
void GLGSRender::end()
{
	if (!draw_fbo)
	{
		rsx::thread::end();
		return;
	}

	if (manually_flush_ring_buffers)
	{
		//Use approximations to reseve space. This path is mostly for debug purposes anyway
		u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
		u32 approx_working_buffer_size = approx_vertex_count * 256;

		//Allocate 256K heap if we have no approximation at this time (inlined array)
		m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
		m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
	}

	draw_fbo.bind();

	//Check if depth buffer is bound and valid
	//If ds is not initialized clear it; it seems new depth textures should have depth cleared
	gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
	if (ds && !ds->cleared())
	{
		glDepthMask(GL_TRUE);
		glClearDepth(1.f);

		glClear(GL_DEPTH_BUFFER_BIT);
		glDepthMask(rsx::method_registers.depth_write_enabled());

		ds->set_cleared();
	}

	std::chrono::time_point<std::chrono::system_clock> textures_start = std::chrono::system_clock::now();

	//Setup textures
	for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
	{
		int location;
		if (!rsx::method_registers.fragment_textures[i].enabled())
		{
			glActiveTexture(GL_TEXTURE0 + i);
			glBindTexture(GL_TEXTURE_2D, 0);
			continue;
		}

		if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
		{
			m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]));
			__glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts);
		}
	}

	//Vertex textures
	for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
	{
		int texture_index = i + rsx::limits::fragment_textures_count;
		int location;

		if (!rsx::method_registers.vertex_textures[i].enabled())
		{
			glActiveTexture(GL_TEXTURE0 + texture_index);
			glBindTexture(GL_TEXTURE_2D, 0);
			continue;
		}

		if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location))
		{
			m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i]));
			__glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts);
		}
	}

	std::chrono::time_point<std::chrono::system_clock> textures_end = std::chrono::system_clock::now();
	m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();

	u32 vertex_draw_count;
	std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
	std::tie(vertex_draw_count, indexed_draw_info) = set_vertex_buffer();
	m_vao.bind();

	std::chrono::time_point<std::chrono::system_clock> draw_start = std::chrono::system_clock::now();

	if (g_cfg_rsx_debug_output)
	{
		m_program->validate();
	}

	if (manually_flush_ring_buffers)
	{
		m_attrib_ring_buffer->unmap();
		m_index_ring_buffer->unmap();
	}

	if (indexed_draw_info)
	{
		if (__glcheck enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
		{
			GLenum index_type = std::get<0>(indexed_draw_info.value());
			__glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
		}

		__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, std::get<0>(indexed_draw_info.value()), (GLvoid *)(std::ptrdiff_t)std::get<1>(indexed_draw_info.value()));
	}
	else
	{
		draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count);
	}

	std::chrono::time_point<std::chrono::system_clock> draw_end = std::chrono::system_clock::now();
	m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();

	write_buffers();

	rsx::thread::end();
}