void GLGSRender::end() { if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state()) { rsx::thread::end(); return; } if (manually_flush_ring_buffers) { //Use approximations to reseve space. This path is mostly for debug purposes anyway u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); u32 approx_working_buffer_size = approx_vertex_count * 256; //Allocate 256K heap if we have no approximation at this time (inlined array) m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U)); m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); } //Do vertex upload before RTT prep / texture lookups to give the driver time to push data u32 vertex_draw_count; u32 actual_vertex_count; u32 vertex_base; std::optional<std::tuple<GLenum, u32> > indexed_draw_info; std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer(); std::chrono::time_point<steady_clock> program_start = steady_clock::now(); //Load program here since it is dependent on vertex state load_program(vertex_base, actual_vertex_count); std::chrono::time_point<steady_clock> program_stop = steady_clock::now(); m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count(); if (manually_flush_ring_buffers) { m_attrib_ring_buffer->unmap(); m_index_ring_buffer->unmap(); } else { //DMA push; not needed with MAP_COHERENT //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); } //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared auto copy_rtt_contents = [](gl::render_target *surface) { //Copy data from old contents onto this one //1. Clip a rectangular region defning the data //2. Perform a GPU blit u16 parent_w = surface->old_contents->width(); u16 parent_h = surface->old_contents->height(); u16 copy_w, copy_h; std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1); surface->set_cleared(); surface->old_contents = nullptr; }; //Check if we have any 'recycled' surfaces in memory and if so, clear them std::vector<int> buffers_to_clear; bool clear_all_color = true; bool clear_depth = false; for (int index = 0; index < 4; index++) { if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0) { if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared()) clear_all_color = false; else buffers_to_clear.push_back(index); } } gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); if (ds && !ds->cleared()) { clear_depth = true; } //Temporarily disable pixel tests glDisable(GL_SCISSOR_TEST); if (clear_depth || buffers_to_clear.size() > 0) { GLenum mask = 0; if (clear_depth) { gl_state.depth_mask(GL_TRUE); gl_state.clear_depth(1.0); gl_state.clear_stencil(255); mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } if (clear_all_color) mask |= GL_COLOR_BUFFER_BIT; glClear(mask); if (buffers_to_clear.size() > 0 && !clear_all_color) { GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f }; //It is impossible for the render target to be typa A or B here (clear all would have been flagged) for (auto &i: buffers_to_clear) glClearBufferfv(draw_fbo.id(), i, colors); } if (clear_depth) gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); ds->set_cleared(); } if (g_cfg.video.strict_rendering_mode) { if (ds->old_contents != nullptr) copy_rtt_contents(ds); for (auto &rtt : m_rtts.m_bound_render_targets) { if (std::get<0>(rtt) != 0) { auto surface = std::get<1>(rtt); if (surface->old_contents != nullptr) copy_rtt_contents(surface); } } } glEnable(GL_SCISSOR_TEST); std::chrono::time_point<steady_clock> textures_start = steady_clock::now(); //Setup textures //Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { int location; if (!rsx::method_registers.fragment_textures[i].enabled()) { if (m_textures_dirty[i]) { glActiveTexture(GL_TEXTURE0 + i); glBindTexture(GL_TEXTURE_2D, 0); m_textures_dirty[i] = false; } continue; } if (m_program->uniforms.has_location("tex" + std::to_string(i), &location)) { m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i])); __glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts); __glcheck m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]); } } //Vertex textures for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) { int texture_index = i + rsx::limits::fragment_textures_count; int location; if (!rsx::method_registers.vertex_textures[i].enabled()) { //glActiveTexture(GL_TEXTURE0 + texture_index); //glBindTexture(GL_TEXTURE_2D, 0); continue; } if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location)) { m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i])); __glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts); } } std::chrono::time_point<steady_clock> textures_end = steady_clock::now(); m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count(); std::chrono::time_point<steady_clock> draw_start = steady_clock::now(); if (g_cfg.video.debug_output) { m_program->validate(); } if (indexed_draw_info) { const GLenum index_type = std::get<0>(indexed_draw_info.value()); const u32 index_offset = std::get<1>(indexed_draw_info.value()); if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) { __glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); } __glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); } else { glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count); } m_attrib_ring_buffer->notify(); m_index_ring_buffer->notify(); m_vertex_state_buffer->notify(); m_fragment_constants_buffer->notify(); m_transform_constants_buffer->notify(); std::chrono::time_point<steady_clock> draw_end = steady_clock::now(); m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count(); m_draw_calls++; if (zcull_task_queue.active_query && zcull_task_queue.active_query->active) zcull_task_queue.active_query->num_draws++; synchronize_buffers(); rsx::thread::end(); }
void GLGSRender::end() { if (!draw_fbo) { rsx::thread::end(); return; } if (manually_flush_ring_buffers) { //Use approximations to reseve space. This path is mostly for debug purposes anyway u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); u32 approx_working_buffer_size = approx_vertex_count * 256; //Allocate 256K heap if we have no approximation at this time (inlined array) m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U)); m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); } draw_fbo.bind(); //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); if (ds && !ds->cleared()) { glDepthMask(GL_TRUE); glClearDepth(1.f); glClear(GL_DEPTH_BUFFER_BIT); glDepthMask(rsx::method_registers.depth_write_enabled()); ds->set_cleared(); } std::chrono::time_point<std::chrono::system_clock> textures_start = std::chrono::system_clock::now(); //Setup textures for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { int location; if (!rsx::method_registers.fragment_textures[i].enabled()) { glActiveTexture(GL_TEXTURE0 + i); glBindTexture(GL_TEXTURE_2D, 0); continue; } if (m_program->uniforms.has_location("tex" + std::to_string(i), &location)) { m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i])); __glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts); } } //Vertex textures for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) { int texture_index = i + rsx::limits::fragment_textures_count; int location; if (!rsx::method_registers.vertex_textures[i].enabled()) { glActiveTexture(GL_TEXTURE0 + texture_index); glBindTexture(GL_TEXTURE_2D, 0); continue; } if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location)) { m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i])); __glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts); } } std::chrono::time_point<std::chrono::system_clock> textures_end = std::chrono::system_clock::now(); m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count(); u32 vertex_draw_count; std::optional<std::tuple<GLenum, u32> > indexed_draw_info; std::tie(vertex_draw_count, indexed_draw_info) = set_vertex_buffer(); m_vao.bind(); std::chrono::time_point<std::chrono::system_clock> draw_start = std::chrono::system_clock::now(); if (g_cfg_rsx_debug_output) { m_program->validate(); } if (manually_flush_ring_buffers) { m_attrib_ring_buffer->unmap(); m_index_ring_buffer->unmap(); } if (indexed_draw_info) { if (__glcheck enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) { GLenum index_type = std::get<0>(indexed_draw_info.value()); __glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); } __glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, std::get<0>(indexed_draw_info.value()), (GLvoid *)(std::ptrdiff_t)std::get<1>(indexed_draw_info.value())); } else { draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count); } std::chrono::time_point<std::chrono::system_clock> draw_end = std::chrono::system_clock::now(); m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count(); write_buffers(); rsx::thread::end(); }