bool GLGSRender::do_method(u32 cmd, u32 arg) { switch (cmd) { case NV4097_CLEAR_SURFACE: { if (arg & 0xF3) { //Only do all this if we have actual work to do init_buffers(true); synchronize_buffers(); clear_surface(arg); } return true; } case NV4097_CLEAR_ZCULL_SURFACE: { // NOP // Clearing zcull memory does not modify depth/stencil buffers 'bound' to the zcull region return true; } case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: flush_draw_buffers = true; return true; } return false; }
bool GLGSRender::do_method(u32 cmd, u32 arg) { switch (cmd) { case NV4097_CLEAR_SURFACE: { if (arg & 0xF3) { //Only do all this if we have actual work to do init_buffers(true); synchronize_buffers(); clear_surface(arg); } return true; } case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: flush_draw_buffers = true; return true; } return false; }
void GLGSRender::init_buffers(bool skip_reading) { if (draw_fbo && !m_rtts_dirty) { set_viewport(); return; } //We are about to change buffers, flush any pending requests for the old buffers synchronize_buffers(); m_rtts_dirty = false; const u16 clip_horizontal = rsx::method_registers.surface_clip_width(); const u16 clip_vertical = rsx::method_registers.surface_clip_height(); const auto pitchs = get_pitchs(); const auto surface_format = rsx::method_registers.surface_color(); const auto depth_format = rsx::method_registers.surface_depth_fmt(); const auto surface_addresses = get_color_surface_addresses(); const auto depth_address = get_zeta_surface_address(); m_rtts.prepare_render_target(nullptr, surface_format, depth_format, clip_horizontal, clip_vertical, rsx::method_registers.surface_color_target(), surface_addresses, depth_address); draw_fbo.recreate(); for (int i = 0; i < rsx::limits::color_buffers_count; ++i) { if (std::get<0>(m_rtts.m_bound_render_targets[i])) { __glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]); std::get<1>(m_rtts.m_bound_render_targets[i])->set_rsx_pitch(pitchs[i]); surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical }; //Verify pitch given is correct if pitch <= 64 (especially 64) if (pitchs[i] <= 64) { const u16 native_pitch = std::get<1>(m_rtts.m_bound_render_targets[i])->get_native_pitch(); if (native_pitch > pitchs[i]) { LOG_WARNING(RSX, "Bad color surface pitch given: surface_width=%d, format=%d, pitch=%d, native_pitch=%d", clip_horizontal, (u32)surface_format, pitchs[i], native_pitch); //Will not transfer this surface between cell and rsx due to misalignment //TODO: Verify correct behaviour surface_info[i].pitch = 0; } } } else surface_info[i] = {}; } if (std::get<0>(m_rtts.m_bound_depth_stencil)) { if (depth_format == rsx::surface_depth_format::z24s8) __glcheck draw_fbo.depth_stencil = *std::get<1>(m_rtts.m_bound_depth_stencil); else __glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil); const u32 depth_surface_pitch = rsx::method_registers.surface_z_pitch(); std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch()); depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical }; //Verify pitch given is correct if pitch <= 64 (especially 64) if (depth_surface_pitch <= 64) { const u16 native_pitch = std::get<1>(m_rtts.m_bound_depth_stencil)->get_native_pitch(); if (native_pitch > depth_surface_pitch) { LOG_WARNING(RSX, "Bad depth surface pitch given: surface_width=%d, format=%d, pitch=%d, native_pitch=%d", clip_horizontal, (u32)depth_format, depth_surface_pitch, native_pitch); //Will not transfer this surface between cell and rsx due to misalignment //TODO: Verify correct behaviour depth_surface_info.pitch = 0; } } } else depth_surface_info = {}; if (!draw_fbo.check()) return; draw_fbo.bind(); set_viewport(); switch (rsx::method_registers.surface_color_target()) { case rsx::surface_target::none: break; case rsx::surface_target::surface_a: __glcheck draw_fbo.draw_buffer(draw_fbo.color[0]); __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); break; case rsx::surface_target::surface_b: __glcheck draw_fbo.draw_buffer(draw_fbo.color[1]); __glcheck draw_fbo.read_buffer(draw_fbo.color[1]); break; case rsx::surface_target::surfaces_a_b: __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] }); __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); break; case rsx::surface_target::surfaces_a_b_c: __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] }); __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); break; case rsx::surface_target::surfaces_a_b_c_d: __glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] }); __glcheck draw_fbo.read_buffer(draw_fbo.color[0]); break; } //Mark buffer regions as NO_ACCESS on Cell visible side if (g_cfg_rsx_write_color_buffers) { auto color_format = rsx::internals::surface_color_format_to_gl(surface_format); for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { if (!surface_info[i].address || !surface_info[i].pitch) continue; const u32 range = surface_info[i].pitch * surface_info[i].height; m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch, color_format.format, color_format.type, color_format.swap_bytes, *std::get<1>(m_rtts.m_bound_render_targets[i])); } } if (g_cfg_rsx_write_depth_buffer) { if (depth_surface_info.address && depth_surface_info.pitch) { auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format); u32 pitch = depth_surface_info.width * 2; if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; const u32 range = pitch * depth_surface_info.height; //TODO: Verify that depth surface pitch variance affects results if (pitch != depth_surface_info.pitch) LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch); m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch, depth_format_gl.format, depth_format_gl.type, true, *std::get<1>(m_rtts.m_bound_depth_stencil)); } } }
void GLGSRender::end() { if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state()) { rsx::thread::end(); return; } if (manually_flush_ring_buffers) { //Use approximations to reseve space. This path is mostly for debug purposes anyway u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); u32 approx_working_buffer_size = approx_vertex_count * 256; //Allocate 256K heap if we have no approximation at this time (inlined array) m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U)); m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); } //Do vertex upload before RTT prep / texture lookups to give the driver time to push data u32 vertex_draw_count; u32 actual_vertex_count; u32 vertex_base; std::optional<std::tuple<GLenum, u32> > indexed_draw_info; std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer(); std::chrono::time_point<steady_clock> program_start = steady_clock::now(); //Load program here since it is dependent on vertex state load_program(vertex_base, actual_vertex_count); std::chrono::time_point<steady_clock> program_stop = steady_clock::now(); m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count(); if (manually_flush_ring_buffers) { m_attrib_ring_buffer->unmap(); m_index_ring_buffer->unmap(); } else { //DMA push; not needed with MAP_COHERENT //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); } //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared auto copy_rtt_contents = [](gl::render_target *surface) { //Copy data from old contents onto this one //1. Clip a rectangular region defning the data //2. Perform a GPU blit u16 parent_w = surface->old_contents->width(); u16 parent_h = surface->old_contents->height(); u16 copy_w, copy_h; std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1); surface->set_cleared(); surface->old_contents = nullptr; }; //Check if we have any 'recycled' surfaces in memory and if so, clear them std::vector<int> buffers_to_clear; bool clear_all_color = true; bool clear_depth = false; for (int index = 0; index < 4; index++) { if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0) { if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared()) clear_all_color = false; else buffers_to_clear.push_back(index); } } gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); if (ds && !ds->cleared()) { clear_depth = true; } //Temporarily disable pixel tests glDisable(GL_SCISSOR_TEST); if (clear_depth || buffers_to_clear.size() > 0) { GLenum mask = 0; if (clear_depth) { gl_state.depth_mask(GL_TRUE); gl_state.clear_depth(1.0); gl_state.clear_stencil(255); mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } if (clear_all_color) mask |= GL_COLOR_BUFFER_BIT; glClear(mask); if (buffers_to_clear.size() > 0 && !clear_all_color) { GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f }; //It is impossible for the render target to be typa A or B here (clear all would have been flagged) for (auto &i: buffers_to_clear) glClearBufferfv(draw_fbo.id(), i, colors); } if (clear_depth) gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); ds->set_cleared(); } if (g_cfg.video.strict_rendering_mode) { if (ds->old_contents != nullptr) copy_rtt_contents(ds); for (auto &rtt : m_rtts.m_bound_render_targets) { if (std::get<0>(rtt) != 0) { auto surface = std::get<1>(rtt); if (surface->old_contents != nullptr) copy_rtt_contents(surface); } } } glEnable(GL_SCISSOR_TEST); std::chrono::time_point<steady_clock> textures_start = steady_clock::now(); //Setup textures //Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { int location; if (!rsx::method_registers.fragment_textures[i].enabled()) { if (m_textures_dirty[i]) { glActiveTexture(GL_TEXTURE0 + i); glBindTexture(GL_TEXTURE_2D, 0); m_textures_dirty[i] = false; } continue; } if (m_program->uniforms.has_location("tex" + std::to_string(i), &location)) { m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i])); __glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts); __glcheck m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]); } } //Vertex textures for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) { int texture_index = i + rsx::limits::fragment_textures_count; int location; if (!rsx::method_registers.vertex_textures[i].enabled()) { //glActiveTexture(GL_TEXTURE0 + texture_index); //glBindTexture(GL_TEXTURE_2D, 0); continue; } if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location)) { m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i])); __glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts); } } std::chrono::time_point<steady_clock> textures_end = steady_clock::now(); m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count(); std::chrono::time_point<steady_clock> draw_start = steady_clock::now(); if (g_cfg.video.debug_output) { m_program->validate(); } if (indexed_draw_info) { const GLenum index_type = std::get<0>(indexed_draw_info.value()); const u32 index_offset = std::get<1>(indexed_draw_info.value()); if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) { __glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); } __glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); } else { glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count); } m_attrib_ring_buffer->notify(); m_index_ring_buffer->notify(); m_vertex_state_buffer->notify(); m_fragment_constants_buffer->notify(); m_transform_constants_buffer->notify(); std::chrono::time_point<steady_clock> draw_end = steady_clock::now(); m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count(); m_draw_calls++; if (zcull_task_queue.active_query && zcull_task_queue.active_query->active) zcull_task_queue.active_query->num_draws++; synchronize_buffers(); rsx::thread::end(); }