void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_index) { size_t buffer_size = 512 * 4 * sizeof(float); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size); void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); fill_vertex_program_constants_data(mapped_buffer); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, (UINT)buffer_size }; m_device->CreateConstantBufferView(&constant_buffer_view_desc, CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) .Offset((INT)descriptor_index, m_descriptor_stride_srv_cbv_uav)); }
void GLGSRender::load_program(u32 vertex_base, u32 vertex_count) { auto &fragment_program = current_fragment_program; auto &vertex_program = current_vertex_program; for (auto &vtx : vertex_program.rsx_vertex_inputs) { auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location]; if (array_info.type() == rsx::vertex_base_type::s1 || array_info.type() == rsx::vertex_base_type::cmp) { //Some vendors do not support GL_x_SNORM buffer textures verify(HERE), vtx.flags == 0; vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT; } } vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side void* pipeline_properties = nullptr; m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, pipeline_properties); m_program->use(); if (m_prog_buffer.check_cache_missed()) m_shaders_cache->store(pipeline_properties, vertex_program, fragment_program); u8 *buf; u32 vertex_state_offset; u32 vertex_constants_offset; u32 fragment_constants_offset; const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); const u32 fragment_buffer_size = fragment_constants_size + (17 * 4 * sizeof(float)); if (manually_flush_ring_buffers) { m_vertex_state_buffer->reserve_storage_on_heap(512); m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256)); if (m_transform_constants_dirty) m_transform_constants_buffer->reserve_storage_on_heap(8192); } // Vertex state auto mapping = m_vertex_state_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); vertex_state_offset = mapping.second; fill_scale_offset_data(buf, false); fill_user_clip_data(buf + 64); *(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits(); *(reinterpret_cast<u32*>(buf + 132)) = vertex_base; fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast<s32*>(buf + 144)); if (m_transform_constants_dirty) { // Vertex constants mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); vertex_constants_offset = mapping.second; fill_vertex_program_constants_data(buf); } // Fragment constants mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); fragment_constants_offset = mapping.second; if (fragment_constants_size) m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, fragment_program); // Fragment state fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); if (m_transform_constants_dirty) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192); if (manually_flush_ring_buffers) { m_vertex_state_buffer->unmap(); m_fragment_constants_buffer->unmap(); if (m_transform_constants_dirty) m_transform_constants_buffer->unmap(); } m_transform_constants_dirty = false; }
bool GLGSRender::load_program() { RSXVertexProgram vertex_program = get_current_vertex_program(); RSXFragmentProgram fragment_program = get_current_fragment_program(); for (auto &vtx : vertex_program.rsx_vertex_inputs) { auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location]; if (array_info.type() == rsx::vertex_base_type::s1 || array_info.type() == rsx::vertex_base_type::cmp) { //Some vendors do not support GL_x_SNORM buffer textures verify(HERE), vtx.flags == 0; vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT; } } for (int i = 0; i < 16; ++i) { auto &tex = rsx::method_registers.fragment_textures[i]; if (tex.enabled()) { const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); if (m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) { //Ignore this rtt since we have an aloasing color texture that will be used if (m_rtts.get_texture_from_render_target_if_applicable(texaddr)) continue; u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); if (format == CELL_GCM_TEXTURE_A8R8G8B8 || format == CELL_GCM_TEXTURE_D8R8G8B8) { fragment_program.redirected_textures |= (1 << i); } } } } auto old_program = m_program; m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); m_program->use(); //Apps can write into the fragment program binary. u32 fragment_constants_size = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); std::vector<u8> fragment_constants_buf; if (fragment_constants_size) { fragment_constants_buf.resize(fragment_constants_size); m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(fragment_constants_buf.data()), gsl::narrow<int>(fragment_constants_size) }, fragment_program); } if (old_program == m_program && !m_transform_constants_dirty) { //This path is taken alot so the savings are tangible struct scale_offset_layout { u16 clip_w, clip_h; float scale_x, offset_x, scale_y, offset_y, scale_z, offset_z; float fog0, fog1; u32 alpha_tested; float alpha_ref; } tmp = {}; tmp.clip_w = rsx::method_registers.surface_clip_width(); tmp.clip_h = rsx::method_registers.surface_clip_height(); tmp.scale_x = rsx::method_registers.viewport_scale_x(); tmp.offset_x = rsx::method_registers.viewport_offset_x(); tmp.scale_y = rsx::method_registers.viewport_scale_y(); tmp.offset_y = rsx::method_registers.viewport_offset_y(); tmp.scale_z = rsx::method_registers.viewport_scale_z(); tmp.offset_z = rsx::method_registers.viewport_offset_z(); tmp.fog0 = rsx::method_registers.fog_params_0(); tmp.fog1 = rsx::method_registers.fog_params_1(); tmp.alpha_tested = rsx::method_registers.alpha_test_enabled(); tmp.alpha_ref = rsx::method_registers.alpha_ref(); size_t old_hash = m_transform_buffer_hash; m_transform_buffer_hash = 0; u8 *data = reinterpret_cast<u8*>(&tmp); for (int i = 0; i < sizeof(tmp); ++i) m_transform_buffer_hash ^= std::hash<char>()(data[i]); if (old_hash == m_transform_buffer_hash) { //Its likely that nothing changed since previous draw. if (!fragment_constants_size) return true; old_hash = m_fragment_buffer_hash; m_fragment_buffer_hash = 0; for (int i = 0; i < fragment_constants_size; ++i) m_fragment_buffer_hash ^= std::hash<char>()(fragment_constants_buf[i]); if (m_fragment_buffer_hash == old_hash) return true; } } m_transform_constants_dirty = false; fragment_constants_size = std::max(32U, fragment_constants_size); u32 max_buffer_sz = 512 + 8192 + align(fragment_constants_size, m_uniform_buffer_offset_align); if (manually_flush_ring_buffers) m_uniform_ring_buffer->reserve_storage_on_heap(align(max_buffer_sz, 512)); u8 *buf; u32 scale_offset_offset; u32 vertex_constants_offset; u32 fragment_constants_offset; // Scale offset auto mapping = m_uniform_ring_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); scale_offset_offset = mapping.second; fill_scale_offset_data(buf, false); // Fragment state u32 is_alpha_tested = rsx::method_registers.alpha_test_enabled(); float alpha_ref = rsx::method_registers.alpha_ref() / 255.f; f32 fog0 = rsx::method_registers.fog_params_0(); f32 fog1 = rsx::method_registers.fog_params_1(); memcpy(buf + 16 * sizeof(float), &fog0, sizeof(float)); memcpy(buf + 17 * sizeof(float), &fog1, sizeof(float)); memcpy(buf + 18 * sizeof(float), &is_alpha_tested, sizeof(u32)); memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float)); // Vertex constants mapping = m_uniform_ring_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); vertex_constants_offset = mapping.second; fill_vertex_program_constants_data(buf); // Fragment constants if (fragment_constants_size) { mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); fragment_constants_offset = mapping.second; memcpy(buf, fragment_constants_buf.data(), fragment_constants_buf.size()); } m_uniform_ring_buffer->bind_range(0, scale_offset_offset, 512); m_uniform_ring_buffer->bind_range(1, vertex_constants_offset, 8192); if (fragment_constants_size) { m_uniform_ring_buffer->bind_range(2, fragment_constants_offset, fragment_constants_size); } if (manually_flush_ring_buffers) m_uniform_ring_buffer->unmap(); return true; }
bool GLGSRender::load_program() { auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16> { gl::render_target *surface = nullptr; if (!is_depth) surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); else surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); if (!surface) return std::make_tuple(false, 0); return std::make_tuple(true, surface->get_native_pitch()); }; RSXVertexProgram vertex_program = get_current_vertex_program(); RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); std::array<float, 16> rtt_scaling; u32 unnormalized_rtts = 0; for (auto &vtx : vertex_program.rsx_vertex_inputs) { auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location]; if (array_info.type() == rsx::vertex_base_type::s1 || array_info.type() == rsx::vertex_base_type::cmp) { //Some vendors do not support GL_x_SNORM buffer textures verify(HERE), vtx.flags == 0; vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT; } } auto old_program = m_program; m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); m_program->use(); if (old_program == m_program && !m_transform_constants_dirty) { //This path is taken alot so the savings are tangible struct scale_offset_layout { u16 clip_w, clip_h; float scale_x, offset_x, scale_y, offset_y, scale_z, offset_z; float fog0, fog1; u32 alpha_tested; float alpha_ref; } tmp = {}; tmp.clip_w = rsx::method_registers.surface_clip_width(); tmp.clip_h = rsx::method_registers.surface_clip_height(); tmp.scale_x = rsx::method_registers.viewport_scale_x(); tmp.offset_x = rsx::method_registers.viewport_offset_x(); tmp.scale_y = rsx::method_registers.viewport_scale_y(); tmp.offset_y = rsx::method_registers.viewport_offset_y(); tmp.scale_z = rsx::method_registers.viewport_scale_z(); tmp.offset_z = rsx::method_registers.viewport_offset_z(); tmp.fog0 = rsx::method_registers.fog_params_0(); tmp.fog1 = rsx::method_registers.fog_params_1(); tmp.alpha_tested = rsx::method_registers.alpha_test_enabled(); tmp.alpha_ref = rsx::method_registers.alpha_ref(); size_t old_hash = m_transform_buffer_hash; m_transform_buffer_hash = 0; u8 *data = reinterpret_cast<u8*>(&tmp); for (int i = 0; i < sizeof(tmp); ++i) m_transform_buffer_hash ^= std::hash<char>()(data[i]); if (old_hash == m_transform_buffer_hash) return true; } m_transform_constants_dirty = false; u32 fragment_constants_size = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); u32 fragment_buffer_size = fragment_constants_size + (17 * 4 * sizeof(float)); u32 max_buffer_sz = 512 + 8192 + align(fragment_constants_size, m_uniform_buffer_offset_align); if (manually_flush_ring_buffers) m_uniform_ring_buffer->reserve_storage_on_heap(align(max_buffer_sz, 512)); u8 *buf; u32 scale_offset_offset; u32 vertex_constants_offset; u32 fragment_constants_offset; // Scale offset auto mapping = m_uniform_ring_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); scale_offset_offset = mapping.second; fill_scale_offset_data(buf, false); // Vertex constants mapping = m_uniform_ring_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); vertex_constants_offset = mapping.second; fill_vertex_program_constants_data(buf); // Fragment constants mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align); buf = static_cast<u8*>(mapping.first); fragment_constants_offset = mapping.second; if (fragment_constants_size) m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, fragment_program); // Fragment state fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); m_uniform_ring_buffer->bind_range(0, scale_offset_offset, 512); m_uniform_ring_buffer->bind_range(1, vertex_constants_offset, 8192); m_uniform_ring_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); if (manually_flush_ring_buffers) m_uniform_ring_buffer->unmap(); return true; }