void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE, ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER, ShaderConvert_COMPLEX_FILTER}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); }
void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {0, 5, 6, 8, 9}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); GL_POP(); }
void GSDeviceOGL::CreateTextureFX() { m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer)); m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer)); // warning 1 sampler by image unit. So you cannot reuse m_ps_ss... m_palette_ss = CreateSampler(false, false, false); glBindSampler(1, m_palette_ss); // Pre compile all Geometry & Vertex Shader // It might cost a seconds at startup but it would reduce benchmark pollution GL_PUSH("Compile GS"); for (uint32 key = 0; key < countof(m_gs); key++) { GSSelector sel(key); if (sel.point == sel.sprite) m_gs[key] = 0; else m_gs[key] = CompileGS(GSSelector(key)); } GL_POP(); GL_PUSH("Compile VS"); for (uint32 key = 0; key < countof(m_vs); key++) { // wildhack is only useful if both TME and FST are enabled. VSSelector sel(key); if (sel.wildhack && (!sel.tme || !sel.fst)) m_vs[key] = 0; else m_vs[key] = CompileVS(sel, !GLLoader::found_GL_ARB_clip_control); } GL_POP(); // Enable all bits for stencil operations. Technically 1 bit is // enough but buffer is polluted with noise. Clear will be limited // to the mask. glStencilMask(0xFF); for (uint32 key = 0; key < countof(m_om_dss); key++) { m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); } // Help to debug FS in apitrace m_apitrace = CompilePS(PSSelector()); }
bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r) { GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y); // LOTS OF CRAP CODE!!!! PLEASE FIX ME !!! if (m_type == GSTexture::Offscreen) { // The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx // architecture is waiting the data right now. #if 0 // Maybe it is as good as the code below. I don't know // With openGL 4.5 you can use glGetTextureSubImage glGetTextureImage(m_texture_id, GL_TEX_LEVEL_0, m_int_format, m_int_type, 1024*1024*16, m_local_buffer); #else // Bind the texture to the read framebuffer to avoid any disturbance glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); glPixelStorei(GL_PACK_ALIGNMENT, m_int_alignment); glReadPixels(r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, m_local_buffer); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); #endif m.bits = m_local_buffer; m.pitch = m_size.x << m_int_shift; return true; } else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { GL_PUSH("Upload Texture %d", m_texture_id); // POP is in Unmap m_dirty = true; m_clean = false; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; m.bits = (uint8*)PboPool::Map(map_size); m.pitch = row_byte; #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif // Save the area for the unmap m_r_x = r.x; m_r_y = r.y; m_r_w = r.width(); m_r_h = r.height(); return true; } return false; }
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); GL_PUSH("Upload Texture %d", m_texture_id); m_dirty = true; m_clean = false; glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); char* src = (char*)data; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; char* map = PboPool::Map(map_size); #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // Note: row_byte != pitch for (int h = 0; h < r.height(); h++) { memcpy(map, src, row_byte); map += row_byte; src += pitch; } PboPool::Unmap(); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); // FIXME OGL4: investigate, only 1 unpack buffer always bound PboPool::UnbindPbo(); PboPool::EndTransfer(); GL_POP(); return true; // For reference, standard upload without pbo (Used to crash on FGLRX) #if 0 // pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); // FIXME useful? glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior return true; #endif }
void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb) { GL_PUSH("UBO"); if(m_vs_cb_cache.Update(vs_cb)) { m_vs_cb->upload(vs_cb); } if(m_ps_cb_cache.Update(ps_cb)) { m_ps_cb->upload(ps_cb); } GL_POP(); }
void GSDeviceOGL::CreateTextureFX() { GL_PUSH("CreateTextureFX"); m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer)); m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer)); // warning 1 sampler by image unit. So you cannot reuse m_ps_ss... m_palette_ss = CreateSampler(false, false, false); gl_BindSampler(1, m_palette_ss); // Pre compile all Geometry & Vertex Shader // It might cost a seconds at startup but it would reduce benchmark pollution m_gs = CompileGS(); int logz = theApp.GetConfig("logz", 1); // Don't do it in debug build, so it can still be tested #ifndef _DEBUG if (GLLoader::found_GL_ARB_clip_control && logz) { fprintf(stderr, "Your driver supports advance depth. Logz will be disabled\n"); logz = 0; } else if (!GLLoader::found_GL_ARB_clip_control && !logz) { fprintf(stderr, "Your driver DOESN'T support advance depth (GL_ARB_clip_control)\n It is higly recommmended to enable logz\n"); } #endif for (uint32 key = 0; key < VSSelector::size(); key++) { // wildhack is only useful if both TME and FST are enabled. VSSelector sel(key); if (sel.wildhack && (!sel.tme || !sel.fst)) m_vs[key] = 0; else m_vs[key] = CompileVS(sel, logz); } // Enable all bits for stencil operations. Technically 1 bit is // enough but buffer is polluted with noise. Clear will be limited // to the mask. glStencilMask(0xFF); for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); // Help to debug FS in apitrace m_apitrace = CompilePS(PSSelector()); GL_POP(); }
void GSShaderOGL::UseProgram() { GL_PUSH("Use Program And Uniform"); if (GLState::dirty_prog) { if (!GLLoader::found_GL_ARB_separate_shader_objects) { GLState::dirty_ressources = true; hash_map<uint64, GLuint >::iterator it; // Note: shader are integer lookup pointer. They start from 1 and incr // every time you create a new shader OR a new program. // Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128. GS has only 2 programs // We migth be able to pack the value in a 32bits int // I would need to check the behavior on Nvidia (pause/resume). uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps; it = m_single_prog.find(sel); if (it == m_single_prog.end()) { GLState::program = LinkNewProgram(); m_single_prog[sel] = GLState::program; ValidateProgram(GLState::program); gl_UseProgram(GLState::program); } else { GLuint prog = it->second; if (prog != GLState::program) { GLState::program = prog; gl_UseProgram(GLState::program); } } } else { ValidatePipeline(m_pipeline); } } SetupRessources(); GLState::dirty_prog = false; GL_POP(); }
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { if (!t->m_dirty.empty() || r.width() == 0 || r.height() == 0) return; const GIFRegTEX0& TEX0 = t->m_TEX0; GLuint fmt; int ps_shader; switch (TEX0.PSM) { case PSM_PSMCT32: case PSM_PSMCT24: fmt = GL_RGBA8; ps_shader = ShaderConvert_COPY; break; case PSM_PSMCT16: case PSM_PSMCT16S: fmt = GL_R16UI; ps_shader = ShaderConvert_RGBA8_TO_16_BITS; break; case PSM_PSMZ32: fmt = GL_R32UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; case PSM_PSMZ24: fmt = GL_R32UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; case PSM_PSMZ16: case PSM_PSMZ16S: fmt = GL_R16UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; default: return; } // Yes lots of logging, but I'm not confident with this code GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM); GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height()); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader)) { GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); if(offscreen->Map(m, &r_offscreen)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; case PSM_PSMZ32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMZ24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMZ16: case PSM_PSMZ16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; default: ASSERT(0); } offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } }
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); // Default upload path for the texture is the Map/Unmap // This path is mostly used for palette. But also for texture that could // overflow the pbo buffer // Data upload is rather small typically 64B or 1024B. So don't bother with PBO // and directly send the data to the GL synchronously m_clean = false; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif #if 0 if (r.height() == 1) { // Palette data. Transfer is small either 64B or 1024B. // Sometimes it is faster, sometimes slower. glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); return true; } #endif GL_PUSH("Upload Texture %d", m_texture_id); // The easy solution without PBO #if 0 // Likely a bad texture glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior #endif // The complex solution with PBO #if 1 char* src = (char*)data; char* map = PboPool::Map(map_size); // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // Note: row_byte != pitch for (int h = 0; h < r.height(); h++) { memcpy(map, src, row_byte); map += row_byte; src += pitch; } PboPool::Unmap(); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); // FIXME OGL4: investigate, only 1 unpack buffer always bound PboPool::UnbindPbo(); PboPool::EndTransfer(); #endif return true; }
void GSRendererHW::Draw() { if(m_dev->IsLost() || GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) { GL_INS("Warning skipping a draw call (%d)", s_n); s_n += 3; // Keep it sync with SW renderer return; } GL_PUSH("HW Draw %d", s_n); GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; // It is allowed to use the depth and rt at the same location. However at least 1 must // be disabled. // 1/ GoW uses a Cd blending on a 24 bits buffer (no alpha) // 2/ SuperMan really draws (0,0,0,0) color and a (0) 32-bits depth // 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth // Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0 const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1)); const bool no_ds = !no_rt && ( // Depth is always pass (no read) and write are discarded (tekken 5). (Note: DATE is currently implemented with a stencil buffer) (context->ZBUF.ZMSK && m_context->TEST.ZTST == ZTST_ALWAYS && !m_context->TEST.DATE) || // Depth will be written through the RT (context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && !context->ZBUF.ZMSK && !context->FRAME.FBMSK && context->TEST.ZTE) ); GIFRegTEX0 TEX0; TEX0.TBP0 = context->FRAME.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->FRAME.PSM; GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); GSTexture* rt_tex = rt ? rt->m_texture : NULL; TEX0.TBP0 = context->ZBUF.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->ZBUF.PSM; GSTextureCache::Target* ds = no_ds ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); GSTexture* ds_tex = ds ? ds->m_texture : NULL; if(!(rt || no_rt) || !(ds || no_ds)) { GL_POP(); ASSERT(0); return; } GSTextureCache::Source* tex = NULL; m_texture_shuffle = false; if(PRIM->TME) { /* // m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } */ GSVector4i r; GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); if(!tex) { GL_POP(); return; } // FIXME: Could be removed on openGL if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. // Initially code also tested the RT but it gives too much false-positive // // Both input and output are 16 bits and texture was initially 32 bits! m_texture_shuffle = (context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt; // Texture shuffle is not yet supported with strange clamp mode ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); } if (rt) { // Be sure texture shuffle detection is properly propagated // Otherwise set or clear the flag (Code in texture cache only set the flag) // Note: it is important to clear the flag when RT is used as a real 16 bits target. rt->m_32_bits_fmt = m_texture_shuffle || !(context->FRAME.PSM & 0x2); } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if (s_n >= s_saven) { // Dump Register state s = format("%05d_context.txt", s_n); m_env.Dump(root_hw+s); m_context->Dump(root_hw+s); } if(s_savet && s_n >= s_saven && tex) { s = format("%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds", s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM, (int)context->CLAMP.WMS, (int)context->CLAMP.WMT, (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); tex->m_texture->Save(root_hw+s, true); if(tex->m_palette) { s = format("%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM); tex->m_palette->Save(root_hw+s, true); } } s_n++; if(s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); if (rt) rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); if (ds_tex) ds_tex->Save(root_hw+s); } s_n++; #ifdef ENABLE_OGL_DEBUG } else { s_n += 2; #endif } if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, tex)) { s_n += 1; // keep counter sync GL_POP(); return; } // skip alpha test if possible GIFRegTEST TEST = context->TEST; GIFRegFRAME FRAME = context->FRAME; GIFRegZBUF ZBUF = context->ZBUF; uint32 fm = context->FRAME.FBMSK; uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) { if(GSRenderer::TryAlphaTest(fm, zm)) { context->TEST.ATST = ATST_ALWAYS; } } context->FRAME.FBMSK = fm; context->ZBUF.ZMSK = zm != 0; // A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { size_t count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; // Hack to avoid vertical black line in various games (ace combat/tekken) if (m_userhacks_align_sprite_X) { // Note for performance reason I do the check only once on the first // primitive int win_position = v[1].XYZ.X - context->XYOFFSET.OFX; const bool unaligned_position = ((win_position & 0xF) == 8); const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X); if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) { // Normaly vertex are aligned on full pixels and texture in half // pixels. Let's extend the coverage of an half-pixel to avoid // hole after upscaling for(size_t i = 0; i < count; i += 2) { v[i+1].XYZ.X += 8; // I really don't know if it is a good idea. Neither what to do for !PRIM->FST if (unaligned_texture) v[i+1].U += 8; } } } if (PRIM->FST) { if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) { if (m_vt.IsLinear()) RoundSpriteOffset<true>(); else RoundSpriteOffset<false>(); } } else { ; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior) } } // DrawPrims(rt_tex, ds_tex, tex); // context->TEST = TEST; context->FRAME = FRAME; context->ZBUF = ZBUF; // GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); if(fm != 0xffffffff && rt) { rt->m_valid = rt->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.fb, r, false); m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); } if(zm != 0xffffffff && ds) { ds->m_valid = ds->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.zb, r, false); m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block()); } // if(m_hacks.m_oo) { (this->*m_hacks.m_oo)(); } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if(s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); if (rt) rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); if (ds_tex) ds_tex->Save(root_hw+s); } s_n++; if(s_savel > 0 && (s_n - s_saven) > s_savel) { s_dump = 0; } #ifdef ENABLE_OGL_DEBUG } else { s_n += 1; #endif } #ifdef DISABLE_HW_TEXTURE_CACHE if (rt) m_tc->Read(rt, r); #endif GL_POP(); }
bool GSRenderer::Merge(int field) { bool en[2]; GSVector4i fr[2]; GSVector4i dr[2]; GSVector2i display_baseline = { INT_MAX, INT_MAX }; GSVector2i frame_baseline = { INT_MAX, INT_MAX }; for(int i = 0; i < 2; i++) { en[i] = IsEnabled(i); if(en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); display_baseline.x = min(dr[i].left, display_baseline.x); display_baseline.y = min(dr[i].top, display_baseline.y); frame_baseline.x = min(fr[i].left, frame_baseline.x); frame_baseline.y = min(fr[i].top, frame_baseline.y); //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); } } if(!en[0] && !en[1]) { return false; } GL_PUSH("Renderer Merge %d (0: enabled %d 0x%x, 1: enabled %d 0x%x)", s_n, en[0], m_regs->DISP[0].DISPFB.Block(), en[1], m_regs->DISP[1].DISPFB.Block()); // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) // // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) bool samesrc = en[0] && en[1] && m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { // persona 4: // // fr[0] = 0 0 640 448 // fr[1] = 0 1 640 448 // dr[0] = 159 50 779 498 // dr[1] = 159 50 779 497 // // second image shifted up by 1 pixel and blended over itself // // god of war: // // fr[0] = 0 1 512 448 // fr[1] = 0 0 512 448 // dr[0] = 127 50 639 497 // dr[1] = 127 50 639 498 // // same just the first image shifted // // These kinds of cases are now fixed by the more generic frame_diff code below, as the code here was too specific and has become obsolete. // NOTE: Persona 4 and God Of War are not rare exceptions, many games have the same(or very similar) offsets. int topDiff = fr[0].top - fr[1].top; if (dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, topDiff, 0, topDiff)) || fr[1].eq(fr[0] + GSVector4i(0, topDiff, 0, topDiff)))) { // dq5: // // fr[0] = 0 1 512 445 // fr[1] = 0 0 512 444 // dr[0] = 127 50 639 494 // dr[1] = 127 50 639 494 int top = min(fr[0].top, fr[1].top); int bottom = min(fr[0].bottom, fr[1].bottom); fr[0].top = fr[1].top = top; fr[0].bottom = fr[1].bottom = bottom; } } GSVector2i fs(0, 0); GSVector2i ds(0, 0); GSTexture* tex[3] = {NULL, NULL, NULL}; int y_offset[3] = {0, 0, 0}; s_n++; bool feedback_merge = m_regs->EXTWRITE.WRITE == 1; if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge) { tex[0] = GetOutput(0, y_offset[0]); tex[1] = tex[0]; // saves one texture fetch y_offset[1] = y_offset[0]; } else { if(en[0]) tex[0] = GetOutput(0, y_offset[0]); if(en[1]) tex[1] = GetOutput(1, y_offset[1]); if(feedback_merge) tex[2] = GetFeedbackOutput(); } GSVector4 src[2]; GSVector4 src_hw[2]; GSVector4 dst[2]; for(int i = 0; i < 2; i++) { if(!en[i] || !tex[i]) continue; GSVector4i r = fr[i]; GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 off(0); GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y); GSVector2i frame_diff(fr[i].left - frame_baseline.x, fr[i].top - frame_baseline.y); // Time Crisis 2/3 uses two side by side images when in split screen mode. // Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII if(display_diff.x > 2) { off.x = tex[i]->GetScale().x * display_diff.x; } // If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D else if(display_diff.x != frame_diff.x) { off.x = tex[i]->GetScale().x * frame_diff.x; } if(display_diff.y >= 4) // Shouldn't this be >= 2? { off.y = tex[i]->GetScale().y * display_diff.y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { off.y /= 2; } } else if(display_diff.y != frame_diff.y) { off.y = tex[i]->GetScale().y * frame_diff.y; } dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); } ds = fs; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } m_real_size = ds; bool slbg = m_regs->PMODE.SLBG; if(tex[0] || tex[1]) { if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) tex[0] = NULL; } GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c); if(m_regs->SMODE2.INT && m_interlace > 0) { if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } else { int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } if(m_shadeboost) { m_dev->ShadeBoost(); } if(m_shaderfx) { m_dev->ExternalFX(); } if(m_fxaa) { m_dev->FXAA(); } } return true; }
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { if(t->m_type != RenderTarget) { ASSERT(0); return; } const GIFRegTEX0& TEX0 = t->m_TEX0; if(TEX0.PSM != PSM_PSMCT32 && TEX0.PSM != PSM_PSMCT24 && TEX0.PSM != PSM_PSMCT16 && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } if(!t->m_dirty.empty()) { return; } GL_PUSH("Texture Cache Read"); // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); GLuint format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? GL_R16UI : GL_RGBA8; //if (format == GL_R16UI) fprintf(stderr, "Format 16 bits integer\n"); #if 0 DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; #endif if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) { GSTexture::GSMap m; if(offscreen->Map(m)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; default: ASSERT(0); } offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } GL_POP(); }
bool GSRenderer::Merge(int field) { bool en[2]; GSVector4i fr[2]; GSVector4i dr[2]; GSVector2i baseline = {INT_MAX, INT_MAX}; for(int i = 0; i < 2; i++) { en[i] = IsEnabled(i); if(en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); baseline.x = min(dr[i].left, baseline.x); baseline.y = min(dr[i].top, baseline.y); //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); } } if(!en[0] && !en[1]) { return false; } GL_PUSH("Renderer Merge %d", s_n); // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) // // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) bool samesrc = en[0] && en[1] && m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; // bool blurdetected = false; if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { if(fr[0].eq(fr[1] + GSVector4i(0, -1, 0, 0)) && dr[0].eq(dr[1] + GSVector4i(0, 0, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, -1, 0, 0)) && dr[1].eq(dr[0] + GSVector4i(0, 0, 0, 1))) { // persona 4: // // fr[0] = 0 0 640 448 // fr[1] = 0 1 640 448 // dr[0] = 159 50 779 498 // dr[1] = 159 50 779 497 // // second image shifted up by 1 pixel and blended over itself // // god of war: // // fr[0] = 0 1 512 448 // fr[1] = 0 0 512 448 // dr[0] = 127 50 639 497 // dr[1] = 127 50 639 498 // // same just the first image shifted int top = min(fr[0].top, fr[1].top); int bottom = max(dr[0].bottom, dr[1].bottom); fr[0].top = top; fr[1].top = top; dr[0].bottom = bottom; dr[1].bottom = bottom; // blurdetected = true; } else if(dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, 1, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, 1, 0, 1)))) { // dq5: // // fr[0] = 0 1 512 445 // fr[1] = 0 0 512 444 // dr[0] = 127 50 639 494 // dr[1] = 127 50 639 494 int top = min(fr[0].top, fr[1].top); int bottom = min(fr[0].bottom, fr[1].bottom); fr[0].top = fr[1].top = top; fr[0].bottom = fr[1].bottom = bottom; // blurdetected = true; } //printf("samesrc = %d blurdetected = %d\n",samesrc,blurdetected); } GSVector2i fs(0, 0); GSVector2i ds(0, 0); GSTexture* tex[2] = {NULL, NULL}; int y_offset[2] = {0, 0}; if(samesrc && fr[0].bottom == fr[1].bottom) { tex[0] = GetOutput(0, y_offset[0]); tex[1] = tex[0]; // saves one texture fetch y_offset[1] = y_offset[0]; } else { if(en[0]) tex[0] = GetOutput(0, y_offset[0]); if(en[1]) tex[1] = GetOutput(1, y_offset[1]); } GSVector4 src[2]; GSVector4 src_hw[2]; GSVector4 dst[2]; for(int i = 0; i < 2; i++) { if(!en[i] || !tex[i]) continue; GSVector4i r = fr[i]; GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 off(0, 0); GSVector2i diff(dr[i].left - baseline.x, dr[i].top - baseline.y); // Time Crisis 2/3 uses two side by side images when in split screen mode. // Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII if(diff.x >= 2) { off.x = tex[i]->GetScale().x * diff.x; } if(diff.y >= 4) // Shouldn't this be 2? { off.y = tex[i]->GetScale().y * diff.y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { off.y /= 2; } } dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); } ds = fs; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } m_real_size = ds; bool slbg = m_regs->PMODE.SLBG; bool mmod = m_regs->PMODE.MMOD; if(tex[0] || tex[1]) { if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) tex[0] = NULL; } GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev->Merge(tex, src_hw, dst, fs, slbg, mmod, c); if(m_regs->SMODE2.INT && m_interlace > 0) { if (m_interlace == 7 && m_regs->SMODE2.FFMD == 1) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } else { int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } if(m_shadeboost) { m_dev->ShadeBoost(); } if (m_shaderfx) { m_dev->ExternalFX(); } if(m_fxaa) { m_dev->FXAA(); } } return true; }