void GSTextureCacheOGL::Read(Source* t, const GSVector4i& r) { const GIFRegTEX0& TEX0 = t->m_TEX0; // FIXME Create a get function to avoid the useless copy // Note: With openGL 4.5 you can use glGetTextureSubImage if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) { m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r); GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); if (offscreen->Map(m, &r_offscreen)) { GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } }
void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE, ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER, ShaderConvert_COMPLEX_FILTER}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); }
void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {0, 5, 6, 8, 9}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); GL_POP(); }
bool GSRenderer::BeginCapture() { GSVector4i disp = m_wnd->GetClientRect().fit(m_aspectratio); float aspect = (float)disp.width() / max(1, disp.height()); return m_capture.BeginCapture(GetTvRefreshRate(), GetInternalResolution(), aspect); }
bool GSTexture9::Update(const GSVector4i& r, const void* data, int pitch, int layer) { if(m_surface) { D3DLOCKED_RECT lr; if(SUCCEEDED(m_surface->LockRect(&lr, r, 0))) { uint8* src = (uint8*)data; uint8* dst = (uint8*)lr.pBits; int bytes = r.width() * sizeof(uint32); switch(m_desc.Format) { case D3DFMT_A8: bytes >>= 2; break; case D3DFMT_A1R5G5B5: bytes >>= 1; break; default: ASSERT(m_desc.Format == D3DFMT_A8R8G8B8); break; } bytes = std::min(bytes, pitch); bytes = std::min(bytes, lr.Pitch); for(int i = 0, j = r.height(); i < j; i++, src += pitch, dst += lr.Pitch) { memcpy(dst, src, bytes); } m_surface->UnlockRect(); return true; } }
bool GSWndDX::Create(const string& title, int w, int h) { if(m_hWnd) return false; m_managed = true; WNDCLASS wc; memset(&wc, 0, sizeof(wc)); wc.style = CS_HREDRAW | CS_VREDRAW | CS_DBLCLKS; wc.lpfnWndProc = WndProc; wc.hInstance = theApp.GetModuleHandle(); // TODO: wc.hIcon = ; wc.hCursor = LoadCursor(NULL, IDC_ARROW); wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); wc.lpszClassName = "GSWndDX"; if(!GetClassInfo(wc.hInstance, wc.lpszClassName, &wc)) { if(!RegisterClass(&wc)) { return false; } } DWORD style = WS_CLIPCHILDREN | WS_CLIPSIBLINGS | WS_OVERLAPPEDWINDOW | WS_BORDER; GSVector4i r; GetWindowRect(GetDesktopWindow(), r); bool remote = !!GetSystemMetrics(SM_REMOTESESSION); if(w <= 0 || h <= 0 || remote) { w = r.width() / 3; h = r.width() / 4; if(!remote) { w *= 2; h *= 2; } } r.left = (r.left + r.right - w) / 2; r.top = (r.top + r.bottom - h) / 2; r.right = r.left + w; r.bottom = r.top + h; AdjustWindowRect(r, style, FALSE); m_hWnd = CreateWindow(wc.lpszClassName, title.c_str(), style, r.left, r.top, r.width(), r.height(), NULL, NULL, wc.hInstance, (LPVOID)this); return m_hWnd != NULL; }
bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r) { GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y); // LOTS OF CRAP CODE!!!! PLEASE FIX ME !!! if (m_type == GSTexture::Offscreen) { // The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx // architecture is waiting the data right now. #if 0 // Maybe it is as good as the code below. I don't know // With openGL 4.5 you can use glGetTextureSubImage glGetTextureSubImage(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, 0, r.width(), r.height(), 0, m_int_format, m_int_type, m_size.x * m_size.y * 4, m_local_buffer); #else // Bind the texture to the read framebuffer to avoid any disturbance glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); glReadPixels(r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, m_local_buffer); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); #endif m.bits = m_local_buffer; m.pitch = m_size.x << m_int_shift; return true; } else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap m_clean = false; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; m.bits = (uint8*)PboPool::Map(map_size); m.pitch = row_byte; #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif // Save the area for the unmap m_r_x = r.x; m_r_y = r.y; m_r_w = r.width(); m_r_h = r.height(); return true; } return false; }
GSTexture* GPURendererSW::GetOutput() { GSVector4i r = m_env.GetDisplayRect(); r.left <<= m_scale.x; r.top <<= m_scale.y; r.right <<= m_scale.x; r.bottom <<= m_scale.y; if(m_dev->ResizeTexture(&m_texture, r.width(), r.height())) { m_mem.ReadFrame32(r, m_output, !!m_env.STATUS.ISRGB24); m_texture->Update(r.rsize(), m_output, m_mem.GetWidth() * sizeof(uint32)); } return m_texture; }
void GPULocalMemory::FillRect(const GSVector4i& r, uint16 c) { Invalidate(r); uint16* RESTRICT dst = GetPixelAddressScaled(r.left, r.top); int w = r.width() << m_scale.x; int h = r.height() << m_scale.y; int pitch = GetWidth(); for(int j = 0; j < h; j++, dst += pitch) { for(int i = 0; i < w; i++) { dst[i] = c; } } }
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) { GPUScanlineSelector sel = m_global.sel; const GSVector4* shift = GPUSetupPrimCodeGenerator::m_shift; if(sel.tme && !sel.twin) { if(sel.sprite) { GSVector4i t = (GSVector4i(vertex[index[1]].t) >> 8) - GSVector4i::x00000001(); t = t.ps32(t); t = t.upl16(t); m_local.twin[2].u = t.xxxx(); m_local.twin[2].v = t.yyyy(); } else {
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); GL_PUSH("Upload Texture %d", m_texture_id); m_dirty = true; m_clean = false; glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); char* src = (char*)data; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; char* map = PboPool::Map(map_size); #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // Note: row_byte != pitch for (int h = 0; h < r.height(); h++) { memcpy(map, src, row_byte); map += row_byte; src += pitch; } PboPool::Unmap(); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); // FIXME OGL4: investigate, only 1 unpack buffer always bound PboPool::UnbindPbo(); PboPool::EndTransfer(); GL_POP(); return true; // For reference, standard upload without pbo (Used to crash on FGLRX) #if 0 // pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); // FIXME useful? glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior return true; #endif }
bool GSTextureSW::Update(const GSVector4i& r, const void* data, int pitch) { GSMap m; if(m_data != NULL && Map(m, &r)) { uint8* RESTRICT src = (uint8*)data; uint8* RESTRICT dst = m.bits; int rowbytes = r.width() << 2; for(int h = r.height(); h > 0; h--, src += pitch, dst += m.pitch) { memcpy(dst, src, rowbytes); } Unmap(); return true; } return false; }
void GSTextureCache11::Read(Source* t, const GSVector4i& r) { // FIXME: copy was copyied from openGL. It is unlikely to work. const GIFRegTEX0& TEX0 = t->m_TEX0; if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) { m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r); GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); if (offscreen->Map(m, &r_offscreen)) { GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } }
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { if (!t->m_dirty.empty() || r.width() == 0 || r.height() == 0) return; const GIFRegTEX0& TEX0 = t->m_TEX0; GLuint fmt; int ps_shader; switch (TEX0.PSM) { case PSM_PSMCT32: case PSM_PSMCT24: fmt = GL_RGBA8; ps_shader = ShaderConvert_COPY; break; case PSM_PSMCT16: case PSM_PSMCT16S: fmt = GL_R16UI; ps_shader = ShaderConvert_RGBA8_TO_16_BITS; break; case PSM_PSMZ32: fmt = GL_R32UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; case PSM_PSMZ24: fmt = GL_R32UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; case PSM_PSMZ16: case PSM_PSMZ16S: fmt = GL_R16UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; default: return; } // Yes lots of logging, but I'm not confident with this code GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM); GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height()); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader)) { GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); if(offscreen->Map(m, &r_offscreen)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; case PSM_PSMZ32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMZ24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMZ16: case PSM_PSMZ16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; default: ASSERT(0); } offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } }
void GSRenderer::VSync(int field) { GSPerfMonAutoTimer pmat(&m_perfmon); m_perfmon.Put(GSPerfMon::Frame); Flush(); if(!m_dev->IsLost(true)) { if(!Merge(field ? 1 : 0)) { return; } } else { ResetDevice(); } m_dev->AgePool(); // osd if((m_perfmon.GetFrame() & 0x1f) == 0) { m_perfmon.Update(); double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); GSVector4i r = GetDisplayRect(); string s; #ifdef GSTITLEINFO_API_FORCE_VERBOSE if (1)//force verbose reply #else if (m_wnd->IsManaged()) #endif { //GSdx owns the window's title, be verbose. string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive"; s = format( "%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d/%d/%d | %d%% CPU | %.2f | %.2f", m_perfmon.GetFrame(), r.width(), r.height(), fps, (int)(100.0 * fps / GetFPS()), s2.c_str(), theApp.m_gs_interlace[m_interlace].name.c_str(), theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), (int)m_perfmon.Get(GSPerfMon::SyncPoint), (int)m_perfmon.Get(GSPerfMon::Prim), (int)m_perfmon.Get(GSPerfMon::Draw), m_perfmon.CPU(), m_perfmon.Get(GSPerfMon::Swizzle) / 1024, m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 ); double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); if(fillrate > 0) { s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); int sum = 0; for(int i = 0; i < 16; i++) { sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); } s += format(" | %d%% CPU", sum); } } else { // Satisfy PCSX2's request for title info: minimal verbosity due to more external title text s = format("%dx%d | %s", r.width(), r.height(), theApp.m_gs_interlace[m_interlace].name.c_str()); } if(m_capture.IsCapturing()) { s += " | Recording..."; } if(m_wnd->IsManaged()) { m_wnd->SetWindowText(s.c_str()); } else { // note: do not use TryEnterCriticalSection. It is unnecessary code complication in // an area that absolutely does not matter (even if it were 100 times slower, it wouldn't // be noticeable). Besides, these locks are extremely short -- overhead of conditional // is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air GSAutoLock lock(&m_pGSsetTitle_Crit); strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1); m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows } } else { // [TODO] // We don't have window title rights, or the window has no title, // so let's use actual OSD! } if(m_frameskip) { return; } // present m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader); // snapshot if(!m_snapshot.empty()) { bool shift = false; #ifdef _WINDOWS shift = !!(::GetAsyncKeyState(VK_SHIFT) & 0x8000); #else shift = m_shift_key; #endif if(!m_dump && shift) { GSFreezeData fd; fd.size = 0; fd.data = NULL; Freeze(&fd, true); fd.data = new uint8[fd.size]; Freeze(&fd, false); m_dump.Open(m_snapshot, m_crc, fd, m_regs); delete [] fd.data; } if(GSTexture* t = m_dev->GetCurrent()) { t->Save(m_snapshot + ".bmp"); } m_snapshot.clear(); } else { if(m_dump) { bool control = false; #ifdef _WINDOWS control = !!(::GetAsyncKeyState(VK_CONTROL) & 0x8000); #else control = m_control_key; #endif m_dump.VSync(field, !control, m_regs); } } // capture if(m_capture.IsCapturing()) { if(GSTexture* current = m_dev->GetCurrent()) { GSVector2i size = m_capture.GetSize(); if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) { GSTexture::GSMap m; if(offscreen->Map(m)) { m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped()); offscreen->Unmap(); } m_dev->Recycle(offscreen); } } } }
//TODO: GSopen 1 => Drop? // Used by GSReplay. At least for now. // More or less copy pasted from GSWndDX::Create and GSWndWGL::Attach with a few // modifications bool GSWndWGL::Create(const string& title, int w, int h) { if(m_NativeWindow) return false; m_managed = true; WNDCLASS wc; memset(&wc, 0, sizeof(wc)); wc.style = CS_HREDRAW | CS_VREDRAW | CS_DBLCLKS | CS_OWNDC; wc.lpfnWndProc = WndProc; wc.hInstance = theApp.GetModuleHandle(); wc.hCursor = LoadCursor(NULL, IDC_ARROW); wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); wc.lpszClassName = "GSWndOGL"; if (!GetClassInfo(wc.hInstance, wc.lpszClassName, &wc)) { if (!RegisterClass(&wc)) { return false; } } DWORD style = WS_CLIPCHILDREN | WS_CLIPSIBLINGS | WS_OVERLAPPEDWINDOW | WS_BORDER; GSVector4i r; GetWindowRect(GetDesktopWindow(), r); // Old GSOpen ModeWidth and ModeHeight are not necessary with this. bool remote = !!GetSystemMetrics(SM_REMOTESESSION); if (w <= 0 || h <= 0 || remote) { w = r.width() / 3; h = r.width() / 4; if (!remote) { w *= 2; h *= 2; } } r.left = (r.left + r.right - w) / 2; r.top = (r.top + r.bottom - h) / 2; r.right = r.left + w; r.bottom = r.top + h; AdjustWindowRect(r, style, FALSE); m_NativeWindow = CreateWindow(wc.lpszClassName, title.c_str(), style, r.left, r.top, r.width(), r.height(), NULL, NULL, wc.hInstance, (LPVOID)this); if (m_NativeWindow == NULL) return false; OpenWGLDisplay(); CreateContext(3, 3); AttachContext(); m_swapinterval = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); PopulateGlFunction(); return true; }
bool GSRenderer::Merge(int field) { bool en[2]; GSVector4i fr[2]; GSVector4i dr[2]; int baseline = INT_MAX; for(int i = 0; i < 2; i++) { en[i] = IsEnabled(i); if(en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); baseline = min(dr[i].top, baseline); //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); } } if(!en[0] && !en[1]) { return false; } // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) // // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) bool samesrc = en[0] && en[1] && m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; // bool blurdetected = false; if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { if(fr[0].eq(fr[1] + GSVector4i(0, -1, 0, 0)) && dr[0].eq(dr[1] + GSVector4i(0, 0, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, -1, 0, 0)) && dr[1].eq(dr[0] + GSVector4i(0, 0, 0, 1))) { // persona 4: // // fr[0] = 0 0 640 448 // fr[1] = 0 1 640 448 // dr[0] = 159 50 779 498 // dr[1] = 159 50 779 497 // // second image shifted up by 1 pixel and blended over itself // // god of war: // // fr[0] = 0 1 512 448 // fr[1] = 0 0 512 448 // dr[0] = 127 50 639 497 // dr[1] = 127 50 639 498 // // same just the first image shifted int top = min(fr[0].top, fr[1].top); int bottom = max(dr[0].bottom, dr[1].bottom); fr[0].top = top; fr[1].top = top; dr[0].bottom = bottom; dr[1].bottom = bottom; // blurdetected = true; } else if(dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, 1, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, 1, 0, 1)))) { // dq5: // // fr[0] = 0 1 512 445 // fr[1] = 0 0 512 444 // dr[0] = 127 50 639 494 // dr[1] = 127 50 639 494 int top = min(fr[0].top, fr[1].top); int bottom = min(fr[0].bottom, fr[1].bottom); fr[0].top = fr[1].top = top; fr[0].bottom = fr[1].bottom = bottom; // blurdetected = true; } //printf("samesrc = %d blurdetected = %d\n",samesrc,blurdetected); } GSVector2i fs(0, 0); GSVector2i ds(0, 0); GSTexture* tex[2] = {NULL, NULL}; if(samesrc && fr[0].bottom == fr[1].bottom) { tex[0] = GetOutput(0); tex[1] = tex[0]; // saves one texture fetch } else { if(en[0]) tex[0] = GetOutput(0); if(en[1]) tex[1] = GetOutput(1); } GSVector4 src[2]; GSVector4 dst[2]; for(int i = 0; i < 2; i++) { if(!en[i] || !tex[i]) continue; GSVector4i r = fr[i]; // overscan hack if(dr[i].height() > 512) // hmm { int y = GetDeviceSize(i).y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) y /= 2; r.bottom = r.top + y; } GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 o(0, 0); if(dr[i].top - baseline >= 4) // 2? { o.y = tex[i]->GetScale().y * (dr[i].top - baseline); if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { o.y /= 2; } } dst[i] = GSVector4(o).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); } ds = fs; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } bool slbg = m_regs->PMODE.SLBG; bool mmod = m_regs->PMODE.MMOD; if(tex[0] || tex[1]) { if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) tex[0] = NULL; } GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev->Merge(tex, src, dst, fs, slbg, mmod, c); if(m_regs->SMODE2.INT && m_interlace > 0) { if (m_interlace == 7 && m_regs->SMODE2.FFMD == 1) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } else { int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } if(m_shadeboost) { m_dev->ShadeBoost(); } if (m_shaderfx) { m_dev->ExternalFX(); } if(m_fxaa) { m_dev->FXAA(); } } return true; }
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { if(t->m_type != RenderTarget) { ASSERT(0); return; } const GIFRegTEX0& TEX0 = t->m_TEX0; if(TEX0.PSM != PSM_PSMCT32 && TEX0.PSM != PSM_PSMCT24 && TEX0.PSM != PSM_PSMCT16 && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } if(!t->m_dirty.empty()) { return; } GL_PUSH("Texture Cache Read"); // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); GLuint format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? GL_R16UI : GL_RGBA8; //if (format == GL_R16UI) fprintf(stderr, "Format 16 bits integer\n"); #if 0 DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; #endif if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) { GSTexture::GSMap m; if(offscreen->Map(m)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; default: ASSERT(0); } offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } GL_POP(); }
void GSTextureOGL::Clear(const void* data, const GSVector4i& area) { glClearTexSubImage(m_texture_id, GL_TEX_LEVEL_0, area.x, area.y, 0, area.width(), area.height(), 1, m_int_format, m_int_type, data); }
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); // Default upload path for the texture is the Map/Unmap // This path is mostly used for palette. But also for texture that could // overflow the pbo buffer // Data upload is rather small typically 64B or 1024B. So don't bother with PBO // and directly send the data to the GL synchronously m_clean = false; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif #if 0 if (r.height() == 1) { // Palette data. Transfer is small either 64B or 1024B. // Sometimes it is faster, sometimes slower. glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); return true; } #endif GL_PUSH("Upload Texture %d", m_texture_id); // The easy solution without PBO #if 0 // Likely a bad texture glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior #endif // The complex solution with PBO #if 1 char* src = (char*)data; char* map = PboPool::Map(map_size); // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // Note: row_byte != pitch for (int h = 0; h < r.height(); h++) { memcpy(map, src, row_byte); map += row_byte; src += pitch; } PboPool::Unmap(); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); // FIXME OGL4: investigate, only 1 unpack buffer always bound PboPool::UnbindPbo(); PboPool::EndTransfer(); #endif return true; }
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count) { const GSDrawingContext* context = m_state->m_context; int n = 1; switch(primclass) { case GS_POINT_CLASS: n = 1; break; case GS_LINE_CLASS: case GS_SPRITE_CLASS: n = 2; break; case GS_TRIANGLE_CLASS: n = 3; break; } GSVector4 tmin = s_minmax.xxxx(); GSVector4 tmax = s_minmax.yyyy(); GSVector4i cmin = GSVector4i::xffffffff(); GSVector4i cmax = GSVector4i::zero(); #if _M_SSE >= 0x401 GSVector4i pmin = GSVector4i::xffffffff(); GSVector4i pmax = GSVector4i::zero(); #else GSVector4 pmin = s_minmax.xxxx(); GSVector4 pmax = s_minmax.yyyy(); #endif const GSVertex* RESTRICT v = (GSVertex*)vertex; for(int i = 0; i < count; i += n) { if(primclass == GS_POINT_CLASS) { GSVector4i c(v[index[i]].m[0]); if(color) { cmin = cmin.min_u8(c); cmax = cmax.max_u8(c); } if(tme) { if(!fst) { GSVector4 stq = GSVector4::cast(c); GSVector4 q = stq.wwww(); stq = (stq.xyww() * q.rcpnr()).xyww(q); tmin = tmin.min(stq); tmax = tmax.max(stq); } else { GSVector4i uv(v[index[i]].m[1]); GSVector4 st = GSVector4(uv.uph16()).xyxy(); tmin = tmin.min(st); tmax = tmax.max(st); } } GSVector4i xyzf(v[index[i]].m[1]); GSVector4i xy = xyzf.upl16(); GSVector4i z = xyzf.yyyy(); #if _M_SSE >= 0x401 GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf)); pmin = pmin.min_u32(p); pmax = pmax.max_u32(p); #else GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww()))); pmin = pmin.min(p); pmax = pmax.max(p); #endif } else if(primclass == GS_LINE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c1); cmax = cmax.max_u8(c1); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 q = stq0.wwww(stq1).rcpnr(); stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1); tmin = tmin.min(stq0.min(stq1)); tmax = tmax.max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); tmin = tmin.min(st0.min(st1)); tmax = tmax.max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); pmin = pmin.min(p0.min(p1)); pmax = pmax.max(p0.max(p1)); #endif } else if(primclass == GS_TRIANGLE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); GSVector4i c2(v[index[i + 2]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c2); cmax = cmax.max_u8(c2); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 stq2 = GSVector4::cast(c2); GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr(); stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1); stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2); tmin = tmin.min(stq2).min(stq0.min(stq1)); tmax = tmax.max(stq2).max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4i uv2(v[index[i + 2]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); GSVector4 st2 = GSVector4(uv2.uph16()).xyxy(); tmin = tmin.min(st2).min(st0.min(st1)); tmax = tmax.max(st2).max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xyzf2(v[index[i + 2]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); GSVector4i xy2 = xyzf2.upl16(); GSVector4i z2 = xyzf2.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2)); pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww()))); pmin = pmin.min(p2).min(p0.min(p1)); pmax = pmax.max(p2).max(p0.max(p1)); #endif } else if(primclass == GS_SPRITE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c1); cmax = cmax.max_u8(c1); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 q = stq1.wwww().rcpnr(); stq0 = (stq0.xyww() * q).xyww(stq1); stq1 = (stq1.xyww() * q).xyww(stq1); tmin = tmin.min(stq0.min(stq1)); tmax = tmax.max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); tmin = tmin.min(st0.min(st1)); tmax = tmax.max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); pmin = pmin.min(p0.min(p1)); pmax = pmax.max(p0.max(p1)); #endif } } #if _M_SSE >= 0x401 pmin = pmin.blend16<0x30>(pmin.srl32(1)); pmax = pmax.blend16<0x30>(pmax.srl32(1)); #endif GSVector4 o(context->XYOFFSET); GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f); m_min.p = (GSVector4(pmin) - o) * s; m_max.p = (GSVector4(pmax) - o) * s; if(tme) { if(fst) { s = GSVector4(1.0f / 16, 1.0f).xxyy(); } else { s = GSVector4(1 << context->TEX0.TW, 1 << context->TEX0.TH, 1, 1); } m_min.t = tmin * s; m_max.t = tmax * s; } else { m_min.t = GSVector4::zero(); m_max.t = GSVector4::zero(); } if(color) { m_min.c = cmin.zzzz().u8to32(); m_max.c = cmax.zzzz().u8to32(); } else { m_min.c = GSVector4i::zero(); m_max.c = GSVector4i::zero(); } }
bool GSRenderer::Merge(int field) { bool en[2]; GSVector4i fr[2]; GSVector4i dr[2]; GSVector2i display_baseline = { INT_MAX, INT_MAX }; GSVector2i frame_baseline = { INT_MAX, INT_MAX }; for(int i = 0; i < 2; i++) { en[i] = IsEnabled(i); if(en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); display_baseline.x = min(dr[i].left, display_baseline.x); display_baseline.y = min(dr[i].top, display_baseline.y); frame_baseline.x = min(fr[i].left, frame_baseline.x); frame_baseline.y = min(fr[i].top, frame_baseline.y); //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); } } if(!en[0] && !en[1]) { return false; } GL_PUSH("Renderer Merge %d (0: enabled %d 0x%x, 1: enabled %d 0x%x)", s_n, en[0], m_regs->DISP[0].DISPFB.Block(), en[1], m_regs->DISP[1].DISPFB.Block()); // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) // // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) bool samesrc = en[0] && en[1] && m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { // persona 4: // // fr[0] = 0 0 640 448 // fr[1] = 0 1 640 448 // dr[0] = 159 50 779 498 // dr[1] = 159 50 779 497 // // second image shifted up by 1 pixel and blended over itself // // god of war: // // fr[0] = 0 1 512 448 // fr[1] = 0 0 512 448 // dr[0] = 127 50 639 497 // dr[1] = 127 50 639 498 // // same just the first image shifted // // These kinds of cases are now fixed by the more generic frame_diff code below, as the code here was too specific and has become obsolete. // NOTE: Persona 4 and God Of War are not rare exceptions, many games have the same(or very similar) offsets. int topDiff = fr[0].top - fr[1].top; if (dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, topDiff, 0, topDiff)) || fr[1].eq(fr[0] + GSVector4i(0, topDiff, 0, topDiff)))) { // dq5: // // fr[0] = 0 1 512 445 // fr[1] = 0 0 512 444 // dr[0] = 127 50 639 494 // dr[1] = 127 50 639 494 int top = min(fr[0].top, fr[1].top); int bottom = min(fr[0].bottom, fr[1].bottom); fr[0].top = fr[1].top = top; fr[0].bottom = fr[1].bottom = bottom; } } GSVector2i fs(0, 0); GSVector2i ds(0, 0); GSTexture* tex[3] = {NULL, NULL, NULL}; int y_offset[3] = {0, 0, 0}; s_n++; bool feedback_merge = m_regs->EXTWRITE.WRITE == 1; if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge) { tex[0] = GetOutput(0, y_offset[0]); tex[1] = tex[0]; // saves one texture fetch y_offset[1] = y_offset[0]; } else { if(en[0]) tex[0] = GetOutput(0, y_offset[0]); if(en[1]) tex[1] = GetOutput(1, y_offset[1]); if(feedback_merge) tex[2] = GetFeedbackOutput(); } GSVector4 src[2]; GSVector4 src_hw[2]; GSVector4 dst[2]; for(int i = 0; i < 2; i++) { if(!en[i] || !tex[i]) continue; GSVector4i r = fr[i]; GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 off(0); GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y); GSVector2i frame_diff(fr[i].left - frame_baseline.x, fr[i].top - frame_baseline.y); // Time Crisis 2/3 uses two side by side images when in split screen mode. // Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII if(display_diff.x > 2) { off.x = tex[i]->GetScale().x * display_diff.x; } // If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D else if(display_diff.x != frame_diff.x) { off.x = tex[i]->GetScale().x * frame_diff.x; } if(display_diff.y >= 4) // Shouldn't this be >= 2? { off.y = tex[i]->GetScale().y * display_diff.y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { off.y /= 2; } } else if(display_diff.y != frame_diff.y) { off.y = tex[i]->GetScale().y * frame_diff.y; } dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); } ds = fs; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } m_real_size = ds; bool slbg = m_regs->PMODE.SLBG; if(tex[0] || tex[1]) { if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) tex[0] = NULL; } GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c); if(m_regs->SMODE2.INT && m_interlace > 0) { if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } else { int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } if(m_shadeboost) { m_dev->ShadeBoost(); } if(m_shaderfx) { m_dev->ExternalFX(); } if(m_fxaa) { m_dev->FXAA(); } } return true; }
void GSTextureCache9::Read(Target* t, const GSVector4i& r) { if(t->m_type != RenderTarget) { // TODO return; } const GIFRegTEX0& TEX0 = t->m_TEX0; if(TEX0.PSM != PSM_PSMCT32 && TEX0.PSM != PSM_PSMCT24 && TEX0.PSM != PSM_PSMCT16 && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } if(!t->m_dirty.empty()) { return; } // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h)) { GSTexture::GSMap m; if(offscreen->Map(m)) { // TODO: block level write GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, o, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WriteFrame16(m.bits, m.pitch, o, r); break; default: ASSERT(0); } offscreen->Unmap(); } m_renderer->m_dev->Recycle(offscreen); } }
bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) { if(m_complete) { return true; } const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; GSVector2i bs = psm.bs; int shift = psm.pal == 0 ? 2 : 0; int tw = std::max<int>(1 << m_TEX0.TW, bs.x); int th = std::max<int>(1 << m_TEX0.TH, bs.y); GSVector4i r = rect; r = r.ralign<Align_Outside>(bs); if(r.eq(GSVector4i(0, 0, tw, th))) { m_complete = true; // lame, but better than nothing } if(m_buff == NULL) { uint32 pitch = (1 << m_tw) << shift; m_buff = _aligned_malloc(pitch * th * 4, 32); if(m_buff == NULL) { return false; } } GSLocalMemory& mem = m_state->m_mem; const GSOffset* RESTRICT off = m_offset; uint32 blocks = 0; GSLocalMemory::readTextureBlock rtxbP = psm.rtxbP; uint32 pitch = (1 << m_tw) << shift; uint8* dst = (uint8*)m_buff + pitch * r.top; int block_pitch = pitch * bs.y; r = r.srl32(3); bs.x >>= 3; bs.y >>= 3; shift += 3; if(m_repeating) { for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) { uint32 base = off->block.row[y]; for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) { uint32 block = (base + off->block.col[x]) % MAX_BLOCKS; uint32 row = i >> 5; uint32 col = 1 << (i & 31); if((m_valid[row] & col) == 0) { m_valid[row] |= col; (mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA); blocks++; } } } } else { for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)