void GPULocalMemory::InitVectors() { m_xxxa = GSVector4i(0x00008000); m_xxbx = GSVector4i(0x00007c00); m_xgxx = GSVector4i(0x000003e0); m_rxxx = GSVector4i(0x0000001f); }
void GSDevice11::OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor) { m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(0, NULL, NULL, 0, count, uav, counters); m_state.rtv = NULL; m_state.dsv = NULL; if(m_state.viewport != rtsize) { m_state.viewport = rtsize; D3D11_VIEWPORT vp; memset(&vp, 0, sizeof(vp)); vp.TopLeftX = 0; vp.TopLeftY = 0; vp.Width = (float)rtsize.x; vp.Height = (float)rtsize.y; vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; m_ctx->RSSetViewports(1, &vp); } GSVector4i r = scissor ? *scissor : GSVector4i(rtsize).zwxy(); if(!m_state.scissor.eq(r)) { m_state.scissor = r; m_ctx->RSSetScissorRects(1, r); } }
GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) : m_state(state) , m_buff(NULL) , m_tw(tw0) , m_age(0) , m_complete(false) , m_p2t(NULL) { m_TEX0 = TEX0; m_TEXA = TEXA; if(m_tw == 0) { m_tw = std::max<int>(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff } memset(m_valid, 0, sizeof(m_valid)); m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM); m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); memcpy(m_pages.bm, m_offset->GetPagesAsBits(TEX0), sizeof(m_pages.bm)); m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower if(m_repeating) { m_p2t = m_state->m_mem.GetPage2TileMap(m_TEX0); } }
bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r) { GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y); // LOTS OF CRAP CODE!!!! PLEASE FIX ME !!! if (m_type == GSTexture::Offscreen) { // The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx // architecture is waiting the data right now. #if 0 // Maybe it is as good as the code below. I don't know // With openGL 4.5 you can use glGetTextureSubImage glGetTextureImage(m_texture_id, GL_TEX_LEVEL_0, m_int_format, m_int_type, 1024*1024*16, m_local_buffer); #else // Bind the texture to the read framebuffer to avoid any disturbance glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); glPixelStorei(GL_PACK_ALIGNMENT, m_int_alignment); glReadPixels(r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, m_local_buffer); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); #endif m.bits = m_local_buffer; m.pitch = m_size.x << m_int_shift; return true; } else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { GL_PUSH("Upload Texture %d", m_texture_id); // POP is in Unmap m_dirty = true; m_clean = false; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; m.bits = (uint8*)PboPool::Map(map_size); m.pitch = row_byte; #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif // Save the area for the unmap m_r_x = r.x; m_r_y = r.y; m_r_w = r.width(); m_r_h = r.height(); return true; } return false; }
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data) { memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global)); if(m_global.sel.tme && m_global.sel.twin) { uint32 u, v; u = ~(m_global.twin.x << 3) & 0xff; // TWW v = ~(m_global.twin.y << 3) & 0xff; // TWH m_local.twin[0].u = GSVector4i((u << 16) | u); m_local.twin[0].v = GSVector4i((v << 16) | v); u = m_global.twin.z << 3; // TWX v = m_global.twin.w << 3; // TWY m_local.twin[1].u = GSVector4i((u << 16) | u) & ~m_local.twin[0].u; m_local.twin[1].v = GSVector4i((v << 16) | v) & ~m_local.twin[0].v; } m_ds = m_ds_map[m_global.sel]; m_de = NULL; m_dr = NULL; // TODO // doesn't need all bits => less functions generated GPUScanlineSelector sel; sel.key = 0; sel.iip = m_global.sel.iip; sel.tfx = m_global.sel.tfx; sel.twin = m_global.sel.twin; sel.sprite = m_global.sel.sprite; m_sp = m_sp_map[sel]; }
void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW longest; longest.p = v[1].p - v[0].p; int i = longest.p.upl(longest.p == GSVector4::zero()).mask(); if(i & 2) return; i &= 1; GSVertexSW& l = v[i]; GSVector4& r = v[1 - i].p; GSVector4 fscissor(scissor); GSVector4 tb = l.p.upl(v[2].p).ceil(); GSVector4 tbmax = tb.max(fscissor.yyyy()); GSVector4 tbmin = tb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin)); int top = tbi.extract32<0>(); int bottom = tbi.extract32<2>(); if(top >= bottom) return; longest.t = v[1].t - v[0].t; longest.c = v[1].c - v[0].c; GSVertexSW dscan = longest * longest.p.xxxx().rcp(); GSVertexSW vl = v[2] - l; GSVector4 vr = v[2].p - r; GSVertexSW dl = vl / vl.p.yyyy(); GSVector4 dr = vr / vr.yyyy(); GSVector4 dy = tbmax.zzzz() - l.p.yyyy(); l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y l += dl * dy; m_dsf.ssp(v, dscan); DrawTriangleSection(top, bottom, l, dl, dscan, fscissor); }
// Note: hack is safe, but it could impact the perf a little (normally games do only a couple of clear by frame) void GSRendererHW::OI_GsMemClear() { // Rectangle draw without texture if ((m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && !PRIM->TME && !PRIM->ABE) { // 0 clear if (m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(0))) { GL_INS("OI_GsMemClear"); GSOffset* off = m_context->offset.fb; GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in)); // Based on WritePixel32 for(int y = r.top; y < r.bottom; y++) { uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]]; int* RESTRICT col = off->pixel.col[0]; for(int x = r.left; x < r.right; x++) { d[col[x]] = 0; // Here the constant color } } } }
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) { ID3D11RenderTargetView* rtv = NULL; ID3D11DepthStencilView* dsv = NULL; if (!rt && !ds) throw GSDXRecoverableError(); if(rt) rtv = *(GSTexture11*)rt; if(ds) dsv = *(GSTexture11*)ds; if(m_state.rtv != rtv || m_state.dsv != dsv) { m_state.rtv = rtv; m_state.dsv = dsv; m_ctx->OMSetRenderTargets(1, &rtv, dsv); } GSVector2i size = rt ? rt->GetSize() : ds->GetSize(); if(m_state.viewport != size) { bool isNative = theApp.GetConfig("upscale_multiplier", 1) == 1; m_state.viewport = size; D3D11_VIEWPORT vp; memset(&vp, 0, sizeof(vp)); vp.TopLeftX = (spritehack > 0 || isNative) ? 0.0f : -0.01f; vp.TopLeftY = (spritehack > 0 || isNative) ? 0.0f : -0.01f; vp.Width = (float)size.x; vp.Height = (float)size.y; vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; m_ctx->RSSetViewports(1, &vp); } GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy(); if(!m_state.scissor.eq(r)) { m_state.scissor = r; m_ctx->RSSetScissorRects(1, r); } }
GSVector4i GSWndOGL::GetClientRect() { unsigned int h = 480; unsigned int w = 640; unsigned int borderDummy; unsigned int depthDummy; Window winDummy; int xDummy; int yDummy; if (!m_NativeDisplay) m_NativeDisplay = XOpenDisplay(NULL); XGetGeometry(m_NativeDisplay, m_NativeWindow, &winDummy, &xDummy, &yDummy, &w, &h, &borderDummy, &depthDummy); return GSVector4i(0, 0, (int)w, (int)h); }
void Clear() { fbo = 0; viewport = GSVector2i(0, 0); scissor = GSVector4i(0, 0, 0, 0); blend = false; eq_RGB = 0; eq_A = 0; f_sRGB = 0; f_dRGB = 0; f_sA = 0; f_dA = 0; r_msk = true; g_msk = true; b_msk = true; a_msk = true; bf = 0.0; depth = false; depth_func = 0; depth_mask = false; stencil = false; stencil_func = 0; stencil_pass = 0; ubo = 0; ps_ss = 0; rt = 0; ds = 0; tex_unit[0] = 0; tex_unit[1] = 0; tex = 0; tex_handle[0] = 0; tex_handle[1] = 0; ps = 0; gs = 0; vs = 0; program = 0; dirty_prog = false; dirty_subroutine_vs = false; dirty_subroutine_ps = false; dirty_ressources = false; }
bool GSTextureSW::Map(GSMap& m, const GSVector4i* r) { GSVector4i r2 = r != NULL ? *r : GSVector4i(0, 0, m_size.x, m_size.y); if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y) { if (!m_mapped.test_and_set(std::memory_order_acquire)) { m.bits = (uint8*)m_data + m_pitch * r2.top + (r2.left << 2); m.pitch = m_pitch; return true; } } return false; }
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) { ID3D11RenderTargetView* rtv = NULL; ID3D11DepthStencilView* dsv = NULL; if(rt) rtv = *(GSTexture11*)rt; if(ds) dsv = *(GSTexture11*)ds; if(m_state.rtv != rtv || m_state.dsv != dsv) { m_state.rtv = rtv; m_state.dsv = dsv; m_ctx->OMSetRenderTargets(1, &rtv, dsv); } if(m_state.viewport != rt->GetSize()) { m_state.viewport = rt->GetSize(); D3D11_VIEWPORT vp; memset(&vp, 0, sizeof(vp)); vp.TopLeftX = 0; vp.TopLeftY = 0; vp.Width = (FLOAT)rt->GetWidth(); vp.Height = (FLOAT)rt->GetHeight(); vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; m_ctx->RSSetViewports(1, &vp); } GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); if(!m_state.scissor.eq(r)) { m_state.scissor = r; m_ctx->RSSetScissorRects(1, r); } }
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& fscissor) { ASSERT(top < bottom); while(1) { do { if(IsOneOfMyScanlines(top)) { GSVector4 lr = l.p.xyxy(r).ceil(); GSVector4 lrmax = lr.max(fscissor.xxxx()); GSVector4 lrmin = lr.min(fscissor.zzzz()); GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin)); int left = lri.extract32<0>(); int right = lri.extract32<2>(); int pixels = right - left; if(pixels > 0) { m_stats.pixels += pixels; GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx(); m_dsf.ssl(right, left, top, scan); } } } while(0); if(++top >= bottom) break; l += dl; r += dr; } }
void Clear() { fbo = 0; viewport = GSVector2i(0, 0); scissor = GSVector4i(0, 0, 0, 0); blend = false; eq_RGB = 0; f_sRGB = 0; f_dRGB = 0; wrgba = 0xF; bf = 0.0; depth = false; depth_func = 0; depth_mask = false; stencil = false; stencil_func = 0; stencil_pass = 0; ubo = 0; ps_ss = 0; rt = 0; ds = 0; for (size_t i = 0; i < countof(tex_unit); i++) tex_unit[i] = 0; for (size_t i = 0; i < countof(tex_handle); i++) tex_handle[i] = 0; ps = 0; gs = 0; vs = 0; program = 0; dirty_prog = false; dirty_ressources = false; }
void GSDrawScanlineCodeGenerator::InitVectors() { #if _M_SSE >= 0x501 GSVector8 log2_coef[4] = { GSVector8(0.204446009836232697516f), GSVector8(-1.04913055217340124191f), GSVector8(2.28330284476918490682f), GSVector8(1.0f), }; for (size_t n = 0; n < countof(log2_coef); ++n) m_log2_coef[n] = log2_coef[n]; #else GSVector4i test[8] = { GSVector4i::zero(), GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), GSVector4i::zero(), }; GSVector4 log2_coef[4] = { GSVector4(0.204446009836232697516f), GSVector4(-1.04913055217340124191f), GSVector4(2.28330284476918490682f), GSVector4(1.0f), }; for (size_t n = 0; n < countof(test); ++n) m_test[n] = test[n]; for (size_t n = 0; n < countof(log2_coef); ++n) m_log2_coef[n] = log2_coef[n]; #endif }
{ lines = 0; } } else if(m_vt.m_primclass == GS_LINE_CLASS) { if(m_vertex.next == lines * 2) { // normally, this step would copy the video onto screen with 512 texture mapped horizontal lines, // but we use the stored video data to create a new texture, and replace the lines with two triangles m_dev->Recycle(t->m_texture); t->m_texture = m_dev->CreateTexture(512, 512); t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4); m_vertex.buff[2] = m_vertex.buff[m_vertex.next - 2]; m_vertex.buff[3] = m_vertex.buff[m_vertex.next - 1]; m_index.buff[0] = 0; m_index.buff[1] = 1; m_index.buff[2] = 2; m_index.buff[3] = 1; m_index.buff[4] = 2; m_index.buff[5] = 3; m_vertex.head = m_vertex.tail = m_vertex.next = 4; m_index.tail = 6; m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
void GSRendererDX9::SetupIA() { D3DPRIMITIVETYPE topology; switch(m_vt.m_primclass) { case GS_POINT_CLASS: topology = D3DPT_POINTLIST; break; case GS_LINE_CLASS: topology = D3DPT_LINELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 2) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 1]; m_index.buff[i + 1] = tmp; } } break; case GS_TRIANGLE_CLASS: topology = D3DPT_TRIANGLELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 3) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 2]; m_index.buff[i + 2] = tmp; } } break; case GS_SPRITE_CLASS: topology = D3DPT_TRIANGLELIST; // each sprite converted to quad needs twice the space while(m_vertex.tail * 2 > m_vertex.maxcount) { GrowVertexBuffer(); } // assume vertices are tightly packed and sequentially indexed (it should be the case) if(m_vertex.next >= 2) { size_t count = m_vertex.next; int i = (int)count * 2 - 4; GSVertex* s = &m_vertex.buff[count - 2]; GSVertex* q = &m_vertex.buff[count * 2 - 4]; uint32* RESTRICT index = &m_index.buff[count * 3 - 6]; for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) { GSVertex v0 = s[0]; GSVertex v1 = s[1]; v0.RGBAQ = v1.RGBAQ; v0.XYZ.Z = v1.XYZ.Z; v0.FOG = v1.FOG; q[0] = v0; q[3] = v1; // swap x, s, u uint16 x = v0.XYZ.X; v0.XYZ.X = v1.XYZ.X; v1.XYZ.X = x; float s = v0.ST.S; v0.ST.S = v1.ST.S; v1.ST.S = s; uint16 u = v0.U; v0.U = v1.U; v1.U = u; q[1] = v0; q[2] = v1; index[0] = i + 0; index[1] = i + 1; index[2] = i + 2; index[3] = i + 1; index[4] = i + 2; index[5] = i + 3; } m_vertex.head = m_vertex.tail = m_vertex.next = count * 2; m_index.tail = count * 3; } break; default: __assume(0); } GSDevice9* dev = (GSDevice9*)m_dev; (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO void* ptr = NULL; if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) { GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; for(uint32 i = 0; i < m_vertex.next; i++, s++, d++) { GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); if(PRIM->TME && !PRIM->FST) { p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); } GSVector4 t = GSVector4::zero(); if(PRIM->TME) { if(PRIM->FST) { if(UserHacks_WildHack && !isPackedUV_HackFlag) { t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16()); //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); } else { t = GSVector4(GSVector4i::load(s->UV).upl16()); } } else { t = GSVector4::loadl(&s->ST); } } t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); d->p = p; d->t = t; } dev->IAUnmapVertexBuffer(); } dev->IASetIndexBuffer(m_index.buff, m_index.tail); dev->IASetPrimitiveTopology(topology); }
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; const GSVector2i& rtsize = rt->GetSize(); const GSVector2& rtscale = rt->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; GSTexture* rtcopy = NULL; ASSERT(m_dev != NULL); GSDeviceDX* dev = (GSDeviceDX*)m_dev; if(DATE) { if(dev->HasStencil()) { GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 dst = src * 2.0f + o.xxxx(); GSVertexPT1 vertices[] = { {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, }; dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); } else { rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); // I'll use VertexTrace when I consider it more trustworthy dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); } } // dev->BeginScene(); // om GSDeviceDX::OMDepthStencilSelector om_dssel; if(context->TEST.ZTE) { om_dssel.ztst = context->TEST.ZTST; om_dssel.zwe = !context->ZBUF.ZMSK; } else { om_dssel.ztst = ZTST_ALWAYS; } if(m_fba) { om_dssel.fba = context->FBA.FBA; } GSDeviceDX::OMBlendSelector om_bsel; if(!IsOpaque()) { om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; om_bsel.c = context->ALPHA.C; om_bsel.d = context->ALPHA.D; if(env.PABE.PABE) { if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) { // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result om_bsel.abe = 0; } else { //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. //ASSERT(0); } } } om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); // vs GSDeviceDX::VSSelector vs_sel; vs_sel.tme = PRIM->TME; vs_sel.fst = PRIM->FST; vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; vs_sel.rtcopy = !!rtcopy; // The real GS appears to do no masking based on the Z buffer format and writing larger Z values // than the buffer supports seems to be an error condition on the real GS, causing it to crash. // We are probably receiving bad coordinates from VU1 in these cases. if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) { if(context->ZBUF.PSM == PSM_PSMZ24) { if(m_vt.m_max.p.z > 0xffffff) { ASSERT(m_vt.m_min.p.z > 0xffffff); // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. if (m_vt.m_min.p.z > 0xffffff) { vs_sel.bppz = 1; om_dssel.ztst = ZTST_ALWAYS; } } } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { if(m_vt.m_max.p.z > 0xffff) { ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo // Fixme : Same as above, I guess. if (m_vt.m_min.p.z > 0xffff) { vs_sel.bppz = 2; om_dssel.ztst = ZTST_ALWAYS; } } } } GSDeviceDX::VSConstantBuffer vs_cb; float sx = 2.0f * rtscale.x / (rtsize.x << 4); float sy = 2.0f * rtscale.y / (rtsize.y << 4); float ox = (float)(int)context->XYOFFSET.OFX; float oy = (float)(int)context->XYOFFSET.OFY; float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, //because DX10 and DX9 have a different pixel center.) // //The resulting shifted output aligns better with common blending / corona / blurring effects, //but introduces a few bad pixels on the edges. if(rt->LikelyOffset) { // DX9 has pixelcenter set to 0.0, so give it some value here if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } ox2 *= rt->OffsetHack_modx; oy2 *= rt->OffsetHack_mody; } vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); // gs GSDeviceDX::GSSelector gs_sel; gs_sel.iip = PRIM->IIP; gs_sel.prim = m_vt.m_primclass; // ps GSDeviceDX::PSSelector ps_sel; GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; if(DATE) { if(dev->HasStencil()) { om_dssel.date = 1; } else { ps_sel.date = 1 + context->TEST.DATM; } } if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) { ps_sel.colclip = 1; } ps_sel.clr1 = om_bsel.IsCLR1(); ps_sel.fba = context->FBA.FBA; ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; if(UserHacks_AlphaHack) ps_sel.aout = 1; if(PRIM->FGE) { ps_sel.fog = 1; ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; } if(context->TEST.ATE) { ps_sel.atst = context->TEST.ATST; switch(ps_sel.atst) { case ATST_LESS: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); break; case ATST_GREATER: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); break; default: ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; break; } } else { ps_sel.atst = ATST_ALWAYS; } if(tex) { ps_sel.wms = context->CLAMP.WMS; ps_sel.wmt = context->CLAMP.WMT; ps_sel.fmt = tex->m_fmt; ps_sel.aem = env.TEXA.AEM; ps_sel.tfx = context->TEX0.TFX; ps_sel.tcc = context->TEX0.TCC; ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; ps_sel.rt = tex->m_target; int w = tex->m_texture->GetWidth(); int h = tex->m_texture->GetHeight(); int tw = (int)(1 << context->TEX0.TW); int th = (int)(1 << context->TEX0.TH); GSVector4 WH(tw, th, w, h); if(PRIM->FST) { vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); //Maybe better? //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); ps_sel.fst = 1; } ps_cb.WH = WH; ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); GSVector4 clamp(ps_cb.MskFix); GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); ps_cb.MinMax = clamp / WH.xyxy(); ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; ps_ssel.ltf = ps_sel.ltf; } else {
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side) { // orientation: // - true: |dv.p.y| > |dv.p.x| // - false |dv.p.x| > |dv.p.y| // side: // - true: top/left edge // - false: bottom/right edge // TODO: bit slow and too much duplicated code // TODO: inner pre-step is still missing (hardly noticable) GSVector4 fscissor(scissor); GSVector4 lrtb = v0.p.upl(v1.p).ceil(); if(orientation) { GSVector4 tbmax = lrtb.max(fscissor.yyyy()); GSVector4 tbmin = lrtb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin)); int top, bottom; GSVertexSW edge, dedge; if((dv.p >= GSVector4::zero()).mask() & 2) { top = tbi.extract32<0>(); bottom = tbi.extract32<3>(); if(top >= bottom) return; edge = v0; dedge = dv / dv.p.yyyy(); edge += dedge * (tbmax.zzzz() - edge.p.yyyy()); } else { top = tbi.extract32<1>(); bottom = tbi.extract32<2>(); if(top >= bottom) return; edge = v1; dedge = dv / dv.p.yyyy(); edge += dedge * (tbmax.wwww() - edge.p.yyyy()); } GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000); int x = p.extract32<0>(); int dx = p.extract32<1>(); if(side) { while(1) { do { int xi = x >> 16; int xf = x & 0xffff; if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi)) { m_stats.pixels++; edge.t.u32[3] = (0x10000 - xf) & 0xffff; m_dsf.ssle(xi + 1, xi, top, edge); edge.t.u32[3] = 0; } } while(0); if(++top >= bottom) break; edge += dedge; x += dx; } } else { while(1) { do { int xi = (x >> 16) + 1; int xf = x & 0xffff; if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi)) { m_stats.pixels++; edge.t.u32[3] = xf; m_dsf.ssle(xi + 1, xi, top, edge); edge.t.u32[3] = 0; } } while(0); if(++top >= bottom) break; edge += dedge; x += dx; } } }
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. * http://www.gnu.org/copyleft/gpl.html * */ #include "stdafx.h" #include "GSVector.h" const GSVector4i GSVector4i::m_xff[17] = { GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
{ blend(a, b, xmm0); } } void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask) { pand(b, mask); pandn(mask, a); por(b, mask); movdqa(a, b); } const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] = { GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000), GSVector4i::zero(), }; alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] = { {7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1}, {2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4}, {1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7}, {4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
void GPURendererSW::Draw() { GPUDrawScanline::SharedData* sd = new GPUDrawScanline::SharedData(); shared_ptr<GSRasterizerData> data(sd); GPUScanlineGlobalData& gd = sd->global; const GPUDrawingEnvironment& env = m_env; gd.sel.key = 0; gd.sel.iip = env.PRIM.IIP; gd.sel.me = env.STATUS.ME; if(env.PRIM.ABE) { gd.sel.abe = env.PRIM.ABE; gd.sel.abr = env.STATUS.ABR; } gd.sel.tge = env.PRIM.TGE; if(env.PRIM.TME) { gd.sel.tme = env.PRIM.TME; gd.sel.tlu = env.STATUS.TP < 2; gd.sel.twin = (env.TWIN.u32 & 0xfffff) != 0; gd.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0; const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY); if(!t) {ASSERT(0); return;} gd.tex = t; gd.clut = (uint16*)_aligned_malloc(sizeof(uint16) * 256, 32); memcpy(gd.clut, m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y), sizeof(uint16) * (env.STATUS.TP == 0 ? 16 : 256)); gd.twin = GSVector4i(env.TWIN.TWW, env.TWIN.TWH, env.TWIN.TWX, env.TWIN.TWY); } gd.sel.dtd = m_dither ? env.STATUS.DTD : 0; gd.sel.md = env.STATUS.MD; gd.sel.sprite = env.PRIM.TYPE == GPU_SPRITE; gd.sel.scalex = m_mem.GetScale().x; gd.vm = m_mem.GetPixelAddress(0, 0); data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x; data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y; data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth()); data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight()); data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); data->vertex = (GSVertexSW*)data->buff; data->vertex_count = m_count; memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count); data->frame = m_perfmon.GetFrame(); int prims = 0; switch(env.PRIM.TYPE) { case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break; case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break; case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break; default: __assume(0); } // TODO: VertexTrace GSVector4 tl(+1e10f); GSVector4 br(-1e10f); GSVertexSW* v = data->vertex; for(int i = 0, j = data->vertex_count; i < j; i++) { GSVector4 p = v[i].p; tl = tl.min(p); br = br.max(p); } data->bbox = GSVector4i(tl.xyxy(br)); GSVector4i r = data->bbox.rintersect(data->scissor); r.left >>= m_scale.x; r.top >>= m_scale.y; r.right >>= m_scale.x; r.bottom >>= m_scale.y; Invalidate(r); m_rl->Queue(data); m_rl->Sync(); m_perfmon.Put(GSPerfMon::Draw, 1); m_perfmon.Put(GSPerfMon::Prim, prims); m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels()); }
void GSRendererDX9::SetupIA(const float& sx, const float& sy) { D3DPRIMITIVETYPE topology; switch(m_vt.m_primclass) { case GS_POINT_CLASS: topology = D3DPT_POINTLIST; break; case GS_LINE_CLASS: topology = D3DPT_LINELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 2) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 1]; m_index.buff[i + 1] = tmp; } } break; case GS_TRIANGLE_CLASS: topology = D3DPT_TRIANGLELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 3) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 2]; m_index.buff[i + 2] = tmp; } } break; case GS_SPRITE_CLASS: topology = D3DPT_TRIANGLELIST; // each sprite converted to quad needs twice the space Lines2Sprites(); break; default: __assume(0); } GSDevice9* dev = (GSDevice9*)m_dev; (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO void* ptr = NULL; if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) { GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; for(uint32 i = 0; i < m_vertex.next; i++, s++, d++) { GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); if(PRIM->TME && !PRIM->FST) { p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); } GSVector4 t = GSVector4::zero(); if(PRIM->TME) { if(PRIM->FST) { if(UserHacks_WildHack && !isPackedUV_HackFlag) { t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16()); //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); } else { t = GSVector4(GSVector4i::load(s->UV).upl16()); } } else { t = GSVector4::loadl(&s->ST); } } t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); d->p = p; d->t = t; } dev->IAUnmapVertexBuffer(); } dev->IASetIndexBuffer(m_index.buff, m_index.tail); dev->IASetPrimitiveTopology(topology); }
bool GSRenderer::Merge(int field) { bool en[2]; GSVector4i fr[2]; GSVector4i dr[2]; int baseline = INT_MAX; for(int i = 0; i < 2; i++) { en[i] = IsEnabled(i); if(en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); baseline = min(dr[i].top, baseline); //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); } } if(!en[0] && !en[1]) { return false; } // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) // // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) bool samesrc = en[0] && en[1] && m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; // bool blurdetected = false; if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { if(fr[0].eq(fr[1] + GSVector4i(0, -1, 0, 0)) && dr[0].eq(dr[1] + GSVector4i(0, 0, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, -1, 0, 0)) && dr[1].eq(dr[0] + GSVector4i(0, 0, 0, 1))) { // persona 4: // // fr[0] = 0 0 640 448 // fr[1] = 0 1 640 448 // dr[0] = 159 50 779 498 // dr[1] = 159 50 779 497 // // second image shifted up by 1 pixel and blended over itself // // god of war: // // fr[0] = 0 1 512 448 // fr[1] = 0 0 512 448 // dr[0] = 127 50 639 497 // dr[1] = 127 50 639 498 // // same just the first image shifted int top = min(fr[0].top, fr[1].top); int bottom = max(dr[0].bottom, dr[1].bottom); fr[0].top = top; fr[1].top = top; dr[0].bottom = bottom; dr[1].bottom = bottom; // blurdetected = true; } else if(dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, 1, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, 1, 0, 1)))) { // dq5: // // fr[0] = 0 1 512 445 // fr[1] = 0 0 512 444 // dr[0] = 127 50 639 494 // dr[1] = 127 50 639 494 int top = min(fr[0].top, fr[1].top); int bottom = min(fr[0].bottom, fr[1].bottom); fr[0].top = fr[1].top = top; fr[0].bottom = fr[1].bottom = bottom; // blurdetected = true; } //printf("samesrc = %d blurdetected = %d\n",samesrc,blurdetected); } GSVector2i fs(0, 0); GSVector2i ds(0, 0); GSTexture* tex[2] = {NULL, NULL}; if(samesrc && fr[0].bottom == fr[1].bottom) { tex[0] = GetOutput(0); tex[1] = tex[0]; // saves one texture fetch } else { if(en[0]) tex[0] = GetOutput(0); if(en[1]) tex[1] = GetOutput(1); } GSVector4 src[2]; GSVector4 dst[2]; for(int i = 0; i < 2; i++) { if(!en[i] || !tex[i]) continue; GSVector4i r = fr[i]; // overscan hack if(dr[i].height() > 512) // hmm { int y = GetDeviceSize(i).y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) y /= 2; r.bottom = r.top + y; } GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 o(0, 0); if(dr[i].top - baseline >= 4) // 2? { o.y = tex[i]->GetScale().y * (dr[i].top - baseline); if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { o.y /= 2; } } dst[i] = GSVector4(o).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); } ds = fs; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } bool slbg = m_regs->PMODE.SLBG; bool mmod = m_regs->PMODE.MMOD; if(tex[0] || tex[1]) { if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) tex[0] = NULL; } GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev->Merge(tex, src, dst, fs, slbg, mmod, c); if(m_regs->SMODE2.INT && m_interlace > 0) { if (m_interlace == 7 && m_regs->SMODE2.FFMD == 1) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } else { int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } if(m_shadeboost) { m_dev->ShadeBoost(); } if (m_shaderfx) { m_dev->ExternalFX(); } if(m_fxaa) { m_dev->FXAA(); } } return true; }
GSVector4i GSVector4i::cast(const GSVector4& v) { return GSVector4i(_mm_castps_si128(v.m)); }
void GSRendererHW::Draw() { if(m_dev->IsLost() || GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) { GL_INS("Warning skipping a draw call (%d)", s_n); s_n += 3; // Keep it sync with SW renderer return; } GL_PUSH("HW Draw %d", s_n); GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; // It is allowed to use the depth and rt at the same location. However at least 1 must // be disabled. // 1/ GoW uses a Cd blending on a 24 bits buffer (no alpha) // 2/ SuperMan really draws (0,0,0,0) color and a (0) 32-bits depth // 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth // Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0 const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1)); const bool no_ds = !no_rt && ( // Depth is always pass (no read) and write are discarded (tekken 5). (Note: DATE is currently implemented with a stencil buffer) (context->ZBUF.ZMSK && m_context->TEST.ZTST == ZTST_ALWAYS && !m_context->TEST.DATE) || // Depth will be written through the RT (context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && !context->ZBUF.ZMSK && !context->FRAME.FBMSK && context->TEST.ZTE) ); GIFRegTEX0 TEX0; TEX0.TBP0 = context->FRAME.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->FRAME.PSM; GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); GSTexture* rt_tex = rt ? rt->m_texture : NULL; TEX0.TBP0 = context->ZBUF.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->ZBUF.PSM; GSTextureCache::Target* ds = no_ds ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); GSTexture* ds_tex = ds ? ds->m_texture : NULL; if(!(rt || no_rt) || !(ds || no_ds)) { GL_POP(); ASSERT(0); return; } GSTextureCache::Source* tex = NULL; m_texture_shuffle = false; if(PRIM->TME) { /* // m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } */ GSVector4i r; GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); if(!tex) { GL_POP(); return; } // FIXME: Could be removed on openGL if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. // Initially code also tested the RT but it gives too much false-positive // // Both input and output are 16 bits and texture was initially 32 bits! m_texture_shuffle = (context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt; // Texture shuffle is not yet supported with strange clamp mode ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); } if (rt) { // Be sure texture shuffle detection is properly propagated // Otherwise set or clear the flag (Code in texture cache only set the flag) // Note: it is important to clear the flag when RT is used as a real 16 bits target. rt->m_32_bits_fmt = m_texture_shuffle || !(context->FRAME.PSM & 0x2); } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if (s_n >= s_saven) { // Dump Register state s = format("%05d_context.txt", s_n); m_env.Dump(root_hw+s); m_context->Dump(root_hw+s); } if(s_savet && s_n >= s_saven && tex) { s = format("%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds", s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM, (int)context->CLAMP.WMS, (int)context->CLAMP.WMT, (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); tex->m_texture->Save(root_hw+s, true); if(tex->m_palette) { s = format("%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM); tex->m_palette->Save(root_hw+s, true); } } s_n++; if(s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); if (rt) rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); if (ds_tex) ds_tex->Save(root_hw+s); } s_n++; #ifdef ENABLE_OGL_DEBUG } else { s_n += 2; #endif } if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, tex)) { s_n += 1; // keep counter sync GL_POP(); return; } // skip alpha test if possible GIFRegTEST TEST = context->TEST; GIFRegFRAME FRAME = context->FRAME; GIFRegZBUF ZBUF = context->ZBUF; uint32 fm = context->FRAME.FBMSK; uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) { if(GSRenderer::TryAlphaTest(fm, zm)) { context->TEST.ATST = ATST_ALWAYS; } } context->FRAME.FBMSK = fm; context->ZBUF.ZMSK = zm != 0; // A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { size_t count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; // Hack to avoid vertical black line in various games (ace combat/tekken) if (m_userhacks_align_sprite_X) { // Note for performance reason I do the check only once on the first // primitive int win_position = v[1].XYZ.X - context->XYOFFSET.OFX; const bool unaligned_position = ((win_position & 0xF) == 8); const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X); if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) { // Normaly vertex are aligned on full pixels and texture in half // pixels. Let's extend the coverage of an half-pixel to avoid // hole after upscaling for(size_t i = 0; i < count; i += 2) { v[i+1].XYZ.X += 8; // I really don't know if it is a good idea. Neither what to do for !PRIM->FST if (unaligned_texture) v[i+1].U += 8; } } } if (PRIM->FST) { if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) { if (m_vt.IsLinear()) RoundSpriteOffset<true>(); else RoundSpriteOffset<false>(); } } else { ; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior) } } // DrawPrims(rt_tex, ds_tex, tex); // context->TEST = TEST; context->FRAME = FRAME; context->ZBUF = ZBUF; // GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); if(fm != 0xffffffff && rt) { rt->m_valid = rt->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.fb, r, false); m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); } if(zm != 0xffffffff && ds) { ds->m_valid = ds->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.zb, r, false); m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block()); } // if(m_hacks.m_oo) { (this->*m_hacks.m_oo)(); } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if(s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); if (rt) rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); if (ds_tex) ds_tex->Save(root_hw+s); } s_n++; if(s_savel > 0 && (s_n - s_saven) > s_savel) { s_dump = 0; } #ifdef ENABLE_OGL_DEBUG } else { s_n += 1; #endif } #ifdef DISABLE_HW_TEXTURE_CACHE if (rt) m_tc->Read(rt, r); #endif GL_POP(); }
void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW dv[3]; dv[0] = v[1] - v[0]; dv[1] = v[2] - v[0]; GSVertexSW longest = dv[1] * (dv[0].p / dv[1].p).yyyy() - dv[0]; int i = longest.p.upl(longest.p == GSVector4::zero()).mask(); if(i & 2) return; i &= 1; GSVertexSW dscan = longest * longest.p.xxxx().rcp(); m_dsf.ssp(v, dscan); GSVector4 fscissor(scissor); GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil(); GSVector4 tbmax = tb.max(fscissor.yyyy()); GSVector4 tbmin = tb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin)); int top = tbi.extract32<0>(); int bottom = tbi.extract32<2>(); GSVertexSW& l = v[0]; GSVector4 r = v[0].p; GSVertexSW dl = dv[i] / dv[i].p.yyyy(); GSVector4 dr = dv[1 - i].p / dv[1 - i].p.yyyy(); GSVector4 dy = tbmax.xxxx() - l.p.yyyy(); l += dl * dy; r += dr * dy; if(top < bottom) { DrawTriangleSection(top, bottom, l, dl, r, dr, dscan, fscissor); } top = tbi.y; bottom = tbi.w; if(top < bottom) { if(i == 0) { l = v[1]; dv[2] = v[2] - v[1]; dl = dv[2] / dv[2].p.yyyy(); } else { r = v[1].p; dv[2].p = v[2].p - v[1].p; dr = dv[2].p / dv[2].p.yyyy(); } l += dl * (tbmax.zzzz() - l.p.yyyy()); r += dr * (tbmax.zzzz() - r.yyyy()); l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y DrawTriangleSection(top, bottom, l, dl, dscan, fscissor); } }
bool GSRenderer::Merge(int field) { bool en[2]; GSVector4i fr[2]; GSVector4i dr[2]; GSVector2i display_baseline = { INT_MAX, INT_MAX }; GSVector2i frame_baseline = { INT_MAX, INT_MAX }; for(int i = 0; i < 2; i++) { en[i] = IsEnabled(i); if(en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); display_baseline.x = min(dr[i].left, display_baseline.x); display_baseline.y = min(dr[i].top, display_baseline.y); frame_baseline.x = min(fr[i].left, frame_baseline.x); frame_baseline.y = min(fr[i].top, frame_baseline.y); //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); } } if(!en[0] && !en[1]) { return false; } GL_PUSH("Renderer Merge %d (0: enabled %d 0x%x, 1: enabled %d 0x%x)", s_n, en[0], m_regs->DISP[0].DISPFB.Block(), en[1], m_regs->DISP[1].DISPFB.Block()); // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) // // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) bool samesrc = en[0] && en[1] && m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { // persona 4: // // fr[0] = 0 0 640 448 // fr[1] = 0 1 640 448 // dr[0] = 159 50 779 498 // dr[1] = 159 50 779 497 // // second image shifted up by 1 pixel and blended over itself // // god of war: // // fr[0] = 0 1 512 448 // fr[1] = 0 0 512 448 // dr[0] = 127 50 639 497 // dr[1] = 127 50 639 498 // // same just the first image shifted // // These kinds of cases are now fixed by the more generic frame_diff code below, as the code here was too specific and has become obsolete. // NOTE: Persona 4 and God Of War are not rare exceptions, many games have the same(or very similar) offsets. int topDiff = fr[0].top - fr[1].top; if (dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, topDiff, 0, topDiff)) || fr[1].eq(fr[0] + GSVector4i(0, topDiff, 0, topDiff)))) { // dq5: // // fr[0] = 0 1 512 445 // fr[1] = 0 0 512 444 // dr[0] = 127 50 639 494 // dr[1] = 127 50 639 494 int top = min(fr[0].top, fr[1].top); int bottom = min(fr[0].bottom, fr[1].bottom); fr[0].top = fr[1].top = top; fr[0].bottom = fr[1].bottom = bottom; } } GSVector2i fs(0, 0); GSVector2i ds(0, 0); GSTexture* tex[3] = {NULL, NULL, NULL}; int y_offset[3] = {0, 0, 0}; s_n++; bool feedback_merge = m_regs->EXTWRITE.WRITE == 1; if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge) { tex[0] = GetOutput(0, y_offset[0]); tex[1] = tex[0]; // saves one texture fetch y_offset[1] = y_offset[0]; } else { if(en[0]) tex[0] = GetOutput(0, y_offset[0]); if(en[1]) tex[1] = GetOutput(1, y_offset[1]); if(feedback_merge) tex[2] = GetFeedbackOutput(); } GSVector4 src[2]; GSVector4 src_hw[2]; GSVector4 dst[2]; for(int i = 0; i < 2; i++) { if(!en[i] || !tex[i]) continue; GSVector4i r = fr[i]; GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 off(0); GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y); GSVector2i frame_diff(fr[i].left - frame_baseline.x, fr[i].top - frame_baseline.y); // Time Crisis 2/3 uses two side by side images when in split screen mode. // Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII if(display_diff.x > 2) { off.x = tex[i]->GetScale().x * display_diff.x; } // If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D else if(display_diff.x != frame_diff.x) { off.x = tex[i]->GetScale().x * frame_diff.x; } if(display_diff.y >= 4) // Shouldn't this be >= 2? { off.y = tex[i]->GetScale().y * display_diff.y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { off.y /= 2; } } else if(display_diff.y != frame_diff.y) { off.y = tex[i]->GetScale().y * frame_diff.y; } dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); } ds = fs; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } m_real_size = ds; bool slbg = m_regs->PMODE.SLBG; if(tex[0] || tex[1]) { if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) tex[0] = NULL; } GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c); if(m_regs->SMODE2.INT && m_interlace > 0) { if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } else { int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } if(m_shadeboost) { m_dev->ShadeBoost(); } if(m_shaderfx) { m_dev->ExternalFX(); } if(m_fxaa) { m_dev->FXAA(); } } return true; }
* any later version. * * This Program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * http://www.gnu.org/copyleft/gpl.html * */ #include "stdafx.h" #include "GSBlock.h" const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); const GSVector4i GSBlock::m_xxxa(0x00008000); const GSVector4i GSBlock::m_xxbx(0x00007c00); const GSVector4i GSBlock::m_xgxx(0x000003e0); const GSVector4i GSBlock::m_rxxx(0x0000001f); const GSVector4i GSBlock::m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9); const GSVector4i GSBlock::m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11); const GSVector4i GSBlock::m_uw8hmask2 = GSVector4i(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13); const GSVector4i GSBlock::m_uw8hmask3 = GSVector4i(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
void GSRendererHW::Draw() { if(m_dev->IsLost()) return; if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return; GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; GIFRegTEX0 TEX0; TEX0.TBP0 = context->FRAME.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->FRAME.PSM; GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); TEX0.TBP0 = context->ZBUF.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->ZBUF.PSM; GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); if(!rt || !ds) { ASSERT(0); return; } GSTextureCache::Source* tex = NULL; if(PRIM->TME) { /* // m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } */ GSVector4i r; GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); if(!tex) return; if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if(s_save && s_n >= s_saven && tex) { s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds", s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM, (int)context->CLAMP.WMS, (int)context->CLAMP.WMT, (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); tex->m_texture->Save(s, true); if(tex->m_palette) { s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM); tex->m_palette->Save(s, true); } } s_n++; if(s_save && s_n >= s_saven) { s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); rt->m_texture->Save(s); } if(s_savez && s_n >= s_saven) { s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); ds->m_texture->Save(s); } s_n++; } if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex)) { return; } // skip alpha test if possible GIFRegTEST TEST = context->TEST; GIFRegFRAME FRAME = context->FRAME; GIFRegZBUF ZBUF = context->ZBUF; uint32 fm = context->FRAME.FBMSK; uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) { if(GSRenderer::TryAlphaTest(fm, zm)) { context->TEST.ATST = ATST_ALWAYS; } } context->FRAME.FBMSK = fm; context->ZBUF.ZMSK = zm != 0; // DrawPrims(rt->m_texture, ds->m_texture, tex); // context->TEST = TEST; context->FRAME = FRAME; context->ZBUF = ZBUF; // GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); if(fm != 0xffffffff) { rt->m_valid = rt->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.fb, r, false); } if(zm != 0xffffffff) { ds->m_valid = ds->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.zb, r, false); } // if(m_hacks.m_oo) { (this->*m_hacks.m_oo)(); } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if(s_save && s_n >= s_saven) { s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); rt->m_texture->Save(s); } if(s_savez && s_n >= s_saven) { s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); ds->m_texture->Save(s); } s_n++; } #ifdef DISABLE_HW_TEXTURE_CACHE m_tc->Read(rt, r); #endif }