void GPURendererSW::VertexKick() { GSVertexSW& dst = m_vl.AddTail(); // TODO: x/y + off.x/y should wrap around at +/-1024 int x = (int)(m_v.XY.X + m_env.DROFF.X) << m_scale.x; int y = (int)(m_v.XY.Y + m_env.DROFF.Y) << m_scale.y; int u = m_v.UV.X; int v = m_v.UV.Y; GSVector4 pt(x, y, u, v); dst.p = pt.xyxy(GSVector4::zero()); dst.t = (pt.zwzw(GSVector4::zero()) + GSVector4(0.125f)) * 256.0f; // dst.c = GSVector4(m_v.RGB.u32) * 128.0f; dst.c = GSVector4(GSVector4i::load((int)m_v.RGB.u32).u8to32() << 7); int count = 0; if(GSVertexSW* v = DrawingKick(count)) { // TODO m_count += count; } }
void GSDevice10::StretchRect(Texture& st, const GSVector4& sr, Texture& dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, ID3D10BlendState* bs, bool linear) { BeginScene(); // om OMSetDepthStencilState(m_convert.dss, 0); OMSetBlendState(bs, 0); OMSetRenderTargets(dt, NULL); // ia float left = dr.x * 2 / dt.GetWidth() - 1.0f; float top = 1.0f - dr.y * 2 / dt.GetHeight(); float right = dr.z * 2 / dt.GetWidth() - 1.0f; float bottom = 1.0f - dr.w * 2 / dt.GetHeight(); GSVertexPT1 vertices[] = { {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sr.x, sr.y)}, {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sr.z, sr.y)}, {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sr.x, sr.w)}, {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sr.z, sr.w)}, }; D3D10_BOX box = {0, 0, 0, sizeof(vertices), 1, 1}; m_dev->UpdateSubresource(m_convert.vb, 0, &box, vertices, 0, 0); IASetVertexBuffer(m_convert.vb, sizeof(vertices[0])); IASetInputLayout(m_convert.il); IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); // vs VSSetShader(m_convert.vs, NULL); // gs GSSetShader(NULL); // ps PSSetShader(ps, ps_cb); PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL); PSSetShaderResources(st, NULL); // rs RSSet(dt.GetWidth(), dt.GetHeight()); // DrawPrimitive(countof(vertices)); // EndScene(); }
void GSDevice11::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear) { BeginScene(); GSVector2i ds = dt->GetSize(); // om OMSetDepthStencilState(m_convert.dss, 0); OMSetBlendState(bs, 0); OMSetRenderTargets(dt, NULL); // ia float left = dr.x * 2 / ds.x - 1.0f; float top = 1.0f - dr.y * 2 / ds.y; float right = dr.z * 2 / ds.x - 1.0f; float bottom = 1.0f - dr.w * 2 / ds.y; GSVertexPT1 vertices[] = { {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sr.x, sr.y)}, {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sr.z, sr.y)}, {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sr.x, sr.w)}, {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sr.z, sr.w)}, }; IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); IASetInputLayout(m_convert.il); IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); // vs VSSetShader(m_convert.vs, NULL); // gs GSSetShader(NULL); // ps PSSetShaderResources(st, NULL); PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL); PSSetShader(ps, ps_cb); // DrawPrimitive(); // EndScene(); PSSetShaderResources(NULL, NULL); }
void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE, ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER, ShaderConvert_COMPLEX_FILTER}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); }
void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {0, 5, 6, 8, 9}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); GL_POP(); }
void GSRendererSW::InitVectors() { m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #if _M_SSE >= 0x501 m_pos_scale2 = GSVector8(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #endif }
void GSDrawScanlineCodeGenerator::InitVectors() { #if _M_SSE >= 0x501 GSVector8 log2_coef[4] = { GSVector8(0.204446009836232697516f), GSVector8(-1.04913055217340124191f), GSVector8(2.28330284476918490682f), GSVector8(1.0f), }; for (size_t n = 0; n < countof(log2_coef); ++n) m_log2_coef[n] = log2_coef[n]; #else GSVector4i test[8] = { GSVector4i::zero(), GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), GSVector4i::zero(), }; GSVector4 log2_coef[4] = { GSVector4(0.204446009836232697516f), GSVector4(-1.04913055217340124191f), GSVector4(2.28330284476918490682f), GSVector4(1.0f), }; for (size_t n = 0; n < countof(test); ++n) m_test[n] = test[n]; for (size_t n = 0; n < countof(log2_coef); ++n) m_log2_coef[n] = log2_coef[n]; #endif }
void GSDevice11::DoShadeBoost(GSTexture* sTex, GSTexture* dTex) { GSVector2i s = dTex->GetSize(); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); ShadeBoostConstantBuffer cb; cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); cb.rcpFrameOpt = GSVector4::zero(); m_ctx->UpdateSubresource(m_shadeboost.cb, 0, NULL, &cb, 0, 0); StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps, m_shadeboost.cb, true); }
void GSDevice11::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) { GSVector4 s = GSVector4(dTex->GetSize()); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset); InterlaceConstantBuffer cb; cb.ZrH = GSVector2(0, 1.0f / s.y); cb.hH = s.y / 2; m_ctx->UpdateSubresource(m_interlace.cb, 0, NULL, &cb, 0, 0); StretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[shader], m_interlace.cb, linear); }
void GSDevice11::DoExternalFX(GSTexture* sTex, GSTexture* dTex) { GSVector2i s = dTex->GetSize(); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); ExternalFXConstantBuffer cb; InitExternalFX(); cb.xyFrame = GSVector2((float)s.x, (float)s.y); cb.rcpFrame = GSVector4(1.0f / (float)s.x, 1.0f / (float)s.y, 0.0f, 0.0f); cb.rcpFrameOpt = GSVector4::zero(); m_ctx->UpdateSubresource(m_shaderfx.cb, 0, NULL, &cb, 0, 0); StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, m_shaderfx.cb, true); }
void GSRendererDX9::UpdateFBA(GSTexture* rt) { if (!rt) return; GSDevice9* dev = (GSDevice9*)m_dev; dev->BeginScene(); // om dev->OMSetDepthStencilState(&m_fba.dss); dev->OMSetBlendState(&m_fba.bs, 0); // ia GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); GSVector4 off = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + off.xxyy()) * s.xyxy()).sat(off.zzyy()); GSVector4 dst = src * 2.0f + off.xxxx(); GSVertexPT1 vertices[] = { {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(0)}, {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(0)}, {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(0)}, {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(0)}, }; dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); dev->IASetInputLayout(dev->m_convert.il); dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); // vs dev->VSSetShader(dev->m_convert.vs, NULL, 0); // ps dev->PSSetShader(dev->m_convert.ps[4], NULL, 0); // dev->DrawPrimitive(); // dev->EndScene(); }
void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex) { GSVector2i s = dTex->GetSize(); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); FXAAConstantBuffer cb; InitFXAA(); cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); cb.rcpFrameOpt = GSVector4::zero(); m_ctx->UpdateSubresource(m_fxaa.cb, 0, NULL, &cb, 0, 0); StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, m_fxaa.cb, true); //sTex->Save("c:\\temp1\\1.bmp"); //dTex->Save("c:\\temp1\\2.bmp"); }
void GSDevice10::Present(const CRect& r) { CRect cr; GetClientRect(m_hWnd, &cr); if(m_backbuffer.GetWidth() != cr.Width() || m_backbuffer.GetHeight() != cr.Height()) { Reset(cr.Width(), cr.Height(), false); } float color[4] = {0, 0, 0, 0}; m_dev->ClearRenderTargetView(m_backbuffer, color); if(m_current) { StretchRect(m_current, m_backbuffer, GSVector4(r)); } m_swapchain->Present(m_vsync ? 1 : 0, 0); }
void GSRendererDX9::SetupIA() { D3DPRIMITIVETYPE topology; switch(m_vt.m_primclass) { case GS_POINT_CLASS: topology = D3DPT_POINTLIST; break; case GS_LINE_CLASS: topology = D3DPT_LINELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 2) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 1]; m_index.buff[i + 1] = tmp; } } break; case GS_TRIANGLE_CLASS: topology = D3DPT_TRIANGLELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 3) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 2]; m_index.buff[i + 2] = tmp; } } break; case GS_SPRITE_CLASS: topology = D3DPT_TRIANGLELIST; // each sprite converted to quad needs twice the space while(m_vertex.tail * 2 > m_vertex.maxcount) { GrowVertexBuffer(); } // assume vertices are tightly packed and sequentially indexed (it should be the case) if(m_vertex.next >= 2) { size_t count = m_vertex.next; int i = (int)count * 2 - 4; GSVertex* s = &m_vertex.buff[count - 2]; GSVertex* q = &m_vertex.buff[count * 2 - 4]; uint32* RESTRICT index = &m_index.buff[count * 3 - 6]; for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) { GSVertex v0 = s[0]; GSVertex v1 = s[1]; v0.RGBAQ = v1.RGBAQ; v0.XYZ.Z = v1.XYZ.Z; v0.FOG = v1.FOG; q[0] = v0; q[3] = v1; // swap x, s, u uint16 x = v0.XYZ.X; v0.XYZ.X = v1.XYZ.X; v1.XYZ.X = x; float s = v0.ST.S; v0.ST.S = v1.ST.S; v1.ST.S = s; uint16 u = v0.U; v0.U = v1.U; v1.U = u; q[1] = v0; q[2] = v1; index[0] = i + 0; index[1] = i + 1; index[2] = i + 2; index[3] = i + 1; index[4] = i + 2; index[5] = i + 3; } m_vertex.head = m_vertex.tail = m_vertex.next = count * 2; m_index.tail = count * 3; } break; default: __assume(0); } GSDevice9* dev = (GSDevice9*)m_dev; (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO void* ptr = NULL; if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) { GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; for(uint32 i = 0; i < m_vertex.next; i++, s++, d++) { GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); if(PRIM->TME && !PRIM->FST) { p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); } GSVector4 t = GSVector4::zero(); if(PRIM->TME) { if(PRIM->FST) { if(UserHacks_WildHack && !isPackedUV_HackFlag) { t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16()); //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); } else { t = GSVector4(GSVector4i::load(s->UV).upl16()); } } else { t = GSVector4::loadl(&s->ST); } } t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); d->p = p; d->t = t; } dev->IAUnmapVertexBuffer(); } dev->IASetIndexBuffer(m_index.buff, m_index.tail); dev->IASetPrimitiveTopology(topology); }
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; const GSVector2i& rtsize = rt->GetSize(); const GSVector2& rtscale = rt->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; GSTexture* rtcopy = NULL; ASSERT(m_dev != NULL); GSDeviceDX* dev = (GSDeviceDX*)m_dev; if(DATE) { if(dev->HasStencil()) { GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 dst = src * 2.0f + o.xxxx(); GSVertexPT1 vertices[] = { {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, }; dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); } else { rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); // I'll use VertexTrace when I consider it more trustworthy dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); } } // dev->BeginScene(); // om GSDeviceDX::OMDepthStencilSelector om_dssel; if(context->TEST.ZTE) { om_dssel.ztst = context->TEST.ZTST; om_dssel.zwe = !context->ZBUF.ZMSK; } else { om_dssel.ztst = ZTST_ALWAYS; } if(m_fba) { om_dssel.fba = context->FBA.FBA; } GSDeviceDX::OMBlendSelector om_bsel; if(!IsOpaque()) { om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; om_bsel.c = context->ALPHA.C; om_bsel.d = context->ALPHA.D; if(env.PABE.PABE) { if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) { // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result om_bsel.abe = 0; } else { //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. //ASSERT(0); } } } om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); // vs GSDeviceDX::VSSelector vs_sel; vs_sel.tme = PRIM->TME; vs_sel.fst = PRIM->FST; vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; vs_sel.rtcopy = !!rtcopy; // The real GS appears to do no masking based on the Z buffer format and writing larger Z values // than the buffer supports seems to be an error condition on the real GS, causing it to crash. // We are probably receiving bad coordinates from VU1 in these cases. if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) { if(context->ZBUF.PSM == PSM_PSMZ24) { if(m_vt.m_max.p.z > 0xffffff) { ASSERT(m_vt.m_min.p.z > 0xffffff); // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. if (m_vt.m_min.p.z > 0xffffff) { vs_sel.bppz = 1; om_dssel.ztst = ZTST_ALWAYS; } } } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { if(m_vt.m_max.p.z > 0xffff) { ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo // Fixme : Same as above, I guess. if (m_vt.m_min.p.z > 0xffff) { vs_sel.bppz = 2; om_dssel.ztst = ZTST_ALWAYS; } } } } GSDeviceDX::VSConstantBuffer vs_cb; float sx = 2.0f * rtscale.x / (rtsize.x << 4); float sy = 2.0f * rtscale.y / (rtsize.y << 4); float ox = (float)(int)context->XYOFFSET.OFX; float oy = (float)(int)context->XYOFFSET.OFY; float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, //because DX10 and DX9 have a different pixel center.) // //The resulting shifted output aligns better with common blending / corona / blurring effects, //but introduces a few bad pixels on the edges. if(rt->LikelyOffset) { // DX9 has pixelcenter set to 0.0, so give it some value here if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } ox2 *= rt->OffsetHack_modx; oy2 *= rt->OffsetHack_mody; } vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); // gs GSDeviceDX::GSSelector gs_sel; gs_sel.iip = PRIM->IIP; gs_sel.prim = m_vt.m_primclass; // ps GSDeviceDX::PSSelector ps_sel; GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; if(DATE) { if(dev->HasStencil()) { om_dssel.date = 1; } else { ps_sel.date = 1 + context->TEST.DATM; } } if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) { ps_sel.colclip = 1; } ps_sel.clr1 = om_bsel.IsCLR1(); ps_sel.fba = context->FBA.FBA; ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; if(UserHacks_AlphaHack) ps_sel.aout = 1; if(PRIM->FGE) { ps_sel.fog = 1; ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; } if(context->TEST.ATE) { ps_sel.atst = context->TEST.ATST; switch(ps_sel.atst) { case ATST_LESS: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); break; case ATST_GREATER: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); break; default: ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; break; } } else { ps_sel.atst = ATST_ALWAYS; } if(tex) { ps_sel.wms = context->CLAMP.WMS; ps_sel.wmt = context->CLAMP.WMT; ps_sel.fmt = tex->m_fmt; ps_sel.aem = env.TEXA.AEM; ps_sel.tfx = context->TEX0.TFX; ps_sel.tcc = context->TEX0.TCC; ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; ps_sel.rt = tex->m_target; int w = tex->m_texture->GetWidth(); int h = tex->m_texture->GetHeight(); int tw = (int)(1 << context->TEX0.TW); int th = (int)(1 << context->TEX0.TH); GSVector4 WH(tw, th, w, h); if(PRIM->FST) { vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); //Maybe better? //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); ps_sel.fst = 1; } ps_cb.WH = WH; ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); GSVector4 clamp(ps_cb.MskFix); GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); ps_cb.MinMax = clamp / WH.xyxy(); ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; ps_ssel.ltf = ps_sel.ltf; } else {
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { if (!t->m_dirty.empty() || r.width() == 0 || r.height() == 0) return; const GIFRegTEX0& TEX0 = t->m_TEX0; GLuint fmt; int ps_shader; switch (TEX0.PSM) { case PSM_PSMCT32: case PSM_PSMCT24: fmt = GL_RGBA8; ps_shader = ShaderConvert_COPY; break; case PSM_PSMCT16: case PSM_PSMCT16S: fmt = GL_R16UI; ps_shader = ShaderConvert_RGBA8_TO_16_BITS; break; case PSM_PSMZ32: fmt = GL_R32UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; case PSM_PSMZ24: fmt = GL_R32UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; case PSM_PSMZ16: case PSM_PSMZ16S: fmt = GL_R16UI; ps_shader = ShaderConvert_FLOAT32_TO_32_BITS; break; default: return; } // Yes lots of logging, but I'm not confident with this code GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM); GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height()); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader)) { GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); if(offscreen->Map(m, &r_offscreen)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; case PSM_PSMZ32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMZ24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMZ16: case PSM_PSMZ16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; default: ASSERT(0); } offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } }
void GSRendererDX9::VertexKick(bool skip) { GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16()); if(tme && !fst) { p = p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z)); } GSVertexHW9& dst = m_vl.AddTail(); dst.p = p; int Uadjust = 0; int Vadjust = 0; if(tme) { if(fst) { dst.t = m_v.GetUV(); #ifdef USE_UPSCALE_HACKS int Udiff = 0; int Vdiff = 0; int multiplier = GetUpscaleMultiplier(); if(multiplier > 1) { Udiff = m_v.UV.U & 4095; Vdiff = m_v.UV.V & 4095; if(Udiff != 0) { if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; } else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; } } if(Vdiff != 0) { if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; } else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; } } Udiff = m_v.UV.U & 255; Vdiff = m_v.UV.V & 255; if(Udiff != 0) { if (Udiff >= 248) { Uadjust = -1; } else if (Udiff <= 8) { Uadjust = 1; } } if(Vdiff != 0) { if (Vdiff >= 248) { Vadjust = -1; } else if (Vdiff <= 8) { Vadjust = 1; } } Udiff = m_v.UV.U & 15; Vdiff = m_v.UV.V & 15; if(Udiff != 0) { if (Udiff >= 15) { Uadjust = -1; } else if (Udiff <= 1) { Uadjust = 1; } } if(Vdiff != 0) { if (Vdiff >= 15) { Vadjust = -1; } else if (Vdiff <= 1) { Vadjust = 1; } } } dst.t.x -= (float) Uadjust; dst.t.y -= (float) Vadjust; #endif } else { dst.t = GSVector4::loadl(&m_v.ST); } } dst._c0() = m_v.RGBAQ.u32[0]; dst._c1() = m_v.FOG.u32[1]; // // BaseDrawingKick can never return NULL here because the DrawingKick function // tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only // condition where this function would return NULL). int count = 0; if(GSVertexHW9* v = DrawingKick<prim>(skip, count)) { GSVector4 scissor = m_context->scissor.dx9; GSVector4 pmin, pmax; switch(prim) { case GS_POINTLIST: pmin = v[0].p; pmax = v[0].p; break; case GS_LINELIST: case GS_LINESTRIP: case GS_SPRITE: pmin = v[0].p.min(v[1].p); pmax = v[0].p.max(v[1].p); break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: pmin = v[0].p.min(v[1].p).min(v[2].p); pmax = v[0].p.max(v[1].p).max(v[2].p); break; } GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); switch(prim) { case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: case GS_SPRITE: test |= pmin == pmax; break; } if(test.mask() & 3) { return; } switch(prim) { case GS_POINTLIST: break; case GS_LINELIST: case GS_LINESTRIP: if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();} break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();} break; case GS_SPRITE: if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();} v[0].p.z = v[1].p.z; v[0].p.w = v[1].p.w; v[0]._c1() = v[1]._c1(); v[2] = v[1]; v[3] = v[1]; v[1].p.y = v[0].p.y; v[1].t.y = v[0].t.y; v[2].p.x = v[0].p.x; v[2].t.x = v[0].t.x; v[4] = v[1]; v[5] = v[2]; count += 4; break; } m_count += count; } }
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count) { const GSDrawingContext* context = m_state->m_context; int n = 1; switch(primclass) { case GS_POINT_CLASS: n = 1; break; case GS_LINE_CLASS: case GS_SPRITE_CLASS: n = 2; break; case GS_TRIANGLE_CLASS: n = 3; break; } GSVector4 tmin = s_minmax.xxxx(); GSVector4 tmax = s_minmax.yyyy(); GSVector4i cmin = GSVector4i::xffffffff(); GSVector4i cmax = GSVector4i::zero(); #if _M_SSE >= 0x401 GSVector4i pmin = GSVector4i::xffffffff(); GSVector4i pmax = GSVector4i::zero(); #else GSVector4 pmin = s_minmax.xxxx(); GSVector4 pmax = s_minmax.yyyy(); #endif const GSVertex* RESTRICT v = (GSVertex*)vertex; for(int i = 0; i < count; i += n) { if(primclass == GS_POINT_CLASS) { GSVector4i c(v[index[i]].m[0]); if(color) { cmin = cmin.min_u8(c); cmax = cmax.max_u8(c); } if(tme) { if(!fst) { GSVector4 stq = GSVector4::cast(c); GSVector4 q = stq.wwww(); stq = (stq.xyww() * q.rcpnr()).xyww(q); tmin = tmin.min(stq); tmax = tmax.max(stq); } else { GSVector4i uv(v[index[i]].m[1]); GSVector4 st = GSVector4(uv.uph16()).xyxy(); tmin = tmin.min(st); tmax = tmax.max(st); } } GSVector4i xyzf(v[index[i]].m[1]); GSVector4i xy = xyzf.upl16(); GSVector4i z = xyzf.yyyy(); #if _M_SSE >= 0x401 GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf)); pmin = pmin.min_u32(p); pmax = pmax.max_u32(p); #else GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww()))); pmin = pmin.min(p); pmax = pmax.max(p); #endif } else if(primclass == GS_LINE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c1); cmax = cmax.max_u8(c1); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 q = stq0.wwww(stq1).rcpnr(); stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1); tmin = tmin.min(stq0.min(stq1)); tmax = tmax.max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); tmin = tmin.min(st0.min(st1)); tmax = tmax.max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); pmin = pmin.min(p0.min(p1)); pmax = pmax.max(p0.max(p1)); #endif } else if(primclass == GS_TRIANGLE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); GSVector4i c2(v[index[i + 2]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c2); cmax = cmax.max_u8(c2); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 stq2 = GSVector4::cast(c2); GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr(); stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1); stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2); tmin = tmin.min(stq2).min(stq0.min(stq1)); tmax = tmax.max(stq2).max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4i uv2(v[index[i + 2]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); GSVector4 st2 = GSVector4(uv2.uph16()).xyxy(); tmin = tmin.min(st2).min(st0.min(st1)); tmax = tmax.max(st2).max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xyzf2(v[index[i + 2]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); GSVector4i xy2 = xyzf2.upl16(); GSVector4i z2 = xyzf2.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2)); pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww()))); pmin = pmin.min(p2).min(p0.min(p1)); pmax = pmax.max(p2).max(p0.max(p1)); #endif } else if(primclass == GS_SPRITE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c1); cmax = cmax.max_u8(c1); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 q = stq1.wwww().rcpnr(); stq0 = (stq0.xyww() * q).xyww(stq1); stq1 = (stq1.xyww() * q).xyww(stq1); tmin = tmin.min(stq0.min(stq1)); tmax = tmax.max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); tmin = tmin.min(st0.min(st1)); tmax = tmax.max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); pmin = pmin.min(p0.min(p1)); pmax = pmax.max(p0.max(p1)); #endif } } #if _M_SSE >= 0x401 pmin = pmin.blend16<0x30>(pmin.srl32(1)); pmax = pmax.blend16<0x30>(pmax.srl32(1)); #endif GSVector4 o(context->XYOFFSET); GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f); m_min.p = (GSVector4(pmin) - o) * s; m_max.p = (GSVector4(pmax) - o) * s; if(tme) { if(fst) { s = GSVector4(1.0f / 16, 1.0f).xxyy(); } else { s = GSVector4(1 << context->TEX0.TW, 1 << context->TEX0.TH, 1, 1); } m_min.t = tmin * s; m_max.t = tmax * s; } else { m_min.t = GSVector4::zero(); m_max.t = GSVector4::zero(); } if(color) { m_min.c = cmin.zzzz().u8to32(); m_max.c = cmax.zzzz().u8to32(); } else { m_min.c = GSVector4i::zero(); m_max.c = GSVector4i::zero(); } }
void GSRendererDX9::SetupIA(const float& sx, const float& sy) { D3DPRIMITIVETYPE topology; switch(m_vt.m_primclass) { case GS_POINT_CLASS: topology = D3DPT_POINTLIST; break; case GS_LINE_CLASS: topology = D3DPT_LINELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 2) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 1]; m_index.buff[i + 1] = tmp; } } break; case GS_TRIANGLE_CLASS: topology = D3DPT_TRIANGLELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 3) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 2]; m_index.buff[i + 2] = tmp; } } break; case GS_SPRITE_CLASS: topology = D3DPT_TRIANGLELIST; // each sprite converted to quad needs twice the space Lines2Sprites(); break; default: __assume(0); } GSDevice9* dev = (GSDevice9*)m_dev; (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO void* ptr = NULL; if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) { GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; for(uint32 i = 0; i < m_vertex.next; i++, s++, d++) { GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); if(PRIM->TME && !PRIM->FST) { p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); } GSVector4 t = GSVector4::zero(); if(PRIM->TME) { if(PRIM->FST) { if(UserHacks_WildHack && !isPackedUV_HackFlag) { t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16()); //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); } else { t = GSVector4(GSVector4i::load(s->UV).upl16()); } } else { t = GSVector4::loadl(&s->ST); } } t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); d->p = p; d->t = t; } dev->IAUnmapVertexBuffer(); } dev->IASetIndexBuffer(m_index.buff, m_index.tail); dev->IASetPrimitiveTopology(topology); }
void GSRenderer::VSync(int field) { GSPerfMonAutoTimer pmat(&m_perfmon); m_perfmon.Put(GSPerfMon::Frame); Flush(); if(!m_dev->IsLost(true)) { if(!Merge(field ? 1 : 0)) { return; } } else { ResetDevice(); } m_dev->AgePool(); // osd if((m_perfmon.GetFrame() & 0x1f) == 0) { m_perfmon.Update(); double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); string s; #ifdef GSTITLEINFO_API_FORCE_VERBOSE if(1)//force verbose reply #else if(m_wnd->IsManaged()) #endif { //GSdx owns the window's title, be verbose. string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive"; s = format( "%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d S/%d P/%d D | %d%% CPU | %.2f | %.2f", m_perfmon.GetFrame(), GetInternalResolution().x, GetInternalResolution().y, fps, (int)(100.0 * fps / GetTvRefreshRate()), s2.c_str(), theApp.m_gs_interlace[m_interlace].name.c_str(), theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), (int)m_perfmon.Get(GSPerfMon::SyncPoint), (int)m_perfmon.Get(GSPerfMon::Prim), (int)m_perfmon.Get(GSPerfMon::Draw), m_perfmon.CPU(), m_perfmon.Get(GSPerfMon::Swizzle) / 1024, m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 ); double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); if(fillrate > 0) { s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); int sum = 0; for(int i = 0; i < 16; i++) { sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); } s += format(" | %d%% CPU", sum); } } else { // Satisfy PCSX2's request for title info: minimal verbosity due to more external title text s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str()); } if(m_capture.IsCapturing()) { s += " | Recording..."; } if(m_wnd->IsManaged()) { m_wnd->SetWindowText(s.c_str()); } else { // note: do not use TryEnterCriticalSection. It is unnecessary code complication in // an area that absolutely does not matter (even if it were 100 times slower, it wouldn't // be noticeable). Besides, these locks are extremely short -- overhead of conditional // is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air std::lock_guard<std::mutex> lock(m_pGSsetTitle_Crit); strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1); m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows } } else { // [TODO] // We don't have window title rights, or the window has no title, // so let's use actual OSD! } if(m_frameskip) { return; } // present m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader); // snapshot if(!m_snapshot.empty()) { bool shift = false; #ifdef _WIN32 shift = !!(::GetAsyncKeyState(VK_SHIFT) & 0x8000); #else shift = m_shift_key; #endif if(!m_dump && shift) { GSFreezeData fd; fd.size = 0; fd.data = NULL; Freeze(&fd, true); fd.data = new uint8[fd.size]; Freeze(&fd, false); m_dump.Open(m_snapshot, m_crc, fd, m_regs); delete [] fd.data; } if(GSTexture* t = m_dev->GetCurrent()) { t->Save(m_snapshot + ".bmp"); } m_snapshot.clear(); } else { if(m_dump) { bool control = false; #ifdef _WIN32 control = !!(::GetAsyncKeyState(VK_CONTROL) & 0x8000); #else control = m_control_key; #endif m_dump.VSync(field, !control, m_regs); } } // capture if(m_capture.IsCapturing()) { if(GSTexture* current = m_dev->GetCurrent()) { GSVector2i size = m_capture.GetSize(); if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) { GSTexture::GSMap m; if(offscreen->Map(m)) { m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped()); offscreen->Unmap(); } m_dev->Recycle(offscreen); } } } }
GSVector4 GSVector4::cast(const GSVector4i& v) { return GSVector4(_mm_castsi128_ps(v.m)); }
void GSDevice10::StretchRect(Texture& st, Texture& dt, const GSVector4& dr, bool linear) { StretchRect(st, GSVector4(0, 0, 1, 1), dt, dr, linear); }
void GSVertexTrace::InitVectors() { s_minmax = GSVector4(FLT_MAX, -FLT_MAX); }
void GSRenderer::VSync(int field) { GSPerfMonAutoTimer pmat(&m_perfmon); m_perfmon.Put(GSPerfMon::Frame); Flush(); if(s_dump && s_n >= s_saven) { m_regs->Dump(root_sw + format("%05d_f%lld_gs_reg.txt", s_n, m_perfmon.GetFrame())); } if(!m_dev->IsLost(true)) { if(!Merge(field ? 1 : 0)) { return; } } else { ResetDevice(); } m_dev->AgePool(); // osd if((m_perfmon.GetFrame() & 0x1f) == 0) { m_perfmon.Update(); double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); string s; #ifdef GSTITLEINFO_API_FORCE_VERBOSE if(1)//force verbose reply #else if(m_wnd->IsManaged()) #endif { //GSdx owns the window's title, be verbose. string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive"; s = format( "%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d S/%d P/%d D | %d%% CPU | %.2f | %.2f", m_perfmon.GetFrame(), GetInternalResolution().x, GetInternalResolution().y, fps, (int)(100.0 * fps / GetTvRefreshRate()), s2.c_str(), theApp.m_gs_interlace[m_interlace].name.c_str(), theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), (int)m_perfmon.Get(GSPerfMon::SyncPoint), (int)m_perfmon.Get(GSPerfMon::Prim), (int)m_perfmon.Get(GSPerfMon::Draw), m_perfmon.CPU(), m_perfmon.Get(GSPerfMon::Swizzle) / 1024, m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 ); double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); if(fillrate > 0) { s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); int sum = 0; for(int i = 0; i < 16; i++) { sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); } s += format(" | %d%% CPU", sum); } } else { // Satisfy PCSX2's request for title info: minimal verbosity due to more external title text s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str()); } if(m_capture.IsCapturing()) { s += " | Recording..."; } if(m_wnd->IsManaged()) { m_wnd->SetWindowText(s.c_str()); } else { // note: do not use TryEnterCriticalSection. It is unnecessary code complication in // an area that absolutely does not matter (even if it were 100 times slower, it wouldn't // be noticeable). Besides, these locks are extremely short -- overhead of conditional // is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air std::lock_guard<std::mutex> lock(m_pGSsetTitle_Crit); strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1); m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows } } else { // [TODO] // We don't have window title rights, or the window has no title, // so let's use actual OSD! } if(m_frameskip) { return; } // present #if 0 // This will scale the OSD to the PS2's output resolution. // Will be affected by 2x, 4x, etc scaling. m_dev->m_osd.m_real_size = m_real_size #elif 0 // This will scale the OSD to the window's size. // Will maintiain the font size no matter what size the window is. GSVector4i window_size = m_wnd->GetClientRect(); m_dev->m_osd.m_real_size.x = window_size.v[2]; m_dev->m_osd.m_real_size.y = window_size.v[3]; #else // This will scale the OSD to the native resolution. // Will size font relative to the window's size. // TODO this should probably be done with native calls m_dev->m_osd.m_real_size.x = 1024; m_dev->m_osd.m_real_size.y = 768; #endif m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader); // snapshot if(!m_snapshot.empty()) { if(!m_dump && m_shift_key) { GSFreezeData fd = {0, nullptr}; Freeze(&fd, true); fd.data = new uint8[fd.size]; Freeze(&fd, false); #ifdef LZMA_SUPPORTED if (m_control_key) m_dump = std::unique_ptr<GSDumpBase>(new GSDump(m_snapshot, m_crc, fd, m_regs)); else m_dump = std::unique_ptr<GSDumpBase>(new GSDumpXz(m_snapshot, m_crc, fd, m_regs)); #else m_dump = std::unique_ptr<GSDumpBase>(new GSDump(m_snapshot, m_crc, fd, m_regs)); #endif delete [] fd.data; } if(GSTexture* t = m_dev->GetCurrent()) { t->Save(m_snapshot + ".bmp"); } m_snapshot.clear(); } else if(m_dump) { if(m_dump->VSync(field, !m_control_key, m_regs)) m_dump.reset(); } // capture if(m_capture.IsCapturing()) { if(GSTexture* current = m_dev->GetCurrent()) { GSVector2i size = m_capture.GetSize(); if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) { GSTexture::GSMap m; if(offscreen->Map(m)) { m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped()); offscreen->Unmap(); } m_dev->Recycle(offscreen); } } } }
void GSDevice::StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) { StretchRect(sTex, GSVector4(0, 0, 1, 1), dTex, dRect, shader, linear); }
bool GSRenderer::Merge(int field) { bool en[2]; GSVector4i fr[2]; GSVector4i dr[2]; GSVector2i display_baseline = { INT_MAX, INT_MAX }; GSVector2i frame_baseline = { INT_MAX, INT_MAX }; for(int i = 0; i < 2; i++) { en[i] = IsEnabled(i); if(en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); display_baseline.x = min(dr[i].left, display_baseline.x); display_baseline.y = min(dr[i].top, display_baseline.y); frame_baseline.x = min(fr[i].left, frame_baseline.x); frame_baseline.y = min(fr[i].top, frame_baseline.y); //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); } } if(!en[0] && !en[1]) { return false; } GL_PUSH("Renderer Merge %d (0: enabled %d 0x%x, 1: enabled %d 0x%x)", s_n, en[0], m_regs->DISP[0].DISPFB.Block(), en[1], m_regs->DISP[1].DISPFB.Block()); // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) // // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) bool samesrc = en[0] && en[1] && m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { // persona 4: // // fr[0] = 0 0 640 448 // fr[1] = 0 1 640 448 // dr[0] = 159 50 779 498 // dr[1] = 159 50 779 497 // // second image shifted up by 1 pixel and blended over itself // // god of war: // // fr[0] = 0 1 512 448 // fr[1] = 0 0 512 448 // dr[0] = 127 50 639 497 // dr[1] = 127 50 639 498 // // same just the first image shifted // // These kinds of cases are now fixed by the more generic frame_diff code below, as the code here was too specific and has become obsolete. // NOTE: Persona 4 and God Of War are not rare exceptions, many games have the same(or very similar) offsets. int topDiff = fr[0].top - fr[1].top; if (dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, topDiff, 0, topDiff)) || fr[1].eq(fr[0] + GSVector4i(0, topDiff, 0, topDiff)))) { // dq5: // // fr[0] = 0 1 512 445 // fr[1] = 0 0 512 444 // dr[0] = 127 50 639 494 // dr[1] = 127 50 639 494 int top = min(fr[0].top, fr[1].top); int bottom = min(fr[0].bottom, fr[1].bottom); fr[0].top = fr[1].top = top; fr[0].bottom = fr[1].bottom = bottom; } } GSVector2i fs(0, 0); GSVector2i ds(0, 0); GSTexture* tex[3] = {NULL, NULL, NULL}; int y_offset[3] = {0, 0, 0}; s_n++; bool feedback_merge = m_regs->EXTWRITE.WRITE == 1; if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge) { tex[0] = GetOutput(0, y_offset[0]); tex[1] = tex[0]; // saves one texture fetch y_offset[1] = y_offset[0]; } else { if(en[0]) tex[0] = GetOutput(0, y_offset[0]); if(en[1]) tex[1] = GetOutput(1, y_offset[1]); if(feedback_merge) tex[2] = GetFeedbackOutput(); } GSVector4 src[2]; GSVector4 src_hw[2]; GSVector4 dst[2]; for(int i = 0; i < 2; i++) { if(!en[i] || !tex[i]) continue; GSVector4i r = fr[i]; GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 off(0); GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y); GSVector2i frame_diff(fr[i].left - frame_baseline.x, fr[i].top - frame_baseline.y); // Time Crisis 2/3 uses two side by side images when in split screen mode. // Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII if(display_diff.x > 2) { off.x = tex[i]->GetScale().x * display_diff.x; } // If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D else if(display_diff.x != frame_diff.x) { off.x = tex[i]->GetScale().x * frame_diff.x; } if(display_diff.y >= 4) // Shouldn't this be >= 2? { off.y = tex[i]->GetScale().y * display_diff.y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { off.y /= 2; } } else if(display_diff.y != frame_diff.y) { off.y = tex[i]->GetScale().y * frame_diff.y; } dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); } ds = fs; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } m_real_size = ds; bool slbg = m_regs->PMODE.SLBG; if(tex[0] || tex[1]) { if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) tex[0] = NULL; } GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c); if(m_regs->SMODE2.INT && m_interlace > 0) { if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } else { int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } if(m_shadeboost) { m_dev->ShadeBoost(); } if(m_shaderfx) { m_dev->ExternalFX(); } if(m_fxaa) { m_dev->FXAA(); } } return true; }
s_n++; } } return m_texture[i]; } template<uint32 primclass, uint32 tme, uint32 fst> void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) { #if 0//_M_SSE >= 0x501 // TODO: something isn't right here, this makes other functions slower (split load/store? old sse code in 3rd party lib?) GSVector8i o2((GSVector4i)m_context->XYOFFSET); GSVector8 tsize2(GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0)); for(int i = (int)m_vertex.next; i > 0; i -= 2, src += 2, dst += 2) // ok to overflow, allocator makes sure there is one more dummy vertex { GSVector8i v0 = GSVector8i::load<true>(src[0].m); GSVector8i v1 = GSVector8i::load<true>(src[1].m); GSVector8 stcq = GSVector8::cast(v0.ac(v1)); GSVector8i xyzuvf = v0.bd(v1); //GSVector8 stcq = GSVector8::load(&src[0].m[0], &src[1].m[0]); //GSVector8i xyzuvf = GSVector8i::load(&src[0].m[1], &src[1].m[1]); GSVector8i xy = xyzuvf.upl16() - o2; GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00());
movdqa(xmm1, xmm0); pshufd(xmm1, xmm1, _MM_SHUFFLE(1, 0, 3, 2)); punpcklwd(xmm0, xmm1); // if(!tme) c = c.srl16(7); if(m_env.sel.tfx == TFX_NONE) { psrlw(xmm0, 7); } // m_env.c.rb = c.xxxx(); // m_env.c.ga = c.zzzz(); movdqa(xmm1, xmm0); pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); movdqa(xmmword[&m_env.c.rb], xmm0); movdqa(xmmword[&m_env.c.ga], xmm1); } } const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] = { GSVector4(4.0f, 4.0f, 4.0f, 4.0f), GSVector4(0.0f, 1.0f, 2.0f, 3.0f), GSVector4(-1.0f, 0.0f, 1.0f, 2.0f), GSVector4(-2.0f, -1.0f, 0.0f, 1.0f), GSVector4(-3.0f, -2.0f, -1.0f, 0.0f), };
void GSTextureCache9::Read(Target* t, const GSVector4i& r) { if(t->m_type != RenderTarget) { // TODO return; } const GIFRegTEX0& TEX0 = t->m_TEX0; if(TEX0.PSM != PSM_PSMCT32 && TEX0.PSM != PSM_PSMCT24 && TEX0.PSM != PSM_PSMCT16 && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } if(!t->m_dirty.empty()) { return; } // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h)) { GSTexture::GSMap m; if(offscreen->Map(m)) { // TODO: block level write GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, o, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WriteFrame16(m.bits, m.pitch, o, r); break; default: ASSERT(0); } offscreen->Unmap(); } m_renderer->m_dev->Recycle(offscreen); } }
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear) { if(!sTex || !dTex) { ASSERT(0); return; } BeginScene(); GSVector2i ds = dTex->GetSize(); // om OMSetDepthStencilState(m_convert.dss, 0); OMSetBlendState(bs, 0); OMSetRenderTargets(dTex, NULL); // ia float left = dRect.x * 2 / ds.x - 1.0f; float top = 1.0f - dRect.y * 2 / ds.y; float right = dRect.z * 2 / ds.x - 1.0f; float bottom = 1.0f - dRect.w * 2 / ds.y; GSVertexPT1 vertices[] = { {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)}, {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)}, {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)}, {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)}, }; IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); IASetInputLayout(m_convert.il); IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); // vs VSSetShader(m_convert.vs, NULL); // gs /* NVIDIA HACK!!!! Not sure why, but having the Geometry shader disabled causes the strange stretching in recent drivers*/ GSSelector sel; //Don't use shading for stretching, we're just passing through - Note: With Win10 it seems to cause other bugs when shading is off if any of the coords is greater than 0 //I really don't know whats going on there, but this seems to resolve it mostly (if not all, not tester a lot of games, only BIOS, FFXII and VP2) //sel.iip = (sRect.y > 0.0f || sRect.w > 0.0f) ? 1 : 0; //sel.prim = 2; //Triangle Strip //SetupGS(sel); GSSetShader(NULL, NULL); /*END OF HACK*/ // // ps PSSetShaderResources(sTex, NULL); PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL); PSSetShader(ps, ps_cb); // DrawPrimitive(); // EndScene(); PSSetShaderResources(NULL, NULL); }