void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW dv[3]; dv[0] = v[1] - v[0]; dv[1] = v[2] - v[0]; dv[2] = v[2] - v[1]; GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p); GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p); GSVector4 a = dx.abs() < dy.abs(); // |x| <= |y| GSVector4 b = dx < GSVector4::zero(); // x < 0 GSVector4 c = dv[1].p * (dv[0].p / dv[1].p).yyyy() < dv[0].p; // longest.p.x < 0 int i = a.mask(); int j = ((a | b) ^ c.xxxx()).mask() ^ 2; // evil GSVertexSW dscan; dscan.p = GSVector4::zero(); dscan.t = GSVector4::zero(); dscan.c = GSVector4::zero(); m_dsf.ssp(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small) DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1); DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2); DrawEdge(v[1], v[2], dv[2], scissor, i & 4, j & 4); }
void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW longest; longest.p = v[1].p - v[0].p; int i = (longest.p > GSVector4::zero()).upl(longest.p == GSVector4::zero()).mask(); if(i & 2) return; i &= 1; GSVertexSW& l = v[1 - i]; GSVector4& r = v[i].p; GSVector4i tb(l.p.xyxy(v[2].p).ceil()); int top = tb.extract32<1>(); int bottom = tb.extract32<3>(); if(top < scissor.y) top = scissor.y; if(bottom > scissor.w) bottom = scissor.w; if(top >= bottom) return; longest.t = v[1].t - v[0].t; longest.c = v[1].c - v[0].c; GSVertexSW dscan = longest * longest.p.xxxx().rcp(); GSVertexSW vl = v[2] - l; GSVector4 vr = v[2].p - r; GSVertexSW dl = vl / vl.p.yyyy(); GSVector4 dr = vr / vr.yyyy(); float py = (float)top - l.p.y; l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y if(py > 0) l += dl * py; (m_ds->*m_dsf.sp)(v, dscan); // TODO: (m_dsf.ssp)(v, dscan); DrawTriangleSection(top, bottom, l, dl, dscan, scissor); }
void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW longest; longest.p = v[1].p - v[0].p; int i = longest.p.upl(longest.p == GSVector4::zero()).mask(); if(i & 2) return; i &= 1; GSVertexSW& l = v[i]; GSVector4& r = v[1 - i].p; GSVector4 fscissor(scissor); GSVector4 tb = l.p.upl(v[2].p).ceil(); GSVector4 tbmax = tb.max(fscissor.yyyy()); GSVector4 tbmin = tb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin)); int top = tbi.extract32<0>(); int bottom = tbi.extract32<2>(); if(top >= bottom) return; longest.t = v[1].t - v[0].t; longest.c = v[1].c - v[0].c; GSVertexSW dscan = longest * longest.p.xxxx().rcp(); GSVertexSW vl = v[2] - l; GSVector4 vr = v[2].p - r; GSVertexSW dl = vl / vl.p.yyyy(); GSVector4 dr = vr / vr.yyyy(); GSVector4 dy = tbmax.zzzz() - l.p.yyyy(); l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y l += dl * dy; m_dsf.ssp(v, dscan); DrawTriangleSection(top, bottom, l, dl, dscan, fscissor); }
void GSRendererDX9::UpdateFBA(GSTexture* rt) { if (!rt) return; GSDevice9* dev = (GSDevice9*)m_dev; dev->BeginScene(); // om dev->OMSetDepthStencilState(&m_fba.dss); dev->OMSetBlendState(&m_fba.bs, 0); // ia GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); GSVector4 off = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + off.xxyy()) * s.xyxy()).sat(off.zzyy()); GSVector4 dst = src * 2.0f + off.xxxx(); GSVertexPT1 vertices[] = { {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(0)}, {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(0)}, {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(0)}, {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(0)}, }; dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); dev->IASetInputLayout(dev->m_convert.il); dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); // vs dev->VSSetShader(dev->m_convert.vs, NULL, 0); // ps dev->PSSetShader(dev->m_convert.ps[4], NULL, 0); // dev->DrawPrimitive(); // dev->EndScene(); }
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& fscissor) { ASSERT(top < bottom); while(1) { do { if(IsOneOfMyScanlines(top)) { GSVector4 lr = l.p.xyxy(r).ceil(); GSVector4 lrmax = lr.max(fscissor.xxxx()); GSVector4 lrmin = lr.min(fscissor.zzzz()); GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin)); int left = lri.extract32<0>(); int right = lri.extract32<2>(); int pixels = right - left; if(pixels > 0) { m_stats.pixels += pixels; GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx(); m_dsf.ssl(right, left, top, scan); } } } while(0); if(++top >= bottom) break; l += dl; r += dr; } }
void GSRendererDX9::VertexKick(bool skip) { GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16()); if(tme && !fst) { p = p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z)); } GSVertexHW9& dst = m_vl.AddTail(); dst.p = p; int Uadjust = 0; int Vadjust = 0; if(tme) { if(fst) { dst.t = m_v.GetUV(); #ifdef USE_UPSCALE_HACKS int Udiff = 0; int Vdiff = 0; int multiplier = GetUpscaleMultiplier(); if(multiplier > 1) { Udiff = m_v.UV.U & 4095; Vdiff = m_v.UV.V & 4095; if(Udiff != 0) { if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; } else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; } } if(Vdiff != 0) { if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; } else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; } } Udiff = m_v.UV.U & 255; Vdiff = m_v.UV.V & 255; if(Udiff != 0) { if (Udiff >= 248) { Uadjust = -1; } else if (Udiff <= 8) { Uadjust = 1; } } if(Vdiff != 0) { if (Vdiff >= 248) { Vadjust = -1; } else if (Vdiff <= 8) { Vadjust = 1; } } Udiff = m_v.UV.U & 15; Vdiff = m_v.UV.V & 15; if(Udiff != 0) { if (Udiff >= 15) { Uadjust = -1; } else if (Udiff <= 1) { Uadjust = 1; } } if(Vdiff != 0) { if (Vdiff >= 15) { Vadjust = -1; } else if (Vdiff <= 1) { Vadjust = 1; } } } dst.t.x -= (float) Uadjust; dst.t.y -= (float) Vadjust; #endif } else { dst.t = GSVector4::loadl(&m_v.ST); } } dst._c0() = m_v.RGBAQ.u32[0]; dst._c1() = m_v.FOG.u32[1]; // // BaseDrawingKick can never return NULL here because the DrawingKick function // tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only // condition where this function would return NULL). int count = 0; if(GSVertexHW9* v = DrawingKick<prim>(skip, count)) { GSVector4 scissor = m_context->scissor.dx9; GSVector4 pmin, pmax; switch(prim) { case GS_POINTLIST: pmin = v[0].p; pmax = v[0].p; break; case GS_LINELIST: case GS_LINESTRIP: case GS_SPRITE: pmin = v[0].p.min(v[1].p); pmax = v[0].p.max(v[1].p); break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: pmin = v[0].p.min(v[1].p).min(v[2].p); pmax = v[0].p.max(v[1].p).max(v[2].p); break; } GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); switch(prim) { case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: case GS_SPRITE: test |= pmin == pmax; break; } if(test.mask() & 3) { return; } switch(prim) { case GS_POINTLIST: break; case GS_LINELIST: case GS_LINESTRIP: if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();} break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();} break; case GS_SPRITE: if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();} v[0].p.z = v[1].p.z; v[0].p.w = v[1].p.w; v[0]._c1() = v[1]._c1(); v[2] = v[1]; v[3] = v[1]; v[1].p.y = v[0].p.y; v[1].t.y = v[0].t.y; v[2].p.x = v[0].p.x; v[2].t.x = v[0].t.x; v[4] = v[1]; v[5] = v[2]; count += 4; break; } m_count += count; } }
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count) { const GSDrawingContext* context = m_state->m_context; int n = 1; switch(primclass) { case GS_POINT_CLASS: n = 1; break; case GS_LINE_CLASS: case GS_SPRITE_CLASS: n = 2; break; case GS_TRIANGLE_CLASS: n = 3; break; } GSVector4 tmin = s_minmax.xxxx(); GSVector4 tmax = s_minmax.yyyy(); GSVector4i cmin = GSVector4i::xffffffff(); GSVector4i cmax = GSVector4i::zero(); #if _M_SSE >= 0x401 GSVector4i pmin = GSVector4i::xffffffff(); GSVector4i pmax = GSVector4i::zero(); #else GSVector4 pmin = s_minmax.xxxx(); GSVector4 pmax = s_minmax.yyyy(); #endif const GSVertex* RESTRICT v = (GSVertex*)vertex; for(int i = 0; i < count; i += n) { if(primclass == GS_POINT_CLASS) { GSVector4i c(v[index[i]].m[0]); if(color) { cmin = cmin.min_u8(c); cmax = cmax.max_u8(c); } if(tme) { if(!fst) { GSVector4 stq = GSVector4::cast(c); GSVector4 q = stq.wwww(); stq = (stq.xyww() * q.rcpnr()).xyww(q); tmin = tmin.min(stq); tmax = tmax.max(stq); } else { GSVector4i uv(v[index[i]].m[1]); GSVector4 st = GSVector4(uv.uph16()).xyxy(); tmin = tmin.min(st); tmax = tmax.max(st); } } GSVector4i xyzf(v[index[i]].m[1]); GSVector4i xy = xyzf.upl16(); GSVector4i z = xyzf.yyyy(); #if _M_SSE >= 0x401 GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf)); pmin = pmin.min_u32(p); pmax = pmax.max_u32(p); #else GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww()))); pmin = pmin.min(p); pmax = pmax.max(p); #endif } else if(primclass == GS_LINE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c1); cmax = cmax.max_u8(c1); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 q = stq0.wwww(stq1).rcpnr(); stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1); tmin = tmin.min(stq0.min(stq1)); tmax = tmax.max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); tmin = tmin.min(st0.min(st1)); tmax = tmax.max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); pmin = pmin.min(p0.min(p1)); pmax = pmax.max(p0.max(p1)); #endif } else if(primclass == GS_TRIANGLE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); GSVector4i c2(v[index[i + 2]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c2); cmax = cmax.max_u8(c2); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 stq2 = GSVector4::cast(c2); GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr(); stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1); stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2); tmin = tmin.min(stq2).min(stq0.min(stq1)); tmax = tmax.max(stq2).max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4i uv2(v[index[i + 2]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); GSVector4 st2 = GSVector4(uv2.uph16()).xyxy(); tmin = tmin.min(st2).min(st0.min(st1)); tmax = tmax.max(st2).max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xyzf2(v[index[i + 2]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); GSVector4i xy2 = xyzf2.upl16(); GSVector4i z2 = xyzf2.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2)); pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww()))); pmin = pmin.min(p2).min(p0.min(p1)); pmax = pmax.max(p2).max(p0.max(p1)); #endif } else if(primclass == GS_SPRITE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); if(color) { if(iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); } else { cmin = cmin.min_u8(c1); cmax = cmax.max_u8(c1); } } if(tme) { if(!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 q = stq1.wwww().rcpnr(); stq0 = (stq0.xyww() * q).xyww(stq1); stq1 = (stq1.xyww() * q).xyww(stq1); tmin = tmin.min(stq0.min(stq1)); tmax = tmax.max(stq0.max(stq1)); } else { GSVector4i uv0(v[index[i + 0]].m[1]); GSVector4i uv1(v[index[i + 1]].m[1]); GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); tmin = tmin.min(st0.min(st1)); tmax = tmax.max(st0.max(st1)); } } GSVector4i xyzf0(v[index[i + 0]].m[1]); GSVector4i xyzf1(v[index[i + 1]].m[1]); GSVector4i xy0 = xyzf0.upl16(); GSVector4i z0 = xyzf0.yyyy(); GSVector4i xy1 = xyzf1.upl16(); GSVector4i z1 = xyzf1.yyyy(); #if _M_SSE >= 0x401 GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1)); GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); #else GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww()))); GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); pmin = pmin.min(p0.min(p1)); pmax = pmax.max(p0.max(p1)); #endif } } #if _M_SSE >= 0x401 pmin = pmin.blend16<0x30>(pmin.srl32(1)); pmax = pmax.blend16<0x30>(pmax.srl32(1)); #endif GSVector4 o(context->XYOFFSET); GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f); m_min.p = (GSVector4(pmin) - o) * s; m_max.p = (GSVector4(pmax) - o) * s; if(tme) { if(fst) { s = GSVector4(1.0f / 16, 1.0f).xxyy(); } else { s = GSVector4(1 << context->TEX0.TW, 1 << context->TEX0.TH, 1, 1); } m_min.t = tmin * s; m_max.t = tmax * s; } else { m_min.t = GSVector4::zero(); m_max.t = GSVector4::zero(); } if(color) { m_min.c = cmin.zzzz().u8to32(); m_max.c = cmax.zzzz().u8to32(); } else { m_min.c = GSVector4i::zero(); m_max.c = GSVector4i::zero(); } }
void GSRendererDX9::SetupIA() { D3DPRIMITIVETYPE topology; switch(m_vt.m_primclass) { case GS_POINT_CLASS: topology = D3DPT_POINTLIST; break; case GS_LINE_CLASS: topology = D3DPT_LINELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 2) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 1]; m_index.buff[i + 1] = tmp; } } break; case GS_TRIANGLE_CLASS: topology = D3DPT_TRIANGLELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 3) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 2]; m_index.buff[i + 2] = tmp; } } break; case GS_SPRITE_CLASS: topology = D3DPT_TRIANGLELIST; // each sprite converted to quad needs twice the space while(m_vertex.tail * 2 > m_vertex.maxcount) { GrowVertexBuffer(); } // assume vertices are tightly packed and sequentially indexed (it should be the case) if(m_vertex.next >= 2) { size_t count = m_vertex.next; int i = (int)count * 2 - 4; GSVertex* s = &m_vertex.buff[count - 2]; GSVertex* q = &m_vertex.buff[count * 2 - 4]; uint32* RESTRICT index = &m_index.buff[count * 3 - 6]; for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) { GSVertex v0 = s[0]; GSVertex v1 = s[1]; v0.RGBAQ = v1.RGBAQ; v0.XYZ.Z = v1.XYZ.Z; v0.FOG = v1.FOG; q[0] = v0; q[3] = v1; // swap x, s, u uint16 x = v0.XYZ.X; v0.XYZ.X = v1.XYZ.X; v1.XYZ.X = x; float s = v0.ST.S; v0.ST.S = v1.ST.S; v1.ST.S = s; uint16 u = v0.U; v0.U = v1.U; v1.U = u; q[1] = v0; q[2] = v1; index[0] = i + 0; index[1] = i + 1; index[2] = i + 2; index[3] = i + 1; index[4] = i + 2; index[5] = i + 3; } m_vertex.head = m_vertex.tail = m_vertex.next = count * 2; m_index.tail = count * 3; } break; default: __assume(0); } GSDevice9* dev = (GSDevice9*)m_dev; (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO void* ptr = NULL; if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) { GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; for(uint32 i = 0; i < m_vertex.next; i++, s++, d++) { GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); if(PRIM->TME && !PRIM->FST) { p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); } GSVector4 t = GSVector4::zero(); if(PRIM->TME) { if(PRIM->FST) { if(UserHacks_WildHack && !isPackedUV_HackFlag) { t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16()); //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); } else { t = GSVector4(GSVector4i::load(s->UV).upl16()); } } else { t = GSVector4::loadl(&s->ST); } } t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); d->p = p; d->t = t; } dev->IAUnmapVertexBuffer(); } dev->IASetIndexBuffer(m_index.buff, m_index.tail); dev->IASetPrimitiveTopology(topology); }
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; const GSVector2i& rtsize = rt->GetSize(); const GSVector2& rtscale = rt->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; GSTexture* rtcopy = NULL; ASSERT(m_dev != NULL); GSDeviceDX* dev = (GSDeviceDX*)m_dev; if(DATE) { if(dev->HasStencil()) { GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 dst = src * 2.0f + o.xxxx(); GSVertexPT1 vertices[] = { {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, }; dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); } else { rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); // I'll use VertexTrace when I consider it more trustworthy dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); } } // dev->BeginScene(); // om GSDeviceDX::OMDepthStencilSelector om_dssel; if(context->TEST.ZTE) { om_dssel.ztst = context->TEST.ZTST; om_dssel.zwe = !context->ZBUF.ZMSK; } else { om_dssel.ztst = ZTST_ALWAYS; } if(m_fba) { om_dssel.fba = context->FBA.FBA; } GSDeviceDX::OMBlendSelector om_bsel; if(!IsOpaque()) { om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; om_bsel.c = context->ALPHA.C; om_bsel.d = context->ALPHA.D; if(env.PABE.PABE) { if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) { // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result om_bsel.abe = 0; } else { //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. //ASSERT(0); } } } om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); // vs GSDeviceDX::VSSelector vs_sel; vs_sel.tme = PRIM->TME; vs_sel.fst = PRIM->FST; vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; vs_sel.rtcopy = !!rtcopy; // The real GS appears to do no masking based on the Z buffer format and writing larger Z values // than the buffer supports seems to be an error condition on the real GS, causing it to crash. // We are probably receiving bad coordinates from VU1 in these cases. if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) { if(context->ZBUF.PSM == PSM_PSMZ24) { if(m_vt.m_max.p.z > 0xffffff) { ASSERT(m_vt.m_min.p.z > 0xffffff); // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. if (m_vt.m_min.p.z > 0xffffff) { vs_sel.bppz = 1; om_dssel.ztst = ZTST_ALWAYS; } } } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { if(m_vt.m_max.p.z > 0xffff) { ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo // Fixme : Same as above, I guess. if (m_vt.m_min.p.z > 0xffff) { vs_sel.bppz = 2; om_dssel.ztst = ZTST_ALWAYS; } } } } GSDeviceDX::VSConstantBuffer vs_cb; float sx = 2.0f * rtscale.x / (rtsize.x << 4); float sy = 2.0f * rtscale.y / (rtsize.y << 4); float ox = (float)(int)context->XYOFFSET.OFX; float oy = (float)(int)context->XYOFFSET.OFY; float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, //because DX10 and DX9 have a different pixel center.) // //The resulting shifted output aligns better with common blending / corona / blurring effects, //but introduces a few bad pixels on the edges. if(rt->LikelyOffset) { // DX9 has pixelcenter set to 0.0, so give it some value here if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } ox2 *= rt->OffsetHack_modx; oy2 *= rt->OffsetHack_mody; } vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); // gs GSDeviceDX::GSSelector gs_sel; gs_sel.iip = PRIM->IIP; gs_sel.prim = m_vt.m_primclass; // ps GSDeviceDX::PSSelector ps_sel; GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; if(DATE) { if(dev->HasStencil()) { om_dssel.date = 1; } else { ps_sel.date = 1 + context->TEST.DATM; } } if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) { ps_sel.colclip = 1; } ps_sel.clr1 = om_bsel.IsCLR1(); ps_sel.fba = context->FBA.FBA; ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; if(UserHacks_AlphaHack) ps_sel.aout = 1; if(PRIM->FGE) { ps_sel.fog = 1; ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; } if(context->TEST.ATE) { ps_sel.atst = context->TEST.ATST; switch(ps_sel.atst) { case ATST_LESS: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); break; case ATST_GREATER: ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); break; default: ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; break; } } else { ps_sel.atst = ATST_ALWAYS; } if(tex) { ps_sel.wms = context->CLAMP.WMS; ps_sel.wmt = context->CLAMP.WMT; ps_sel.fmt = tex->m_fmt; ps_sel.aem = env.TEXA.AEM; ps_sel.tfx = context->TEX0.TFX; ps_sel.tcc = context->TEX0.TCC; ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; ps_sel.rt = tex->m_target; int w = tex->m_texture->GetWidth(); int h = tex->m_texture->GetHeight(); int tw = (int)(1 << context->TEX0.TW); int th = (int)(1 << context->TEX0.TH); GSVector4 WH(tw, th, w, h); if(PRIM->FST) { vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); //Maybe better? //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); ps_sel.fst = 1; } ps_cb.WH = WH; ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); GSVector4 clamp(ps_cb.MskFix); GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); ps_cb.MinMax = clamp / WH.xyxy(); ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; ps_ssel.ltf = ps_sel.ltf; } else {
void GSRendererDX9::SetupIA(const float& sx, const float& sy) { D3DPRIMITIVETYPE topology; switch(m_vt.m_primclass) { case GS_POINT_CLASS: topology = D3DPT_POINTLIST; break; case GS_LINE_CLASS: topology = D3DPT_LINELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 2) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 1]; m_index.buff[i + 1] = tmp; } } break; case GS_TRIANGLE_CLASS: topology = D3DPT_TRIANGLELIST; if(PRIM->IIP == 0) { for(size_t i = 0, j = m_index.tail; i < j; i += 3) { uint32 tmp = m_index.buff[i + 0]; m_index.buff[i + 0] = m_index.buff[i + 2]; m_index.buff[i + 2] = tmp; } } break; case GS_SPRITE_CLASS: topology = D3DPT_TRIANGLELIST; // each sprite converted to quad needs twice the space Lines2Sprites(); break; default: __assume(0); } GSDevice9* dev = (GSDevice9*)m_dev; (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO void* ptr = NULL; if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) { GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; for(uint32 i = 0; i < m_vertex.next; i++, s++, d++) { GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); if(PRIM->TME && !PRIM->FST) { p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); } else { p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); } GSVector4 t = GSVector4::zero(); if(PRIM->TME) { if(PRIM->FST) { if(UserHacks_WildHack && !isPackedUV_HackFlag) { t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16()); //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); } else { t = GSVector4(GSVector4i::load(s->UV).upl16()); } } else { t = GSVector4::loadl(&s->ST); } } t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); d->p = p; d->t = t; } dev->IAUnmapVertexBuffer(); } dev->IASetIndexBuffer(m_index.buff, m_index.tail); dev->IASetPrimitiveTopology(topology); }
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side) { // orientation: // - true: |dv.p.y| > |dv.p.x| // - false |dv.p.x| > |dv.p.y| // side: // - true: top/left edge // - false: bottom/right edge // TODO: bit slow and too much duplicated code // TODO: inner pre-step is still missing (hardly noticable) GSVector4 fscissor(scissor); GSVector4 lrtb = v0.p.upl(v1.p).ceil(); if(orientation) { GSVector4 tbmax = lrtb.max(fscissor.yyyy()); GSVector4 tbmin = lrtb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin)); int top, bottom; GSVertexSW edge, dedge; if((dv.p >= GSVector4::zero()).mask() & 2) { top = tbi.extract32<0>(); bottom = tbi.extract32<3>(); if(top >= bottom) return; edge = v0; dedge = dv / dv.p.yyyy(); edge += dedge * (tbmax.zzzz() - edge.p.yyyy()); } else { top = tbi.extract32<1>(); bottom = tbi.extract32<2>(); if(top >= bottom) return; edge = v1; dedge = dv / dv.p.yyyy(); edge += dedge * (tbmax.wwww() - edge.p.yyyy()); } GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000); int x = p.extract32<0>(); int dx = p.extract32<1>(); if(side) { while(1) { do { int xi = x >> 16; int xf = x & 0xffff; if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi)) { m_stats.pixels++; edge.t.u32[3] = (0x10000 - xf) & 0xffff; m_dsf.ssle(xi + 1, xi, top, edge); edge.t.u32[3] = 0; } } while(0); if(++top >= bottom) break; edge += dedge; x += dx; } } else { while(1) { do { int xi = (x >> 16) + 1; int xf = x & 0xffff; if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi)) { m_stats.pixels++; edge.t.u32[3] = xf; m_dsf.ssle(xi + 1, xi, top, edge); edge.t.u32[3] = 0; } } while(0); if(++top >= bottom) break; edge += dedge; x += dx; } } }
void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW dv[3]; dv[0] = v[1] - v[0]; dv[1] = v[2] - v[0]; GSVertexSW longest = dv[1] * (dv[0].p / dv[1].p).yyyy() - dv[0]; int i = longest.p.upl(longest.p == GSVector4::zero()).mask(); if(i & 2) return; i &= 1; GSVertexSW dscan = longest * longest.p.xxxx().rcp(); m_dsf.ssp(v, dscan); GSVector4 fscissor(scissor); GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil(); GSVector4 tbmax = tb.max(fscissor.yyyy()); GSVector4 tbmin = tb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin)); int top = tbi.extract32<0>(); int bottom = tbi.extract32<2>(); GSVertexSW& l = v[0]; GSVector4 r = v[0].p; GSVertexSW dl = dv[i] / dv[i].p.yyyy(); GSVector4 dr = dv[1 - i].p / dv[1 - i].p.yyyy(); GSVector4 dy = tbmax.xxxx() - l.p.yyyy(); l += dl * dy; r += dr * dy; if(top < bottom) { DrawTriangleSection(top, bottom, l, dl, r, dr, dscan, fscissor); } top = tbi.y; bottom = tbi.w; if(top < bottom) { if(i == 0) { l = v[1]; dv[2] = v[2] - v[1]; dl = dv[2] / dv[2].p.yyyy(); } else { r = v[1].p; dv[2].p = v[2].p - v[1].p; dr = dv[2].p / dv[2].p.yyyy(); } l += dl * (tbmax.zzzz() - l.p.yyyy()); r += dr * (tbmax.zzzz() - r.yyyy()); l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y DrawTriangleSection(top, bottom, l, dl, dscan, fscissor); } }
void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW dv = v[1] - v[0]; GSVector4 dp = dv.p.abs(); if(m_dsf.ssle) { int i = (dp < dp.yxwz()).mask() & 1; // |x| <= |y| GSVertexSW dscan; dscan.p = GSVector4::zero(); dscan.t = GSVector4::zero(); dscan.c = GSVector4::zero(); m_dsf.ssp(v, dscan); DrawEdge(v[0], v[1], dv, scissor, i, 0); DrawEdge(v[0], v[1], dv, scissor, i, 1); return; } GSVector4i dpi(dp); if(dpi.y == 0) { if(dpi.x > 0) { // shortcut for horizontal lines GSVector4 mask = (v[0].p > v[1].p).xxxx(); GSVertexSW l, dl; l.p = v[0].p.blend8(v[1].p, mask); l.t = v[0].t.blend8(v[1].t, mask); l.c = v[0].c.blend8(v[1].c, mask); GSVector4 r; r = v[1].p.blend8(v[0].p, mask); GSVector4i p(l.p); if(scissor.top <= p.y && p.y < scissor.bottom) { GSVertexSW dscan = dv / dv.p.xxxx(); m_dsf.ssp(v, dscan); l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y GSVector4 fscissor(scissor); DrawTriangleSection(p.y, p.y + 1, l, dl, dscan, fscissor); } } return; } int i = dpi.x > dpi.y ? 0 : 1; GSVertexSW edge = v[0]; GSVertexSW dedge = dv / dp.v[i]; // TODO: prestep + clip with the scissor int steps = dpi.v[i]; while(steps-- > 0) { DrawPoint(&edge, scissor); edge += dedge; } }