void GSDrawScanlineCodeGenerator::WriteZBuf() { if(!m_sel.zwrite) { return; } movdqa(xmm1, xmmword[!m_sel.sprite ? &m_env.temp.zs : &m_env.p.z]); bool fast = false; if(m_sel.ztest && m_sel.zpsm < 2) { // zs = zs.blend8(zd, zm); movdqa(xmm0, xmm4); movdqa(xmm7, xmmword[&m_env.temp.zd]); blend8(xmm1, xmm7); fast = true; } WritePixel(xmm1, xmm0, ebp, dh, fast, m_sel.zpsm); }
void GPUDrawScanlineCodeGenerator::WriteFrame() { // GSVector4i fs = r | g | b | (m_sel.md ? GSVector4i(0x80008000) : m_sel.tme ? a : 0); pcmpeqd(xmm0, xmm0); if(m_sel.md || m_sel.tme) { movdqa(xmm2, xmm0); psllw(xmm2, 15); } psrlw(xmm0, 11); psllw(xmm0, 3); // xmm0 = 0x00f8 // xmm2 = 0x8000 (md) // GSVector4i r = (c[0] & 0x00f800f8) >> 3; pand(xmm4, xmm0); psrlw(xmm4, 3); // GSVector4i g = (c[1] & 0x00f800f8) << 2; pand(xmm5, xmm0); psllw(xmm5, 2); por(xmm4, xmm5); // GSVector4i b = (c[2] & 0x00f800f8) << 7; pand(xmm6, xmm0); psllw(xmm6, 7); por(xmm4, xmm6); if(m_sel.md) { // GSVector4i a = GSVector4i(0x80008000); por(xmm4, xmm2); } else if(m_sel.tme) { // GSVector4i a = (c[3] << 8) & 0x80008000; psllw(xmm3, 8); pand(xmm3, xmm2); por(xmm4, xmm3); } // fs = fs.blend8(fd, test); movdqa(xmm0, xmm7); blend8(xmm4, xmm1); // GSVector4i::store<false>(fb, fs); // movdqu(ptr[edi], xmm4); movq(qword[edi], xmm4); movhps(qword[edi + 8], xmm4); }
void GPUDrawScanlineCodeGenerator::AlphaBlend() { if(!m_sel.abe) { return; } // xmm1 = fd // xmm3 = a // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test // xmm0, xmm2 = free // GSVector4i r = (fd & 0x001f001f) << 3; pcmpeqd(xmm0, xmm0); psrlw(xmm0, 11); // 0x001f movdqa(xmm2, xmm1); pand(xmm2, xmm0); psllw(xmm2, 3); switch(m_sel.abr) { case 0: // r = r.avg8(c[0]); pavgb(xmm2, xmm4); break; case 1: // r = r.addus8(c[0]); paddusb(xmm2, xmm4); break; case 2: // r = r.subus8(c[0]); psubusb(xmm2, xmm4); break; case 3: // r = r.addus8(c[0].srl16(2)); movdqa(xmm0, xmm4); psrlw(xmm0, 2); paddusb(xmm2, xmm0); break; } if(m_sel.tme) { movdqa(xmm0, xmm3); blend8(xmm4, xmm2); } else { movdqa(xmm4, xmm2); } // GSVector4i g = (d & 0x03e003e0) >> 2; pcmpeqd(xmm0, xmm0); psrlw(xmm0, 11); psllw(xmm0, 5); // 0x03e0 movdqa(xmm2, xmm1); pand(xmm2, xmm0); psrlw(xmm2, 2); switch(m_sel.abr) { case 0: // g = g.avg8(c[2]); pavgb(xmm2, xmm5); break; case 1: // g = g.addus8(c[2]); paddusb(xmm2, xmm5); break; case 2: // g = g.subus8(c[2]); psubusb(xmm2, xmm5); break; case 3: // g = g.addus8(c[2].srl16(2)); movdqa(xmm0, xmm5); psrlw(xmm0, 2); paddusb(xmm2, xmm0); break; } if(m_sel.tme) { movdqa(xmm0, xmm3); blend8(xmm5, xmm2); } else { movdqa(xmm5, xmm2); } // GSVector4i b = (d & 0x7c007c00) >> 7; pcmpeqd(xmm0, xmm0); psrlw(xmm0, 11); psllw(xmm0, 10); // 0x7c00 movdqa(xmm2, xmm1); pand(xmm2, xmm0); psrlw(xmm2, 7); switch(m_sel.abr) { case 0: // b = b.avg8(c[2]); pavgb(xmm2, xmm6); break; case 1: // b = b.addus8(c[2]); paddusb(xmm2, xmm6); break; case 2: // b = b.subus8(c[2]); psubusb(xmm2, xmm6); break; case 3: // b = b.addus8(c[2].srl16(2)); movdqa(xmm0, xmm6); psrlw(xmm0, 2); paddusb(xmm2, xmm0); break; } if(m_sel.tme) { movdqa(xmm0, xmm3); blend8(xmm6, xmm2); } else { movdqa(xmm6, xmm2); } }
void GSDrawScanlineCodeGenerator::AlphaTFX() { if(!m_sel.fb) { return; } switch(m_sel.tfx) { case TFX_MODULATE: // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); // gat = gat.modulate16<1>(ga).clamp8(); modulate16<1>(xmm6, xmm4); clamp16(xmm6, xmm3); // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { psrlw(xmm4, 7); mix16(xmm6, xmm4, xmm3); } break; case TFX_DECAL: // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); psrlw(xmm4, 7); mix16(xmm6, xmm4, xmm3); } break; case TFX_HIGHLIGHT: // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); movdqa(xmm2, xmm4); // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); psrlw(xmm4, 7); if(m_sel.tcc) { paddusb(xmm4, xmm6); } mix16(xmm6, xmm4, xmm3); break; case TFX_HIGHLIGHT2: // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); movdqa(xmm2, xmm4); psrlw(xmm4, 7); mix16(xmm6, xmm4, xmm3); } break; case TFX_NONE: // gat = iip ? ga.srl16(7) : ga; if(m_sel.iip) { psrlw(xmm6, 7); } break; } if(m_sel.aa1) { // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha // FIXME: bios config screen cubes if(!m_sel.abe) { // a = cov if(m_sel.edge) { movdqa(xmm0, xmmword[&m_env.temp.cov]); } else { pcmpeqd(xmm0, xmm0); psllw(xmm0, 15); psrlw(xmm0, 8); } mix16(xmm6, xmm0, xmm1); } else { // a = a == 0x80 ? cov : a pcmpeqd(xmm0, xmm0); psllw(xmm0, 15); psrlw(xmm0, 8); if(m_sel.edge) { movdqa(xmm1, xmmword[&m_env.temp.cov]); } else { movdqa(xmm1, xmm0); } pcmpeqw(xmm0, xmm6); psrld(xmm0, 16); pslld(xmm0, 16); blend8(xmm6, xmm1); } } }
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv) { // xmm0, xmm1, xmm4, xmm5, xmm6 = free int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; if(wms_clamp == wmt_clamp) { if(wms_clamp) { if(region) { pmaxsw(uv, xmmword[&m_env.t.min]); } else { pxor(xmm0, xmm0); pmaxsw(uv, xmm0); } pminsw(uv, xmmword[&m_env.t.max]); } else { pand(uv, xmmword[&m_env.t.min]); if(region) { por(uv, xmmword[&m_env.t.max]); } } } else { movdqa(xmm1, uv); movdqa(xmm4, xmmword[&m_env.t.min]); movdqa(xmm5, xmmword[&m_env.t.max]); // GSVector4i clamp = t.sat_i16(m_env.t.min, m_env.t.max); pmaxsw(uv, xmm4); pminsw(uv, xmm5); // GSVector4i repeat = (t & m_env.t.min) | m_env.t.max; pand(xmm1, xmm4); if(region) { por(xmm1, xmm5); } // clamp.blend8(repeat, m_env.t.mask); movdqa(xmm0, xmmword[&m_env.t.mask]); blend8(uv, xmm1); } }