void GSDrawScanlineCodeGenerator::Fog() { if(!m_sel.fwrite || !m_sel.fge) { return; } // rb = m_local.gd->frb.lerp16<0>(rb, f); // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, frb)]); vmovdqa(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, fga)]); vmovdqa(xmm6, xmm3); lerp16(xmm2, xmm0, xmm9, 0); lerp16(xmm3, xmm1, xmm9, 0); mix16(xmm3, xmm6, xmm9); }
void GSDrawScanlineCodeGenerator::Fog() { if(!m_sel.fwrite || !m_sel.fge) { return; } // rb = m_env.frb.lerp16<0>(rb, f); // ga = m_env.fga.lerp16<0>(ga, f).mix16(ga); movdqa(xmm0, xmmword[!m_sel.sprite ? &m_env.temp.f : &m_env.p.f]); movdqa(xmm1, xmm6); movdqa(xmm2, xmmword[&m_env.frb]); lerp16<0>(xmm5, xmm2, xmm0); movdqa(xmm2, xmmword[&m_env.fga]); lerp16<0>(xmm6, xmm2, xmm0); mix16(xmm6, xmm1, xmm0); }
void GSDrawScanlineCodeGenerator::AlphaBlend() { if(!m_sel.fwrite) { return; } if(m_sel.abe == 0 && m_sel.aa1 == 0) { return; } if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) { switch(m_sel.fpsm) { case 0: case 1: // c[2] = fd & mask; // c[3] = (fd >> 8) & mask; vpsllw(xmm0, xmm6, 8); vpsrlw(xmm0, 8); vpsrlw(xmm1, xmm6, 8); break; case 2: // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); vpcmpeqd(xmm15, xmm15); vpsrld(xmm15, 27); // 0x0000001f vpand(xmm0, xmm6, xmm15); vpslld(xmm0, 3); vpslld(xmm15, 10); // 0x00007c00 vpand(xmm5, xmm6, xmm15); vpslld(xmm5, 9); vpor(xmm0, xmm1); vpsrld(xmm15, 5); // 0x000003e0 vpand(xmm1, xmm6, xmm15); vpsrld(xmm1, 2); vpsllw(xmm15, 10); // 0x00008000 vpand(xmm5, xmm6, xmm15); vpslld(xmm5, 8); vpor(xmm1, xmm5); break; } } // xmm2, xmm3 = src rb, ga // xmm0, xmm1 = dst rb, ga // xmm5, xmm15 = free if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { vmovdqa(xmm5, xmm2); } if(m_sel.aba != m_sel.abb) { // rb = c[aba * 2 + 0]; switch(m_sel.aba) { case 0: break; case 1: vmovdqa(xmm2, xmm0); break; case 2: vpxor(xmm2, xmm2); break; } // rb = rb.sub16(c[abb * 2 + 0]); switch(m_sel.abb) { case 0: vpsubw(xmm2, xmm5); break; case 1: vpsubw(xmm2, xmm0); break; case 2: break; } if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; switch(m_sel.abc) { case 0: case 1: vpshuflw(xmm15, m_sel.abc ? xmm1 : xmm3, _MM_SHUFFLE(3, 3, 1, 1)); vpshufhw(xmm15, xmm15, _MM_SHUFFLE(3, 3, 1, 1)); vpsllw(xmm15, 7); break; case 2: vmovdqa(xmm15, ptr[r12 + offsetof(GSScanlineGlobalData, afix)]); break; } // rb = rb.modulate16<1>(a); modulate16(xmm2, xmm15, 1); } // rb = rb.add16(c[abd * 2 + 0]); switch(m_sel.abd) { case 0: vpaddw(xmm2, xmm5); break; case 1: vpaddw(xmm2, xmm0); break; case 2: break; } } else { // rb = c[abd * 2 + 0]; switch(m_sel.abd) { case 0: break; case 1: vmovdqa(xmm2, xmm0); break; case 2: vpxor(xmm2, xmm2); break; } } if(m_sel.pabe) { // mask = (c[1] << 8).sra32(31); vpslld(xmm0, xmm3, 8); vpsrad(xmm0, 31); // rb = c[0].blend8(rb, mask); vpblendvb(xmm2, xmm5, xmm2, xmm0); } // xmm0 = pabe mask // xmm3 = src ga // xmm1 = dst ga // xmm2 = rb // xmm15 = a // xmm5 = free vmovdqa(xmm5, xmm3); if(m_sel.aba != m_sel.abb) { // ga = c[aba * 2 + 1]; switch(m_sel.aba) { case 0: break; case 1: vmovdqa(xmm3, xmm1); break; case 2: vpxor(xmm3, xmm3); break; } // ga = ga.sub16(c[abeb * 2 + 1]); switch(m_sel.abb) { case 0: vpsubw(xmm3, xmm5); break; case 1: vpsubw(xmm3, xmm1); break; case 2: break; } if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); modulate16(xmm3, xmm15, 1); } // ga = ga.add16(c[abd * 2 + 1]); switch(m_sel.abd) { case 0: vpaddw(xmm3, xmm5); break; case 1: vpaddw(xmm3, xmm1); break; case 2: break; } } else { // ga = c[abd * 2 + 1]; switch(m_sel.abd) { case 0: break; case 1: vmovdqa(xmm3, xmm1); break; case 2: vpxor(xmm3, xmm3); break; } } // xmm0 = pabe mask // xmm5 = src ga // xmm2 = rb // xmm3 = ga // xmm1, xmm15 = free if(m_sel.pabe) { vpsrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) // ga = c[1].blend8(ga, mask).mix16(c[1]); vpblendvb(xmm3, xmm5, xmm3, xmm0); } else { if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(xmm3, xmm5, xmm15); } } }
void GSDrawScanlineCodeGenerator::ColorTFX() { if(!m_sel.fwrite) { return; } switch(m_sel.tfx) { case TFX_MODULATE: // rbt = rbt.modulate16<1>(rb).clamp8(); modulate16(xmm2, xmm13, 1); clamp16(xmm2, xmm0); break; case TFX_DECAL: break; case TFX_HIGHLIGHT: case TFX_HIGHLIGHT2: // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); vmovdqa(xmm1, xmm3); modulate16(xmm3, xmm14, 1); vpshuflw(xmm6, xmm14, _MM_SHUFFLE(3, 3, 1, 1)); vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1)); vpsrlw(xmm6, 7); vpaddw(xmm3, xmm6); clamp16(xmm3, xmm0); mix16(xmm3, xmm1, xmm0); // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); modulate16(xmm2, xmm13, 1); vpaddw(xmm2, xmm6); clamp16(xmm2, xmm0); break; case TFX_NONE: // rbt = iip ? rb.srl16(7) : rb; if(m_sel.iip) { vpsrlw(xmm2, xmm13, 7); } break; } }
void GSDrawScanlineCodeGenerator::AlphaTFX() { if(!m_sel.fb) { return; } switch(m_sel.tfx) { case TFX_MODULATE: // gat = gat.modulate16<1>(ga).clamp8(); modulate16(xmm3, xmm14, 1); clamp16(xmm3, xmm0); // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { vpsrlw(xmm1, xmm14, 7); mix16(xmm3, xmm1, xmm0); } break; case TFX_DECAL: // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { vpsrlw(xmm1, xmm14, 7); mix16(xmm3, xmm1, xmm0); } break; case TFX_HIGHLIGHT: // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); vpsrlw(xmm1, xmm14, 7); if(m_sel.tcc) { vpaddusb(xmm1, xmm3); } mix16(xmm3, xmm1, xmm0); break; case TFX_HIGHLIGHT2: // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { vpsrlw(xmm1, xmm14, 7); mix16(xmm3, xmm1, xmm0); } break; case TFX_NONE: // gat = iip ? ga.srl16(7) : ga; if(m_sel.iip) { vpsrlw(xmm3, xmm14, 7); } break; } // TODO: aa1 }
void GSDrawScanlineCodeGenerator::AlphaBlend() { if(!m_sel.fwrite) { return; } if(m_sel.abe == 0 && m_sel.aa1 == 0) { return; } if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) { switch(m_sel.fpsm) { case 0: case 1: // c[2] = fd & mask; // c[3] = (fd >> 8) & mask; movdqa(xmm0, xmm2); movdqa(xmm1, xmm2); psllw(xmm0, 8); psrlw(xmm0, 8); psrlw(xmm1, 8); break; case 2: // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); movdqa(xmm0, xmm2); movdqa(xmm1, xmm2); movdqa(xmm4, xmm2); pcmpeqd(xmm7, xmm7); psrld(xmm7, 27); // 0x0000001f pand(xmm0, xmm7); pslld(xmm0, 3); pslld(xmm7, 10); // 0x00007c00 pand(xmm4, xmm7); pslld(xmm4, 9); por(xmm0, xmm4); movdqa(xmm4, xmm1); psrld(xmm7, 5); // 0x000003e0 pand(xmm1, xmm7); psrld(xmm1, 2); psllw(xmm7, 10); // 0x00008000 pand(xmm4, xmm7); pslld(xmm4, 8); por(xmm1, xmm4); break; } } // xmm5, xmm6 = src rb, ga // xmm0, xmm1 = dst rb, ga // xmm2, xmm3 = used // xmm4, xmm7 = free if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { movdqa(xmm4, xmm5); } if(m_sel.aba != m_sel.abb) { // rb = c[aba * 2 + 0]; switch(m_sel.aba) { case 0: break; case 1: movdqa(xmm5, xmm0); break; case 2: pxor(xmm5, xmm5); break; } // rb = rb.sub16(c[abb * 2 + 0]); switch(m_sel.abb) { case 0: psubw(xmm5, xmm4); break; case 1: psubw(xmm5, xmm0); break; case 2: break; } if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_env.afix; switch(m_sel.abc) { case 0: case 1: movdqa(xmm7, m_sel.abc ? xmm1 : xmm6); pshuflw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); psllw(xmm7, 7); break; case 2: movdqa(xmm7, xmmword[&m_env.afix]); break; } // rb = rb.modulate16<1>(a); modulate16<1>(xmm5, xmm7); } // rb = rb.add16(c[abd * 2 + 0]); switch(m_sel.abd) { case 0: paddw(xmm5, xmm4); break; case 1: paddw(xmm5, xmm0); break; case 2: break; } } else { // rb = c[abd * 2 + 0]; switch(m_sel.abd) { case 0: break; case 1: movdqa(xmm5, xmm0); break; case 2: pxor(xmm5, xmm5); break; } } if(m_sel.pabe) { // mask = (c[1] << 8).sra32(31); movdqa(xmm0, xmm6); pslld(xmm0, 8); psrad(xmm0, 31); // rb = c[0].blend8(rb, mask); blend8r(xmm5, xmm4); } // xmm6 = src ga // xmm1 = dst ga // xmm5 = rb // xmm7 = a // xmm2, xmm3 = used // xmm0, xmm4 = free movdqa(xmm4, xmm6); if(m_sel.aba != m_sel.abb) { // ga = c[aba * 2 + 1]; switch(m_sel.aba) { case 0: break; case 1: movdqa(xmm6, xmm1); break; case 2: pxor(xmm6, xmm6); break; } // ga = ga.sub16(c[abeb * 2 + 1]); switch(m_sel.abb) { case 0: psubw(xmm6, xmm4); break; case 1: psubw(xmm6, xmm1); break; case 2: break; } if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); modulate16<1>(xmm6, xmm7); } // ga = ga.add16(c[abd * 2 + 1]); switch(m_sel.abd) { case 0: paddw(xmm6, xmm4); break; case 1: paddw(xmm6, xmm1); break; case 2: break; } } else { // ga = c[abd * 2 + 1]; switch(m_sel.abd) { case 0: break; case 1: movdqa(xmm6, xmm1); break; case 2: pxor(xmm6, xmm6); break; } } // xmm4 = src ga // xmm5 = rb // xmm6 = ga // xmm2, xmm3 = used // xmm0, xmm1, xmm7 = free if(m_sel.pabe) { if(!m_cpu.has(util::Cpu::tSSE41)) { // doh, previous blend8r overwrote xmm0 (sse41 uses pblendvb) movdqa(xmm0, xmm4); pslld(xmm0, 8); psrad(xmm0, 31); } psrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) // ga = c[1].blend8(ga, mask).mix16(c[1]); blend8r(xmm6, xmm4); } else { if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(xmm6, xmm4, xmm7); } } }
void GSDrawScanlineCodeGenerator::ColorTFX() { if(!m_sel.fwrite) { return; } switch(m_sel.tfx) { case TFX_MODULATE: // GSVector4i rb = iip ? rbf : m_env.c.rb; // rbt = rbt.modulate16<1>(rb).clamp8(); modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]); clamp16(xmm5, xmm1); break; case TFX_DECAL: break; case TFX_HIGHLIGHT: case TFX_HIGHLIGHT2: if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) { // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm2, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); } // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); movdqa(xmm1, xmm6); modulate16<1>(xmm6, xmm2); pshuflw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); pshufhw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); psrlw(xmm2, 7); paddw(xmm6, xmm2); clamp16(xmm6, xmm0); mix16(xmm6, xmm1, xmm0); // GSVector4i rb = iip ? rbf : m_env.c.rb; // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]); paddw(xmm5, xmm2); clamp16(xmm5, xmm0); break; case TFX_NONE: // rbt = iip ? rb.srl16(7) : rb; if(m_sel.iip) { psrlw(xmm5, 7); } break; } }
void GSDrawScanlineCodeGenerator::AlphaTFX() { if(!m_sel.fb) { return; } switch(m_sel.tfx) { case TFX_MODULATE: // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); // gat = gat.modulate16<1>(ga).clamp8(); modulate16<1>(xmm6, xmm4); clamp16(xmm6, xmm3); // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { psrlw(xmm4, 7); mix16(xmm6, xmm4, xmm3); } break; case TFX_DECAL: // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); psrlw(xmm4, 7); mix16(xmm6, xmm4, xmm3); } break; case TFX_HIGHLIGHT: // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); movdqa(xmm2, xmm4); // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); psrlw(xmm4, 7); if(m_sel.tcc) { paddusb(xmm4, xmm6); } mix16(xmm6, xmm4, xmm3); break; case TFX_HIGHLIGHT2: // if(!tcc) gat = gat.mix16(ga.srl16(7)); if(!m_sel.tcc) { // GSVector4i ga = iip ? gaf : m_env.c.ga; movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); movdqa(xmm2, xmm4); psrlw(xmm4, 7); mix16(xmm6, xmm4, xmm3); } break; case TFX_NONE: // gat = iip ? ga.srl16(7) : ga; if(m_sel.iip) { psrlw(xmm6, 7); } break; } if(m_sel.aa1) { // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha // FIXME: bios config screen cubes if(!m_sel.abe) { // a = cov if(m_sel.edge) { movdqa(xmm0, xmmword[&m_env.temp.cov]); } else { pcmpeqd(xmm0, xmm0); psllw(xmm0, 15); psrlw(xmm0, 8); } mix16(xmm6, xmm0, xmm1); } else { // a = a == 0x80 ? cov : a pcmpeqd(xmm0, xmm0); psllw(xmm0, 15); psrlw(xmm0, 8); if(m_sel.edge) { movdqa(xmm1, xmmword[&m_env.temp.cov]); } else { movdqa(xmm1, xmm0); } pcmpeqw(xmm0, xmm6); psrld(xmm0, 16); pslld(xmm0, 16); blend8(xmm6, xmm1); } } }