void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) { if(!m_sel.zb) { return; } // int za = fza_base.y + fza_offset->y; movsxd(rbp, dword[rsi + 4]); movsxd(rax, dword[rdi + 4]); add(rbp, rax); // GSVector4i zs = zi; if(!m_sel.sprite) { if(m_sel.zoverflow) { // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); mov(rax, (size_t)&GSVector4::m_half); vbroadcastss(xmm0, ptr[rax]); vmulps(xmm0, xmm8); vcvttps2dq(xmm0, xmm0); vpslld(xmm0, 1); vcvttps2dq(xmm1, xmm8); vpcmpeqd(xmm2, xmm2); vpsrld(xmm2, 31); vpand(xmm1, xmm2); vpor(xmm0, xmm1); } else { // zs = GSVector4i(z); vcvttps2dq(xmm0, xmm8); } if(m_sel.zwrite) { vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.zs)], xmm0); } } if(m_sel.ztest) { ReadPixel(xmm1, rbp); if(m_sel.zwrite && m_sel.zpsm < 2) { vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.zd)], xmm1); } // zd &= 0xffffffff >> m_sel.zpsm * 8; if(m_sel.zpsm) { vpslld(xmm1, m_sel.zpsm * 8); vpsrld(xmm1, m_sel.zpsm * 8); } if(m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector4i off = GSVector4i::x80000000(); vpcmpeqd(xmm2, xmm2); vpslld(xmm2, 31); // GSVector4i zso = zs - off; // GSVector4i zdo = zd - off; vpsubd(xmm0, xmm2); vpsubd(xmm1, xmm2); } switch(m_sel.ztst) { case ZTST_GEQUAL: // test |= zso < zdo; // ~(zso >= zdo) vpcmpgtd(xmm1, xmm0); vpor(xmm15, xmm1); break; case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL // test |= zso <= zdo; // ~(zso > zdo) vpcmpgtd(xmm0, xmm1); vpcmpeqd(xmm2, xmm2); vpxor(xmm0, xmm2); vpor(xmm15, xmm0); break; } alltrue(); } }
result_type operator()(B const &block) const { typedef typename B::value_type value_type; typedef typename vsip::impl::view_of<B>::type view_type; view_type view(const_cast<B&>(block)); return alltrue(view); }
void GPUDrawScanlineCodeGenerator::TestMask() { if(!m_sel.me) { return; } // test |= fd.sra16(15); movdqa(xmm0, xmm1); psraw(xmm0, 15); por(xmm7, xmm0); alltrue(); }
void GSDrawScanlineCodeGenerator::TestDestAlpha() { if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); if(m_sel.datm) { if(m_sel.fpsm == 2) { vpxor(xmm0, xmm0); //vpsrld(xmm1, xmm6, 15); vpslld(xmm1, xmm6, 16); vpsrad(xmm1, 31); vpcmpeqd(xmm1, xmm0); } else { vpcmpeqd(xmm0, xmm0); vpxor(xmm1, xmm6, xmm0); vpsrad(xmm1, 31); } } else { if(m_sel.fpsm == 2) { vpslld(xmm1, xmm6, 16); vpsrad(xmm1, 31); } else { vpsrad(xmm1, xmm6, 31); } } vpor(xmm15, xmm1); alltrue(); }
void GSDrawScanlineCodeGenerator::TestDestAlpha() { if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } // test |= ((fd [<< 16]) ^ m_env.datm).sra32(31); movdqa(xmm1, xmm2); if(m_sel.datm) { if(m_sel.fpsm == 2) { pxor(xmm0, xmm0); psrld(xmm1, 15); pcmpeqd(xmm1, xmm0); } else { pcmpeqd(xmm0, xmm0); pxor(xmm1, xmm0); psrad(xmm1, 31); } } else { if(m_sel.fpsm == 2) { pslld(xmm1, 16); } psrad(xmm1, 31); } por(xmm7, xmm1); alltrue(); }
void GSDrawScanlineCodeGenerator::TestAlpha() { switch(m_sel.afail) { case AFAIL_FB_ONLY: if(!m_sel.zwrite) return; break; case AFAIL_ZB_ONLY: if(!m_sel.fwrite) return; break; case AFAIL_RGB_ONLY: if(!m_sel.zwrite && m_sel.fpsm == 1) return; break; } switch(m_sel.atst) { case ATST_NEVER: // t = GSVector4i::xffffffff(); vpcmpeqd(xmm1, xmm1); break; case ATST_ALWAYS: return; case ATST_LESS: case ATST_LEQUAL: // t = (ga >> 16) > m_local.gd->aref; vpsrld(xmm1, xmm3, 16); vpcmpgtd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); break; case ATST_EQUAL: // t = (ga >> 16) != m_local.gd->aref; vpsrld(xmm1, xmm3, 16); vpcmpeqd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); vpcmpeqd(xmm0, xmm0); vpxor(xmm1, xmm0); break; case ATST_GEQUAL: case ATST_GREATER: // t = (ga >> 16) < m_local.gd->aref; vpsrld(xmm0, xmm3, 16); vmovdqa(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); vpcmpgtd(xmm1, xmm0); break; case ATST_NOTEQUAL: // t = (ga >> 16) == m_local.gd->aref; vpsrld(xmm1, xmm3, 16); vpcmpeqd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); break; } switch(m_sel.afail) { case AFAIL_KEEP: // test |= t; vpor(xmm15, xmm1); alltrue(); break; case AFAIL_FB_ONLY: // zm |= t; vpor(xmm5, xmm1); break; case AFAIL_ZB_ONLY: // fm |= t; vpor(xmm4, xmm1); break; case AFAIL_RGB_ONLY: // zm |= t; vpor(xmm5, xmm1); // fm |= t & GSVector4i::xff000000(); vpsrld(xmm1, 24); vpslld(xmm1, 24); vpor(xmm4, xmm1); break; } }
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) { if(!m_sel.zb) { return; } // int za = fza_base.y + fza_offset->y; mov(ebp, dword[esi + 4]); add(ebp, dword[edi + 4]); // GSVector4i zs = zi; if(!m_sel.sprite) { if(m_sel.zoverflow) { // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); static float half = 0.5f; movss(temp1, dword[&half]); shufps(temp1, temp1, _MM_SHUFFLE(0, 0, 0, 0)); mulps(temp1, xmm0); cvttps2dq(temp1, temp1); pslld(temp1, 1); cvttps2dq(xmm0, xmm0); pcmpeqd(temp2, temp2); psrld(temp2, 31); pand(xmm0, temp2); por(xmm0, temp1); } else { // zs = GSVector4i(z); cvttps2dq(xmm0, xmm0); } if(m_sel.zwrite) { movdqa(xmmword[&m_env.temp.zs], xmm0); } } if(m_sel.ztest) { ReadPixel(xmm1, ebp); if(m_sel.zwrite && m_sel.zpsm < 2) { movdqa(xmmword[&m_env.temp.zd], xmm1); } // zd &= 0xffffffff >> m_sel.zpsm * 8; if(m_sel.zpsm) { pslld(xmm1, m_sel.zpsm * 8); psrld(xmm1, m_sel.zpsm * 8); } if(m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector4i o = GSVector4i::x80000000(); pcmpeqd(xmm4, xmm4); pslld(xmm4, 31); // GSVector4i zso = zs - o; psubd(xmm0, xmm4); // GSVector4i zdo = zd - o; psubd(xmm1, xmm4); } switch(m_sel.ztst) { case ZTST_GEQUAL: // test |= zso < zdo; // ~(zso >= zdo) pcmpgtd(xmm1, xmm0); por(xmm7, xmm1); break; case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL // test |= zso <= zdo; // ~(zso > zdo) pcmpgtd(xmm0, xmm1); pcmpeqd(xmm4, xmm4); pxor(xmm0, xmm4); por(xmm7, xmm0); break; } alltrue(); } }
void GSDrawScanlineCodeGenerator::TestAlpha() { switch(m_sel.afail) { case AFAIL_FB_ONLY: if(!m_sel.zwrite) return; break; case AFAIL_ZB_ONLY: if(!m_sel.fwrite) return; break; case AFAIL_RGB_ONLY: if(!m_sel.zwrite && m_sel.fpsm == 1) return; break; } switch(m_sel.atst) { case ATST_NEVER: // t = GSVector4i::xffffffff(); pcmpeqd(xmm1, xmm1); break; case ATST_ALWAYS: return; case ATST_LESS: case ATST_LEQUAL: // t = (ga >> 16) > m_env.aref; movdqa(xmm1, xmm6); psrld(xmm1, 16); pcmpgtd(xmm1, xmmword[&m_env.aref]); break; case ATST_EQUAL: // t = (ga >> 16) != m_env.aref; movdqa(xmm1, xmm6); psrld(xmm1, 16); pcmpeqd(xmm1, xmmword[&m_env.aref]); pcmpeqd(xmm0, xmm0); pxor(xmm1, xmm0); break; case ATST_GEQUAL: case ATST_GREATER: // t = (ga >> 16) < m_env.aref; movdqa(xmm0, xmm6); psrld(xmm0, 16); movdqa(xmm1, xmmword[&m_env.aref]); pcmpgtd(xmm1, xmm0); break; case ATST_NOTEQUAL: // t = (ga >> 16) == m_env.aref; movdqa(xmm1, xmm6); psrld(xmm1, 16); pcmpeqd(xmm1, xmmword[&m_env.aref]); break; } switch(m_sel.afail) { case AFAIL_KEEP: // test |= t; por(xmm7, xmm1); alltrue(); break; case AFAIL_FB_ONLY: // zm |= t; por(xmm4, xmm1); break; case AFAIL_ZB_ONLY: // fm |= t; por(xmm3, xmm1); break; case AFAIL_RGB_ONLY: // zm |= t; por(xmm4, xmm1); // fm |= t & GSVector4i::xff000000(); psrld(xmm1, 24); pslld(xmm1, 24); por(xmm3, xmm1); break; } }