void GPUDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b) { if(m_cpu.has(util::Cpu::tSSE41)) { pblendvb(a, b); } else { blend(a, b, xmm0); } }
void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a) { if(m_cpu.has(util::Cpu::tSSE41)) { pblendvb(a, b); movdqa(b, a); } else { blendr(b, a, xmm0); } }
void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b) { #if _M_SSE >= 0x500 vpblendvb(a, a, b, xmm0); #elif _M_SSE >= 0x401 pblendvb(a, b); #else blend(a, b, xmm0); #endif }
void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a) { #if _M_SSE >= 0x500 vpblendvb(b, a, b, xmm0); #elif _M_SSE >= 0x401 pblendvb(a, b); movdqa(b, a); #else blendr(b, a, xmm0); #endif }
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1) { // xmm0, xmm1, xmm4, xmm5, xmm6 = free int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; if(wms_clamp == wmt_clamp) { if(wms_clamp) { if(region) { movdqa(xmm4, xmmword[&m_env.t.min]); pmaxsw(uv0, xmm4); pmaxsw(uv1, xmm4); } else { pxor(xmm0, xmm0); pmaxsw(uv0, xmm0); pmaxsw(uv1, xmm0); } movdqa(xmm5, xmmword[&m_env.t.max]); pminsw(uv0, xmm5); pminsw(uv1, xmm5); } else { movdqa(xmm4, xmmword[&m_env.t.min]); pand(uv0, xmm4); pand(uv1, xmm4); if(region) { movdqa(xmm5, xmmword[&m_env.t.max]); por(uv0, xmm5); por(uv1, xmm5); } } } else { movdqa(xmm1, uv0); movdqa(xmm6, uv1); movdqa(xmm4, xmmword[&m_env.t.min]); movdqa(xmm5, xmmword[&m_env.t.max]); // GSVector4i clamp = t.sat_i16(m_env.t.min, m_env.t.max); pmaxsw(uv0, xmm4); pmaxsw(uv1, xmm4); pminsw(uv0, xmm5); pminsw(uv1, xmm5); // GSVector4i repeat = (t & m_env.t.min) | m_env.t.max; pand(xmm1, xmm4); pand(xmm6, xmm4); if(region) { por(xmm1, xmm5); por(xmm6, xmm5); } // clamp.blend8(repeat, m_env.t.mask); if(m_cpu.has(util::Cpu::tSSE41)) { movdqa(xmm0, xmmword[&m_env.t.mask]); pblendvb(uv0, xmm1); pblendvb(uv1, xmm6); } else { movdqa(xmm0, xmmword[&m_env.t.invmask]); movdqa(xmm4, xmm0); pand(uv0, xmm0); pandn(xmm0, xmm1); por(uv0, xmm0); pand(uv1, xmm4); pandn(xmm4, xmm6); por(uv1, xmm4); } } }