int main() { int rval; mmx_t ma; mmx_t mb; movq_r2r(mm0, mm1); rval = mmx_ok(); /* Announce return value of mmx_ok() */ // printf("Value returned from init was %x.", rval); // printf(" (Indicates MMX %s available)\n\n",(rval)? "is" : "not"); // fflush(stdout); fflush(stderr); // if(rval) { /* PADD *****************************************************/ ma.q = 0x1111111180000000LL; mb.q = 0x7fffffff00000001LL; paddd(ma, mb); fprintf(stdout, "paddd: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddd: mb.q is 9111111080000001\n"); fflush(stdout); fflush(stderr); ma.q = 0x0001000100010001LL; mb.q = 0x80007fffffff0001LL; paddw(ma, mb); fprintf(stdout, "paddw: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddw: mb.q is 8001800000000002\n"); fflush(stdout); fflush(stderr); ma.q = 0x80007fffffff0001LL; mb.q = 0x0001000100010000LL; paddw(ma, mb); fprintf(stdout, "paddw: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddw: mb.q is 8001800000000001\n"); fflush(stdout); fflush(stderr); ma.q = 0x01010101807fff01LL; mb.q = 0x807fff0101010101LL; paddb(ma, mb); fprintf(stdout, "paddb: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddb: mb.q is 8180000281800002\n"); fflush(stdout); fflush(stderr); /* PADDS ****************************************************/ ma.q = 0x0001000100010001LL; mb.q = 0x80007fffffff0001LL; paddsw(ma, mb); fprintf(stdout, "paddsw: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddsw: mb.q is 80017fff00000002\n"); ma.q = 0x80007fffffff0001LL; mb.q = 0x0001000100010000LL; paddsw(ma, mb); fprintf(stdout, "paddsw: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddsw: mb.q is 80017fff00000001\n"); ma.q = 0x01010101807fff01LL; mb.q = 0x807fff0101010101LL; paddsb(ma, mb); fprintf(stdout, "paddsb: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddsb: mb.q is 817f0002817f0002\n"); fflush(stdout); fflush(stderr); /* PADDUS ***************************************************/ ma.q = 0x0001000100010001LL; mb.q = 0x80007fffffff0001LL; paddusw(ma, mb); fprintf(stdout, "paddusw: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddusw: mb.q is 80018000ffff0002\n"); fflush(stdout); fflush(stderr); ma.q = 0x80007fffffff0001LL; mb.q = 0x0001000100010000LL; paddusw(ma, mb); fprintf(stdout, "paddusw: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddusw: mb.q is 80018000ffff0001\n"); fflush(stdout); fflush(stderr); ma.q = 0x01010101807fff01LL; mb.q = 0x807fff0101010101LL; paddusb(ma, mb); fprintf(stdout, "paddusb: mb.q is %016llx\n", mb.q); fprintf(stderr, "paddusb: mb.q is 8180ff028180ff02\n"); fflush(stdout); fflush(stderr); /* PSUB *****************************************************/ ma.q = 0x7fffffff00000001LL; mb.q = 0x1111111180000000LL; psubd(ma, mb); fprintf(stdout, "psubd: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubd: mb.q is 911111127fffffff\n"); fflush(stdout); fflush(stderr); ma.q = 0x80007fffffff0001LL; mb.q = 0x0001000100010001LL; psubw(ma, mb); fprintf(stdout, "psubw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubw: mb.q is 8001800200020000\n"); fflush(stdout); fflush(stderr); ma.q = 0x0001000100010000LL; mb.q = 0x80007fffffff0001LL; psubw(ma, mb); fprintf(stdout, "psubw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubw: mb.q is 7fff7ffefffe0001\n"); fflush(stdout); fflush(stderr); ma.q = 0x807fff0101010101LL; mb.q = 0x01010101807fff01LL; psubb(ma, mb); fprintf(stdout, "psubb: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubb: mb.q is 818202007f7efe00\n"); fflush(stdout); fflush(stderr); /* PSUBS ****************************************************/ ma.q = 0x80007fffffff0001LL; mb.q = 0x0001000100010001LL; psubsw(ma, mb); fprintf(stdout, "psubsw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubsw: mb.q is 7fff800200020000\n"); fflush(stdout); fflush(stderr); ma.q = 0x0001000100010000LL; mb.q = 0x80007fffffff0001LL; psubsw(ma, mb); fprintf(stdout, "psubsw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubsw: mb.q is 80007ffefffe0001\n"); fflush(stdout); fflush(stderr); ma.q = 0x807fff0101010101LL; mb.q = 0x01010101807fff01LL; psubsb(ma, mb); fprintf(stdout, "psubsb: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubsb: mb.q is 7f820200807efe00\n"); fflush(stdout); fflush(stderr); /* PSUBUS ***************************************************/ ma.q = 0x80007fffffff0001LL; mb.q = 0x0001000100010001LL; psubusw(ma, mb); fprintf(stdout, "psubusw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubusw: mb.q is 0000000000000000\n"); fflush(stdout); fflush(stderr); ma.q = 0x0001000100010000LL; mb.q = 0x80007fffffff0001LL; psubusw(ma, mb); fprintf(stdout, "psubusw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubusw: mb.q is 7fff7ffefffe0001\n"); fflush(stdout); fflush(stderr); ma.q = 0x807fff0101010101LL; mb.q = 0x01010101807fff01LL; psubusb(ma, mb); fprintf(stdout, "psubusb: mb.q is %016llx\n", mb.q); fprintf(stderr, "psubusb: mb.q is 000000007f7efe00\n"); fflush(stdout); fflush(stderr); /* PMUL *****************************************************/ ma.q = 0x8000ffff00ff0000LL; mb.q = 0x0200ffff00ffffffLL; pmulhw(ma, mb); fprintf(stdout, "pmulhw: mb.q is %016llx\n", mb.q); fprintf(stderr, "pmulhw: mb.q is ff00000000000000\n"); fflush(stdout); fflush(stderr); mb.q = 0x0200ffff00ffffffLL; pmullw(ma, mb); fprintf(stdout, "pmullw: mb.q is %016llx\n", mb.q); fprintf(stderr, "pmullw: mb.q is 00000001fe010000\n"); fflush(stdout); fflush(stderr); /* PMADD ****************************************************/ ma.q = 0x8000345680007f34LL; mb.q = 0x93234a27ffff1707LL; pmaddwd(ma, mb); fprintf(stdout, "pmaddwd: mb.q is %016llx\n", mb.q); fprintf(stderr, "pmaddwd: mb.q is 4597551a0b71a66c\n"); fflush(stdout); fflush(stderr); /* PCMPEQ ***************************************************/ ma.q = 0x800034568f237f34LL; mb.q = 0x93009a568f237f34LL; pcmpeqd(ma, mb); fprintf(stdout, "pcmpeqd: mb.q is %016llx\n", mb.q); fprintf(stderr, "pcmpeqd: mb.q is 00000000ffffffff\n"); fflush(stdout); fflush(stderr); mb.q = 0x93009a568f237f34LL; pcmpeqw(ma, mb); fprintf(stdout, "pcmpeqw: mb.q is %016llx\n", mb.q); fprintf(stderr, "pcmpeqw: mb.q is 00000000ffffffff\n"); fflush(stdout); fflush(stderr); mb.q = 0x93009a568f237f34LL; pcmpeqb(ma, mb); fprintf(stdout, "pcmpeqb: mb.q is %016llx\n", mb.q); fprintf(stderr, "pcmpeqb: mb.q is 00ff00ffffffffff\n"); fflush(stdout); fflush(stderr); /* PCMPGT ***************************************************/ ma.q = 0x666688884477aaffLL; mb.q = 0x1234567890abcdefLL; pcmpgtd(ma, mb); fprintf(stdout, "pcmpgtd: mb.q is %016llx\n", mb.q); fprintf(stderr, "pcmpgtd: mb.q is 0000000000000000\n"); fflush(stdout); fflush(stderr); mb.q = 0x1234567890abcdefLL; pcmpgtw(ma, mb); fprintf(stdout, "pcmpgtw: mb.q is %016llx\n", mb.q); fprintf(stderr, "pcmpgtw: mb.q is 0000ffff0000ffff\n"); fflush(stdout); fflush(stderr); mb.q = 0x1234567890abcdefLL; pcmpgtb(ma, mb); fprintf(stdout, "pcmpgtb: mb.q is %016llx\n", mb.q); fprintf(stderr, "pcmpgtb: mb.q is 0000ffff0000ff00\n"); fflush(stdout); fflush(stderr); /* PACKSS ***************************************************/ ma.q = 0x00012222000abbbbLL; mb.q = 0x0000888800003333LL; packssdw(ma, mb); fprintf(stdout, "packssdw: mb.q is %016llx\n", mb.q); fprintf(stderr, "packssdw: mb.q is 7fff7fff7fff3333\n"); fflush(stdout); fflush(stderr); ma.q = 0x00aa00dd01009999LL; mb.q = 0x0011002200330044LL; packsswb(ma, mb); fprintf(stdout, "packsswb: mb.q is %016llx\n", mb.q); fprintf(stderr, "packsswb: mb.q is 7f7f7f8011223344\n"); fflush(stdout); fflush(stderr); /* PACKUS ***************************************************/ ma.q = 0x00aa00dd01009999LL; mb.q = 0x0011002200330044LL; packuswb(ma, mb); fprintf(stdout, "packuswb: mb.q is %016llx\n", mb.q); fprintf(stderr, "packuswb: mb.q is aaddff0011223344\n"); fflush(stdout); fflush(stderr); /* PUNPCKH **************************************************/ ma.q = 0x090a0b0c0d0e0f00LL; mb.q = 0x0102030405060708LL; punpckhdq(ma, mb); fprintf(stdout, "punpckhdq: mb.q is %016llx\n", mb.q); fprintf(stderr, "punpckhdq: mb.q is 090a0b0c01020304\n"); fflush(stdout); fflush(stderr); mb.q = 0x0102030405060708LL; punpckhwd(ma, mb); fprintf(stdout, "punpckhwd: mb.q is %016llx\n", mb.q); fprintf(stderr, "punpckhwd: mb.q is 090a01020b0c0304\n"); fflush(stdout); fflush(stderr); mb.q = 0x0102030405060708LL; punpckhbw(ma, mb); fprintf(stdout, "punpckhbw: mb.q is %016llx\n", mb.q); fprintf(stderr, "punpckhbw: mb.q is 09010a020b030c04\n"); fflush(stdout); fflush(stderr); /* PUNPCKL **************************************************/ ma.q = 0x090a0b0c0d0e0f00LL; mb.q = 0x0102030405060708LL; punpckldq(ma, mb); fprintf(stdout, "punpckldq: mb.q is %016llx\n", mb.q); fprintf(stderr, "punpckldq: mb.q is 0d0e0f0005060708\n"); fflush(stdout); fflush(stderr); mb.q = 0x0102030405060708LL; punpcklwd(ma, mb); fprintf(stdout, "punpcklwd: mb.q is %016llx\n", mb.q); fprintf(stderr, "punpcklwd: mb.q is 0d0e05060f000708\n"); fflush(stdout); fflush(stderr); mb.q = 0x0102030405060708LL; punpcklbw(ma, mb); fprintf(stdout, "punpcklbw: mb.q is %016llx\n", mb.q); fprintf(stderr, "punpcklbw: mb.q is 0d050e060f070008\n"); fflush(stdout); fflush(stderr); /* PAND, PANDN, POR, PXOR ***********************************/ ma.q = 0x5555555555555555LL; mb.q = 0x3333333333333333LL; pand(ma, mb); fprintf(stdout, "pand: mb.q is %016llx\n", mb.q); fprintf(stderr, "pand: mb.q is 1111111111111111\n"); fflush(stdout); fflush(stderr); mb.q = 0x3333333333333333LL; pandn(ma, mb); fprintf(stdout, "pandn: mb.q is %016llx\n", mb.q); fprintf(stderr, "pandn: mb.q is 4444444444444444\n"); fflush(stdout); fflush(stderr); mb.q = 0x3333333333333333LL; por(ma, mb); fprintf(stdout, "por: mb.q is %016llx\n", mb.q); fprintf(stderr, "por: mb.q is 7777777777777777\n"); fflush(stdout); fflush(stderr); mb.q = 0x3333333333333333LL; pxor(ma, mb); fprintf(stdout, "pxor: mb.q is %016llx\n", mb.q); fprintf(stderr, "pxor: mb.q is 6666666666666666\n"); fflush(stdout); fflush(stderr); /* PSLL *****************************************************/ ma.q = 0x0000000000000018LL; mb.q = 0x0123456789abcdefLL; psllq(ma, mb); fprintf(stdout, "psllq: mb.q is %016llx\n", mb.q); fprintf(stderr, "psllq: mb.q is 6789abcdef000000\n"); fflush(stdout); fflush(stderr); mb.q = 0x0123456789abcdefLL; pslld(ma, mb); fprintf(stdout, "pslld: mb.q is %016llx\n", mb.q); fprintf(stderr, "pslld: mb.q is 67000000ef000000\n"); fflush(stdout); fflush(stderr); mb.q = 0x0123456789abcdefLL; psllw(ma, mb); fprintf(stdout, "psllw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psllw: mb.q is 0000000000000000\n"); fflush(stdout); fflush(stderr); /* PSRL *****************************************************/ ma.q = 0x0000000000000018LL; mb.q = 0x0123456789abcdefLL; psrlq(ma, mb); fprintf(stdout, "psrlq: mb.q is %016llx\n", mb.q); fprintf(stderr, "psrlq: mb.q is 0000000123456789\n"); fflush(stdout); fflush(stderr); mb.q = 0x0123456789abcdefLL; psrld(ma, mb); fprintf(stdout, "psrld: mb.q is %016llx\n", mb.q); fprintf(stderr, "psrld: mb.q is 0000000100000089\n"); fflush(stdout); fflush(stderr); mb.q = 0x0123456789abcdefLL; psrlw(ma, mb); fprintf(stdout, "psrlw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psrlw: mb.q is 0000000000000000\n"); fflush(stdout); fflush(stderr); /* PSRA *****************************************************/ ma.q = 0x0000000000000018LL; mb.q = 0x0123456789abcdefLL; psrad(ma, mb); fprintf(stdout, "psrad: mb.q is %016llx\n", mb.q); fprintf(stderr, "psrad: mb.q is 00000001ffffff89\n"); fflush(stdout); fflush(stderr); mb.q = 0x0123456789abcdefLL; psraw(ma, mb); fprintf(stdout, "psraw: mb.q is %016llx\n", mb.q); fprintf(stderr, "psraw: mb.q is 00000000ffffffff\n"); fflush(stdout); fflush(stderr); /* Exit MXX *************************************************/ emms(); } /* Clean-up and exit nicely */ exit(0); }
void GPUDrawScanlineCodeGenerator::SampleTexture() { if(!m_sel.tme) { return; } if(m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } // xmm2 = s // xmm3 = t // xmm7 = test // xmm0, xmm4, xmm5, xmm6 = free // xmm1 = used if(m_sel.ltf) { // GSVector4i u = s.sub16(GSVector4i(0x00200020)); // - 0.125f // GSVector4i v = t.sub16(GSVector4i(0x00200020)); // - 0.125f mov(eax, 0x00200020); movd(xmm0, eax); pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); psubw(xmm2, xmm0); psubw(xmm3, xmm0); // GSVector4i uf = (u & GSVector4i::x00ff()) << 7; // GSVector4i vf = (v & GSVector4i::x00ff()) << 7; movdqa(xmm0, xmm2); psllw(xmm0, 8); psrlw(xmm0, 1); movdqa(ptr[&m_local.temp.uf], xmm0); if(!m_sel.sprite) { movdqa(xmm0, xmm3); psllw(xmm0, 8); psrlw(xmm0, 1); movdqa(ptr[&m_local.temp.vf], xmm0); } } // GSVector4i u0 = s.srl16(8); // GSVector4i v0 = t.srl16(8); psrlw(xmm2, 8); psrlw(xmm3, 8); // xmm2 = u // xmm3 = v // xmm7 = test // xmm0, xmm4, xmm5, xmm6 = free // xmm1 = used if(m_sel.ltf) { // GSVector4i u1 = u0.add16(GSVector4i::x0001()); // GSVector4i v1 = v0.add16(GSVector4i::x0001()); movdqa(xmm4, xmm2); movdqa(xmm5, xmm3); pcmpeqd(xmm0, xmm0); psrlw(xmm0, 15); paddw(xmm4, xmm0); paddw(xmm5, xmm0); if(m_sel.twin) { // u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u); // v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v); // u1 = (u1 & m_local.twin[0].u).add16(m_local.twin[1].u); // v1 = (v1 & m_local.twin[0].v).add16(m_local.twin[1].v); movdqa(xmm0, ptr[&m_local.twin[0].u]); movdqa(xmm6, ptr[&m_local.twin[1].u]); pand(xmm2, xmm0); paddw(xmm2, xmm6); pand(xmm4, xmm0); paddw(xmm4, xmm6); movdqa(xmm0, ptr[&m_local.twin[0].v]); movdqa(xmm6, ptr[&m_local.twin[1].v]); pand(xmm3, xmm0); paddw(xmm3, xmm6); pand(xmm5, xmm0); paddw(xmm5, xmm6); } else { // u0 = u0.min_i16(m_local.twin[2].u); // v0 = v0.min_i16(m_local.twin[2].v); // u1 = u1.min_i16(m_local.twin[2].u); // v1 = v1.min_i16(m_local.twin[2].v); // TODO: if(!sprite) clamp16 else: movdqa(xmm0, ptr[&m_local.twin[2].u]); movdqa(xmm6, ptr[&m_local.twin[2].v]); pminsw(xmm2, xmm0); pminsw(xmm3, xmm6); pminsw(xmm4, xmm0); pminsw(xmm5, xmm6); } // xmm2 = u0 // xmm3 = v0 // xmm4 = u1 // xmm5 = v1 // xmm7 = test // xmm0, xmm6 = free // xmm1 = used // GSVector4i addr00 = v0.sll16(8) | u0; // GSVector4i addr01 = v0.sll16(8) | u1; // GSVector4i addr10 = v1.sll16(8) | u0; // GSVector4i addr11 = v1.sll16(8) | u1; psllw(xmm3, 8); movdqa(xmm0, xmm3); por(xmm3, xmm2); por(xmm0, xmm4); psllw(xmm5, 8); movdqa(xmm6, xmm5); por(xmm5, xmm2); por(xmm6, xmm4); // xmm3 = addr00 // xmm0 = addr01 // xmm5 = addr10 // xmm6 = addr11 // xmm7 = test // xmm2, xmm4 = free // xmm1 = used ReadTexel(xmm2, xmm3); ReadTexel(xmm4, xmm0); ReadTexel(xmm3, xmm5); ReadTexel(xmm5, xmm6); // xmm2 = c00 // xmm4 = c01 // xmm3 = c10 // xmm5 = c11 // xmm7 = test // xmm0, xmm6 = free // xmm1 = used // spill (TODO) movdqa(ptr[&m_local.temp.fd], xmm1); movdqa(ptr[&m_local.temp.test], xmm7); // xmm2 = c00 // xmm4 = c01 // xmm3 = c10 // xmm5 = c11 // xmm0, xmm1, xmm6, xmm7 = free movdqa(xmm1, xmm2); psllw(xmm1, 11); psrlw(xmm1, 8); movdqa(xmm0, xmm4); psllw(xmm0, 11); psrlw(xmm0, 8); lerp16<0>(xmm0, xmm1, ptr[&m_local.temp.uf]); movdqa(xmm6, xmm2); psllw(xmm6, 6); psrlw(xmm6, 11); psllw(xmm6, 3); movdqa(xmm1, xmm4); psllw(xmm1, 6); psrlw(xmm1, 11); psllw(xmm1, 3); lerp16<0>(xmm1, xmm6, ptr[&m_local.temp.uf]); movdqa(xmm7, xmm2); psllw(xmm7, 1); psrlw(xmm7, 11); psllw(xmm7, 3); movdqa(xmm6, xmm4); psllw(xmm6, 1); psrlw(xmm6, 11); psllw(xmm6, 3); lerp16<0>(xmm6, xmm7, ptr[&m_local.temp.uf]); psraw(xmm2, 15); psrlw(xmm2, 8); psraw(xmm4, 15); psrlw(xmm4, 8); lerp16<0>(xmm4, xmm2, ptr[&m_local.temp.uf]); // xmm0 = r00 // xmm1 = g00 // xmm6 = b00 // xmm4 = a00 // xmm3 = c10 // xmm5 = c11 // xmm2, xmm7 = free movdqa(xmm7, xmm3); psllw(xmm7, 11); psrlw(xmm7, 8); movdqa(xmm2, xmm5); psllw(xmm2, 11); psrlw(xmm2, 8); lerp16<0>(xmm2, xmm7, ptr[&m_local.temp.uf]); lerp16<0>(xmm2, xmm0, ptr[&m_local.temp.vf]); // xmm2 = r // xmm1 = g00 // xmm6 = b00 // xmm4 = a00 // xmm3 = c10 // xmm5 = c11 // xmm0, xmm7 = free movdqa(xmm7, xmm3); psllw(xmm7, 6); psrlw(xmm7, 11); psllw(xmm7, 3); movdqa(xmm0, xmm5); psllw(xmm0, 6); psrlw(xmm0, 11); psllw(xmm0, 3); lerp16<0>(xmm0, xmm7, ptr[&m_local.temp.uf]); lerp16<0>(xmm0, xmm1, ptr[&m_local.temp.vf]); // xmm2 = r // xmm0 = g // xmm6 = b00 // xmm4 = a00 // xmm3 = c10 // xmm5 = c11 // xmm1, xmm7 = free movdqa(xmm7, xmm3); psllw(xmm7, 1); psrlw(xmm7, 11); psllw(xmm7, 3); movdqa(xmm1, xmm5); psllw(xmm1, 1); psrlw(xmm1, 11); psllw(xmm1, 3); lerp16<0>(xmm1, xmm7, ptr[&m_local.temp.uf]); lerp16<0>(xmm1, xmm6, ptr[&m_local.temp.vf]); // xmm2 = r // xmm0 = g // xmm1 = b // xmm4 = a00 // xmm3 = c10 // xmm5 = c11 // xmm6, xmm7 = free psraw(xmm3, 15); psrlw(xmm3, 8); psraw(xmm5, 15); psrlw(xmm5, 8); lerp16<0>(xmm5, xmm3, ptr[&m_local.temp.uf]); lerp16<0>(xmm5, xmm4, ptr[&m_local.temp.vf]); // xmm2 = r // xmm0 = g // xmm1 = b // xmm5 = a // xmm3, xmm4, xmm6, xmm7 = free // TODO movdqa(xmm3, xmm5); // a movdqa(xmm4, xmm2); // r movdqa(xmm6, xmm1); // b movdqa(xmm5, xmm0); // g // reload test movdqa(xmm7, ptr[&m_local.temp.test]); // xmm4 = r // xmm5 = g // xmm6 = b // xmm3 = a // xmm7 = test // xmm0, xmm1, xmm2 = free // test |= (c[0] | c[1] | c[2] | c[3]).eq16(GSVector4i::zero()); // mask out blank pixels (not perfect) movdqa(xmm1, xmm3); por(xmm1, xmm4); movdqa(xmm2, xmm5); por(xmm2, xmm6); por(xmm1, xmm2); pxor(xmm0, xmm0); pcmpeqw(xmm1, xmm0); por(xmm7, xmm1); // a = a.gt16(GSVector4i::zero()); pcmpgtw(xmm3, xmm0); // reload fd movdqa(xmm1, ptr[&m_local.temp.fd]); } else { if(m_sel.twin) { // u = (u & m_local.twin[0].u).add16(m_local.twin[1].u); // v = (v & m_local.twin[0].v).add16(m_local.twin[1].v); pand(xmm2, ptr[&m_local.twin[0].u]); paddw(xmm2, ptr[&m_local.twin[1].u]); pand(xmm3, ptr[&m_local.twin[0].v]); paddw(xmm3, ptr[&m_local.twin[1].v]); } else { // u = u.min_i16(m_local.twin[2].u); // v = v.min_i16(m_local.twin[2].v); // TODO: if(!sprite) clamp16 else: pminsw(xmm2, ptr[&m_local.twin[2].u]); pminsw(xmm3, ptr[&m_local.twin[2].v]); } // xmm2 = u // xmm3 = v // xmm7 = test // xmm0, xmm4, xmm5, xmm6 = free // xmm1 = used // GSVector4i addr = v.sll16(8) | u; psllw(xmm3, 8); por(xmm3, xmm2); // xmm3 = addr // xmm7 = test // xmm0, xmm2, xmm4, xmm5, xmm6 = free // xmm1 = used ReadTexel(xmm6, xmm3); // xmm3 = c00 // xmm7 = test // xmm0, xmm2, xmm4, xmm5, xmm6 = free // xmm1 = used // test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels pxor(xmm0, xmm0); pcmpeqw(xmm0, xmm6); por(xmm7, xmm0); // c[0] = (c00 << 3) & 0x00f800f8; // c[1] = (c00 >> 2) & 0x00f800f8; // c[2] = (c00 >> 7) & 0x00f800f8; // c[3] = c00.sra16(15); movdqa(xmm3, xmm6); psraw(xmm3, 15); // a pcmpeqd(xmm0, xmm0); psrlw(xmm0, 11); psllw(xmm0, 3); // 0x00f8 movdqa(xmm4, xmm6); psllw(xmm4, 3); pand(xmm4, xmm0); // r movdqa(xmm5, xmm6); psrlw(xmm5, 2); pand(xmm5, xmm0); // g psrlw(xmm6, 7); pand(xmm6, xmm0); // b } }