void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, const Reg8& mask, bool fast, int psm) { if(fast) { // if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs); // if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs); test(mask, 0x0f); je("@f"); movq(qword[addr * 2 + (size_t)m_env.vm], src); L("@@"); test(mask, 0xf0); je("@f"); movhps(qword[addr * 2 + (size_t)m_env.vm + 8 * 2], src); L("@@"); } else { // if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>()); // if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>()); // if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>()); // if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>()); test(mask, 0x03); je("@f"); WritePixel(src, temp, addr, 0, psm); L("@@"); test(mask, 0x0c); je("@f"); WritePixel(src, temp, addr, 1, psm); L("@@"); test(mask, 0x30); je("@f"); WritePixel(src, temp, addr, 2, psm); L("@@"); test(mask, 0xc0); je("@f"); WritePixel(src, temp, addr, 3, psm); L("@@"); } }
void GPUDrawScanlineCodeGenerator::WriteFrame() { // GSVector4i fs = r | g | b | (m_sel.md ? GSVector4i(0x80008000) : m_sel.tme ? a : 0); pcmpeqd(xmm0, xmm0); if(m_sel.md || m_sel.tme) { movdqa(xmm2, xmm0); psllw(xmm2, 15); } psrlw(xmm0, 11); psllw(xmm0, 3); // xmm0 = 0x00f8 // xmm2 = 0x8000 (md) // GSVector4i r = (c[0] & 0x00f800f8) >> 3; pand(xmm4, xmm0); psrlw(xmm4, 3); // GSVector4i g = (c[1] & 0x00f800f8) << 2; pand(xmm5, xmm0); psllw(xmm5, 2); por(xmm4, xmm5); // GSVector4i b = (c[2] & 0x00f800f8) << 7; pand(xmm6, xmm0); psllw(xmm6, 7); por(xmm4, xmm6); if(m_sel.md) { // GSVector4i a = GSVector4i(0x80008000); por(xmm4, xmm2); } else if(m_sel.tme) { // GSVector4i a = (c[3] << 8) & 0x80008000; psllw(xmm3, 8); pand(xmm3, xmm2); por(xmm4, xmm3); } // fs = fs.blend8(fd, test); movdqa(xmm0, xmm7); blend8(xmm4, xmm1); // GSVector4i::store<false>(fb, fs); // movdqu(ptr[edi], xmm4); movq(qword[edi], xmm4); movhps(qword[edi + 8], xmm4); }
void GPUDrawScanlineCodeGenerator::Generate() { push(esi); push(edi); Init(); align(16); L("loop"); // GSVector4i test = m_test[7 + (steps & (steps >> 31))]; mov(edx, ecx); sar(edx, 31); and(edx, ecx); shl(edx, 4); movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); // movdqu(xmm1, ptr[edi]); movq(xmm1, qword[edi]); movhps(xmm1, qword[edi + 8]); // ecx = steps // esi = tex (tme) // edi = fb // xmm1 = fd // xmm2 = s // xmm3 = t // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test TestMask(); SampleTexture(); // xmm1 = fd // xmm3 = a // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test // xmm0, xmm2 = free ColorTFX(); AlphaBlend(); Dither(); WriteFrame(); L("step"); // if(steps <= 0) break; test(ecx, ecx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); L("exit"); pop(edi); pop(esi); ret(8); }
void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr) { movq(dst, qword[addr * 2 + (size_t)m_env.vm]); movhps(dst, qword[addr * 2 + (size_t)m_env.vm + 8 * 2]); }