// Grouped together since these 3 instructions do exactly the same thing. TEST_F(x64EmitterTest, MOVNT_DQ_PS_PD) { for (const auto& r : xmmnames) { emitter->MOVNTDQ(MatR(RAX), r.reg); emitter->MOVNTPS(MatR(RAX), r.reg); emitter->MOVNTPD(MatR(RAX), r.reg); ExpectDisassembly("movntdq dqword ptr ds:[rax], " + r.name + " " "movntps dqword ptr ds:[rax], " + r.name + " " "movntpd dqword ptr ds:[rax], " + r.name); } }
TEST_F(x64EmitterTest, MOVNTI) { emitter->MOVNTI(32, MatR(RAX), R12); emitter->MOVNTI(32, M(code_buffer), R12); emitter->MOVNTI(64, MatR(RAX), R12); emitter->MOVNTI(64, M(code_buffer), R12); ExpectDisassembly("movnti dword ptr ds:[rax], r12d " "movnti dword ptr ds:[rip-12], r12d " "movnti qword ptr ds:[rax], r12 " "movnti qword ptr ds:[rip-24], r12"); }
TEST_F(x64EmitterTest, PREFETCH) { emitter->PREFETCH(XEmitter::PF_NTA, MatR(R12)); emitter->PREFETCH(XEmitter::PF_T0, MatR(R12)); emitter->PREFETCH(XEmitter::PF_T1, MatR(R12)); emitter->PREFETCH(XEmitter::PF_T2, MatR(R12)); ExpectDisassembly("prefetchnta byte ptr ds:[r12] " "prefetcht0 byte ptr ds:[r12] " "prefetcht1 byte ptr ds:[r12] " "prefetcht2 byte ptr ds:[r12]"); }
TEST_F(x64EmitterTest, LDDQU) { for (const auto& r : xmmnames) { emitter->LDDQU(r.reg, MatR(R12)); ExpectDisassembly("lddqu " + r.name + ", dqword ptr ds:[r12]"); } }
bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerIndex) { GEPaletteFormat fmt = (GEPaletteFormat)id.clutfmt; if (!id.hasClutShift && !id.hasClutMask && !id.hasClutOffset) { // This is simple - just mask if necessary. if (bitsPerIndex > 8) { AND(32, R(resultReg), Imm32(0x000000FF)); } return true; } MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate.clutformat)); MOV(32, R(tempReg1), MatR(tempReg1)); // Shift = (clutformat >> 2) & 0x1F if (id.hasClutShift) { MOV(32, R(RCX), R(tempReg1)); SHR(32, R(RCX), Imm8(2)); AND(32, R(RCX), Imm8(0x1F)); SHR(32, R(resultReg), R(RCX)); } // Mask = (clutformat >> 8) & 0xFF if (id.hasClutMask) { MOV(32, R(tempReg2), R(tempReg1)); SHR(32, R(tempReg2), Imm8(8)); AND(32, R(resultReg), R(tempReg2)); } // We need to wrap any entries beyond the first 1024 bytes. u32 offsetMask = fmt == GE_CMODE_32BIT_ABGR8888 ? 0x00FF : 0x01FF; // We must mask to 0xFF before ORing 0x100 in 16 bit CMODEs. // But skip if we'll mask 0xFF after offset anyway. if (bitsPerIndex > 8 && (!id.hasClutOffset || offsetMask != 0x00FF)) { AND(32, R(resultReg), Imm32(0x000000FF)); } // Offset = (clutformat >> 12) & 0x01F0 if (id.hasClutOffset) { SHR(32, R(tempReg1), Imm8(16)); SHL(32, R(tempReg1), Imm8(4)); OR(32, R(resultReg), R(tempReg1)); AND(32, R(resultReg), Imm32(offsetMask)); } return true; }
TEST_F(x64EmitterTest, MOVBE) { emitter->MOVBE(16, RAX, MatR(R12)); emitter->MOVBE(16, MatR(RAX), R12); emitter->MOVBE(32, RAX, MatR(R12)); emitter->MOVBE(32, MatR(RAX), R12); emitter->MOVBE(64, RAX, MatR(R12)); emitter->MOVBE(64, MatR(RAX), R12); ExpectDisassembly("movbe ax, word ptr ds:[r12] " "movbe word ptr ds:[rax], r12w " "movbe eax, dword ptr ds:[r12] " "movbe dword ptr ds:[rax], r12d " "movbe rax, qword ptr ds:[r12] " "movbe qword ptr ds:[rax], r12"); }
TEST_F(x64EmitterTest, FLD_FST_FSTP) { emitter->FLD(32, MatR(RBP)); emitter->FLD(64, MatR(RBP)); emitter->FLD(80, MatR(RBP)); emitter->FST(32, MatR(RBP)); emitter->FST(64, MatR(RBP)); // No 80 bit version of FST emitter->FSTP(32, MatR(RBP)); emitter->FSTP(64, MatR(RBP)); emitter->FSTP(80, MatR(RBP)); ExpectDisassembly("fld dword ptr ss:[rbp] " "fld qword ptr ss:[rbp] " "fld tbyte ptr ss:[rbp] " "fst dword ptr ss:[rbp] " "fst qword ptr ss:[rbp] " "fstp dword ptr ss:[rbp] " "fstp qword ptr ss:[rbp] " "fstp tbyte ptr ss:[rbp]"); }
void Jit::Comp_FPU2op(MIPSOpcode op) { CONDITIONAL_DISABLE(FPU); int fs = _FS; int fd = _FD; auto execRounding = [&](void (XEmitter::*conv)(X64Reg, OpArg), int setMXCSR) { fpr.SpillLock(fd, fs); fpr.MapReg(fd, fs == fd, true); // Small optimization: 0 is our default mode anyway. if (setMXCSR == 0 && !js.hasSetRounding) { setMXCSR = -1; } if (setMXCSR != -1) { STMXCSR(MIPSSTATE_VAR(mxcsrTemp)); MOV(32, R(TEMPREG), MIPSSTATE_VAR(mxcsrTemp)); AND(32, R(TEMPREG), Imm32(~(3 << 13))); OR(32, R(TEMPREG), Imm32(setMXCSR << 13)); MOV(32, MIPSSTATE_VAR(temp), R(TEMPREG)); LDMXCSR(MIPSSTATE_VAR(temp)); } (this->*conv)(TEMPREG, fpr.R(fs)); // Did we get an indefinite integer value? CMP(32, R(TEMPREG), Imm32(0x80000000)); FixupBranch skip = J_CC(CC_NE); if (fd != fs) { CopyFPReg(fpr.RX(fd), fpr.R(fs)); } XORPS(XMM1, R(XMM1)); CMPSS(fpr.RX(fd), R(XMM1), CMP_LT); // At this point, -inf = 0xffffffff, inf/nan = 0x00000000. // We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits. MOVD_xmm(R(TEMPREG), fpr.RX(fd)); XOR(32, R(TEMPREG), Imm32(0x7fffffff)); SetJumpTarget(skip); MOVD_xmm(fpr.RX(fd), R(TEMPREG)); if (setMXCSR != -1) { LDMXCSR(MIPSSTATE_VAR(mxcsrTemp)); } }; switch (op & 0x3f) { case 5: //F(fd) = fabsf(F(fs)); break; //abs fpr.SpillLock(fd, fs); fpr.MapReg(fd, fd == fs, true); MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssNoSignMask[0])); if (fd != fs && fpr.IsMapped(fs)) { MOVAPS(fpr.RX(fd), MatR(TEMPREG)); ANDPS(fpr.RX(fd), fpr.R(fs)); } else { if (fd != fs) { MOVSS(fpr.RX(fd), fpr.R(fs)); } ANDPS(fpr.RX(fd), MatR(TEMPREG)); } break; case 6: //F(fd) = F(fs); break; //mov if (fd != fs) { fpr.SpillLock(fd, fs); fpr.MapReg(fd, fd == fs, true); CopyFPReg(fpr.RX(fd), fpr.R(fs)); } break; case 7: //F(fd) = -F(fs); break; //neg fpr.SpillLock(fd, fs); fpr.MapReg(fd, fd == fs, true); MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssSignBits2[0])); if (fd != fs && fpr.IsMapped(fs)) { MOVAPS(fpr.RX(fd), MatR(TEMPREG)); XORPS(fpr.RX(fd), fpr.R(fs)); } else { if (fd != fs) { MOVSS(fpr.RX(fd), fpr.R(fs)); } XORPS(fpr.RX(fd), MatR(TEMPREG)); } break; case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt fpr.SpillLock(fd, fs); fpr.MapReg(fd, fd == fs, true); SQRTSS(fpr.RX(fd), fpr.R(fs)); break; case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break; //trunc.w.s execRounding(&XEmitter::CVTTSS2SI, -1); break; case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w fpr.SpillLock(fd, fs); fpr.MapReg(fd, fs == fd, true); if (fpr.IsMapped(fs)) { CVTDQ2PS(fpr.RX(fd), fpr.R(fs)); } else { // If fs was fd, we'd be in the case above since we mapped fd. MOVSS(fpr.RX(fd), fpr.R(fs)); CVTDQ2PS(fpr.RX(fd), fpr.R(fd)); } break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s // Uses the current rounding mode. execRounding(&XEmitter::CVTSS2SI, -1); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s execRounding(&XEmitter::CVTSS2SI, 0); break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s execRounding(&XEmitter::CVTSS2SI, 2); break; case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s execRounding(&XEmitter::CVTSS2SI, 1); break; default: DISABLE; return; } fpr.ReleaseSpillLocks(); }
TEST_F(x64EmitterTest, LDMXCSR) { emitter->LDMXCSR(MatR(R12)); ExpectDisassembly("ldmxcsr dword ptr ds:[r12]"); }