static bool fnmsubs(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frA]); a.movq(a.xmm1, a.ppcfpr[instr.frC]); a.mulsd(a.xmm0, a.xmm1); a.movq(a.xmm1, a.ppcfpr[instr.frB]); a.subsd(a.xmm0, a.xmm1); a.mov(a.zax, UINT64_C(0x8000000000000000)); a.movq(a.xmm1, a.zax); a.pxor(a.xmm0, a.xmm1); a.cvtsd2ss(a.xmm1, a.xmm0); a.cvtss2sd(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
static bool frsp(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frB]); a.cvtsd2ss(a.xmm1, a.xmm0); a.cvtss2sd(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
void truncateToSingleSd(PPCEmuAssembler& a, const PPCEmuAssembler::XmmRegister& dst, const PPCEmuAssembler::XmmRegister& src) { auto maskGp = a.allocGpTmp(); a.mov(maskGp, UINT64_C(0xFFFFFFFFE0000000)); if (&dst == &src) { auto tmp = a.allocXmmTmp(); a.movq(tmp, maskGp); a.pand(dst, tmp); } else { a.movq(dst, maskGp); a.pand(dst, src); } }
static bool fabs(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frB]); a.mov(a.zax, UINT64_C(0x7FFFFFFFFFFFFFFF)); a.movq(a.xmm1, a.zax); a.pand(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
static void absXmmSd(PPCEmuAssembler& a, const PPCEmuAssembler::XmmRegister& reg) { auto maskGp = a.allocGpTmp(); auto maskXmm = a.allocXmmTmp(); a.mov(maskGp, UINT64_C(0x7FFFFFFFFFFFFFFF)); a.movq(maskXmm, maskGp); a.pand(reg, maskXmm); }
static void negateXmmSd(PPCEmuAssembler& a, const PPCEmuAssembler::XmmRegister& reg) { auto maskGp = a.allocGpTmp(); auto maskXmm = a.allocXmmTmp(); a.mov(maskGp, UINT64_C(0x8000000000000000)); a.movq(maskXmm, maskGp); a.pxor(reg, maskXmm); }
static void roundTo24BitSd(PPCEmuAssembler& a, const PPCEmuAssembler::XmmRegister& reg) { auto maskGp = a.allocGpTmp(); auto maskXmm = a.allocXmmTmp(); auto tmp = a.allocXmmTmp(); a.movq(tmp, reg); a.mov(maskGp, UINT64_C(0x8000000)); a.movq(maskXmm, maskGp); a.pand(tmp, maskXmm); a.mov(maskGp, UINT64_C(0xFFFFFFFFF8000000)); a.movq(maskXmm, maskGp); a.pand(reg, maskXmm); a.paddq(reg, tmp); }
static bool fmaddGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto result = a.allocXmmTmp(); { auto srcC = a.loadRegisterRead(a.fprps[instr.frC]); // Do the rounding first so we don't run out of host registers if (ShouldRound) { auto tmpSrcC = a.allocXmmTmp(srcC); roundTo24BitSd(a, tmpSrcC); srcC = tmpSrcC; } auto srcA = a.loadRegisterRead(a.fprps[instr.frA]); auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.movq(result, srcA); if (hostHasFMA3()) { if (ShouldSubtract) { a.vfmsub132sd(result, srcB, srcC); } else { a.vfmadd132sd(result, srcB, srcC); } } else { // no FMA3 a.mulsd(result, srcC); if (ShouldSubtract) { a.subsd(result, srcB); } else { a.addsd(result, srcB); } } } if (ShouldNegate) { negateXmmSd(a, result); } if (ShouldRound) { roundToSingleSd(a, result, result); auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); a.movddup(dst, result); } else { auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, result); } return true; }
static bool frsp(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); auto srcA = a.loadRegisterRead(a.fprps[instr.frB]); a.movq(dst, srcA); roundToSingleSd(a, dst, dst); a.movddup(dst, dst); return true; }