static bool fmaddGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto result = a.allocXmmTmp(); { auto srcC = a.loadRegisterRead(a.fprps[instr.frC]); // Do the rounding first so we don't run out of host registers if (ShouldRound) { auto tmpSrcC = a.allocXmmTmp(srcC); roundTo24BitSd(a, tmpSrcC); srcC = tmpSrcC; } auto srcA = a.loadRegisterRead(a.fprps[instr.frA]); auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.movq(result, srcA); if (hostHasFMA3()) { if (ShouldSubtract) { a.vfmsub132sd(result, srcB, srcC); } else { a.vfmadd132sd(result, srcB, srcC); } } else { // no FMA3 a.mulsd(result, srcC); if (ShouldSubtract) { a.subsd(result, srcB); } else { a.addsd(result, srcB); } } } if (ShouldNegate) { negateXmmSd(a, result); } if (ShouldRound) { roundToSingleSd(a, result, result); auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); a.movddup(dst, result); } else { auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, result); } return true; }
static bool fpArithGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto tmpSrcA = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frA])); switch (op) { case FPAdd: { auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.addsd(tmpSrcA, srcB); break; } case FPSub: { auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.subsd(tmpSrcA, srcB); break; } case FPMul: { auto tmpSrcC = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frC])); if (ShouldRound) { // PPC has this weird behaviour with fmuls where it truncates the // RHS operator to 24-bits of mantissa before multiplying... roundTo24BitSd(a, tmpSrcC); } a.mulsd(tmpSrcA, tmpSrcC); break; } case FPDiv: { auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.divsd(tmpSrcA, srcB); break; } } if (ShouldRound) { roundToSingleSd(a, tmpSrcA, tmpSrcA); auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); a.movddup(dst, tmpSrcA); } else { auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, tmpSrcA); } return true; }
// Move from Special Purpose Register static bool mfspr(PPCEmuAssembler& a, Instruction instr) { auto spr = decodeSPR(instr); auto dst = a.loadRegisterWrite(a.gpr[instr.rD]); switch (spr) { case SPR::XER: a.mov(dst, a.loadRegisterRead(a.xer)); break; case SPR::LR: a.mov(dst, a.lrMem); break; case SPR::CTR: a.mov(dst, a.ctrMem); break; case SPR::UGQR0: a.mov(dst, a.loadRegisterRead(a.gqr[0])); break; case SPR::UGQR1: a.mov(dst, a.loadRegisterRead(a.gqr[1])); break; case SPR::UGQR2: a.mov(dst, a.loadRegisterRead(a.gqr[2])); break; case SPR::UGQR3: a.mov(dst, a.loadRegisterRead(a.gqr[3])); break; case SPR::UGQR4: a.mov(dst, a.loadRegisterRead(a.gqr[4])); break; case SPR::UGQR5: a.mov(dst, a.loadRegisterRead(a.gqr[5])); break; case SPR::UGQR6: a.mov(dst, a.loadRegisterRead(a.gqr[6])); break; case SPR::UGQR7: a.mov(dst, a.loadRegisterRead(a.gqr[7])); break; case SPR::UPIR: a.mov(dst, a.coreIdMem); break; default: decaf_abort(fmt::format("Invalid mfspr SPR {}", static_cast<uint32_t>(spr))); } return true; }
// Data Cache Block Zero static bool dcbz(PPCEmuAssembler& a, Instruction instr) { auto src = a.allocGpTmp().r32(); if (instr.rA == 0) { a.mov(src, 0); } else { a.mov(src, a.loadRegisterRead(a.gpr[instr.rA])); } a.add(src, a.loadRegisterRead(a.gpr[instr.rB])); // Align down a.and_(src, ~static_cast<uint32_t>(31)); // Write 32 bytes of zero's there a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 0, 8), 0); a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 8, 8), 0); a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 16, 8), 0); a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 24, 8), 0); return true; }
static bool fsel(PPCEmuAssembler& a, Instruction instr) { auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); auto srcC = a.loadRegisterRead(a.fprps[instr.frC]); auto tmp = a.allocXmmTmp(); a.pxor(tmp, tmp); constexpr auto NLE_US = 6; a.cmpsd(tmp, a.loadRegisterRead(a.fprps[instr.frA]), NLE_US); auto tmp2 = a.allocXmmTmp(); a.movapd(tmp2, tmp); a.pand(tmp, srcB); a.pandn(tmp2, srcC); a.por(tmp2, tmp); a.movsd(dst, tmp2); return true; }
static bool frsp(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); auto srcA = a.loadRegisterRead(a.fprps[instr.frB]); a.movq(dst, srcA); roundToSingleSd(a, dst, dst); a.movddup(dst, dst); return true; }
static bool fmrGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } auto tmpSrc = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frB])); if (ShouldAbs) { absXmmSd(a, tmpSrc); } if (ShouldNegate) { negateXmmSd(a, tmpSrc); } auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, tmpSrc); return true; }