Example #1
0
static bool
fmaddGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto result = a.allocXmmTmp();
   {
      auto srcC = a.loadRegisterRead(a.fprps[instr.frC]);
      // Do the rounding first so we don't run out of host registers
      if (ShouldRound) {
         auto tmpSrcC = a.allocXmmTmp(srcC);
         roundTo24BitSd(a, tmpSrcC);
         srcC = tmpSrcC;
      }
      auto srcA = a.loadRegisterRead(a.fprps[instr.frA]);
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);

      a.movq(result, srcA);
      if (hostHasFMA3()) {
         if (ShouldSubtract) {
            a.vfmsub132sd(result, srcB, srcC);
         } else {
            a.vfmadd132sd(result, srcB, srcC);
         }
      } else {  // no FMA3
         a.mulsd(result, srcC);
         if (ShouldSubtract) {
            a.subsd(result, srcB);
         } else {
            a.addsd(result, srcB);
         }
      }
   }

   if (ShouldNegate) {
      negateXmmSd(a, result);
   }

   if (ShouldRound) {
      roundToSingleSd(a, result, result);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, result);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, result);
   }

   return true;
}
Example #2
0
static bool
fpArithGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto tmpSrcA = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frA]));

   switch (op) {
   case FPAdd: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.addsd(tmpSrcA, srcB);
      break;
   }
   case FPSub: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.subsd(tmpSrcA, srcB);
      break;
   }
   case FPMul: {
      auto tmpSrcC = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frC]));
      if (ShouldRound) {
         // PPC has this weird behaviour with fmuls where it truncates the
         //  RHS operator to 24-bits of mantissa before multiplying...
         roundTo24BitSd(a, tmpSrcC);
      }
      a.mulsd(tmpSrcA, tmpSrcC);
      break;
   }
   case FPDiv: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.divsd(tmpSrcA, srcB);
      break;
   }
   }

   if (ShouldRound) {
      roundToSingleSd(a, tmpSrcA, tmpSrcA);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, tmpSrcA);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, tmpSrcA);
   }

   return true;
}
Example #3
0
// Move from Special Purpose Register
static bool
mfspr(PPCEmuAssembler& a, Instruction instr)
{
   auto spr = decodeSPR(instr);

   auto dst = a.loadRegisterWrite(a.gpr[instr.rD]);

   switch (spr) {
   case SPR::XER:
      a.mov(dst, a.loadRegisterRead(a.xer));
      break;
   case SPR::LR:
      a.mov(dst, a.lrMem);
      break;
   case SPR::CTR:
      a.mov(dst, a.ctrMem);
      break;
   case SPR::UGQR0:
      a.mov(dst, a.loadRegisterRead(a.gqr[0]));
      break;
   case SPR::UGQR1:
      a.mov(dst, a.loadRegisterRead(a.gqr[1]));
      break;
   case SPR::UGQR2:
      a.mov(dst, a.loadRegisterRead(a.gqr[2]));
      break;
   case SPR::UGQR3:
      a.mov(dst, a.loadRegisterRead(a.gqr[3]));
      break;
   case SPR::UGQR4:
      a.mov(dst, a.loadRegisterRead(a.gqr[4]));
      break;
   case SPR::UGQR5:
      a.mov(dst, a.loadRegisterRead(a.gqr[5]));
      break;
   case SPR::UGQR6:
      a.mov(dst, a.loadRegisterRead(a.gqr[6]));
      break;
   case SPR::UGQR7:
      a.mov(dst, a.loadRegisterRead(a.gqr[7]));
      break;
   case SPR::UPIR:
      a.mov(dst, a.coreIdMem);
      break;
   default:
      decaf_abort(fmt::format("Invalid mfspr SPR {}", static_cast<uint32_t>(spr)));
   }

   return true;
}
Example #4
0
// Data Cache Block Zero
static bool
dcbz(PPCEmuAssembler& a, Instruction instr)
{
   auto src = a.allocGpTmp().r32();

   if (instr.rA == 0) {
      a.mov(src, 0);
   } else {
      a.mov(src, a.loadRegisterRead(a.gpr[instr.rA]));
   }

   a.add(src, a.loadRegisterRead(a.gpr[instr.rB]));

   // Align down
   a.and_(src, ~static_cast<uint32_t>(31));

   // Write 32 bytes of zero's there
   a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 0, 8), 0);
   a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 8, 8), 0);
   a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 16, 8), 0);
   a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 24, 8), 0);

   return true;
}
Example #5
0
static bool
fsel(PPCEmuAssembler& a, Instruction instr)
{
   auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);

   auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
   auto srcC = a.loadRegisterRead(a.fprps[instr.frC]);

   auto tmp = a.allocXmmTmp();
   a.pxor(tmp, tmp);

   constexpr auto NLE_US = 6;
   a.cmpsd(tmp, a.loadRegisterRead(a.fprps[instr.frA]), NLE_US);

   auto tmp2 = a.allocXmmTmp();
   a.movapd(tmp2, tmp);
   a.pand(tmp, srcB);
   a.pandn(tmp2, srcC);
   a.por(tmp2, tmp);

   a.movsd(dst, tmp2);

   return true;
}
Example #6
0
static bool
frsp(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
   auto srcA = a.loadRegisterRead(a.fprps[instr.frB]);
   a.movq(dst, srcA);

   roundToSingleSd(a, dst, dst);

   a.movddup(dst, dst);
   return true;
}
Example #7
0
static bool
fmrGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   auto tmpSrc = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frB]));

   if (ShouldAbs) {
      absXmmSd(a, tmpSrc);
   }

   if (ShouldNegate) {
      negateXmmSd(a, tmpSrc);
   }

   auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
   a.movsd(dst, tmpSrc);

   return true;
}