Example #1
0
static bool
fnmsubs(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   a.movq(a.xmm0, a.ppcfpr[instr.frA]);
   a.movq(a.xmm1, a.ppcfpr[instr.frC]);
   a.mulsd(a.xmm0, a.xmm1);

   a.movq(a.xmm1, a.ppcfpr[instr.frB]);
   a.subsd(a.xmm0, a.xmm1);

   a.mov(a.zax, UINT64_C(0x8000000000000000));
   a.movq(a.xmm1, a.zax);
   a.pxor(a.xmm0, a.xmm1);

   a.cvtsd2ss(a.xmm1, a.xmm0);
   a.cvtss2sd(a.xmm0, a.xmm1);

   a.movq(a.ppcfpr[instr.frD], a.xmm0);

   return true;
}
Example #2
0
static bool
fmaddGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto result = a.allocXmmTmp();
   {
      auto srcC = a.loadRegisterRead(a.fprps[instr.frC]);
      // Do the rounding first so we don't run out of host registers
      if (ShouldRound) {
         auto tmpSrcC = a.allocXmmTmp(srcC);
         roundTo24BitSd(a, tmpSrcC);
         srcC = tmpSrcC;
      }
      auto srcA = a.loadRegisterRead(a.fprps[instr.frA]);
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);

      a.movq(result, srcA);
      if (hostHasFMA3()) {
         if (ShouldSubtract) {
            a.vfmsub132sd(result, srcB, srcC);
         } else {
            a.vfmadd132sd(result, srcB, srcC);
         }
      } else {  // no FMA3
         a.mulsd(result, srcC);
         if (ShouldSubtract) {
            a.subsd(result, srcB);
         } else {
            a.addsd(result, srcB);
         }
      }
   }

   if (ShouldNegate) {
      negateXmmSd(a, result);
   }

   if (ShouldRound) {
      roundToSingleSd(a, result, result);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, result);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, result);
   }

   return true;
}
Example #3
0
static bool
fpArithGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto tmpSrcA = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frA]));

   switch (op) {
   case FPAdd: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.addsd(tmpSrcA, srcB);
      break;
   }
   case FPSub: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.subsd(tmpSrcA, srcB);
      break;
   }
   case FPMul: {
      auto tmpSrcC = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frC]));
      if (ShouldRound) {
         // PPC has this weird behaviour with fmuls where it truncates the
         //  RHS operator to 24-bits of mantissa before multiplying...
         roundTo24BitSd(a, tmpSrcC);
      }
      a.mulsd(tmpSrcA, tmpSrcC);
      break;
   }
   case FPDiv: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.divsd(tmpSrcA, srcB);
      break;
   }
   }

   if (ShouldRound) {
      roundToSingleSd(a, tmpSrcA, tmpSrcA);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, tmpSrcA);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, tmpSrcA);
   }

   return true;
}
Example #4
0
static bool
fsub(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   a.movq(a.xmm0, a.ppcfpr[instr.frA]);
   a.movq(a.xmm1, a.ppcfpr[instr.frB]);

   a.subsd(a.xmm0, a.xmm1);

   a.movq(a.ppcfpr[instr.frD], a.xmm0);

   return true;
}