Example #1
0
static bool
fmaddGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto result = a.allocXmmTmp();
   {
      auto srcC = a.loadRegisterRead(a.fprps[instr.frC]);
      // Do the rounding first so we don't run out of host registers
      if (ShouldRound) {
         auto tmpSrcC = a.allocXmmTmp(srcC);
         roundTo24BitSd(a, tmpSrcC);
         srcC = tmpSrcC;
      }
      auto srcA = a.loadRegisterRead(a.fprps[instr.frA]);
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);

      a.movq(result, srcA);
      if (hostHasFMA3()) {
         if (ShouldSubtract) {
            a.vfmsub132sd(result, srcB, srcC);
         } else {
            a.vfmadd132sd(result, srcB, srcC);
         }
      } else {  // no FMA3
         a.mulsd(result, srcC);
         if (ShouldSubtract) {
            a.subsd(result, srcB);
         } else {
            a.addsd(result, srcB);
         }
      }
   }

   if (ShouldNegate) {
      negateXmmSd(a, result);
   }

   if (ShouldRound) {
      roundToSingleSd(a, result, result);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, result);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, result);
   }

   return true;
}
Example #2
0
static bool
fpArithGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto tmpSrcA = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frA]));

   switch (op) {
   case FPAdd: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.addsd(tmpSrcA, srcB);
      break;
   }
   case FPSub: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.subsd(tmpSrcA, srcB);
      break;
   }
   case FPMul: {
      auto tmpSrcC = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frC]));
      if (ShouldRound) {
         // PPC has this weird behaviour with fmuls where it truncates the
         //  RHS operator to 24-bits of mantissa before multiplying...
         roundTo24BitSd(a, tmpSrcC);
      }
      a.mulsd(tmpSrcA, tmpSrcC);
      break;
   }
   case FPDiv: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.divsd(tmpSrcA, srcB);
      break;
   }
   }

   if (ShouldRound) {
      roundToSingleSd(a, tmpSrcA, tmpSrcA);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, tmpSrcA);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, tmpSrcA);
   }

   return true;
}
Example #3
0
static void
absXmmSd(PPCEmuAssembler& a,
         const PPCEmuAssembler::XmmRegister& reg)
{
   auto maskGp = a.allocGpTmp();
   auto maskXmm = a.allocXmmTmp();
   a.mov(maskGp, UINT64_C(0x7FFFFFFFFFFFFFFF));
   a.movq(maskXmm, maskGp);
   a.pand(reg, maskXmm);
}
Example #4
0
static void
negateXmmSd(PPCEmuAssembler& a,
            const PPCEmuAssembler::XmmRegister& reg)
{
   auto maskGp = a.allocGpTmp();
   auto maskXmm = a.allocXmmTmp();
   a.mov(maskGp, UINT64_C(0x8000000000000000));
   a.movq(maskXmm, maskGp);
   a.pxor(reg, maskXmm);
}
Example #5
0
static void
roundTo24BitSd(PPCEmuAssembler& a,
               const PPCEmuAssembler::XmmRegister& reg)
{
   auto maskGp = a.allocGpTmp();
   auto maskXmm = a.allocXmmTmp();
   auto tmp = a.allocXmmTmp();
   a.movq(tmp, reg);

   a.mov(maskGp, UINT64_C(0x8000000));
   a.movq(maskXmm, maskGp);
   a.pand(tmp, maskXmm);

   a.mov(maskGp, UINT64_C(0xFFFFFFFFF8000000));
   a.movq(maskXmm, maskGp);
   a.pand(reg, maskXmm);

   a.paddq(reg, tmp);
}
Example #6
0
static bool
fsel(PPCEmuAssembler& a, Instruction instr)
{
   auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);

   auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
   auto srcC = a.loadRegisterRead(a.fprps[instr.frC]);

   auto tmp = a.allocXmmTmp();
   a.pxor(tmp, tmp);

   constexpr auto NLE_US = 6;
   a.cmpsd(tmp, a.loadRegisterRead(a.fprps[instr.frA]), NLE_US);

   auto tmp2 = a.allocXmmTmp();
   a.movapd(tmp2, tmp);
   a.pand(tmp, srcB);
   a.pandn(tmp2, srcC);
   a.por(tmp2, tmp);

   a.movsd(dst, tmp2);

   return true;
}
Example #7
0
void
truncateToSingleSd(PPCEmuAssembler& a,
                   const PPCEmuAssembler::XmmRegister& dst,
                   const PPCEmuAssembler::XmmRegister& src)
{
   auto maskGp = a.allocGpTmp();
   a.mov(maskGp, UINT64_C(0xFFFFFFFFE0000000));
   if (&dst == &src) {
      auto tmp = a.allocXmmTmp();
      a.movq(tmp, maskGp);
      a.pand(dst, tmp);
   } else {
      a.movq(dst, maskGp);
      a.pand(dst, src);
   }
}
Example #8
0
static bool
fmrGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   auto tmpSrc = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frB]));

   if (ShouldAbs) {
      absXmmSd(a, tmpSrc);
   }

   if (ShouldNegate) {
      negateXmmSd(a, tmpSrc);
   }

   auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
   a.movsd(dst, tmpSrc);

   return true;
}