示例#1
0
static bool
fnmsubs(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   a.movq(a.xmm0, a.ppcfpr[instr.frA]);
   a.movq(a.xmm1, a.ppcfpr[instr.frC]);
   a.mulsd(a.xmm0, a.xmm1);

   a.movq(a.xmm1, a.ppcfpr[instr.frB]);
   a.subsd(a.xmm0, a.xmm1);

   a.mov(a.zax, UINT64_C(0x8000000000000000));
   a.movq(a.xmm1, a.zax);
   a.pxor(a.xmm0, a.xmm1);

   a.cvtsd2ss(a.xmm1, a.xmm0);
   a.cvtss2sd(a.xmm0, a.xmm1);

   a.movq(a.ppcfpr[instr.frD], a.xmm0);

   return true;
}
示例#2
0
static bool
fmaddGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto result = a.allocXmmTmp();
   {
      auto srcC = a.loadRegisterRead(a.fprps[instr.frC]);
      // Do the rounding first so we don't run out of host registers
      if (ShouldRound) {
         auto tmpSrcC = a.allocXmmTmp(srcC);
         roundTo24BitSd(a, tmpSrcC);
         srcC = tmpSrcC;
      }
      auto srcA = a.loadRegisterRead(a.fprps[instr.frA]);
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);

      a.movq(result, srcA);
      if (hostHasFMA3()) {
         if (ShouldSubtract) {
            a.vfmsub132sd(result, srcB, srcC);
         } else {
            a.vfmadd132sd(result, srcB, srcC);
         }
      } else {  // no FMA3
         a.mulsd(result, srcC);
         if (ShouldSubtract) {
            a.subsd(result, srcB);
         } else {
            a.addsd(result, srcB);
         }
      }
   }

   if (ShouldNegate) {
      negateXmmSd(a, result);
   }

   if (ShouldRound) {
      roundToSingleSd(a, result, result);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, result);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, result);
   }

   return true;
}
示例#3
0
static bool
shiftArithmetic(PPCEmuAssembler& a, Instruction instr)
{
   if (flags & ShiftImmediate && instr.sh == 0) {
      // Clear Carry Flag
      a.mov(a.ecx, a.ppcxer);
      a.and_(a.ecx, ~XERegisterBits::Carry);
      a.mov(a.ppcxer, a.ecx);
      return true;
   }

   return jit_fallback(a, instr);
}
示例#4
0
static bool
fmr(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   a.movq(a.xmm0, a.ppcfpr[instr.frB]);
   a.movq(a.ppcfpr[instr.frD], a.xmm0);

   return true;
}
示例#5
0
static bool
fpArithGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto tmpSrcA = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frA]));

   switch (op) {
   case FPAdd: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.addsd(tmpSrcA, srcB);
      break;
   }
   case FPSub: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.subsd(tmpSrcA, srcB);
      break;
   }
   case FPMul: {
      auto tmpSrcC = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frC]));
      if (ShouldRound) {
         // PPC has this weird behaviour with fmuls where it truncates the
         //  RHS operator to 24-bits of mantissa before multiplying...
         roundTo24BitSd(a, tmpSrcC);
      }
      a.mulsd(tmpSrcA, tmpSrcC);
      break;
   }
   case FPDiv: {
      auto srcB = a.loadRegisterRead(a.fprps[instr.frB]);
      a.divsd(tmpSrcA, srcB);
      break;
   }
   }

   if (ShouldRound) {
      roundToSingleSd(a, tmpSrcA, tmpSrcA);
      auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
      a.movddup(dst, tmpSrcA);
   } else {
      auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
      a.movsd(dst, tmpSrcA);
   }

   return true;
}
示例#6
0
static bool
frsp(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   auto dst = a.loadRegisterWrite(a.fprps[instr.frD]);
   auto srcA = a.loadRegisterRead(a.fprps[instr.frB]);
   a.movq(dst, srcA);

   roundToSingleSd(a, dst, dst);

   a.movddup(dst, dst);
   return true;
}
示例#7
0
static bool
fabs(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   a.movq(a.xmm0, a.ppcfpr[instr.frB]);

   a.mov(a.zax, UINT64_C(0x7FFFFFFFFFFFFFFF));
   a.movq(a.xmm1, a.zax);
   a.pand(a.xmm0, a.xmm1);

   a.movq(a.ppcfpr[instr.frD], a.xmm0);

   return true;
}
示例#8
0
static bool
fmrGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   auto tmpSrc = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frB]));

   if (ShouldAbs) {
      absXmmSd(a, tmpSrc);
   }

   if (ShouldNegate) {
      negateXmmSd(a, tmpSrc);
   }

   auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]);
   a.movsd(dst, tmpSrc);

   return true;
}
示例#9
0
static bool
fmadds(PPCEmuAssembler& a, Instruction instr)
{
   if (instr.rc) {
      return jit_fallback(a, instr);
   }

   // FPSCR, FPRF supposed to be updated here...

   a.movq(a.xmm0, a.ppcfpr[instr.frA]);
   a.movq(a.xmm1, a.ppcfpr[instr.frC]);
   a.mulsd(a.xmm0, a.xmm1);

   a.movq(a.xmm1, a.ppcfpr[instr.frB]);
   a.addsd(a.xmm0, a.xmm1);

   a.cvtsd2ss(a.xmm1, a.xmm0);
   a.cvtss2sd(a.xmm0, a.xmm1);

   a.movq(a.ppcfpr[instr.frD], a.xmm0);

   return true;
}
示例#10
0
static bool
addGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (flags & AddSubtract) {
      return jit_fallback(a, instr);
   }

   bool recordCarry = false;
   bool recordOverflow = false;
   bool recordCond = false;

   if (flags & AddCarry) {
      recordCarry = true;
   }

   if (flags & AddAlwaysRecord) {
      recordOverflow = true;
      recordCond = true;
   } else if (flags & AddCheckRecord) {
      if (instr.oe) {
         recordOverflow = true;
      }

      if (instr.rc) {
         recordCond = true;
      }
   }

   if ((flags & AddZeroRA) && instr.rA == 0) {
      a.mov(a.eax, 0);
   } else {
      a.mov(a.eax, a.ppcgpr[instr.rA]);
   }

   if (flags & AddSubtract) {
      a.not_(a.eax);
   }

   if (flags & AddImmediate) {
      a.mov(a.ecx, sign_extend<16>(instr.simm));
   } else if (flags & AddToZero) {
      a.mov(a.ecx, 0);
   } else if (flags & AddToMinusOne) {
      a.mov(a.ecx, -1);
   } else {
      a.mov(a.ecx, a.ppcgpr[instr.rB]);
   }

   if (flags & AddShifted) {
      a.shl(a.ecx, 16);
   }

   // Mark x64 CF based on PPC CF
   if (flags & AddExtended) {
      a.mov(a.edx, a.ppcxer);
      a.and_(a.edx, XERegisterBits::Carry);
      a.add(a.edx, 0xffffffff);

      a.adc(a.eax, a.ecx);
   } else if (flags & AddSubtract) {
      a.stc();

      a.adc(a.eax, a.ecx);
   } else {
      a.add(a.eax, a.ecx);
   }

   if (recordCarry && recordOverflow) {
      a.mov(a.ecx, 0);
      a.setc(a.ecx.r8());
      a.mov(a.edx, 0);
      a.seto(a.edx.r8());

      a.shl(a.ecx, XERegisterBits::CarryShift);
      a.shl(a.edx, XERegisterBits::OverflowShift);
      a.or_(a.ecx, a.edx);
   } else if (recordCarry) {
      a.mov(a.ecx, 0);
      a.setc(a.ecx.r8());
      a.shl(a.ecx, XERegisterBits::CarryShift);
   } else if (recordOverflow) {
      a.mov(a.ecx, 0);
      a.seto(a.ecx.r8());
      a.shl(a.ecx, XERegisterBits::OverflowShift);
   }

   if (recordCarry || recordOverflow) {
      uint32_t mask = 0xFFFFFFFF;

      if (recordCarry) {
         mask &= ~XERegisterBits::Carry;
      }

      if (recordOverflow) {
         mask &= ~XERegisterBits::Overflow;
      }

      a.mov(a.edx, a.ppcxer);
      a.and_(a.edx, mask);
      a.or_(a.edx, a.ecx);
      a.mov(a.ppcxer, a.edx);
   }

   a.mov(a.ppcgpr[instr.rD], a.eax);

   if (recordCond) {
      updateConditionRegister(a, a.eax, a.ecx, a.edx);
   }

   return true;
}
示例#11
0
static bool
divGeneric(PPCEmuAssembler& a, Instruction instr)
{
   // Need to fallback due to overflow at the moment.
   return jit_fallback(a, instr);
}
示例#12
0
static bool
psqStore(PPCEmuAssembler& a, Instruction instr)
{
   return jit_fallback(a, instr);
}
示例#13
0
static bool
stswGeneric(PPCEmuAssembler& a, Instruction instr)
{
   return jit_fallback(a, instr);
}
示例#14
0
static bool
storeGeneric(PPCEmuAssembler& a, Instruction instr)
{
   if (flags & StoreConditional) {
      // Early out for if statement below.
      return jit_fallback(a, instr);
   }

   if ((flags & StoreZeroRA) && instr.rA == 0) {
      if (flags & StoreIndexed) {
         a.mov(a.ecx, a.ppcgpr[instr.rB]);
      } else {
         a.mov(a.ecx, sign_extend<16, int32_t>(instr.d));
      }
   } else {
      a.mov(a.ecx, a.ppcgpr[instr.rA]);

      if (flags & StoreIndexed) {
         a.add(a.ecx, a.ppcgpr[instr.rB]);
      } else {
         a.add(a.ecx, sign_extend<16, int32_t>(instr.d));
      }
   }

   if (flags & StoreConditional) {
      /*
      state->cr.cr0 = state->xer.so ? ConditionRegisterFlag::SummaryOverflow : 0;

      if (state->reserve) {
      // Store is succesful, clear reserve bit and set CR0[EQ]
      state->cr.cr0 |= ConditionRegisterFlag::Equal;
      state->reserve = false;
      } else {
      // Reserve bit is not set, do not write.
      return;
      }
      */
   }

   a.mov(a.zdx, a.zcx);
   a.add(a.zdx, a.membase);

   if (flags & StoreFloatAsInteger) {
      assert(sizeof(Type) == 4);
      a.mov(a.eax, a.ppcfprps[instr.rS][0]);
   } else if (std::is_floating_point<Type>::value) {
      if (flags & StoreSingle) {
         assert(sizeof(Type) == 4);
         a.mov(a.eax, a.ppcfprps[instr.rS][0]);
      }
      else {
         assert(sizeof(Type) == 8);
         a.mov(a.zax, a.ppcfpr[instr.rS]);
      }
   } else {
      if (sizeof(Type) == 1) {
         a.mov(a.eax.r8(), a.ppcgpr[instr.rS]);
      } else if (sizeof(Type) == 2) {
         a.mov(a.eax.r16(), a.ppcgpr[instr.rS]);
      } else if (sizeof(Type) == 4) {
         a.mov(a.eax, a.ppcgpr[instr.rS]);
      } else {
         assert(0);
      }
   }

   if (!(flags & StoreByteReverse)) {
      if (sizeof(Type) == 1) {
         // Inverted reverse logic means we have
         //    to check for this but do nothing.
      } else if (sizeof(Type) == 2) {
         a.xchg(a.eax.r8Hi(), a.eax.r8Lo());
      } else if (sizeof(Type) == 4) {
         a.bswap(a.eax);
      } else if (sizeof(Type) == 8) {
         a.bswap(a.zax);
      } else {
         assert(0);
      }
   }

   if (sizeof(Type) == 1) {
      a.mov(asmjit::X86Mem(a.zdx, 0), a.eax.r8());
   } else if (sizeof(Type) == 2) {
      a.mov(asmjit::X86Mem(a.zdx, 0), a.eax.r16());
   } else if (sizeof(Type) == 4) {
      a.mov(asmjit::X86Mem(a.zdx, 0), a.eax);
   } else if (sizeof(Type) == 8) {
      a.mov(asmjit::X86Mem(a.zdx, 0), a.zax);
   } else {
      assert(0);
   }

   if (flags & StoreUpdate) {
      a.mov(a.ppcgpr[instr.rA], a.ecx);
   }

   return true;
}