static bool fnmsubs(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frA]); a.movq(a.xmm1, a.ppcfpr[instr.frC]); a.mulsd(a.xmm0, a.xmm1); a.movq(a.xmm1, a.ppcfpr[instr.frB]); a.subsd(a.xmm0, a.xmm1); a.mov(a.zax, UINT64_C(0x8000000000000000)); a.movq(a.xmm1, a.zax); a.pxor(a.xmm0, a.xmm1); a.cvtsd2ss(a.xmm1, a.xmm0); a.cvtss2sd(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
static bool fmaddGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto result = a.allocXmmTmp(); { auto srcC = a.loadRegisterRead(a.fprps[instr.frC]); // Do the rounding first so we don't run out of host registers if (ShouldRound) { auto tmpSrcC = a.allocXmmTmp(srcC); roundTo24BitSd(a, tmpSrcC); srcC = tmpSrcC; } auto srcA = a.loadRegisterRead(a.fprps[instr.frA]); auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.movq(result, srcA); if (hostHasFMA3()) { if (ShouldSubtract) { a.vfmsub132sd(result, srcB, srcC); } else { a.vfmadd132sd(result, srcB, srcC); } } else { // no FMA3 a.mulsd(result, srcC); if (ShouldSubtract) { a.subsd(result, srcB); } else { a.addsd(result, srcB); } } } if (ShouldNegate) { negateXmmSd(a, result); } if (ShouldRound) { roundToSingleSd(a, result, result); auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); a.movddup(dst, result); } else { auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, result); } return true; }
static bool shiftArithmetic(PPCEmuAssembler& a, Instruction instr) { if (flags & ShiftImmediate && instr.sh == 0) { // Clear Carry Flag a.mov(a.ecx, a.ppcxer); a.and_(a.ecx, ~XERegisterBits::Carry); a.mov(a.ppcxer, a.ecx); return true; } return jit_fallback(a, instr); }
static bool fmr(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frB]); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
static bool fpArithGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto tmpSrcA = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frA])); switch (op) { case FPAdd: { auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.addsd(tmpSrcA, srcB); break; } case FPSub: { auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.subsd(tmpSrcA, srcB); break; } case FPMul: { auto tmpSrcC = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frC])); if (ShouldRound) { // PPC has this weird behaviour with fmuls where it truncates the // RHS operator to 24-bits of mantissa before multiplying... roundTo24BitSd(a, tmpSrcC); } a.mulsd(tmpSrcA, tmpSrcC); break; } case FPDiv: { auto srcB = a.loadRegisterRead(a.fprps[instr.frB]); a.divsd(tmpSrcA, srcB); break; } } if (ShouldRound) { roundToSingleSd(a, tmpSrcA, tmpSrcA); auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); a.movddup(dst, tmpSrcA); } else { auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, tmpSrcA); } return true; }
static bool frsp(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); auto srcA = a.loadRegisterRead(a.fprps[instr.frB]); a.movq(dst, srcA); roundToSingleSd(a, dst, dst); a.movddup(dst, dst); return true; }
static bool fabs(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frB]); a.mov(a.zax, UINT64_C(0x7FFFFFFFFFFFFFFF)); a.movq(a.xmm1, a.zax); a.pand(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
static bool fmrGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } auto tmpSrc = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frB])); if (ShouldAbs) { absXmmSd(a, tmpSrc); } if (ShouldNegate) { negateXmmSd(a, tmpSrc); } auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, tmpSrc); return true; }
static bool fmadds(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frA]); a.movq(a.xmm1, a.ppcfpr[instr.frC]); a.mulsd(a.xmm0, a.xmm1); a.movq(a.xmm1, a.ppcfpr[instr.frB]); a.addsd(a.xmm0, a.xmm1); a.cvtsd2ss(a.xmm1, a.xmm0); a.cvtss2sd(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
static bool addGeneric(PPCEmuAssembler& a, Instruction instr) { if (flags & AddSubtract) { return jit_fallback(a, instr); } bool recordCarry = false; bool recordOverflow = false; bool recordCond = false; if (flags & AddCarry) { recordCarry = true; } if (flags & AddAlwaysRecord) { recordOverflow = true; recordCond = true; } else if (flags & AddCheckRecord) { if (instr.oe) { recordOverflow = true; } if (instr.rc) { recordCond = true; } } if ((flags & AddZeroRA) && instr.rA == 0) { a.mov(a.eax, 0); } else { a.mov(a.eax, a.ppcgpr[instr.rA]); } if (flags & AddSubtract) { a.not_(a.eax); } if (flags & AddImmediate) { a.mov(a.ecx, sign_extend<16>(instr.simm)); } else if (flags & AddToZero) { a.mov(a.ecx, 0); } else if (flags & AddToMinusOne) { a.mov(a.ecx, -1); } else { a.mov(a.ecx, a.ppcgpr[instr.rB]); } if (flags & AddShifted) { a.shl(a.ecx, 16); } // Mark x64 CF based on PPC CF if (flags & AddExtended) { a.mov(a.edx, a.ppcxer); a.and_(a.edx, XERegisterBits::Carry); a.add(a.edx, 0xffffffff); a.adc(a.eax, a.ecx); } else if (flags & AddSubtract) { a.stc(); a.adc(a.eax, a.ecx); } else { a.add(a.eax, a.ecx); } if (recordCarry && recordOverflow) { a.mov(a.ecx, 0); a.setc(a.ecx.r8()); a.mov(a.edx, 0); a.seto(a.edx.r8()); a.shl(a.ecx, XERegisterBits::CarryShift); a.shl(a.edx, XERegisterBits::OverflowShift); a.or_(a.ecx, a.edx); } else if (recordCarry) { a.mov(a.ecx, 0); a.setc(a.ecx.r8()); a.shl(a.ecx, XERegisterBits::CarryShift); } else if (recordOverflow) { a.mov(a.ecx, 0); a.seto(a.ecx.r8()); a.shl(a.ecx, XERegisterBits::OverflowShift); } if (recordCarry || recordOverflow) { uint32_t mask = 0xFFFFFFFF; if (recordCarry) { mask &= ~XERegisterBits::Carry; } if (recordOverflow) { mask &= ~XERegisterBits::Overflow; } a.mov(a.edx, a.ppcxer); a.and_(a.edx, mask); a.or_(a.edx, a.ecx); a.mov(a.ppcxer, a.edx); } a.mov(a.ppcgpr[instr.rD], a.eax); if (recordCond) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }
static bool divGeneric(PPCEmuAssembler& a, Instruction instr) { // Need to fallback due to overflow at the moment. return jit_fallback(a, instr); }
static bool psqStore(PPCEmuAssembler& a, Instruction instr) { return jit_fallback(a, instr); }
static bool stswGeneric(PPCEmuAssembler& a, Instruction instr) { return jit_fallback(a, instr); }
static bool storeGeneric(PPCEmuAssembler& a, Instruction instr) { if (flags & StoreConditional) { // Early out for if statement below. return jit_fallback(a, instr); } if ((flags & StoreZeroRA) && instr.rA == 0) { if (flags & StoreIndexed) { a.mov(a.ecx, a.ppcgpr[instr.rB]); } else { a.mov(a.ecx, sign_extend<16, int32_t>(instr.d)); } } else { a.mov(a.ecx, a.ppcgpr[instr.rA]); if (flags & StoreIndexed) { a.add(a.ecx, a.ppcgpr[instr.rB]); } else { a.add(a.ecx, sign_extend<16, int32_t>(instr.d)); } } if (flags & StoreConditional) { /* state->cr.cr0 = state->xer.so ? ConditionRegisterFlag::SummaryOverflow : 0; if (state->reserve) { // Store is succesful, clear reserve bit and set CR0[EQ] state->cr.cr0 |= ConditionRegisterFlag::Equal; state->reserve = false; } else { // Reserve bit is not set, do not write. return; } */ } a.mov(a.zdx, a.zcx); a.add(a.zdx, a.membase); if (flags & StoreFloatAsInteger) { assert(sizeof(Type) == 4); a.mov(a.eax, a.ppcfprps[instr.rS][0]); } else if (std::is_floating_point<Type>::value) { if (flags & StoreSingle) { assert(sizeof(Type) == 4); a.mov(a.eax, a.ppcfprps[instr.rS][0]); } else { assert(sizeof(Type) == 8); a.mov(a.zax, a.ppcfpr[instr.rS]); } } else { if (sizeof(Type) == 1) { a.mov(a.eax.r8(), a.ppcgpr[instr.rS]); } else if (sizeof(Type) == 2) { a.mov(a.eax.r16(), a.ppcgpr[instr.rS]); } else if (sizeof(Type) == 4) { a.mov(a.eax, a.ppcgpr[instr.rS]); } else { assert(0); } } if (!(flags & StoreByteReverse)) { if (sizeof(Type) == 1) { // Inverted reverse logic means we have // to check for this but do nothing. } else if (sizeof(Type) == 2) { a.xchg(a.eax.r8Hi(), a.eax.r8Lo()); } else if (sizeof(Type) == 4) { a.bswap(a.eax); } else if (sizeof(Type) == 8) { a.bswap(a.zax); } else { assert(0); } } if (sizeof(Type) == 1) { a.mov(asmjit::X86Mem(a.zdx, 0), a.eax.r8()); } else if (sizeof(Type) == 2) { a.mov(asmjit::X86Mem(a.zdx, 0), a.eax.r16()); } else if (sizeof(Type) == 4) { a.mov(asmjit::X86Mem(a.zdx, 0), a.eax); } else if (sizeof(Type) == 8) { a.mov(asmjit::X86Mem(a.zdx, 0), a.zax); } else { assert(0); } if (flags & StoreUpdate) { a.mov(a.ppcgpr[instr.rA], a.ecx); } return true; }