static bool fnmsubs(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frA]); a.movq(a.xmm1, a.ppcfpr[instr.frC]); a.mulsd(a.xmm0, a.xmm1); a.movq(a.xmm1, a.ppcfpr[instr.frB]); a.subsd(a.xmm0, a.xmm1); a.mov(a.zax, UINT64_C(0x8000000000000000)); a.movq(a.xmm1, a.zax); a.pxor(a.xmm0, a.xmm1); a.cvtsd2ss(a.xmm1, a.xmm0); a.cvtss2sd(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
// Count Leading Zeroes Word static bool cntlzw(PPCEmuAssembler& a, Instruction instr) { asmjit::Label lblZero(a); a.mov(a.ecx, a.ppcgpr[instr.rS]); a.mov(a.eax, 32); a.cmp(a.ecx, 0); a.je(lblZero); a.mov(a.edx, 0); a.bsr(a.edx, a.ecx); a.dec(a.eax); a.sub(a.eax, a.edx); a.bind(lblZero); a.mov(a.ppcgpr[instr.rA], a.eax); if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }
// Move from Condition Register static bool mfcr(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppccr); a.mov(a.ppcgpr[instr.rD], a.eax); return true; }
// Condition Register OR with Complement static bool crorc(PPCEmuAssembler& a, Instruction instr) { getTwoCRB(a, instr.crbA, a.eax, instr.crbB, a.ecx); a.not_(a.ecx); a.or_(a.eax, a.ecx); setCRB(a, instr.crbD, a.eax, a.ecx, a.edx); return true; }
static void absXmmSd(PPCEmuAssembler& a, const PPCEmuAssembler::XmmRegister& reg) { auto maskGp = a.allocGpTmp(); auto maskXmm = a.allocXmmTmp(); a.mov(maskGp, UINT64_C(0x7FFFFFFFFFFFFFFF)); a.movq(maskXmm, maskGp); a.pand(reg, maskXmm); }
static void negateXmmSd(PPCEmuAssembler& a, const PPCEmuAssembler::XmmRegister& reg) { auto maskGp = a.allocGpTmp(); auto maskXmm = a.allocXmmTmp(); a.mov(maskGp, UINT64_C(0x8000000000000000)); a.movq(maskXmm, maskGp); a.pxor(reg, maskXmm); }
static bool shiftArithmetic(PPCEmuAssembler& a, Instruction instr) { if (flags & ShiftImmediate && instr.sh == 0) { // Clear Carry Flag a.mov(a.ecx, a.ppcxer); a.and_(a.ecx, ~XERegisterBits::Carry); a.mov(a.ppcxer, a.ecx); return true; } return jit_fallback(a, instr); }
// Extend Sign Half Word static bool extsh(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppcgpr[instr.rS]); a.movsx(a.eax, a.eax.r16()); a.mov(a.ppcgpr[instr.rA], a.eax); if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }
void getTwoCRB(PPCEmuAssembler& a, uint32_t bita, const asmjit::X86GpReg& da, uint32_t bitb, const asmjit::X86GpReg& db) { auto shifta = 31 - bita; auto shiftb = 31 - bitb; a.mov(da, a.ppccr); a.mov(db, da); if (shifta > 0) { a.shr(da, shifta); } if (shiftb > 0) { a.shr(db, shiftb); } a.and_(da, 1); a.and_(db, 1); }
static bool frsp(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frB]); a.cvtsd2ss(a.xmm1, a.xmm0); a.cvtss2sd(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
bool jit_fallback(PPCEmuAssembler& a, Instruction instr) { auto data = gInstructionTable.decode(instr); auto fptr = sInstructionMap[static_cast<size_t>(data->id)]; if (!fptr) { assert(0); } //printf("JIT Fallback for `%s`\n", data->name); a.mov(a.zcx, a.state); a.mov(a.edx, (uint32_t)instr); a.call(asmjit::Ptr(fptr)); return true; }
// NOR static bool nor(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppcgpr[instr.rS]); a.mov(a.ecx, a.ppcgpr[instr.rB]); a.or_(a.eax, a.ecx); a.not_(a.eax); a.mov(a.ppcgpr[instr.rA], a.eax); if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }
static bool orGeneric(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppcgpr[instr.rS]); if (flags & OrImmediate) { a.mov(a.ecx, instr.uimm); } else { a.mov(a.ecx, a.ppcgpr[instr.rB]); } if (flags & OrShifted) { a.shl(a.ecx, 16); } if (flags & OrComplement) { a.not_(a.ecx); } a.or_(a.eax, a.ecx); a.mov(a.ppcgpr[instr.rA], a.eax); if (flags & OrAlwaysRecord) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } else if (flags & OrCheckRecord) { if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } } return true; }
static bool shiftLogical(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppcgpr[instr.rS]); if (flags & ShiftImmediate) { if (flags & ShiftLeft) { a.shl(a.eax, instr.sh); } else if (flags & ShiftRight) { a.shr(a.eax, instr.sh); } else { assert(0); } } else { a.mov(a.ecx, a.ppcgpr[instr.rB]); if (flags & ShiftLeft) { a.shl(a.eax, a.ecx.r8()); } else if (flags & ShiftRight) { a.shr(a.eax, a.ecx.r8()); } else { assert(0); } } a.mov(a.ppcgpr[instr.rA], a.eax); if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }
static bool frsp(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... auto dst = a.loadRegisterWrite(a.fprps[instr.frD]); auto srcA = a.loadRegisterRead(a.fprps[instr.frB]); a.movq(dst, srcA); roundToSingleSd(a, dst, dst); a.movddup(dst, dst); return true; }
static bool fabs(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } // FPSCR, FPRF supposed to be updated here... a.movq(a.xmm0, a.ppcfpr[instr.frB]); a.mov(a.zax, UINT64_C(0x7FFFFFFFFFFFFFFF)); a.movq(a.xmm1, a.zax); a.pand(a.xmm0, a.xmm1); a.movq(a.ppcfpr[instr.frD], a.xmm0); return true; }
void setCRB(PPCEmuAssembler& a, uint32_t bit, const asmjit::X86GpReg& value, const asmjit::X86GpReg& tmp, const asmjit::X86GpReg& tmp2) { auto shift = 31 - bit; a.mov(tmp, a.ppccr); a.and_(tmp, ~(1 << shift)); a.mov(tmp2, value); a.and_(tmp2, 1); a.shl(tmp2, shift); a.or_(tmp, tmp2); a.mov(a.ppccr, tmp); }
static bool mulUnsignedGeneric(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppcgpr[instr.rA]); if (flags & MulImmediate) { a.mov(a.ecx, sign_extend<16>(instr.simm)); } else { a.mov(a.ecx, a.ppcgpr[instr.rB]); } a.mul(a.ecx); if (flags & MulLow) { a.mov(a.ppcgpr[instr.rD], a.eax); if (flags & MulCheckRecord) { if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } } } else if (flags & MulHigh) { a.mov(a.ppcgpr[instr.rD], a.edx); if (flags & MulCheckRecord) { if (instr.rc) { updateConditionRegister(a, a.edx, a.ecx, a.eax); } } } else { assert(0); } return true; }
static bool fmrGeneric(PPCEmuAssembler& a, Instruction instr) { if (instr.rc) { return jit_fallback(a, instr); } auto tmpSrc = a.allocXmmTmp(a.loadRegisterRead(a.fprps[instr.frB])); if (ShouldAbs) { absXmmSd(a, tmpSrc); } if (ShouldNegate) { negateXmmSd(a, tmpSrc); } auto dst = a.loadRegisterReadWrite(a.fprps[instr.frD]); a.movsd(dst, tmpSrc); return true; }
static bool mergeGeneric(PPCEmuAssembler& a, Instruction instr) { if (flags & MergeValue0) { a.mov(a.zax, a.ppcfprps[instr.frA][1]); } else { a.mov(a.zax, a.ppcfprps[instr.frA][0]); } if (flags & MergeValue1) { a.mov(a.zcx, a.ppcfprps[instr.frB][1]); } else { a.mov(a.zcx, a.ppcfprps[instr.frB][0]); } a.mov(a.ppcfprps[instr.frD][0], a.zax); a.mov(a.ppcfprps[instr.frD][1], a.zcx); if (instr.rc) { updateFloatConditionRegister(a, a.zax, a.zcx); } return true; }
// Move Condition Register Field static bool mcrf(PPCEmuAssembler& a, Instruction instr) { uint32_t crshifts = (7 - instr.crfS) * 4; uint32_t crshiftd = (7 - instr.crfD) * 4; a.mov(a.eax, a.ppccr); a.mov(a.ecx, a.eax); a.and_(a.ecx, ~(0xF << crshifts)); a.shr(a.ecx, crshifts); a.shl(a.ecx, crshiftd); a.and_(a.eax, ~(0xF << crshiftd)); a.or_(a.eax, a.ecx); a.mov(a.ppccr, a.eax); return true; }
void truncateToSingleSd(PPCEmuAssembler& a, const PPCEmuAssembler::XmmRegister& dst, const PPCEmuAssembler::XmmRegister& src) { auto maskGp = a.allocGpTmp(); a.mov(maskGp, UINT64_C(0xFFFFFFFFE0000000)); if (&dst == &src) { auto tmp = a.allocXmmTmp(); a.movq(tmp, maskGp); a.pand(dst, tmp); } else { a.movq(dst, maskGp); a.pand(dst, src); } }
// Data Cache Block Zero static bool dcbz(PPCEmuAssembler& a, Instruction instr) { auto src = a.allocGpTmp().r32(); if (instr.rA == 0) { a.mov(src, 0); } else { a.mov(src, a.loadRegisterRead(a.gpr[instr.rA])); } a.add(src, a.loadRegisterRead(a.gpr[instr.rB])); // Align down a.and_(src, ~static_cast<uint32_t>(31)); // Write 32 bytes of zero's there a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 0, 8), 0); a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 8, 8), 0); a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 16, 8), 0); a.mov(asmjit::X86Mem(a.membaseReg, src, 0, 24, 8), 0); return true; }
// Store Multiple Words // Writes consecutive words to memory from rS to r31 static bool stmw(PPCEmuAssembler& a, Instruction instr) { auto o = sign_extend<16, int32_t>(instr.d); if (instr.rA) { a.mov(a.ecx, a.ppcgpr[instr.rA]); a.add(a.ecx, o); } else { a.mov(a.ecx, o); } a.add(a.zcx, a.membase); for (int r = instr.rS, d = 0; r <= 31; ++r, d += 4) { a.mov(a.eax, a.ppcgpr[r]); a.bswap(a.eax); a.mov(asmjit::X86Mem(a.zcx, d), a.eax); } return true; }
// Move to Condition Register Fields static bool mtcrf(PPCEmuAssembler& a, Instruction instr) { uint32_t crm = instr.crm; uint32_t mask = 0; for (auto i = 0u; i < 8; ++i) { if (crm & (1 << i)) { mask |= 0xf << (i * 4); } } a.mov(a.eax, a.ppcgpr[instr.rS]); a.and_(a.eax, mask); a.mov(a.ecx, a.ppccr); a.and_(a.ecx, ~mask); a.or_(a.eax, a.ecx); a.mov(a.ppccr, a.eax); return true; }
bool jit_fallback(PPCEmuAssembler& a, Instruction instr) { auto data = gInstructionTable.decode(instr); auto fptr = cpu::interpreter::getInstructionHandler(data->id); if (!fptr) { throw; } if (TRACK_FALLBACK_CALLS) { a.mov(a.zax, reinterpret_cast<intptr_t>(&sFallbackCalls[static_cast<uint32_t>(data->id)])); a.lock(); a.inc(asmjit::X86Mem(a.zax, 0)); } a.mov(a.zcx, a.state); a.mov(a.edx, (uint32_t)instr); a.call(asmjit::Ptr(fptr)); return true; }
bool jit_fallback(PPCEmuAssembler& a, espresso::Instruction instr) { auto data = espresso::decodeInstruction(instr); decaf_assert(data, fmt::format("Failed to decode instruction {:08X}", instr.value)); auto fptr = cpu::interpreter::getInstructionHandler(data->id); decaf_assert(fptr, fmt::format("Unimplemented instruction {}", static_cast<int>(data->id))); a.evictAll(); if (TRACK_FALLBACK_CALLS) { auto fallbackAddr = reinterpret_cast<intptr_t>(&sFallbackCalls[static_cast<uint32_t>(data->id)]); a.mov(asmjit::x86::rax, asmjit::Ptr(fallbackAddr)); a.lock().inc(asmjit::X86Mem(asmjit::x86::rax, 0)); } a.mov(a.sysArgReg[0], a.stateReg); a.mov(a.sysArgReg[1], (uint32_t)instr); a.call(asmjit::Ptr(fptr)); return true; }
static bool rlwGeneric(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppcgpr[instr.rS]); if (flags & RlwImmediate) { a.rol(a.eax, instr.sh); } else { a.mov(a.ecx, a.ppcgpr[instr.rB]); a.and_(a.ecx, 0x1f); a.rol(a.eax, a.ecx.r8()); } auto m = make_ppc_bitmask(instr.mb, instr.me); if (flags & RlwAnd) { a.and_(a.eax, m); } else if (flags & RlwInsert) { a.and_(a.eax, m); a.mov(a.ecx, a.ppcgpr[instr.rA]); a.and_(a.ecx, ~m); a.or_(a.eax, a.ecx); } a.mov(a.ppcgpr[instr.rA], a.eax); if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }
static bool addGeneric(PPCEmuAssembler& a, Instruction instr) { if (flags & AddSubtract) { return jit_fallback(a, instr); } bool recordCarry = false; bool recordOverflow = false; bool recordCond = false; if (flags & AddCarry) { recordCarry = true; } if (flags & AddAlwaysRecord) { recordOverflow = true; recordCond = true; } else if (flags & AddCheckRecord) { if (instr.oe) { recordOverflow = true; } if (instr.rc) { recordCond = true; } } if ((flags & AddZeroRA) && instr.rA == 0) { a.mov(a.eax, 0); } else { a.mov(a.eax, a.ppcgpr[instr.rA]); } if (flags & AddSubtract) { a.not_(a.eax); } if (flags & AddImmediate) { a.mov(a.ecx, sign_extend<16>(instr.simm)); } else if (flags & AddToZero) { a.mov(a.ecx, 0); } else if (flags & AddToMinusOne) { a.mov(a.ecx, -1); } else { a.mov(a.ecx, a.ppcgpr[instr.rB]); } if (flags & AddShifted) { a.shl(a.ecx, 16); } // Mark x64 CF based on PPC CF if (flags & AddExtended) { a.mov(a.edx, a.ppcxer); a.and_(a.edx, XERegisterBits::Carry); a.add(a.edx, 0xffffffff); a.adc(a.eax, a.ecx); } else if (flags & AddSubtract) { a.stc(); a.adc(a.eax, a.ecx); } else { a.add(a.eax, a.ecx); } if (recordCarry && recordOverflow) { a.mov(a.ecx, 0); a.setc(a.ecx.r8()); a.mov(a.edx, 0); a.seto(a.edx.r8()); a.shl(a.ecx, XERegisterBits::CarryShift); a.shl(a.edx, XERegisterBits::OverflowShift); a.or_(a.ecx, a.edx); } else if (recordCarry) { a.mov(a.ecx, 0); a.setc(a.ecx.r8()); a.shl(a.ecx, XERegisterBits::CarryShift); } else if (recordOverflow) { a.mov(a.ecx, 0); a.seto(a.ecx.r8()); a.shl(a.ecx, XERegisterBits::OverflowShift); } if (recordCarry || recordOverflow) { uint32_t mask = 0xFFFFFFFF; if (recordCarry) { mask &= ~XERegisterBits::Carry; } if (recordOverflow) { mask &= ~XERegisterBits::Overflow; } a.mov(a.edx, a.ppcxer); a.and_(a.edx, mask); a.or_(a.edx, a.ecx); a.mov(a.ppcxer, a.edx); } a.mov(a.ppcgpr[instr.rD], a.eax); if (recordCond) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }
// Negate static bool neg(PPCEmuAssembler& a, Instruction instr) { a.mov(a.eax, a.ppcgpr[instr.rA]); a.neg(a.eax); a.mov(a.ppcgpr[instr.rD], a.eax); if (instr.oe) { a.mov(a.ecx, 0); a.seto(a.ecx.r8()); // Reset overflow a.mov(a.edx, a.ppcxer); a.and_(a.edx, ~XERegisterBits::Overflow); a.shiftTo(a.ecx, 0, XERegisterBits::Overflow); a.or_(a.edx, a.ecx); a.shiftTo(a.ecx, XERegisterBits::Overflow, XERegisterBits::StickyOV); a.or_(a.edx, a.ecx); a.mov(a.ppcxer, a.edx); } if (instr.rc) { updateConditionRegister(a, a.eax, a.ecx, a.edx); } return true; }