void ArmFPRCache::Flush() { for(u8 a = 0; a < NUMPPCREG; ++a) if (ArmCRegs[a].PPCReg != 33) { u16 offset = PPCSTATE_OFF(ps) + (ArmCRegs[a].PPCReg * 16) + (ArmCRegs[a].PS1 ? 8 : 0); emit->VSTR(ArmCRegs[a].Reg, R9, offset); ArmCRegs[a].PPCReg = 33; ArmCRegs[a].LastLoad = 0; } }
void JitArm::ComputeRC(int cr) { ARMReg rB = gpr.GetReg(); MOV(rB, 0x2); // Result == 0 SetCC(CC_LT); MOV(rB, 0x8); // Result < 0 SetCC(CC_GT); MOV(rB, 0x4); // Result > 0 SetCC(); STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); gpr.Unlock(rB); }
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad) { u32 lastRegIndex = GetLeastUsedRegister(true); if (_regs[preg][PS1].GetType() != REG_NOTLOADED) { u8 a = _regs[preg][PS1].GetRegIndex(); ArmCRegs[a].LastLoad = 0; return ArmCRegs[a].Reg; } u32 regindex; if (FindFreeRegister(regindex)) { s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); ArmCRegs[regindex].PPCReg = preg; ArmCRegs[regindex].LastLoad = 0; ArmCRegs[regindex].PS1 = PS1; _regs[preg][PS1].LoadToReg(regindex); emit->VLDR(ArmCRegs[regindex].Reg, R9, offset); return ArmCRegs[regindex].Reg; } // Alright, we couldn't get a free space, dump that least used register s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[lastRegIndex].PPCReg * 16) + (ArmCRegs[lastRegIndex].PS1 ? 8 : 0); s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld); _regs[ArmCRegs[lastRegIndex].PPCReg][ArmCRegs[lastRegIndex].PS1].Flush(); ArmCRegs[lastRegIndex].PPCReg = preg; ArmCRegs[lastRegIndex].LastLoad = 0; ArmCRegs[lastRegIndex].PS1 = PS1; _regs[preg][PS1].LoadToReg(lastRegIndex); emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew); return ArmCRegs[lastRegIndex].Reg; }
ARMReg ArmRegCache::R(u32 preg) { if (regs[preg].GetType() == REG_IMM) return BindToRegister(preg); u32 lastRegIndex = GetLeastUsedRegister(true); // Check if already Loaded if (regs[preg].GetType() == REG_REG) { u8 a = regs[preg].GetRegIndex(); ArmCRegs[a].LastLoad = 0; return ArmCRegs[a].Reg; } // Check if we have a free register u32 regindex; if (FindFreeRegister(regindex)) { emit->LDR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); ArmCRegs[regindex].PPCReg = preg; ArmCRegs[regindex].LastLoad = 0; regs[preg].LoadToReg(regindex); return ArmCRegs[regindex].Reg; } // Alright, we couldn't get a free space, dump that least used register emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4); emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); regs[ArmCRegs[lastRegIndex].PPCReg].Flush(); ArmCRegs[lastRegIndex].PPCReg = preg; ArmCRegs[lastRegIndex].LastLoad = 0; regs[preg].LoadToReg(lastRegIndex); return ArmCRegs[lastRegIndex].Reg; }
void JitArm64::mtmsr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); gpr.BindToRegister(inst.RS, true); STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(msr)); gpr.Flush(FlushMode::FLUSH_ALL); fpr.Flush(FlushMode::FLUSH_ALL); WriteExit(js.compilerPC + 4); }
void JitArm::mtmsr(UGeckoInstruction inst) { INSTRUCTION_START // Don't interpret this, if we do we get thrown out //JITDISABLE(SystemRegisters) STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(msr)); gpr.Flush(); fpr.Flush(); WriteExit(js.compilerPC + 4, 0); }
void JitArm::ComputeRC(s32 value, int cr) { ARMReg rB = gpr.GetReg(); if (value < 0) MOV(rB, 0x8); else if (value > 0) MOV(rB, 0x4); else MOV(rB, 0x2); STRB(rB, R9, PPCSTATE_OFF(cr_fast) + cr); gpr.Unlock(rB); }
void JitArm64::crXXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); // Special case: crclr if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) { // Clear CR field bit int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); switch (bit) { case CR_SO_BIT: AND(XA, XA, 64 - 62, 62, true); // XA & ~(1<<61) break; case CR_EQ_BIT: ORR(XA, XA, 0, 0, true); // XA | 1<<0 break; case CR_GT_BIT: ORR(XA, XA, 64 - 63, 0, true); // XA | 1<<63 break; case CR_LT_BIT: AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62) break; } STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); gpr.Unlock(WA); return; }
void ArmRegCache::Flush() { for (u8 a = 0; a < 32; ++a) { if (regs[a].GetType() == REG_IMM) BindToRegister(a); if (regs[a].GetType() == REG_REG) { u32 regindex = regs[a].GetRegIndex(); emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4); ArmCRegs[regindex].PPCReg = 33; ArmCRegs[regindex].LastLoad = 0; } regs[a].Flush(); } }
void JitArm::mtspr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(SystemRegisters) u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); switch (iIndex) { case SPR_LR: case SPR_CTR: case SPR_XER: // These are safe to do the easy way, see the bottom of this function. break; case SPR_GQR0: case SPR_GQR0 + 1: case SPR_GQR0 + 2: case SPR_GQR0 + 3: case SPR_GQR0 + 4: case SPR_GQR0 + 5: case SPR_GQR0 + 6: case SPR_GQR0 + 7: // Prevent recompiler from compiling in old quantizer values. // If the value changed, destroy all blocks using this quantizer // This will create a little bit of block churn, but hopefully not too bad. { /* MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value CMP(32, R(EAX), gpr.R(inst.RD)); FixupBranch skip_destroy = J_CC(CC_E, false); int gqr = iIndex - SPR_GQR0; ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr); SetJumpTarget(skip_destroy);*/ } // TODO - break block if quantizers are written to. default: Default(inst); return; } // OK, this is easy. ARMReg RD = gpr.R(inst.RD); STR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4); }
void JitArm64::mtsrin(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); u32 b = inst.RB, d = inst.RD; gpr.BindToRegister(d, d == b); ARM64Reg index = gpr.GetReg(); ARM64Reg index64 = EncodeRegTo64(index); ARM64Reg RB = gpr.R(b); UBFM(index, RB, 28, 31); ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2)); STR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0])); gpr.Unlock(index); }
void JitArm::mfspr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(SystemRegisters) u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); ARMReg RD = gpr.R(inst.RD); switch (iIndex) { case SPR_WPAR: case SPR_DEC: case SPR_TL: case SPR_TU: Default(inst); return; default: LDR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4); break; } }
void JitArm::stX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); u32 a = inst.RA, b = inst.RB, s = inst.RS; s32 offset = inst.SIMM_16; u32 accessSize = 0; s32 regOffset = -1; bool update = false; bool fastmem = false; switch (inst.OPCD) { case 45: // sthu update = true; case 44: // sth accessSize = 16; break; case 31: switch (inst.SUBOP10) { case 183: // stwux update = true; case 151: // stwx fastmem = true; accessSize = 32; regOffset = b; break; case 247: // stbux update = true; case 215: // stbx accessSize = 8; regOffset = b; break; case 439: // sthux update = true; case 407: // sthx accessSize = 16; regOffset = b; break; } break; case 37: // stwu update = true; case 36: // stw fastmem = true; accessSize = 32; break; case 39: // stbu update = true; case 38: // stb accessSize = 8; break; } SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset); if (update) { ARMReg rA = gpr.GetReg(); ARMReg RB; ARMReg RA = gpr.R(a); if (regOffset != -1) RB = gpr.R(regOffset); // Check for DSI exception prior to writing back address LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); FixupBranch DoNotWrite = B_CC(CC_NEQ); if (a) { if (regOffset == -1) { MOVI2R(rA, offset); ADD(RA, RA, rA); } else { ADD(RA, RA, RB); } } else { if (regOffset == -1) MOVI2R(RA, (u32)offset); else MOV(RA, RB); } SetJumpTarget(DoNotWrite); gpr.Unlock(rA); } }
void JitArm64::mfspr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); int d = inst.RD; switch (iIndex) { case SPR_TL: case SPR_TU: { ARM64Reg WA = gpr.GetReg(); ARM64Reg WB = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XB = EncodeRegTo64(WB); // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. MOVI2R(XA, (u64)&CoreTiming::globalTimer); LDR(INDEX_UNSIGNED, XA, XA, 0); MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks); LDR(INDEX_UNSIGNED, XB, XB, 0); SUB(XA, XA, XB); // It might seem convenient to correct the timer for the block position here for even more accurate // timing, but as of currently, this can break games. If we end up reading a time *after* the time // at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only // 50 downcount remaining, some games don't function correctly, such as Karaoke Party Revolution, // which won't get past the loading screen. // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 ORR(XB, SP, 1, 60); ADD(XB, XB, 1); UMULH(XA, XA, XB); MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue); LDR(INDEX_UNSIGNED, XB, XB, 0); ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3)); STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL])); if (MergeAllowedNextInstructions(1)) { const UGeckoInstruction& next = js.op[1].inst; // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // if we can. u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F); // Be careful; the actual opcode is for mftb (371), not mfspr (339) int n = next.RD; if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d) { js.downcountAmount++; js.skipInstructions = 1; gpr.BindToRegister(d, false); gpr.BindToRegister(n, false); if (iIndex == SPR_TL) MOV(gpr.R(d), WA); else ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); if (nextIndex == SPR_TL) MOV(gpr.R(n), WA); else ORR(EncodeRegTo64(gpr.R(n)), SP, XA, ArithOption(XA, ST_LSR, 32)); gpr.Unlock(WA, WB); break; } } gpr.BindToRegister(d, false); if (iIndex == SPR_TU) ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); else MOV(gpr.R(d), WA); gpr.Unlock(WA, WB); } break; case SPR_XER: { gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); ARM64Reg WA = gpr.GetReg(); LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl)); LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_CA_SHIFT)); LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov)); ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_OV_SHIFT)); gpr.Unlock(WA); } break; case SPR_WPAR: case SPR_DEC: FALLBACK_IF(true); default: gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); break; } }
void JitArm64::twx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); s32 a = inst.RA; ARM64Reg WA = gpr.GetReg(); if (inst.OPCD == 3) // twi { if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 4096) { // Can fit in immediate in to the instruction encoding CMP(gpr.R(a), inst.SIMM_16); } else { MOVI2R(WA, (s32)(s16)inst.SIMM_16); CMP(gpr.R(a), WA); } } else // tw { CMP(gpr.R(a), gpr.R(inst.RB)); } std::vector<FixupBranch> fixups; CCFlags conditions[] = { CC_LT, CC_GT, CC_EQ, CC_VC, CC_VS }; for (int i = 0; i < 5; i++) { if (inst.TO & (1 << i)) { FixupBranch f = B(conditions[i]); fixups.push_back(f); } } FixupBranch dont_trap = B(); for (const FixupBranch& fixup : fixups) { SetJumpTarget(fixup); } gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); ORR(WA, WA, 24, 0); // Same as WA | EXCEPTION_PROGRAM STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); MOVI2R(WA, js.compilerPC); // WA is unlocked in this function WriteExceptionExit(WA); SetJumpTarget(dont_trap); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { gpr.Flush(FlushMode::FLUSH_ALL); fpr.Flush(FlushMode::FLUSH_ALL); WriteExit(js.compilerPC + 4); } }
void JitArm::lfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; bool single = false; bool update = false; bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 567: // lfsux single = true; update = true; offsetReg = b; break; case 535: // lfsx single = true; zeroA = true; offsetReg = b; break; case 631: // lfdux update = true; offsetReg = b; break; case 599: // lfdx zeroA = true; offsetReg = b; break; } break; case 49: // lfsu update = true; single = true; break; case 48: // lfs single = true; zeroA = true; break; case 51: // lfdu update = true; break; case 50: // lfd zeroA = true; break; } ARMReg v0 = fpr.R0(inst.FD), v1; if (single) v1 = fpr.R1(inst.FD); if (update) { RA = gpr.R(a); // Update path /always/ uses RA if (offsetReg == -1) // uses SIMM_16 { MOVI2R(rB, offset); ADD(rB, rB, RA); } else { ADD(rB, gpr.R(offsetReg), RA); } } else { if (zeroA) { if (offsetReg == -1) { if (a) { RA = gpr.R(a); MOVI2R(rB, offset); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)offset); } } else { ARMReg RB = gpr.R(offsetReg); if (a) { RA = gpr.R(a); ADD(rB, RB, RA); } else { MOV(rB, RB); } } } } LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (update) MOV(RA, rB); if (Core::g_CoreStartupParameter.bFastmem) { Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) BIC(rB, rB, mask); // 1 MOVI2R(rA, (u32)Memory::base, false); // 2-3 ADD(rB, rB, rA); // 4 NEONXEmitter nemit(this); if (single) { VLDR(S0, rB, 0); nemit.VREV32(I_8, D0, D0); // Byte swap to result VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { VLDR(v0, rB, 0); nemit.VREV64(I_8, v0, v0); // Byte swap to result } } else { PUSH(4, R0, R1, R2, R3); MOV(R0, rB); if (single) { MOVI2R(rA, (u32)&Memory::Read_U32); BL(rA); VMOV(S0, R0); VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { MOVI2R(rA, (u32)&Memory::Read_F64); BL(rA); #if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 VMOV(v0, R0); #else VMOV(v0, D0); #endif } POP(4, R0, R1, R2, R3); } gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void JitArm::fcmpo(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 a = inst.FA, b = inst.FB; int cr = inst.CRFD; ARMReg vA = fpr.R0(a); ARMReg vB = fpr.R0(b); ARMReg fpscrReg = gpr.GetReg(); ARMReg crReg = gpr.GetReg(); Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 Operand2 LessThan(0x8, 0xA); // 0x8000 Operand2 GreaterThan(0x4, 0xA); // 0x4000 Operand2 EqualTo(0x2, 0xA); // 0x2000 Operand2 NANRes(0x1, 0xA); // 0x1000 FixupBranch Done1, Done2, Done3; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); BIC(fpscrReg, fpscrReg, FPRFMask); VCMPE(vA, vB); VMRS(_PC); SetCC(CC_LT); ORR(fpscrReg, fpscrReg, LessThan); MOV(crReg, 8); Done1 = B(); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, GreaterThan); MOV(crReg, 4); Done2 = B(); SetCC(CC_EQ); ORR(fpscrReg, fpscrReg, EqualTo); MOV(crReg, 2); Done3 = B(); SetCC(); ORR(fpscrReg, fpscrReg, NANRes); MOV(crReg, 1); VCMPE(vA, vA); VMRS(_PC); FixupBranch NanA = B_CC(CC_NEQ); VCMPE(vB, vB); VMRS(_PC); FixupBranch NanB = B_CC(CC_NEQ); SetFPException(fpscrReg, FPSCR_VXVC); FixupBranch Done4 = B(); SetJumpTarget(NanA); SetJumpTarget(NanB); SetFPException(fpscrReg, FPSCR_VXSNAN); TST(fpscrReg, VEMask); FixupBranch noVXVC = B_CC(CC_NEQ); SetFPException(fpscrReg, FPSCR_VXVC); SetJumpTarget(noVXVC); SetJumpTarget(Done1); SetJumpTarget(Done2); SetJumpTarget(Done3); SetJumpTarget(Done4); STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(fpscrReg, crReg); }
void JitArm::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void JitArmILAsmRoutineManager::Generate() { enterCode = GetCodePtr(); PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR); // Take care to 8-byte align stack for function calls. // We are misaligned here because of an odd number of args for PUSH. // It's not like x86 where you need to account for an extra 4 bytes // consumed by CALL. SUB(_SP, _SP, 4); MOVI2R(R0, (u32)&CoreTiming::downcount); MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); FixupBranch skipToRealDispatcher = B(); dispatcher = GetCodePtr(); printf("ILDispatcher is %p\n", dispatcher); // Downcount Check // The result of slice decrementation should be in flags if somebody jumped here // IMPORTANT - We jump on negative, not carry!!! FixupBranch bail = B_CC(CC_MI); SetJumpTarget(skipToRealDispatcher); dispatcherNoCheck = GetCodePtr(); // This block of code gets the address of the compiled block of code // It runs though to the compiling portion if it isn't found LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12 Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here. MOVI2R(R14, (u32)jit->GetBlockCache()->iCache); LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here // R12 Confirmed this is the correct iCache Location loaded. TST(R12, 0x80); // Test to see if it is a JIT block. SetCC(CC_EQ); // Success, it is our Jitblock. MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); // LDR R14 right here to get CodePointers()[0] pointer. LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size LDR(R14, R14, R12); // Load the block address in to R14 B(R14); // No need to jump anywhere after here, the block will go back to dispatcher start SetCC(); // If we get to this point, that means that we don't have the block cached to execute // So call ArmJit to compile the block and then execute it. MOVI2R(R14, (u32)&Jit); BL(R14); B(dispatcherNoCheck); // fpException() // Floating Point Exception Check, Jumped to if false fpException = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(Exceptions)); ORR(R0, R0, EXCEPTION_FPU_UNAVAILABLE); STR(R0, R9, PPCSTATE_OFF(Exceptions)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); STR(R0, R9, PPCSTATE_OFF(pc)); B(dispatcher); SetJumpTarget(bail); doTiming = GetCodePtr(); // XXX: In JIT64, Advance() gets called /after/ the exception checking // once it jumps back to the start of outerLoop QuickCallFunction(R14, (void*)&CoreTiming::Advance); // Does exception checking testExceptions = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(pc)); STR(R0, R9, PPCSTATE_OFF(npc)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); STR(R0, R9, PPCSTATE_OFF(pc)); // Check the state pointer to see if we are exiting // Gets checked on every exception check MOVI2R(R0, (u32)PowerPC::GetStatePtr()); MVN(R1, 0); LDR(R0, R0); TST(R0, R1); FixupBranch Exit = B_CC(CC_NEQ); B(dispatcher); SetJumpTarget(Exit); ADD(_SP, _SP, 4); POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns GenerateCommon(); FlushIcache(); }
void JitArm::Helper_UpdateCR1(ARMReg fpscr, ARMReg temp) { UBFX(temp, fpscr, 28, 4); STRB(temp, R9, PPCSTATE_OFF(cr_fast[1])); }
void JitArm::lXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); u32 a = inst.RA, b = inst.RB, d = inst.RD; s32 offset = inst.SIMM_16; u32 accessSize = 0; s32 offsetReg = -1; bool update = false; bool signExtend = false; bool reverse = false; bool fastmem = false; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 55: // lwzux update = true; case 23: // lwzx fastmem = true; accessSize = 32; offsetReg = b; break; case 119: //lbzux update = true; case 87: // lbzx fastmem = true; accessSize = 8; offsetReg = b; break; case 311: // lhzux update = true; case 279: // lhzx fastmem = true; accessSize = 16; offsetReg = b; break; case 375: // lhaux update = true; case 343: // lhax accessSize = 16; signExtend = true; offsetReg = b; break; case 534: // lwbrx accessSize = 32; reverse = true; break; case 790: // lhbrx accessSize = 16; reverse = true; break; } break; case 33: // lwzu update = true; case 32: // lwz fastmem = true; accessSize = 32; break; case 35: // lbzu update = true; case 34: // lbz fastmem = true; accessSize = 8; break; case 41: // lhzu update = true; case 40: // lhz fastmem = true; accessSize = 16; break; case 43: // lhau update = true; case 42: // lha signExtend = true; accessSize = 16; break; } // Check for exception before loading ARMReg rA = gpr.GetReg(false); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_NEQ); SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse); if (update) { ARMReg RA = gpr.R(a); if (offsetReg == -1) { rA = gpr.GetReg(false); MOVI2R(rA, offset); ADD(RA, RA, rA); } else { ADD(RA, RA, gpr.R(offsetReg)); } } SetJumpTarget(DoNotLoad); // LWZ idle skipping if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && inst.OPCD == 32 && (inst.hex & 0xFFFF0000) == 0x800D0000 && (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { ARMReg RD = gpr.R(d); // if it's still 0, we can wait until the next event TST(RD, RD); FixupBranch noIdle = B_CC(CC_NEQ); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); rA = gpr.GetReg(); MOVI2R(rA, (u32)&PowerPC::OnIdle); MOVI2R(R0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); BL(rA); gpr.Unlock(rA); WriteExceptionExit(); SetJumpTarget(noIdle); //js.compilerPC += 8; return; } }
void JitArm::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer // Set rounding mode first // PPC <-> ARM rounding modes // 0, 1, 2, 3 <-> 0, 3, 1, 2 ARMReg rB = gpr.GetReg(); VMRS(rA); // Bits 22-23 BIC(rA, rA, Operand2(3, 5)); LDR(rB, R9, PPCSTATE_OFF(fpscr)); AND(rB, rB, 0x3); // Get the FPSCR rounding bits CMP(rB, 1); SetCC(CC_EQ); // zero ORR(rA, rA, Operand2(3, 5)); SetCC(CC_NEQ); CMP(rB, 2); // +inf SetCC(CC_EQ); ORR(rA, rA, Operand2(1, 5)); SetCC(CC_NEQ); CMP(rB, 3); // -inf SetCC(CC_EQ); ORR(rA, rA, Operand2(2, 5)); SetCC(); VMSR(rA); ORR(rA, rA, Operand2(3, 5)); VCVT(vD, vB, TO_INT | IS_SIGNED); VMSR(rA); gpr.Unlock(rB); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void JitArm::stfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; bool single = false; bool update = false; bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 663: // stfsx single = true; zeroA = true; offsetReg = b; break; case 695: // stfsux single = true; offsetReg = b; break; case 727: // stfdx zeroA = true; offsetReg = b; break; case 759: // stfdux update = true; offsetReg = b; break; } break; case 53: // stfsu update = true; single = true; break; case 52: // stfs single = true; zeroA = true; break; case 55: // stfdu update = true; break; case 54: // stfd zeroA = true; break; } ARMReg v0 = fpr.R0(inst.FS); if (update) { RA = gpr.R(a); // Update path /always/ uses RA if (offsetReg == -1) // uses SIMM_16 { MOVI2R(rB, offset); ADD(rB, rB, RA); } else { ADD(rB, gpr.R(offsetReg), RA); } } else { if (zeroA) { if (offsetReg == -1) { if (a) { RA = gpr.R(a); MOVI2R(rB, offset); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)offset); } } else { ARMReg RB = gpr.R(offsetReg); if (a) { RA = gpr.R(a); ADD(rB, RB, RA); } else { MOV(rB, RB); } } } } if (update) { LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); SetCC(CC_NEQ); MOV(RA, rB); SetCC(); } if (Core::g_CoreStartupParameter.bFastmem) { Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) BIC(rB, rB, mask); // 1 MOVI2R(rA, (u32)Memory::base, false); // 2-3 ADD(rB, rB, rA); // 4 NEONXEmitter nemit(this); if (single) { VCVT(S0, v0, 0); nemit.VREV32(I_8, D0, D0); VSTR(S0, rB, 0); } else { nemit.VREV64(I_8, D0, v0); VSTR(D0, rB, 0); } } else { PUSH(4, R0, R1, R2, R3); if (single) { MOVI2R(rA, (u32)&Memory::Write_U32); VCVT(S0, v0, 0); VMOV(R0, S0); MOV(R1, rB); BL(rA); } else { MOVI2R(rA, (u32)&Memory::Write_F64); #if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 VMOV(R0, v0); MOV(R2, rB); #else VMOV(D0, v0); MOV(R0, rB); #endif BL(rA); } POP(4, R0, R1, R2, R3); } gpr.Unlock(rA, rB); }