void JitArm::ps_rsqrte(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); ARMReg fpscrReg = gpr.GetReg(); ARMReg V0 = D1; ARMReg rA = gpr.GetReg(); MOVI2R(fpscrReg, (u32)&PPC_NAN); VLDR(V0, fpscrReg, 0); LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); VCMP(vB0); VMRS(_PC); FixupBranch Less0 = B_CC(CC_LT); VMOV(vD0, V0); SetFPException(fpscrReg, FPSCR_VXSQRT); FixupBranch SkipOrr0 = B(); SetJumpTarget(Less0); SetCC(CC_EQ); ORR(rA, rA, 1); SetCC(); SetJumpTarget(SkipOrr0); VCMP(vB1); VMRS(_PC); FixupBranch Less1 = B_CC(CC_LT); VMOV(vD1, V0); SetFPException(fpscrReg, FPSCR_VXSQRT); FixupBranch SkipOrr1 = B(); SetJumpTarget(Less1); SetCC(CC_EQ); ORR(rA, rA, 2); SetCC(); SetJumpTarget(SkipOrr1); CMP(rA, 0); FixupBranch noException = B_CC(CC_EQ); SetFPException(fpscrReg, FPSCR_ZX); SetJumpTarget(noException); VCVT(S0, vB0, 0); VCVT(S1, vB1, 0); NEONXEmitter nemit(this); nemit.VRSQRTE(F_32, D0, D0); VCVT(vD0, S0, 0); VCVT(vD1, S1, 0); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(fpscrReg, rA); }
void JitArm::fcmpu(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 a = inst.FA, b = inst.FB; int cr = inst.CRFD; ARMReg vA = fpr.R0(a); ARMReg vB = fpr.R0(b); ARMReg fpscrReg = gpr.GetReg(); ARMReg crReg = gpr.GetReg(); Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 Operand2 LessThan(0x8, 0xA); // 0x8000 Operand2 GreaterThan(0x4, 0xA); // 0x4000 Operand2 EqualTo(0x2, 0xA); // 0x2000 Operand2 NANRes(0x1, 0xA); // 0x1000 FixupBranch Done1, Done2, Done3; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); BIC(fpscrReg, fpscrReg, FPRFMask); VCMPE(vA, vB); VMRS(_PC); SetCC(CC_LT); ORR(fpscrReg, fpscrReg, LessThan); MOV(crReg, 8); Done1 = B(); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, GreaterThan); MOV(crReg, 4); Done2 = B(); SetCC(CC_EQ); ORR(fpscrReg, fpscrReg, EqualTo); MOV(crReg, 2); Done3 = B(); SetCC(); ORR(fpscrReg, fpscrReg, NANRes); MOV(crReg, 1); VCMPE(vA, vA); VMRS(_PC); FixupBranch NanA = B_CC(CC_NEQ); VCMPE(vB, vB); VMRS(_PC); FixupBranch NanB = B_CC(CC_NEQ); FixupBranch Done4 = B(); SetJumpTarget(NanA); SetJumpTarget(NanB); SetFPException(fpscrReg, FPSCR_VXSNAN); SetJumpTarget(Done1); SetJumpTarget(Done2); SetJumpTarget(Done3); SetJumpTarget(Done4); STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(fpscrReg, crReg); }
void JitArm::lwz(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RD = gpr.R(inst.RD); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); { if (inst.RA) { MOVI2R(rB, inst.SIMM_16); ARMReg RA = gpr.R(inst.RA); ADD(rB, rB, RA); } else MOVI2R(rB, (u32)inst.SIMM_16); MOVI2R(rA, (u32)&Memory::Read_U32); PUSH(4, R0, R1, R2, R3); MOV(R0, rB); BL(rA); MOV(rA, R0); POP(4, R0, R1, R2, R3); MOV(RD, rA); gpr.Unlock(rA, rB); } SetJumpTarget(DoNotLoad); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && (inst.hex & 0xFFFF0000) == 0x800D0000 && (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { gpr.Flush(); fpr.Flush(); // if it's still 0, we can wait until the next event TST(RD, RD); FixupBranch noIdle = B_CC(CC_NEQ); rA = gpr.GetReg(); MOVI2R(rA, (u32)&PowerPC::OnIdle); MOVI2R(R0, PowerPC::ppcState.gpr[inst.RA] + (s32)(s16)inst.SIMM_16); BL(rA); gpr.Unlock(rA); WriteExceptionExit(); SetJumpTarget(noIdle); //js.compilerPC += 8; return; } }
void JitArm::stbu(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) ARMReg RA = gpr.R(inst.RA); ARMReg RS = gpr.R(inst.RS); ARMReg ValueReg = gpr.GetReg(); ARMReg Addr = gpr.GetReg(); ARMReg Function = gpr.GetReg(); MOVI2R(Addr, inst.SIMM_16); ADD(Addr, Addr, RA); // Check for DSI exception prior to writing back address LDR(Function, R9, PPCSTATE_OFF(Exceptions)); CMP(Function, EXCEPTION_DSI); FixupBranch DoNotWrite = B_CC(CC_EQ); MOV(RA, Addr); SetJumpTarget(DoNotWrite); MOV(ValueReg, RS); MOVI2R(Function, (u32)&Memory::Write_U8); PUSH(4, R0, R1, R2, R3); MOV(R0, ValueReg); MOV(R1, Addr); BL(Function); POP(4, R0, R1, R2, R3); gpr.Unlock(ValueReg, Addr, Function); }
void JitArm::lwzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RB = gpr.R(inst.RB); ARMReg RD = gpr.R(inst.RD); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); { if (inst.RA) { ARMReg RA = gpr.R(inst.RA); ADD(rB, RA, RB); } else MOV(rB, RB); MOVI2R(rA, (u32)&Memory::Read_U32); PUSH(4, R0, R1, R2, R3); MOV(R0, rB); BL(rA); MOV(rA, R0); POP(4, R0, R1, R2, R3); MOV(RD, rA); gpr.Unlock(rA, rB); } SetJumpTarget(DoNotLoad); //// u32 temp = Memory::Read_U32(_inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB]); }
void JitArm::lha(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RD = gpr.R(inst.RD); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (inst.RA) { MOVI2R(rB, inst.SIMM_16); ARMReg RA = gpr.R(inst.RA); ADD(rB, rB, RA); } else MOVI2R(rB, (u32)inst.SIMM_16); MOVI2R(rA, (u32)&Memory::Read_U16); PUSH(4, R0, R1, R2, R3); MOV(R0, rB); BL(rA); MOV(rA, R0); SXTH(rA, rA); POP(4, R0, R1, R2, R3); MOV(RD, rA); gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void JitArm::ps_sel(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; ARMReg vA0 = fpr.R0(a); ARMReg vA1 = fpr.R1(a); ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vC0 = fpr.R0(c); ARMReg vC1 = fpr.R1(c); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); VCMP(vA0); VMRS(_PC); FixupBranch GT0 = B_CC(CC_GE); VMOV(vD0, vB0); FixupBranch EQ0 = B(); SetJumpTarget(GT0); VMOV(vD0, vC0); SetJumpTarget(EQ0); VCMP(vA1); VMRS(_PC); FixupBranch GT1 = B_CC(CC_GE); VMOV(vD1, vB1); FixupBranch EQ1 = B(); SetJumpTarget(GT1); VMOV(vD1, vC1); SetJumpTarget(EQ1); }
void JitArm::lfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) Default(inst); return; ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (inst.RA) { MOVI2R(rB, inst.SIMM_16); ARMReg RA = gpr.R(inst.RA); ADD(rB, rB, RA); } else MOVI2R(rB, (u32)inst.SIMM_16); MOVI2R(rA, (u32)&Memory::Read_U32); PUSH(4, R0, R1, R2, R3); MOV(R0, rB); BL(rA); MOV(rA, R0); POP(4, R0, R1, R2, R3); ARMReg v0 = fpr.R0(inst.FD, false); ARMReg v1 = fpr.R1(inst.FD, false); VMOV(v0, rA, false); VMOV(v1, rA, false); gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void JitArm::stX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); u32 a = inst.RA, b = inst.RB, s = inst.RS; s32 offset = inst.SIMM_16; u32 accessSize = 0; s32 regOffset = -1; bool update = false; bool fastmem = false; switch (inst.OPCD) { case 45: // sthu update = true; case 44: // sth accessSize = 16; break; case 31: switch (inst.SUBOP10) { case 183: // stwux update = true; case 151: // stwx fastmem = true; accessSize = 32; regOffset = b; break; case 247: // stbux update = true; case 215: // stbx accessSize = 8; regOffset = b; break; case 439: // sthux update = true; case 407: // sthx accessSize = 16; regOffset = b; break; } break; case 37: // stwu update = true; case 36: // stw fastmem = true; accessSize = 32; break; case 39: // stbu update = true; case 38: // stb accessSize = 8; break; } SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset); if (update) { ARMReg rA = gpr.GetReg(); ARMReg RB; ARMReg RA = gpr.R(a); if (regOffset != -1) RB = gpr.R(regOffset); // Check for DSI exception prior to writing back address LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); FixupBranch DoNotWrite = B_CC(CC_NEQ); if (a) { if (regOffset == -1) { MOVI2R(rA, offset); ADD(RA, RA, rA); } else { ADD(RA, RA, RB); } } else { if (regOffset == -1) MOVI2R(RA, (u32)offset); else MOV(RA, RB); } SetJumpTarget(DoNotWrite); gpr.Unlock(rA); } }
void JitArm::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer // Set rounding mode first // PPC <-> ARM rounding modes // 0, 1, 2, 3 <-> 0, 3, 1, 2 ARMReg rB = gpr.GetReg(); VMRS(rA); // Bits 22-23 BIC(rA, rA, Operand2(3, 5)); LDR(rB, R9, PPCSTATE_OFF(fpscr)); AND(rB, rB, 0x3); // Get the FPSCR rounding bits CMP(rB, 1); SetCC(CC_EQ); // zero ORR(rA, rA, Operand2(3, 5)); SetCC(CC_NEQ); CMP(rB, 2); // +inf SetCC(CC_EQ); ORR(rA, rA, Operand2(1, 5)); SetCC(CC_NEQ); CMP(rB, 3); // -inf SetCC(CC_EQ); ORR(rA, rA, Operand2(2, 5)); SetCC(); VMSR(rA); ORR(rA, rA, Operand2(3, 5)); VCVT(vD, vB, TO_INT | IS_SIGNED); VMSR(rA); gpr.Unlock(rB); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void JitArm::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void Jit::Comp_SVQ(MIPSOpcode op) { CONDITIONAL_DISABLE; int imm = (signed short)(op&0xFFFC); int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5); MIPSGPReg rs = _RS; bool doCheck = false; switch (op >> 26) { case 54: //lv.q { // CC might be set by slow path below, so load regs first. u8 vregs[4]; GetVectorRegs(vregs, V_Quad, vt); fpr.MapRegsAndSpillLockV(vregs, V_Quad, MAP_DIRTY | MAP_NOINIT); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } for (int i = 0; i < 4; i++) VLDR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else for (int i = 0; i < 4; i++) VLDR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetCC(CC_EQ); MOVI2R(R0, 0); for (int i = 0; i < 4; i++) VMOV(fpr.V(vregs[i]), R0); SetCC(CC_AL); } #endif } break; case 62: //sv.q { // CC might be set by slow path below, so load regs first. u8 vregs[4]; GetVectorRegs(vregs, V_Quad, vt); fpr.MapRegsAndSpillLockV(vregs, V_Quad, 0); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } for (int i = 0; i < 4; i++) VSTR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else for (int i = 0; i < 4; i++) VSTR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetCC(CC_AL); } #endif } break; default: DISABLE; break; } fpr.ReleaseSpillLocksAndDiscardTemps(); }
void Jit::Comp_SV(MIPSOpcode op) { CONDITIONAL_DISABLE; s32 imm = (signed short)(op&0xFFFC); int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5); MIPSGPReg rs = _RS; bool doCheck = false; switch (op >> 26) { case 50: //lv.s // VI(vt) = Memory::Read_U32(addr); { // CC might be set by slow path below, so load regs first. fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } VLDR(fpr.V(vt), R0, 0); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else VLDR(fpr.V(vt), R0, 0); if (doCheck) { SetCC(CC_EQ); MOVI2F(fpr.V(vt), 0.0f, R0); SetCC(CC_AL); } #endif } break; case 58: //sv.s // Memory::Write_U32(VI(vt), addr); { // CC might be set by slow path below, so load regs first. fpr.MapRegV(vt); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } VSTR(fpr.V(vt), R0, 0); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else VSTR(fpr.V(vt), R0, 0); if (doCheck) { SetCC(CC_AL); } #endif } break; default: DISABLE; } }
void JitArm::lfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; bool single = false; bool update = false; bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 567: // lfsux single = true; update = true; offsetReg = b; break; case 535: // lfsx single = true; zeroA = true; offsetReg = b; break; case 631: // lfdux update = true; offsetReg = b; break; case 599: // lfdx zeroA = true; offsetReg = b; break; } break; case 49: // lfsu update = true; single = true; break; case 48: // lfs single = true; zeroA = true; break; case 51: // lfdu update = true; break; case 50: // lfd zeroA = true; break; } ARMReg v0 = fpr.R0(inst.FD), v1; if (single) v1 = fpr.R1(inst.FD); if (update) { RA = gpr.R(a); // Update path /always/ uses RA if (offsetReg == -1) // uses SIMM_16 { MOVI2R(rB, offset); ADD(rB, rB, RA); } else { ADD(rB, gpr.R(offsetReg), RA); } } else { if (zeroA) { if (offsetReg == -1) { if (a) { RA = gpr.R(a); MOVI2R(rB, offset); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)offset); } } else { ARMReg RB = gpr.R(offsetReg); if (a) { RA = gpr.R(a); ADD(rB, RB, RA); } else { MOV(rB, RB); } } } } LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (update) MOV(RA, rB); if (Core::g_CoreStartupParameter.bFastmem) { Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) BIC(rB, rB, mask); // 1 MOVI2R(rA, (u32)Memory::base, false); // 2-3 ADD(rB, rB, rA); // 4 NEONXEmitter nemit(this); if (single) { VLDR(S0, rB, 0); nemit.VREV32(I_8, D0, D0); // Byte swap to result VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { VLDR(v0, rB, 0); nemit.VREV64(I_8, v0, v0); // Byte swap to result } } else { PUSH(4, R0, R1, R2, R3); MOV(R0, rB); if (single) { MOVI2R(rA, (u32)&Memory::Read_U32); BL(rA); VMOV(S0, R0); VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { MOVI2R(rA, (u32)&Memory::Read_F64); BL(rA); #if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 VMOV(v0, R0); #else VMOV(v0, D0); #endif } POP(4, R0, R1, R2, R3); } gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void Jit::Comp_FPULS(u32 op) { CONDITIONAL_DISABLE; s32 offset = (s16)(op & 0xFFFF); int ft = _FT; int rs = _RS; // u32 addr = R(rs) + offset; // logBlocks = 1; bool doCheck = false; switch(op >> 26) { case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1 fpr.SpillLock(ft); fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY); if (gpr.IsImm(rs)) { u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, offset); } else { SetCCAndR0ForSafeAddress(rs, offset, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } VLDR(fpr.R(ft), R0, 0); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else VLDR(fpr.R(ft), R0, 0); if (doCheck) { SetCC(CC_EQ); MOVI2R(R0, 0); VMOV(fpr.R(ft), R0); SetCC(CC_AL); } #endif fpr.ReleaseSpillLock(ft); break; case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1 fpr.MapReg(ft); if (gpr.IsImm(rs)) { u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, offset); } else { SetCCAndR0ForSafeAddress(rs, offset, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip2; if (doCheck) { skip2 = B_CC(CC_EQ); } VSTR(fpr.R(ft), R0, 0); if (doCheck) { SetJumpTarget(skip2); SetCC(CC_AL); } #else VSTR(fpr.R(ft), R0, 0); if (doCheck) { SetCC(CC_AL); } #endif break; default: Comp_Generic(op); return; } }
void JitArm::lXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); u32 a = inst.RA, b = inst.RB, d = inst.RD; s32 offset = inst.SIMM_16; u32 accessSize = 0; s32 offsetReg = -1; bool update = false; bool signExtend = false; bool reverse = false; bool fastmem = false; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 55: // lwzux update = true; case 23: // lwzx fastmem = true; accessSize = 32; offsetReg = b; break; case 119: //lbzux update = true; case 87: // lbzx fastmem = true; accessSize = 8; offsetReg = b; break; case 311: // lhzux update = true; case 279: // lhzx fastmem = true; accessSize = 16; offsetReg = b; break; case 375: // lhaux update = true; case 343: // lhax accessSize = 16; signExtend = true; offsetReg = b; break; case 534: // lwbrx accessSize = 32; reverse = true; break; case 790: // lhbrx accessSize = 16; reverse = true; break; } break; case 33: // lwzu update = true; case 32: // lwz fastmem = true; accessSize = 32; break; case 35: // lbzu update = true; case 34: // lbz fastmem = true; accessSize = 8; break; case 41: // lhzu update = true; case 40: // lhz fastmem = true; accessSize = 16; break; case 43: // lhau update = true; case 42: // lha signExtend = true; accessSize = 16; break; } // Check for exception before loading ARMReg rA = gpr.GetReg(false); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_NEQ); SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse); if (update) { ARMReg RA = gpr.R(a); if (offsetReg == -1) { rA = gpr.GetReg(false); MOVI2R(rA, offset); ADD(RA, RA, rA); } else { ADD(RA, RA, gpr.R(offsetReg)); } } SetJumpTarget(DoNotLoad); // LWZ idle skipping if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && inst.OPCD == 32 && (inst.hex & 0xFFFF0000) == 0x800D0000 && (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { ARMReg RD = gpr.R(d); // if it's still 0, we can wait until the next event TST(RD, RD); FixupBranch noIdle = B_CC(CC_NEQ); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); rA = gpr.GetReg(); MOVI2R(rA, (u32)&PowerPC::OnIdle); MOVI2R(R0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); BL(rA); gpr.Unlock(rA); WriteExceptionExit(); SetJumpTarget(noIdle); //js.compilerPC += 8; return; } }
void JitArmILAsmRoutineManager::Generate() { enterCode = GetCodePtr(); PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR); // Take care to 8-byte align stack for function calls. // We are misaligned here because of an odd number of args for PUSH. // It's not like x86 where you need to account for an extra 4 bytes // consumed by CALL. SUB(_SP, _SP, 4); MOVI2R(R0, (u32)&CoreTiming::downcount); MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); FixupBranch skipToRealDispatcher = B(); dispatcher = GetCodePtr(); printf("ILDispatcher is %p\n", dispatcher); // Downcount Check // The result of slice decrementation should be in flags if somebody jumped here // IMPORTANT - We jump on negative, not carry!!! FixupBranch bail = B_CC(CC_MI); SetJumpTarget(skipToRealDispatcher); dispatcherNoCheck = GetCodePtr(); // This block of code gets the address of the compiled block of code // It runs though to the compiling portion if it isn't found LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12 Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here. MOVI2R(R14, (u32)jit->GetBlockCache()->iCache); LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here // R12 Confirmed this is the correct iCache Location loaded. TST(R12, 0x80); // Test to see if it is a JIT block. SetCC(CC_EQ); // Success, it is our Jitblock. MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); // LDR R14 right here to get CodePointers()[0] pointer. LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size LDR(R14, R14, R12); // Load the block address in to R14 B(R14); // No need to jump anywhere after here, the block will go back to dispatcher start SetCC(); // If we get to this point, that means that we don't have the block cached to execute // So call ArmJit to compile the block and then execute it. MOVI2R(R14, (u32)&Jit); BL(R14); B(dispatcherNoCheck); // fpException() // Floating Point Exception Check, Jumped to if false fpException = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(Exceptions)); ORR(R0, R0, EXCEPTION_FPU_UNAVAILABLE); STR(R0, R9, PPCSTATE_OFF(Exceptions)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); STR(R0, R9, PPCSTATE_OFF(pc)); B(dispatcher); SetJumpTarget(bail); doTiming = GetCodePtr(); // XXX: In JIT64, Advance() gets called /after/ the exception checking // once it jumps back to the start of outerLoop QuickCallFunction(R14, (void*)&CoreTiming::Advance); // Does exception checking testExceptions = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(pc)); STR(R0, R9, PPCSTATE_OFF(npc)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); STR(R0, R9, PPCSTATE_OFF(pc)); // Check the state pointer to see if we are exiting // Gets checked on every exception check MOVI2R(R0, (u32)PowerPC::GetStatePtr()); MVN(R1, 0); LDR(R0, R0); TST(R0, R1); FixupBranch Exit = B_CC(CC_NEQ); B(dispatcher); SetJumpTarget(Exit); ADD(_SP, _SP, 4); POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns GenerateCommon(); FlushIcache(); }