void Jit64::lfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); int d = inst.RD; int a = inst.RA; FALLBACK_IF(!a); s32 offset = (s32)(s16)inst.SIMM_16; SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false); MEMCHECK_START fpr.Lock(d); fpr.BindToRegister(d, false); ConvertSingleToDouble(fpr.RX(d), EAX, true); MEMCHECK_END fpr.UnlockAll(); }
void JitILBase::ps_sum(UGeckoInstruction inst) { // TODO: This operation strikes me as a bit strange... // perhaps we can optimize it depending on the users? // TODO: ps_sum breaks Sonic Colours (black screen) FALLBACK_IF(true); INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc || inst.SUBOP5 != 10); IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); IREmitter::InstLoc temp; val = ibuild.EmitCompactMRegToPacked(val); val = ibuild.EmitFPDup0(val); temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPAdd(val, temp); temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMerge11(val, temp); val = ibuild.EmitExpandPackedToMReg(val); ibuild.EmitStoreFReg(val, inst.FD); }
void JitILBase::subfic(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); IREmitter::InstLoc nota, lhs, val, test; nota = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RA), ibuild.EmitIntConst(-1)); if (inst.SIMM_16 == -1) { val = nota; test = ibuild.EmitIntConst(1); } else { lhs = ibuild.EmitIntConst(inst.SIMM_16 + 1); val = ibuild.EmitAdd(nota, lhs); test = ibuild.EmitICmpUgt(lhs, val); } ibuild.EmitStoreGReg(val, inst.RD); ibuild.EmitStoreCarry(test); }
void JitArm::lfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) Default(inst); return; ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (inst.RA) { MOVI2R(rB, inst.SIMM_16); ARMReg RA = gpr.R(inst.RA); ADD(rB, rB, RA); } else MOVI2R(rB, (u32)inst.SIMM_16); MOVI2R(rA, (u32)&Memory::Read_U32); PUSH(4, R0, R1, R2, R3); MOV(R0, rB); BL(rA); MOV(rA, R0); POP(4, R0, R1, R2, R3); ARMReg v0 = fpr.R0(inst.FD, false); ARMReg v1 = fpr.R1(inst.FD, false); VMOV(v0, rA, false); VMOV(v1, rA, false); gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void JitArm64::crXXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); // Special case: crclr if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) { // Clear CR field bit int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); switch (bit) { case CR_SO_BIT: AND(XA, XA, 64 - 62, 62, true); // XA & ~(1<<61) break; case CR_EQ_BIT: ORR(XA, XA, 0, 0, true); // XA | 1<<0 break; case CR_GT_BIT: ORR(XA, XA, 64 - 63, 0, true); // XA | 1<<63 break; case CR_LT_BIT: AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62) break; } STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); gpr.Unlock(WA); return; }
void JitILBase::psq_l(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStorePairedOff); FALLBACK_IF(jo.memcheck || inst.W); // For performance, the AsmCommon routines assume address translation is on. FALLBACK_IF(!UReg_MSR(MSR).DR); IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12); IREmitter::InstLoc val; if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); if (inst.OPCD == 57) ibuild.EmitStoreGReg(addr, inst.RA); val = ibuild.EmitLoadPaired( addr, inst.I | (inst.W << 3)); // The lower 3 bits is for GQR index. The next 1 bit is for inst.W val = ibuild.EmitExpandPackedToMReg(val); ibuild.EmitStoreFReg(val, inst.RD); }
void JitILBase::subfex(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); if (inst.OE) PanicAlert("OE: subfex"); IREmitter::InstLoc val, test, lhs, rhs, carry; rhs = ibuild.EmitLoadGReg(inst.RA); carry = ibuild.EmitLoadCarry(); rhs = ibuild.EmitXor(rhs, ibuild.EmitIntConst(-1)); rhs = ibuild.EmitAdd(rhs, carry); test = ibuild.EmitICmpEq(rhs, ibuild.EmitIntConst(0)); test = ibuild.EmitAnd(test, carry); lhs = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitAdd(lhs, rhs); ibuild.EmitStoreGReg(val, inst.RD); test = ibuild.EmitOr(test, ibuild.EmitICmpUgt(lhs, val)); ibuild.EmitStoreCarry(test); if (inst.Rc) ComputeRC(ibuild, val); }
void JitILBase::addex(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA); IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); IREmitter::InstLoc ab = ibuild.EmitAdd(a, b); IREmitter::InstLoc new_carry = ibuild.EmitICmpUlt(ab, a); IREmitter::InstLoc previous_carry = ibuild.EmitLoadCarry(); IREmitter::InstLoc abc = ibuild.EmitAdd(ab, previous_carry); new_carry = ibuild.EmitOr(new_carry, ibuild.EmitICmpUlt(abc, ab)); ibuild.EmitStoreGReg(abc, inst.RD); ibuild.EmitStoreCarry(new_carry); if (inst.OE) PanicAlert("OE: addex"); if (inst.Rc) ComputeRC(ibuild, abc); }
void JitArm::ps_sel(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; ARMReg vA0 = fpr.R0(a); ARMReg vA1 = fpr.R1(a); ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vC0 = fpr.R0(c); ARMReg vC1 = fpr.R1(c); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); VCMP(vA0); VMRS(_PC); FixupBranch GT0 = B_CC(CC_GE); VMOV(vD0, vB0); FixupBranch EQ0 = B(); SetJumpTarget(GT0); VMOV(vD0, vC0); SetJumpTarget(EQ0); VCMP(vA1); VMRS(_PC); FixupBranch GT1 = B_CC(CC_GE); VMOV(vD1, vB1); FixupBranch EQ1 = B(); SetJumpTarget(GT1); VMOV(vD1, vC1); SetJumpTarget(EQ1); }
void JitILBase::fp_arith_s(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21)); // Only the interpreter has "proper" support for (some) FP flags FALLBACK_IF(inst.SUBOP5 == 25 && SConfig::GetInstance().bFPRF); IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); switch (inst.SUBOP5) { case 20: // sub val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB)); break; case 21: // add val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB)); break; case 25: // mul val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC)); break; default: _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); } if (inst.OPCD == 59) { val = ibuild.EmitDoubleToSingle(val); val = ibuild.EmitDupSingleToMReg(val); } else { val = ibuild.EmitInsertDoubleInMReg(val, ibuild.EmitLoadFReg(inst.FD)); } ibuild.EmitStoreFReg(val, inst.FD); }
void JitArm::lfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; bool single = false; bool update = false; bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 567: // lfsux single = true; update = true; offsetReg = b; break; case 535: // lfsx single = true; zeroA = true; offsetReg = b; break; case 631: // lfdux update = true; offsetReg = b; break; case 599: // lfdx zeroA = true; offsetReg = b; break; } break; case 49: // lfsu update = true; single = true; break; case 48: // lfs single = true; zeroA = true; break; case 51: // lfdu update = true; break; case 50: // lfd zeroA = true; break; } ARMReg v0 = fpr.R0(inst.FD), v1; if (single) v1 = fpr.R1(inst.FD); if (update) { RA = gpr.R(a); // Update path /always/ uses RA if (offsetReg == -1) // uses SIMM_16 { MOVI2R(rB, offset); ADD(rB, rB, RA); } else { ADD(rB, gpr.R(offsetReg), RA); } } else { if (zeroA) { if (offsetReg == -1) { if (a) { RA = gpr.R(a); MOVI2R(rB, offset); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)offset); } } else { ARMReg RB = gpr.R(offsetReg); if (a) { RA = gpr.R(a); ADD(rB, RB, RA); } else { MOV(rB, RB); } } } } LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (update) MOV(RA, rB); if (Core::g_CoreStartupParameter.bFastmem) { Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) BIC(rB, rB, mask); // 1 MOVI2R(rA, (u32)Memory::base, false); // 2-3 ADD(rB, rB, rA); // 4 NEONXEmitter nemit(this); if (single) { VLDR(S0, rB, 0); nemit.VREV32(I_8, D0, D0); // Byte swap to result VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { VLDR(v0, rB, 0); nemit.VREV64(I_8, v0, v0); // Byte swap to result } } else { PUSH(4, R0, R1, R2, R3); MOV(R0, rB); if (single) { MOVI2R(rA, (u32)&Memory::Read_U32); BL(rA); VMOV(S0, R0); VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { MOVI2R(rA, (u32)&Memory::Read_F64); BL(rA); #if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 VMOV(v0, R0); #else VMOV(v0, D0); #endif } POP(4, R0, R1, R2, R3); } gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void JitArm::stfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; bool single = false; bool update = false; bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 663: // stfsx single = true; zeroA = true; offsetReg = b; break; case 695: // stfsux single = true; offsetReg = b; break; case 727: // stfdx zeroA = true; offsetReg = b; break; case 759: // stfdux update = true; offsetReg = b; break; } break; case 53: // stfsu update = true; single = true; break; case 52: // stfs single = true; zeroA = true; break; case 55: // stfdu update = true; break; case 54: // stfd zeroA = true; break; } ARMReg v0 = fpr.R0(inst.FS); if (update) { RA = gpr.R(a); // Update path /always/ uses RA if (offsetReg == -1) // uses SIMM_16 { MOVI2R(rB, offset); ADD(rB, rB, RA); } else { ADD(rB, gpr.R(offsetReg), RA); } } else { if (zeroA) { if (offsetReg == -1) { if (a) { RA = gpr.R(a); MOVI2R(rB, offset); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)offset); } } else { ARMReg RB = gpr.R(offsetReg); if (a) { RA = gpr.R(a); ADD(rB, RB, RA); } else { MOV(rB, RB); } } } } if (update) { LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); SetCC(CC_NEQ); MOV(RA, rB); SetCC(); } if (Core::g_CoreStartupParameter.bFastmem) { Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) BIC(rB, rB, mask); // 1 MOVI2R(rA, (u32)Memory::base, false); // 2-3 ADD(rB, rB, rA); // 4 NEONXEmitter nemit(this); if (single) { VCVT(S0, v0, 0); nemit.VREV32(I_8, D0, D0); VSTR(S0, rB, 0); } else { nemit.VREV64(I_8, D0, v0); VSTR(D0, rB, 0); } } else { PUSH(4, R0, R1, R2, R3); if (single) { MOVI2R(rA, (u32)&Memory::Write_U32); VCVT(S0, v0, 0); VMOV(R0, S0); MOV(R1, rB); BL(rA); } else { MOVI2R(rA, (u32)&Memory::Write_F64); #if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 VMOV(R0, v0); MOV(R2, rB); #else VMOV(D0, v0); MOV(R0, rB); #endif BL(rA); } POP(4, R0, R1, R2, R3); } gpr.Unlock(rA, rB); }
void JitILBase::reg_imm(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); int d = inst.RD, a = inst.RA, s = inst.RS; IREmitter::InstLoc val, test, c; switch (inst.OPCD) { case 14: // addi val = ibuild.EmitIntConst(inst.SIMM_16); if (a) val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val); ibuild.EmitStoreGReg(val, d); break; case 15: // addis val = ibuild.EmitIntConst(inst.SIMM_16 << 16); if (a) val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val); ibuild.EmitStoreGReg(val, d); break; case 24: // ori val = ibuild.EmitIntConst(inst.UIMM); val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val); ibuild.EmitStoreGReg(val, a); break; case 25: // oris val = ibuild.EmitIntConst(inst.UIMM << 16); val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val); ibuild.EmitStoreGReg(val, a); break; case 28: // andi val = ibuild.EmitIntConst(inst.UIMM); val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val); ibuild.EmitStoreGReg(val, a); ComputeRC(ibuild, val); break; case 29: // andis val = ibuild.EmitIntConst(inst.UIMM << 16); val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val); ibuild.EmitStoreGReg(val, a); ComputeRC(ibuild, val); break; case 26: // xori val = ibuild.EmitIntConst(inst.UIMM); val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val); ibuild.EmitStoreGReg(val, a); break; case 27: // xoris val = ibuild.EmitIntConst(inst.UIMM << 16); val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val); ibuild.EmitStoreGReg(val, a); break; case 12: // addic case 13: // addic_rc c = ibuild.EmitIntConst(inst.SIMM_16); val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), c); ibuild.EmitStoreGReg(val, d); test = ibuild.EmitICmpUgt(c, val); ibuild.EmitStoreCarry(test); if (inst.OPCD == 13) ComputeRC(ibuild, val); break; default: FALLBACK_IF(true); } }
void JitArm64::mftb(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); mfspr(inst); }
void JitArm::mftb(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(SystemRegisters) mfspr(inst); }
void JitArm::fcmpo(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 a = inst.FA, b = inst.FB; int cr = inst.CRFD; ARMReg vA = fpr.R0(a); ARMReg vB = fpr.R0(b); ARMReg fpscrReg = gpr.GetReg(); ARMReg crReg = gpr.GetReg(); Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 Operand2 LessThan(0x8, 0xA); // 0x8000 Operand2 GreaterThan(0x4, 0xA); // 0x4000 Operand2 EqualTo(0x2, 0xA); // 0x2000 Operand2 NANRes(0x1, 0xA); // 0x1000 FixupBranch Done1, Done2, Done3; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); BIC(fpscrReg, fpscrReg, FPRFMask); VCMPE(vA, vB); VMRS(_PC); SetCC(CC_LT); ORR(fpscrReg, fpscrReg, LessThan); MOV(crReg, 8); Done1 = B(); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, GreaterThan); MOV(crReg, 4); Done2 = B(); SetCC(CC_EQ); ORR(fpscrReg, fpscrReg, EqualTo); MOV(crReg, 2); Done3 = B(); SetCC(); ORR(fpscrReg, fpscrReg, NANRes); MOV(crReg, 1); VCMPE(vA, vA); VMRS(_PC); FixupBranch NanA = B_CC(CC_NEQ); VCMPE(vB, vB); VMRS(_PC); FixupBranch NanB = B_CC(CC_NEQ); SetFPException(fpscrReg, FPSCR_VXVC); FixupBranch Done4 = B(); SetJumpTarget(NanA); SetJumpTarget(NanB); SetFPException(fpscrReg, FPSCR_VXSNAN); TST(fpscrReg, VEMask); FixupBranch noVXVC = B_CC(CC_NEQ); SetFPException(fpscrReg, FPSCR_VXVC); SetJumpTarget(noVXVC); SetJumpTarget(Done1); SetJumpTarget(Done2); SetJumpTarget(Done3); SetJumpTarget(Done4); STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(fpscrReg, crReg); }
void Jit64::reg_imm(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(Integer) u32 d = inst.RD, a = inst.RA, s = inst.RS; switch (inst.OPCD) { case 14: // addi // occasionally used as MOV - emulate, with immediate propagation if (gpr.R(a).IsImm() && d != a && a != 0) { gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16); } else if (inst.SIMM_16 == 0 && d != a && a != 0) { gpr.Lock(a, d); gpr.BindToRegister(d, false, true); MOV(32, gpr.R(d), gpr.R(a)); gpr.UnlockAll(); } else { regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD); //addi } break; case 15: if (a == 0) { // lis // Merge with next instruction if loading a 32-bits immediate value (lis + addi, lis + ori) if (!js.isLastInstruction && !Core::g_CoreStartupParameter.bEnableDebugging) { if ((js.next_inst.OPCD == 14) && (js.next_inst.RD == d) && (js.next_inst.RA == d)) { // addi gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) + (u32)(s32)js.next_inst.SIMM_16); js.downcountAmount++; js.skipnext = true; break; } else if ((js.next_inst.OPCD == 24) && (js.next_inst.RA == d) && (js.next_inst.RS == d)) { // ori gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) | (u32)js.next_inst.UIMM); js.downcountAmount++; js.skipnext = true; break; } } // Not merged regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); } else { // addis regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); } break; case 24: if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop {NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one. regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR); break; //ori case 25: regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); break;//oris case 28: regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); break; case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break; case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris case 12: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); break; //addic case 13: regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); break; //addic_rc default: Default(inst); break; } }
void Jit64::stfd(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); FALLBACK_IF(js.memcheck || !inst.RA); int s = inst.RS; int a = inst.RA; u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { mem_mask |= Memory::ADDR_MASK_MEM1; } #ifdef ENABLE_MEM_CHECK if (Core::g_CoreStartupParameter.bEnableDebugging) { mem_mask |= Memory::EXRAM_MASK; } #endif gpr.FlushLockX(ABI_PARAM1); gpr.Lock(a); fpr.Lock(s); gpr.BindToRegister(a, true, false); s32 offset = (s32)(s16)inst.SIMM_16; LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); TEST(32, R(ABI_PARAM1), Imm32(mem_mask)); FixupBranch safe = J_CC(CC_NZ); // Fast routine if (cpu_info.bSSSE3) { MOVAPD(XMM0, fpr.R(s)); PSHUFB(XMM0, M((void*)bswapShuffle1x8)); #if _M_X86_64 MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0); #else AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0); #endif } else { MOVAPD(XMM0, fpr.R(s)); MOVD_xmm(R(EAX), XMM0); UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4); PSRLQ(XMM0, 32); MOVD_xmm(R(EAX), XMM0); UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); } FixupBranch exit = J(true); SetJumpTarget(safe); // Safe but slow routine MOVAPD(XMM0, fpr.R(s)); PSRLQ(XMM0, 32); MOVD_xmm(R(EAX), XMM0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0))); MOVAPD(XMM0, fpr.R(s)); MOVD_xmm(R(EAX), XMM0); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse()); SetJumpTarget(exit); gpr.UnlockAll(); gpr.UnlockAllX(); fpr.UnlockAll(); }
void JitILBase::ps_maddXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), op2, op3; val = ibuild.EmitCompactMRegToPacked(val); switch (inst.SUBOP5) { case 14: // madds0 { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); op2 = ibuild.EmitFPDup0(op2); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPAdd(val, op3); break; } case 15: // madds1 { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); op2 = ibuild.EmitFPDup1(op2); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPAdd(val, op3); break; } case 28: // msub { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPSub(val, op3); break; } case 29: // madd { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPAdd(val, op3); break; } case 30: // nmsub { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPSub(val, op3); val = ibuild.EmitFPNeg(val); break; } case 31: // nmadd { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPAdd(val, op3); val = ibuild.EmitFPNeg(val); break; } } val = ibuild.EmitExpandPackedToMReg(val); ibuild.EmitStoreFReg(val, inst.FD); }
void JitArm64::twx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); s32 a = inst.RA; ARM64Reg WA = gpr.GetReg(); if (inst.OPCD == 3) // twi { if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 4096) { // Can fit in immediate in to the instruction encoding CMP(gpr.R(a), inst.SIMM_16); } else { MOVI2R(WA, (s32)(s16)inst.SIMM_16); CMP(gpr.R(a), WA); } } else // tw { CMP(gpr.R(a), gpr.R(inst.RB)); } std::vector<FixupBranch> fixups; CCFlags conditions[] = { CC_LT, CC_GT, CC_EQ, CC_VC, CC_VS }; for (int i = 0; i < 5; i++) { if (inst.TO & (1 << i)) { FixupBranch f = B(conditions[i]); fixups.push_back(f); } } FixupBranch dont_trap = B(); for (const FixupBranch& fixup : fixups) { SetJumpTarget(fixup); } gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); ORR(WA, WA, 24, 0); // Same as WA | EXCEPTION_PROGRAM STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); MOVI2R(WA, js.compilerPC); // WA is unlocked in this function WriteExceptionExit(WA); SetJumpTarget(dont_trap); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { gpr.Flush(FlushMode::FLUSH_ALL); fpr.Flush(FlushMode::FLUSH_ALL); WriteExit(js.compilerPC + 4); } }
void JitArm::lXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); u32 a = inst.RA, b = inst.RB, d = inst.RD; s32 offset = inst.SIMM_16; u32 accessSize = 0; s32 offsetReg = -1; bool update = false; bool signExtend = false; bool reverse = false; bool fastmem = false; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 55: // lwzux update = true; case 23: // lwzx fastmem = true; accessSize = 32; offsetReg = b; break; case 119: //lbzux update = true; case 87: // lbzx fastmem = true; accessSize = 8; offsetReg = b; break; case 311: // lhzux update = true; case 279: // lhzx fastmem = true; accessSize = 16; offsetReg = b; break; case 375: // lhaux update = true; case 343: // lhax accessSize = 16; signExtend = true; offsetReg = b; break; case 534: // lwbrx accessSize = 32; reverse = true; break; case 790: // lhbrx accessSize = 16; reverse = true; break; } break; case 33: // lwzu update = true; case 32: // lwz fastmem = true; accessSize = 32; break; case 35: // lbzu update = true; case 34: // lbz fastmem = true; accessSize = 8; break; case 41: // lhzu update = true; case 40: // lhz fastmem = true; accessSize = 16; break; case 43: // lhau update = true; case 42: // lha signExtend = true; accessSize = 16; break; } // Check for exception before loading ARMReg rA = gpr.GetReg(false); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_NEQ); SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse); if (update) { ARMReg RA = gpr.R(a); if (offsetReg == -1) { rA = gpr.GetReg(false); MOVI2R(rA, offset); ADD(RA, RA, rA); } else { ADD(RA, RA, gpr.R(offsetReg)); } } SetJumpTarget(DoNotLoad); // LWZ idle skipping if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && inst.OPCD == 32 && (inst.hex & 0xFFFF0000) == 0x800D0000 && (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { ARMReg RD = gpr.R(d); // if it's still 0, we can wait until the next event TST(RD, RD); FixupBranch noIdle = B_CC(CC_NEQ); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); rA = gpr.GetReg(); MOVI2R(rA, (u32)&PowerPC::OnIdle); MOVI2R(R0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); BL(rA); gpr.Unlock(rA); WriteExceptionExit(); SetJumpTarget(noIdle); //js.compilerPC += 8; return; } }
void JitArm64::mfspr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); int d = inst.RD; switch (iIndex) { case SPR_TL: case SPR_TU: { ARM64Reg WA = gpr.GetReg(); ARM64Reg WB = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XB = EncodeRegTo64(WB); // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. MOVI2R(XA, (u64)&CoreTiming::globalTimer); LDR(INDEX_UNSIGNED, XA, XA, 0); MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks); LDR(INDEX_UNSIGNED, XB, XB, 0); SUB(XA, XA, XB); // It might seem convenient to correct the timer for the block position here for even more accurate // timing, but as of currently, this can break games. If we end up reading a time *after* the time // at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only // 50 downcount remaining, some games don't function correctly, such as Karaoke Party Revolution, // which won't get past the loading screen. // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 ORR(XB, SP, 1, 60); ADD(XB, XB, 1); UMULH(XA, XA, XB); MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue); LDR(INDEX_UNSIGNED, XB, XB, 0); ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3)); STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL])); if (MergeAllowedNextInstructions(1)) { const UGeckoInstruction& next = js.op[1].inst; // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // if we can. u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F); // Be careful; the actual opcode is for mftb (371), not mfspr (339) int n = next.RD; if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d) { js.downcountAmount++; js.skipInstructions = 1; gpr.BindToRegister(d, false); gpr.BindToRegister(n, false); if (iIndex == SPR_TL) MOV(gpr.R(d), WA); else ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); if (nextIndex == SPR_TL) MOV(gpr.R(n), WA); else ORR(EncodeRegTo64(gpr.R(n)), SP, XA, ArithOption(XA, ST_LSR, 32)); gpr.Unlock(WA, WB); break; } } gpr.BindToRegister(d, false); if (iIndex == SPR_TU) ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); else MOV(gpr.R(d), WA); gpr.Unlock(WA, WB); } break; case SPR_XER: { gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); ARM64Reg WA = gpr.GetReg(); LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl)); LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_CA_SHIFT)); LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov)); ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_OV_SHIFT)); gpr.Unlock(WA); } break; case SPR_WPAR: case SPR_DEC: FALLBACK_IF(true); default: gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); break; } }
void Jit64::fcmpx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(jo.fpAccurateFcmp); //bool ordered = inst.SUBOP10 == 32; int a = inst.FA; int b = inst.FB; int crf = inst.CRFD; fpr.Lock(a,b); fpr.BindToRegister(b, true); // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a)); FixupBranch pNaN, pLesser, pGreater; FixupBranch continue1, continue2, continue3; if (a != b) { // if B > A, goto Lesser's jump target pLesser = J_CC(CC_A); } // if (B != B) or (A != A), goto NaN's jump target pNaN = J_CC(CC_P); if (a != b) { // if B < A, goto Greater's jump target // JB can't precede the NaN check because it doesn't test ZF pGreater = J_CC(CC_B); } // Equal MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); continue1 = J(); // NAN SetJumpTarget(pNaN); MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1)); if (a != b) { continue2 = J(); // Greater Than SetJumpTarget(pGreater); MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); continue3 = J(); // Less Than SetJumpTarget(pLesser); MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); } SetJumpTarget(continue1); if (a != b) { SetJumpTarget(continue2); SetJumpTarget(continue3); } fpr.UnlockAll(); }
void JitArm::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void Jit64::lfd(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); FALLBACK_IF(js.memcheck || !inst.RA); int d = inst.RD; int a = inst.RA; s32 offset = (s32)(s16)inst.SIMM_16; gpr.FlushLockX(ABI_PARAM1); gpr.Lock(a); MOV(32, R(ABI_PARAM1), gpr.R(a)); // TODO - optimize. This has to load the previous value - upper double should stay unmodified. fpr.Lock(d); fpr.BindToRegister(d, true); X64Reg xd = fpr.RX(d); if (cpu_info.bSSSE3) { #if _M_X86_64 MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); #else AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset)); #endif PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe)); MOVSD(xd, R(XMM0)); } else { #if _M_X86_64 LoadAndSwap(64, EAX, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); MOV(64, M(&temp64), R(EAX)); MEMCHECK_START MOVSD(XMM0, M(&temp64)); MOVSD(xd, R(XMM0)); MEMCHECK_END #else AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset)); BSWAP(32, EAX); MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX)); MEMCHECK_START MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4)); BSWAP(32, EAX); MOV(32, M(&temp64), R(EAX)); MOVSD(XMM0, M(&temp64)); MOVSD(xd, R(XMM0)); MEMCHECK_END #endif } gpr.UnlockAll(); gpr.UnlockAllX(); fpr.UnlockAll(); }
void JitArm::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer // Set rounding mode first // PPC <-> ARM rounding modes // 0, 1, 2, 3 <-> 0, 3, 1, 2 ARMReg rB = gpr.GetReg(); VMRS(rA); // Bits 22-23 BIC(rA, rA, Operand2(3, 5)); LDR(rB, R9, PPCSTATE_OFF(fpscr)); AND(rB, rB, 0x3); // Get the FPSCR rounding bits CMP(rB, 1); SetCC(CC_EQ); // zero ORR(rA, rA, Operand2(3, 5)); SetCC(CC_NEQ); CMP(rB, 2); // +inf SetCC(CC_EQ); ORR(rA, rA, Operand2(1, 5)); SetCC(CC_NEQ); CMP(rB, 3); // -inf SetCC(CC_EQ); ORR(rA, rA, Operand2(2, 5)); SetCC(); VMSR(rA); ORR(rA, rA, Operand2(3, 5)); VCVT(vD, vB, TO_INT | IS_SIGNED); VMSR(rA); gpr.Unlock(rB); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void JitArm::stX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); u32 a = inst.RA, b = inst.RB, s = inst.RS; s32 offset = inst.SIMM_16; u32 accessSize = 0; s32 regOffset = -1; bool update = false; bool fastmem = false; switch (inst.OPCD) { case 45: // sthu update = true; case 44: // sth accessSize = 16; break; case 31: switch (inst.SUBOP10) { case 183: // stwux update = true; case 151: // stwx fastmem = true; accessSize = 32; regOffset = b; break; case 247: // stbux update = true; case 215: // stbx accessSize = 8; regOffset = b; break; case 439: // sthux update = true; case 407: // sthx accessSize = 16; regOffset = b; break; } break; case 37: // stwu update = true; case 36: // stw fastmem = true; accessSize = 32; break; case 39: // stbu update = true; case 38: // stb accessSize = 8; break; } SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset); if (update) { ARMReg rA = gpr.GetReg(); ARMReg RB; ARMReg RA = gpr.R(a); if (regOffset != -1) RB = gpr.R(regOffset); // Check for DSI exception prior to writing back address LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); FixupBranch DoNotWrite = B_CC(CC_NEQ); if (a) { if (regOffset == -1) { MOVI2R(rA, offset); ADD(RA, RA, rA); } else { ADD(RA, RA, RB); } } else { if (regOffset == -1) MOVI2R(RA, (u32)offset); else MOV(RA, RB); } SetJumpTarget(DoNotWrite); gpr.Unlock(rA); } }