void JitArm::ps_rsqrte(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); ARMReg fpscrReg = gpr.GetReg(); ARMReg V0 = D1; ARMReg rA = gpr.GetReg(); MOVI2R(fpscrReg, (u32)&PPC_NAN); VLDR(V0, fpscrReg, 0); LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); VCMP(vB0); VMRS(_PC); FixupBranch Less0 = B_CC(CC_LT); VMOV(vD0, V0); SetFPException(fpscrReg, FPSCR_VXSQRT); FixupBranch SkipOrr0 = B(); SetJumpTarget(Less0); SetCC(CC_EQ); ORR(rA, rA, 1); SetCC(); SetJumpTarget(SkipOrr0); VCMP(vB1); VMRS(_PC); FixupBranch Less1 = B_CC(CC_LT); VMOV(vD1, V0); SetFPException(fpscrReg, FPSCR_VXSQRT); FixupBranch SkipOrr1 = B(); SetJumpTarget(Less1); SetCC(CC_EQ); ORR(rA, rA, 2); SetCC(); SetJumpTarget(SkipOrr1); CMP(rA, 0); FixupBranch noException = B_CC(CC_EQ); SetFPException(fpscrReg, FPSCR_ZX); SetJumpTarget(noException); VCVT(S0, vB0, 0); VCVT(S1, vB1, 0); NEONXEmitter nemit(this); nemit.VRSQRTE(F_32, D0, D0); VCVT(vD0, S0, 0); VCVT(vD1, S1, 0); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(fpscrReg, rA); }
void JitArm::ps_muls1(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 a = inst.FA, c = inst.FC, d = inst.FD; ARMReg vA0 = fpr.R0(a); ARMReg vA1 = fpr.R1(a); ARMReg vC1 = fpr.R1(c); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); VMUL(V0, vA0, vC1); VMUL(V1, vA1, vC1); VMOV(vD0, V0); VMOV(vD1, V1); fpr.Unlock(V0); fpr.Unlock(V1); }
void JitArm::ps_mr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); VMOV(vD0, vB0); VMOV(vD1, vB1); }
void JitArm::stfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg v0 = fpr.R0(inst.FS); VCVT(S0, v0, 0); if (inst.RA) { MOVI2R(rB, inst.SIMM_16); ARMReg RA = gpr.R(inst.RA); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)inst.SIMM_16); } MOVI2R(rA, (u32)&Memory::Write_U32); PUSH(4, R0, R1, R2, R3); VMOV(R0, S0); MOV(R1, rB); BL(rA); POP(4, R0, R1, R2, R3); gpr.Unlock(rA, rB); }
void JitArm::ps_merge10(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; ARMReg vA1 = fpr.R1(a); ARMReg vB0 = fpr.R0(b); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); ARMReg V0 = fpr.GetReg(); VMOV(V0, vB0); VMOV(vD0, vA1); VMOV(vD1, V0); fpr.Unlock(V0); }
void JitArm::fmrx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(FloatingPoint) ARMReg vD = fpr.R0(inst.FD); ARMReg vB = fpr.R0(inst.FB); VMOV(vD, vB); if (inst.Rc) Helper_UpdateCR1(vD); }
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { if (prefix == 0xE4) return; int n = GetNumVectorElements(sz); u8 origV[4]; static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f}; for (int i = 0; i < n; i++) origV[i] = vregs[i]; for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; int constants = (prefix >> (12+i)) & 1; // Unchanged, hurray. if (!constants && regnum == i && !abs && !negate) continue; // This puts the value into a temp reg, so we won't write the modified value back. vregs[i] = fpr.GetTempV(); fpr.MapRegV(vregs[i], MAP_NOINIT | MAP_DIRTY); if (!constants) { // Prefix may say "z, z, z, z" but if this is a pair, we force to x. // TODO: But some ops seem to use const 0 instead? if (regnum >= n) { ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", prefix, sz); regnum = 0; } if (abs) { VABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); } else { VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); } } else { // TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though. MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0); } // TODO: This can be integrated into the VABS / VMOV above, and also the constants. if (negate) VNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); // TODO: This probably means it will swap out soon, inefficiently... fpr.ReleaseSpillLockV(vregs[i]); } }
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { if (prefix == 0xE4) return; int n = GetNumVectorElements(sz); u8 origV[4]; static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f}; for (int i = 0; i < n; i++) origV[i] = vregs[i]; for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; int constants = (prefix >> (12+i)) & 1; // Unchanged, hurray. if (!constants && regnum == i && !abs && !negate) continue; // This puts the value into a temp reg, so we won't write the modified value back. vregs[i] = fpr.GetTempV(); if (!constants) { fpr.MapDirtyInV(vregs[i], origV[regnum]); fpr.SpillLockV(vregs[i]); // Prefix may say "z, z, z, z" but if this is a pair, we force to x. // TODO: But some ops seem to use const 0 instead? if (regnum >= n) { WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC)); regnum = 0; } if (abs) { VABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); if (negate) VNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); } else { if (negate) VNEG(fpr.V(vregs[i]), fpr.V(origV[regnum])); else VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); } } else { fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT); fpr.SpillLockV(vregs[i]); MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0, negate); } } }
void JitArm::fsubsx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(FloatingPoint) ARMReg vA = fpr.R0(inst.FA); ARMReg vB = fpr.R0(inst.FB); ARMReg vD0 = fpr.R0(inst.FD); ARMReg vD1 = fpr.R1(inst.FD); VSUB(vD0, vA, vB); VMOV(vD1, vD0); if (inst.Rc) Helper_UpdateCR1(vD0); }
void JitArm::lfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) Default(inst); return; ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (inst.RA) { MOVI2R(rB, inst.SIMM_16); ARMReg RA = gpr.R(inst.RA); ADD(rB, rB, RA); } else MOVI2R(rB, (u32)inst.SIMM_16); MOVI2R(rA, (u32)&Memory::Read_U32); PUSH(4, R0, R1, R2, R3); MOV(R0, rB); BL(rA); MOV(rA, R0); POP(4, R0, R1, R2, R3); ARMReg v0 = fpr.R0(inst.FD, false); ARMReg v1 = fpr.R1(inst.FD, false); VMOV(v0, rA, false); VMOV(v1, rA, false); gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void JitArm::ps_sel(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; ARMReg vA0 = fpr.R0(a); ARMReg vA1 = fpr.R1(a); ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vC0 = fpr.R0(c); ARMReg vC1 = fpr.R1(c); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); VCMP(vA0); VMRS(_PC); FixupBranch GT0 = B_CC(CC_GE); VMOV(vD0, vB0); FixupBranch EQ0 = B(); SetJumpTarget(GT0); VMOV(vD0, vC0); SetJumpTarget(EQ0); VCMP(vA1); VMRS(_PC); FixupBranch GT1 = B_CC(CC_GE); VMOV(vD1, vB1); FixupBranch EQ1 = B(); SetJumpTarget(GT1); VMOV(vD1, vC1); SetJumpTarget(EQ1); }
// Breaks Animal Crossing void JitArm::fmulsx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(FloatingPoint) Default(inst); return; ARMReg vA = fpr.R0(inst.FA); ARMReg vC = fpr.R0(inst.FC); ARMReg vD0 = fpr.R0(inst.FD); ARMReg vD1 = fpr.R1(inst.FD); VMUL(vD0, vA, vC); VMOV(vD1, vD0); if (inst.Rc) Helper_UpdateCR1(vD0); }
void JitArm::ps_sum1(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; ARMReg vA0 = fpr.R0(a); ARMReg vB1 = fpr.R1(b); ARMReg vC0 = fpr.R0(c); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); VMOV(vD0, vC0); VADD(vD1, vA0, vB1); }
void Jit::Comp_VDot(u32 op) { // DISABLE; CONDITIONAL_DISABLE; // WARNING: No prefix support! if (js.MayHavePrefix()) { Comp_Generic(op); js.EatPrefix(); return; } int vd = _VD; int vs = _VS; int vt = _VT; VectorSize sz = GetVecSize(op); // TODO: Force read one of them into regs? probably not. u8 sregs[4], tregs[4]; GetVectorRegs(sregs, sz, vs); GetVectorRegs(tregs, sz, vt); // TODO: applyprefixST here somehow (shuffle, etc...) fpr.MapRegsV(sregs, sz, 0); fpr.MapRegsV(tregs, sz, 0); VMUL(S0, fpr.V(sregs[0]), fpr.V(tregs[0])); int n = GetNumVectorElements(sz); for (int i = 1; i < n; i++) { // sum += s[i]*t[i]; VMLA(S0, fpr.V(sregs[i]), fpr.V(tregs[i])); } fpr.ReleaseSpillLocks(); fpr.MapRegV(vd, MAP_NOINIT | MAP_DIRTY); // TODO: applyprefixD here somehow (write mask etc..) VMOV(fpr.V(vd), S0); fpr.ReleaseSpillLocks(); js.EatPrefix(); }
void kamikaze_add(vec3f pos, float s) { actor* a = ACTOR_get(actor_pool); VMOV(a->pos, pos); a->pos[2] -= 0.1f; a->vel[0] = 0.0f; a->vel[1] = -rand01(); a->vel[2] = -5.0f -10.0f*rand01(); a->ang = 0.0; a->collide_size[0] = s; a->collide_size[1] = s; a->aux[0] = linear(20.0f, 30.0f,rand01()); a->update = kamikaze_update; a->render = kamikaze_render; }
void Jit::Comp_VVectorInit(u32 op) { CONDITIONAL_DISABLE; // WARNING: No prefix support! if (js.MayHavePrefix()) { Comp_Generic(op); js.EatPrefix(); return; } switch ((op >> 16) & 0xF) { case 6: // v=zeros; break; //vzero MOVI2F(S0, 0.0f, R0); break; case 7: // v=ones; break; //vone MOVI2F(S0, 1.0f, R0); break; default: DISABLE; break; } VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 dregs[4]; GetVectorRegsPrefixD(dregs, sz, _VD); fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) VMOV(fpr.V(dregs[i]), S0); ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); }
void Jit::Comp_VVectorInit(MIPSOpcode op) { CONDITIONAL_DISABLE; // WARNING: No prefix support! if (js.HasUnknownPrefix() || disablePrefixes) { DISABLE; } switch ((op >> 16) & 0xF) { case 6: // v=zeros; break; //vzero MOVI2F(S0, 0.0f, R0); break; case 7: // v=ones; break; //vone MOVI2F(S0, 1.0f, R0); break; default: DISABLE; break; } VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 dregs[4]; GetVectorRegsPrefixD(dregs, sz, _VD); fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) VMOV(fpr.V(dregs[i]), S0); ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocksAndDiscardTemps(); }
void Jit::Comp_FPULS(u32 op) { CONDITIONAL_DISABLE; s32 offset = (s16)(op & 0xFFFF); int ft = _FT; int rs = _RS; // u32 addr = R(rs) + offset; // logBlocks = 1; bool doCheck = false; switch(op >> 26) { case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1 fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY); if (gpr.IsImm(rs)) { u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, offset); } else { SetCCAndR0ForSafeAddress(rs, offset, R1); doCheck = true; } ADD(R0, R0, R11); } VLDR(fpr.R(ft), R0, 0); if (doCheck) { SetCC(CC_EQ); MOVI2R(R0, 0); VMOV(fpr.R(ft), R0); SetCC(CC_AL); } break; case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1 fpr.MapReg(ft); if (gpr.IsImm(rs)) { u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, offset); } else { SetCCAndR0ForSafeAddress(rs, offset, R1); doCheck = true; } ADD(R0, R0, R11); } VSTR(fpr.R(ft), R0, 0); if (doCheck) { SetCC(CC_AL); } break; default: Comp_Generic(op); return; } }
void Jit::Comp_FPU2op(u32 op) { CONDITIONAL_DISABLE; int fs = _FS; int fd = _FD; // logBlocks = 1; switch (op & 0x3f) { case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt fpr.MapDirtyIn(fd, fs); VSQRT(fpr.R(fd), fpr.R(fs)); break; case 5: //F(fd) = fabsf(F(fs)); break; //abs fpr.MapDirtyIn(fd, fs); VABS(fpr.R(fd), fpr.R(fs)); break; case 6: //F(fd) = F(fs); break; //mov fpr.MapDirtyIn(fd, fs); VMOV(fpr.R(fd), fpr.R(fs)); break; case 7: //F(fd) = -F(fs); break; //neg fpr.MapDirtyIn(fd, fs); VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VADD(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VSUB(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED); break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); LDR(R0, CTXREG, offsetof(MIPSState, fcr31)); AND(R0, R0, Operand2(3)); // MIPS Rounding Mode: // 0: Round nearest // 1: Round to zero // 2: Round up (ceil) // 3: Round down (floor) CMP(R0, Operand2(2)); SetCC(CC_GE); MOVI2F(S0, 0.5f, R1); SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0); SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0); SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */ SetCC(CC_AL); CMP(R0, Operand2(1)); SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */ SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */ SetCC(CC_AL); break; default: DISABLE; } }
void JitArm::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer // Set rounding mode first // PPC <-> ARM rounding modes // 0, 1, 2, 3 <-> 0, 3, 1, 2 ARMReg rB = gpr.GetReg(); VMRS(rA); // Bits 22-23 BIC(rA, rA, Operand2(3, 5)); LDR(rB, R9, PPCSTATE_OFF(fpscr)); AND(rB, rB, 0x3); // Get the FPSCR rounding bits CMP(rB, 1); SetCC(CC_EQ); // zero ORR(rA, rA, Operand2(3, 5)); SetCC(CC_NEQ); CMP(rB, 2); // +inf SetCC(CC_EQ); ORR(rA, rA, Operand2(1, 5)); SetCC(CC_NEQ); CMP(rB, 3); // -inf SetCC(CC_EQ); ORR(rA, rA, Operand2(2, 5)); SetCC(); VMSR(rA); ORR(rA, rA, Operand2(3, 5)); VCVT(vD, vB, TO_INT | IS_SIGNED); VMSR(rA); gpr.Unlock(rB); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void JitArm::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void Jit::Comp_SVQ(MIPSOpcode op) { CONDITIONAL_DISABLE; int imm = (signed short)(op&0xFFFC); int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5); MIPSGPReg rs = _RS; bool doCheck = false; switch (op >> 26) { case 54: //lv.q { // CC might be set by slow path below, so load regs first. u8 vregs[4]; GetVectorRegs(vregs, V_Quad, vt); fpr.MapRegsAndSpillLockV(vregs, V_Quad, MAP_DIRTY | MAP_NOINIT); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } for (int i = 0; i < 4; i++) VLDR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else for (int i = 0; i < 4; i++) VLDR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetCC(CC_EQ); MOVI2R(R0, 0); for (int i = 0; i < 4; i++) VMOV(fpr.V(vregs[i]), R0); SetCC(CC_AL); } #endif } break; case 62: //sv.q { // CC might be set by slow path below, so load regs first. u8 vregs[4]; GetVectorRegs(vregs, V_Quad, vt); fpr.MapRegsAndSpillLockV(vregs, V_Quad, 0); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } for (int i = 0; i < 4; i++) VSTR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else for (int i = 0; i < 4; i++) VSTR(fpr.V(vregs[i]), R0, i * 4); if (doCheck) { SetCC(CC_AL); } #endif } break; default: DISABLE; break; } fpr.ReleaseSpillLocksAndDiscardTemps(); }
void Jit::Comp_SV(u32 op) { CONDITIONAL_DISABLE; s32 imm = (signed short)(op&0xFFFC); int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5); int rs = _RS; bool doCheck = false; switch (op >> 26) { case 50: //lv.s // VI(vt) = Memory::Read_U32(addr); { // CC might be set by slow path below, so load regs first. fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT); fpr.ReleaseSpillLocks(); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } VLDR(fpr.V(vt), R0, 0); if (doCheck) { SetCC(CC_EQ); MOVI2R(R0, 0); VMOV(fpr.V(vt), R0); SetCC(CC_AL); } } break; case 58: //sv.s // Memory::Write_U32(VI(vt), addr); { // CC might be set by slow path below, so load regs first. fpr.MapRegV(vt); fpr.ReleaseSpillLocks(); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } VSTR(fpr.V(vt), R0, 0); if (doCheck) { SetCC(CC_AL); } } break; default: DISABLE; } }
void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN); if (!js.prefixD) return; int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { if (js.VfpuWriteMask(i)) continue; // TODO: These clampers are wrong - put this into google // and look at the plot: abs(x) - abs(x-0.5) + 0.5 // It's too steep. // Also, they mishandle NaN and Inf. int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) { // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1] fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 0.0f, R0); MOVI2F(S1, 1.0f, R0); VCMP(fpr.V(vregs[i]), S0); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_LE); VMOV(fpr.V(vregs[i]), S0); SetCC(CC_AL); VCMP(fpr.V(vregs[i]), S1); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_GT); VMOV(fpr.V(vregs[i]), S1); SetCC(CC_AL); /* VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD} VABS(fpr.V(vregs[i]), fpr.V(vregs[i])); VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 + 0.5f VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);*/ } else if (sat == 3) { fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, -1.0f, R0); MOVI2F(S1, 1.0f, R0); VCMP(fpr.V(vregs[i]), S0); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_LT); VMOV(fpr.V(vregs[i]), S0); SetCC(CC_AL); VCMP(fpr.V(vregs[i]), S1); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_GT); VMOV(fpr.V(vregs[i]), S1); SetCC(CC_AL); // clamped = fabs(x) - fabs(x-1.0f); // [-1, 1] /* fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 1.0f, R0); VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD} VABS(fpr.V(vregs[i]), fpr.V(vregs[i])); VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 */ } } }
void Jit::Comp_VV2Op(u32 op) { CONDITIONAL_DISABLE; DISABLE; if (js.HasUnknownPrefix()) DISABLE; VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 sregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); ARMReg tempxregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) { int reg = fpr.GetTempV(); fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY); fpr.SpillLockV(reg); tempxregs[i] = fpr.V(reg); } else { fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY); fpr.SpillLockV(dregs[i]); tempxregs[i] = fpr.V(dregs[i]); } } // Warning: sregs[i] and tempxregs[i] may be the same reg. // Helps for vmov, hurts for vrcp, etc. for (int i = 0; i < n; ++i) { switch ((op >> 16) & 0x1f) { case 0: // d[i] = s[i]; break; //vmov // Probably for swizzle. VMOV(tempxregs[i], fpr.V(sregs[i])); break; case 1: // d[i] = fabsf(s[i]); break; //vabs //if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) VABS(tempxregs[i], fpr.V(sregs[i])); break; case 2: // d[i] = -s[i]; break; //vneg VNEG(tempxregs[i], fpr.V(sregs[i])); break; case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 DISABLE; break; case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 DISABLE; break; case 16: // d[i] = 1.0f / s[i]; break; //vrcp MOVI2F(S0, 1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq MOVI2F(S0, 1.0f, R0); VSQRT(S1, fpr.V(sregs[i])); VDIV(tempxregs[i], S0, S1); break; case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin DISABLE; break; case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos DISABLE; break; case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 DISABLE; break; case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 DISABLE; break; case 22: // d[i] = sqrtf(s[i]); break; //vsqrt VSQRT(tempxregs[i], fpr.V(sregs[i])); VABS(tempxregs[i], tempxregs[i]); break; case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin DISABLE; break; case 24: // d[i] = -1.0f / s[i]; break; // vnrcp MOVI2F(S0, -1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin DISABLE; break; case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 DISABLE; break; } } fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) { VMOV(fpr.V(dregs[i]), tempxregs[i]); } ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); }
void JitArm::lfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; bool single = false; bool update = false; bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 567: // lfsux single = true; update = true; offsetReg = b; break; case 535: // lfsx single = true; zeroA = true; offsetReg = b; break; case 631: // lfdux update = true; offsetReg = b; break; case 599: // lfdx zeroA = true; offsetReg = b; break; } break; case 49: // lfsu update = true; single = true; break; case 48: // lfs single = true; zeroA = true; break; case 51: // lfdu update = true; break; case 50: // lfd zeroA = true; break; } ARMReg v0 = fpr.R0(inst.FD), v1; if (single) v1 = fpr.R1(inst.FD); if (update) { RA = gpr.R(a); // Update path /always/ uses RA if (offsetReg == -1) // uses SIMM_16 { MOVI2R(rB, offset); ADD(rB, rB, RA); } else { ADD(rB, gpr.R(offsetReg), RA); } } else { if (zeroA) { if (offsetReg == -1) { if (a) { RA = gpr.R(a); MOVI2R(rB, offset); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)offset); } } else { ARMReg RB = gpr.R(offsetReg); if (a) { RA = gpr.R(a); ADD(rB, RB, RA); } else { MOV(rB, RB); } } } } LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (update) MOV(RA, rB); if (Core::g_CoreStartupParameter.bFastmem) { Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) BIC(rB, rB, mask); // 1 MOVI2R(rA, (u32)Memory::base, false); // 2-3 ADD(rB, rB, rA); // 4 NEONXEmitter nemit(this); if (single) { VLDR(S0, rB, 0); nemit.VREV32(I_8, D0, D0); // Byte swap to result VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { VLDR(v0, rB, 0); nemit.VREV64(I_8, v0, v0); // Byte swap to result } } else { PUSH(4, R0, R1, R2, R3); MOV(R0, rB); if (single) { MOVI2R(rA, (u32)&Memory::Read_U32); BL(rA); VMOV(S0, R0); VCVT(v0, S0, 0); VCVT(v1, S0, 0); } else { MOVI2R(rA, (u32)&Memory::Read_F64); BL(rA); #if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 VMOV(v0, R0); #else VMOV(v0, D0); #endif } POP(4, R0, R1, R2, R3); } gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); }
void JitArm::stfXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; bool single = false; bool update = false; bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) { case 31: switch (inst.SUBOP10) { case 663: // stfsx single = true; zeroA = true; offsetReg = b; break; case 695: // stfsux single = true; offsetReg = b; break; case 727: // stfdx zeroA = true; offsetReg = b; break; case 759: // stfdux update = true; offsetReg = b; break; } break; case 53: // stfsu update = true; single = true; break; case 52: // stfs single = true; zeroA = true; break; case 55: // stfdu update = true; break; case 54: // stfd zeroA = true; break; } ARMReg v0 = fpr.R0(inst.FS); if (update) { RA = gpr.R(a); // Update path /always/ uses RA if (offsetReg == -1) // uses SIMM_16 { MOVI2R(rB, offset); ADD(rB, rB, RA); } else { ADD(rB, gpr.R(offsetReg), RA); } } else { if (zeroA) { if (offsetReg == -1) { if (a) { RA = gpr.R(a); MOVI2R(rB, offset); ADD(rB, rB, RA); } else { MOVI2R(rB, (u32)offset); } } else { ARMReg RB = gpr.R(offsetReg); if (a) { RA = gpr.R(a); ADD(rB, RB, RA); } else { MOV(rB, RB); } } } } if (update) { LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); SetCC(CC_NEQ); MOV(RA, rB); SetCC(); } if (Core::g_CoreStartupParameter.bFastmem) { Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) BIC(rB, rB, mask); // 1 MOVI2R(rA, (u32)Memory::base, false); // 2-3 ADD(rB, rB, rA); // 4 NEONXEmitter nemit(this); if (single) { VCVT(S0, v0, 0); nemit.VREV32(I_8, D0, D0); VSTR(S0, rB, 0); } else { nemit.VREV64(I_8, D0, v0); VSTR(D0, rB, 0); } } else { PUSH(4, R0, R1, R2, R3); if (single) { MOVI2R(rA, (u32)&Memory::Write_U32); VCVT(S0, v0, 0); VMOV(R0, S0); MOV(R1, rB); BL(rA); } else { MOVI2R(rA, (u32)&Memory::Write_F64); #if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 VMOV(R0, v0); MOV(R2, rB); #else VMOV(D0, v0); MOV(R0, rB); #endif BL(rA); } POP(4, R0, R1, R2, R3); } gpr.Unlock(rA, rB); }
void ArmJit::NEONApplyPrefixD(DestARMReg dest) { // Apply clamps to dest.rd int n = GetNumVectorElements(dest.sz); int sat1_mask = 0; int sat3_mask = 0; int full_mask = (1 << n) - 1; for (int i = 0; i < n; i++) { int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) sat1_mask |= 1 << i; if (sat == 3) sat3_mask |= 1 << i; } if (sat1_mask && sat3_mask) { // Why would anyone do this? ELOG("PREFIXD: Can't have both sat[0-1] and sat[-1-1] at the same time yet"); } if (sat1_mask) { if (sat1_mask != full_mask) { ELOG("PREFIXD: Can't have partial sat1 mask yet (%i vs %i)", sat1_mask, full_mask); } if (IsD(dest.rd)) { VMOV_immf(D0, 0.0); VMOV_immf(D1, 1.0); VMAX(F_32, dest.rd, dest.rd, D0); VMIN(F_32, dest.rd, dest.rd, D1); } else { VMOV_immf(Q0, 1.0); VMIN(F_32, dest.rd, dest.rd, Q0); VMOV_immf(Q0, 0.0); VMAX(F_32, dest.rd, dest.rd, Q0); } } if (sat3_mask && sat1_mask != full_mask) { if (sat3_mask != full_mask) { ELOG("PREFIXD: Can't have partial sat3 mask yet (%i vs %i)", sat3_mask, full_mask); } if (IsD(dest.rd)) { VMOV_immf(D0, 0.0); VMOV_immf(D1, 1.0); VMAX(F_32, dest.rd, dest.rd, D0); VMIN(F_32, dest.rd, dest.rd, D1); } else { VMOV_immf(Q0, 1.0); VMIN(F_32, dest.rd, dest.rd, Q0); VMOV_immf(Q0, -1.0); VMAX(F_32, dest.rd, dest.rd, Q0); } } // Check for actual mask operation (unrelated to the "masks" above). if (dest.backingRd != dest.rd) { // This means that we need to apply the write mask, from rd to backingRd. // What a pain. We can at least shortcut easy cases like half the register. // And we can generate the masks easily with some of the crazy vector imm modes. (bits2bytes for example). // So no need to load them from RAM. int writeMask = (~(js.prefixD >> 8)) & 0xF; if (writeMask == 3) { ILOG("Doing writemask = 3"); VMOV(D_0(dest.rd), D_0(dest.backingRd)); } else { // TODO ELOG("PREFIXD: Arbitrary write masks not supported (%i / %i)", writeMask, full_mask); VMOV(dest.backingRd, dest.rd); } }
void Jit::Comp_VecDo3(u32 op) { CONDITIONAL_DISABLE; DISABLE; // WARNING: No prefix support! if (js.MayHavePrefix()) { Comp_Generic(op); js.EatPrefix(); return; } int vd = _VD; int vs = _VS; int vt = _VT; void (ARMXEmitter::*triop)(ARMReg, ARMReg, ARMReg) = NULL; switch (op >> 26) { case 24: //VFPU0 switch ((op >> 23)&7) { case 0: // d[i] = s[i] + t[i]; break; //vadd triop = &ARMXEmitter::VADD; break; case 1: // d[i] = s[i] - t[i]; break; //vsub triop = &ARMXEmitter::VSUB; break; case 7: // d[i] = s[i] / t[i]; break; //vdiv triop = &ARMXEmitter::VDIV; break; } break; case 25: //VFPU1 switch ((op >> 23)&7) { case 0: // d[i] = s[i] * t[i]; break; //vmul triop = &ARMXEmitter::VMUL; break; } break; } if (!triop) { DISABLE; } VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 sregs[4], tregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixT(tregs, sz, _VT); GetVectorRegsPrefixD(dregs, sz, _VD); MIPSReg tempregs[4]; for (int i = 0; i < n; i++) { if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs, n, tregs)) { tempregs[i] = fpr.GetTempV(); } else { fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] || dregs[i] == tregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY); tempregs[i] = dregs[i]; } } for (int i = 0; i < n; i++) { fpr.SpillLockV(sregs[i]); fpr.SpillLockV(tregs[i]); fpr.MapRegV(sregs[i]); fpr.MapRegV(tregs[i]); fpr.MapRegV(tempregs[i]); (this->*triop)(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); fpr.ReleaseSpillLockV(sregs[i]); fpr.ReleaseSpillLockV(tregs[i]); } fpr.MapRegsV(dregs, sz, MAP_DIRTY); for (int i = 0; i < n; i++) { if (dregs[i] != tempregs[i]) VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i])); } ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); js.EatPrefix(); }