void JitArm::ps_rsqrte(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); ARMReg fpscrReg = gpr.GetReg(); ARMReg V0 = D1; ARMReg rA = gpr.GetReg(); MOVI2R(fpscrReg, (u32)&PPC_NAN); VLDR(V0, fpscrReg, 0); LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); VCMP(vB0); VMRS(_PC); FixupBranch Less0 = B_CC(CC_LT); VMOV(vD0, V0); SetFPException(fpscrReg, FPSCR_VXSQRT); FixupBranch SkipOrr0 = B(); SetJumpTarget(Less0); SetCC(CC_EQ); ORR(rA, rA, 1); SetCC(); SetJumpTarget(SkipOrr0); VCMP(vB1); VMRS(_PC); FixupBranch Less1 = B_CC(CC_LT); VMOV(vD1, V0); SetFPException(fpscrReg, FPSCR_VXSQRT); FixupBranch SkipOrr1 = B(); SetJumpTarget(Less1); SetCC(CC_EQ); ORR(rA, rA, 2); SetCC(); SetJumpTarget(SkipOrr1); CMP(rA, 0); FixupBranch noException = B_CC(CC_EQ); SetFPException(fpscrReg, FPSCR_ZX); SetJumpTarget(noException); VCVT(S0, vB0, 0); VCVT(S1, vB1, 0); NEONXEmitter nemit(this); nemit.VRSQRTE(F_32, D0, D0); VCVT(vD0, S0, 0); VCVT(vD1, S1, 0); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(fpscrReg, rA); }
void JitArm::ps_sel(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; ARMReg vA0 = fpr.R0(a); ARMReg vA1 = fpr.R1(a); ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); ARMReg vC0 = fpr.R0(c); ARMReg vC1 = fpr.R1(c); ARMReg vD0 = fpr.R0(d, false); ARMReg vD1 = fpr.R1(d, false); VCMP(vA0); VMRS(_PC); FixupBranch GT0 = B_CC(CC_GE); VMOV(vD0, vB0); FixupBranch EQ0 = B(); SetJumpTarget(GT0); VMOV(vD0, vC0); SetJumpTarget(EQ0); VCMP(vA1); VMRS(_PC); FixupBranch GT1 = B_CC(CC_GE); VMOV(vD1, vB1); FixupBranch EQ1 = B(); SetJumpTarget(GT1); VMOV(vD1, vC1); SetJumpTarget(EQ1); }
void Jit::Comp_FPUComp(u32 op) { CONDITIONAL_DISABLE; int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias if (opc == 0)//f, sf (signalling false) { MOVI2R(R0, 0); STR(R0, CTXREG, offsetof(MIPSState, fpcond)); return; } int fs = _FS; int ft = _FT; fpr.MapInIn(fs, ft); VCMP(fpr.R(fs), fpr.R(ft)); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). switch(opc) { case 1: // un, ngle (unordered) SetCC(CC_VS); MOVI2R(R0, 1); SetCC(CC_VC); break; case 2: // eq, seq (equal, ordered) SetCC(CC_EQ); MOVI2R(R0, 1); SetCC(CC_NEQ); break; case 3: // ueq, ngl (equal, unordered) SetCC(CC_EQ); MOVI2R(R0, 1); SetCC(CC_NEQ); MOVI2R(R0, 0); SetCC(CC_VS); MOVI2R(R0, 1); SetCC(CC_AL); STR(R0, CTXREG, offsetof(MIPSState, fpcond)); return; case 4: // olt, lt (less than, ordered) SetCC(CC_LO); MOVI2R(R0, 1); SetCC(CC_HS); break; case 5: // ult, nge (less than, unordered) SetCC(CC_LT); MOVI2R(R0, 1); SetCC(CC_GE); break; case 6: // ole, le (less equal, ordered) SetCC(CC_LS); MOVI2R(R0, 1); SetCC(CC_HI); break; case 7: // ule, ngt (less equal, unordered) SetCC(CC_LE); MOVI2R(R0, 1); SetCC(CC_GT); break; default: Comp_Generic(op); return; } MOVI2R(R0, 0); SetCC(CC_AL); STR(R0, CTXREG, offsetof(MIPSState, fpcond)); }
void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN); if (!js.prefixD) return; int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { if (js.VfpuWriteMask(i)) continue; // TODO: These clampers are wrong - put this into google // and look at the plot: abs(x) - abs(x-0.5) + 0.5 // It's too steep. // Also, they mishandle NaN and Inf. int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) { // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1] fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 0.0f, R0); MOVI2F(S1, 1.0f, R0); VCMP(fpr.V(vregs[i]), S0); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_LE); VMOV(fpr.V(vregs[i]), S0); SetCC(CC_AL); VCMP(fpr.V(vregs[i]), S1); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_GT); VMOV(fpr.V(vregs[i]), S1); SetCC(CC_AL); /* VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD} VABS(fpr.V(vregs[i]), fpr.V(vregs[i])); VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 + 0.5f VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);*/ } else if (sat == 3) { fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, -1.0f, R0); MOVI2F(S1, 1.0f, R0); VCMP(fpr.V(vregs[i]), S0); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_LT); VMOV(fpr.V(vregs[i]), S0); SetCC(CC_AL); VCMP(fpr.V(vregs[i]), S1); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_GT); VMOV(fpr.V(vregs[i]), S1); SetCC(CC_AL); // clamped = fabs(x) - fabs(x-1.0f); // [-1, 1] /* fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 1.0f, R0); VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD} VABS(fpr.V(vregs[i]), fpr.V(vregs[i])); VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 */ } } }