Example #1
0
void JitArm::ps_rsqrte(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 b = inst.FB, d = inst.FD;

	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);
	ARMReg fpscrReg = gpr.GetReg();
	ARMReg V0 = D1;
	ARMReg rA = gpr.GetReg();

	MOVI2R(fpscrReg, (u32)&PPC_NAN);
	VLDR(V0, fpscrReg, 0);
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));

	VCMP(vB0);
	VMRS(_PC);
	FixupBranch Less0 = B_CC(CC_LT);
		VMOV(vD0, V0);
		SetFPException(fpscrReg, FPSCR_VXSQRT);
		FixupBranch SkipOrr0 = B();
	SetJumpTarget(Less0);
	SetCC(CC_EQ);
		ORR(rA, rA, 1);
	SetCC();
	SetJumpTarget(SkipOrr0);

	VCMP(vB1);
	VMRS(_PC);
	FixupBranch Less1 = B_CC(CC_LT);
		VMOV(vD1, V0);
		SetFPException(fpscrReg, FPSCR_VXSQRT);
		FixupBranch SkipOrr1 = B();
	SetJumpTarget(Less1);
	SetCC(CC_EQ);
		ORR(rA, rA, 2);
	SetCC();
	SetJumpTarget(SkipOrr1);

	CMP(rA, 0);
	FixupBranch noException = B_CC(CC_EQ);
	SetFPException(fpscrReg, FPSCR_ZX);
	SetJumpTarget(noException);

	VCVT(S0, vB0, 0);
	VCVT(S1, vB1, 0);

	NEONXEmitter nemit(this);
	nemit.VRSQRTE(F_32, D0, D0);
	VCVT(vD0, S0, 0);
	VCVT(vD1, S1, 0);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(fpscrReg, rA);
}
Example #2
0
void JitArm::ps_sel(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;

	ARMReg vA0 = fpr.R0(a);
	ARMReg vA1 = fpr.R1(a);
	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vC0 = fpr.R0(c);
	ARMReg vC1 = fpr.R1(c);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);

	VCMP(vA0);
	VMRS(_PC);

	FixupBranch GT0 = B_CC(CC_GE);
	VMOV(vD0, vB0);
	FixupBranch EQ0 = B();
	SetJumpTarget(GT0);
	VMOV(vD0, vC0);
	SetJumpTarget(EQ0);

	VCMP(vA1);
	VMRS(_PC);
	FixupBranch GT1 = B_CC(CC_GE);
	VMOV(vD1, vB1);
	FixupBranch EQ1 = B();
	SetJumpTarget(GT1);
	VMOV(vD1, vC1);
	SetJumpTarget(EQ1);
}
Example #3
0
void Jit::Comp_FPUComp(u32 op) {
	CONDITIONAL_DISABLE;
	int opc = op & 0xF;
	if (opc >= 8) opc -= 8; // alias
	if (opc == 0)//f, sf (signalling false)
	{
		MOVI2R(R0, 0);
		STR(R0, CTXREG, offsetof(MIPSState, fpcond));
		return;
	}

	int fs = _FS;
	int ft = _FT;
	fpr.MapInIn(fs, ft);
	VCMP(fpr.R(fs), fpr.R(ft));
	VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
	switch(opc)
	{
	case 1:      // un,  ngle (unordered)
		SetCC(CC_VS);
		MOVI2R(R0, 1);
		SetCC(CC_VC);
		break;
	case 2:      // eq,  seq (equal, ordered)
		SetCC(CC_EQ);
		MOVI2R(R0, 1);
		SetCC(CC_NEQ);
		break;
	case 3:      // ueq, ngl (equal, unordered)
		SetCC(CC_EQ);
		MOVI2R(R0, 1);
		SetCC(CC_NEQ);
		MOVI2R(R0, 0);
		SetCC(CC_VS);
		MOVI2R(R0, 1);
		SetCC(CC_AL);
		STR(R0, CTXREG, offsetof(MIPSState, fpcond));
		return;
	case 4:      // olt, lt (less than, ordered)
		SetCC(CC_LO);
		MOVI2R(R0, 1);
		SetCC(CC_HS);
		break;
	case 5:      // ult, nge (less than, unordered)
		SetCC(CC_LT);
		MOVI2R(R0, 1);
		SetCC(CC_GE);
		break;
	case 6:      // ole, le (less equal, ordered)
		SetCC(CC_LS);
		MOVI2R(R0, 1);
		SetCC(CC_HI);
		break;
	case 7:      // ule, ngt (less equal, unordered)
		SetCC(CC_LE);
		MOVI2R(R0, 1);
		SetCC(CC_GT);
		break;
	default:
		Comp_Generic(op);
		return;
	}
	MOVI2R(R0, 0);
	SetCC(CC_AL);
	STR(R0, CTXREG, offsetof(MIPSState, fpcond));
}
Example #4
0
void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
    _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);
    if (!js.prefixD) return;

    int n = GetNumVectorElements(sz);
    for (int i = 0; i < n; i++) 	{
        if (js.VfpuWriteMask(i))
            continue;

        // TODO: These clampers are wrong - put this into google
        // and look at the plot:   abs(x) - abs(x-0.5) + 0.5
        // It's too steep.

        // Also, they mishandle NaN and Inf.
        int sat = (js.prefixD >> (i * 2)) & 3;
        if (sat == 1) {
            // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, 0.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LE);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            /*
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 + 0.5f
            VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);*/
        } else if (sat == 3) {
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, -1.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LT);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            // clamped = fabs(x) - fabs(x-1.0f);        // [-1, 1]
            /*
            fpr.MapRegV(vregs[i], MAP_DIRTY);
            MOVI2F(S0, 1.0f, R0);
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2
            */
        }
    }
}