예제 #1
0
	void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
		_assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);
		if (!js.prefixD) return;

		int n = GetNumVectorElements(sz);
		for (int i = 0; i < n; i++) 	{
			if (js.VfpuWriteMask(i))
				continue;

			int sat = (js.prefixD >> (i * 2)) & 3;
			if (sat == 1) {
				// clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
				fpr.MapRegV(vregs[i], MAP_DIRTY);
				MOVI2F(S0, 0.5, R0);
				VABS(S1, fpr.V(vregs[i]));     // S1 = fabs(x)
				VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
				VABS(S2, S2);
				VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 + 0.5f
				VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);
			} else if (sat == 3) {
				// clamped = fabs(x) - fabs(x-1.0f);        // [-1, 1]
				fpr.MapRegV(vregs[i], MAP_DIRTY);
				MOVI2F(S0, 1.0, R0);
				VABS(S1, fpr.V(vregs[i]));     // S1 = fabs(x)
				VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
				VABS(S2, S2);
				VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2
			}
		}
	}
예제 #2
0
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
    if (prefix == 0xE4) return;

    int n = GetNumVectorElements(sz);
    u8 origV[4];
    static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

    for (int i = 0; i < n; i++)
        origV[i] = vregs[i];

    for (int i = 0; i < n; i++)
    {
        int regnum = (prefix >> (i*2)) & 3;
        int abs    = (prefix >> (8+i)) & 1;
        int negate = (prefix >> (16+i)) & 1;
        int constants = (prefix >> (12+i)) & 1;

        // Unchanged, hurray.
        if (!constants && regnum == i && !abs && !negate)
            continue;

        // This puts the value into a temp reg, so we won't write the modified value back.
        vregs[i] = fpr.GetTempV();
        if (!constants) {
            fpr.MapDirtyInV(vregs[i], origV[regnum]);
            fpr.SpillLockV(vregs[i]);

            // Prefix may say "z, z, z, z" but if this is a pair, we force to x.
            // TODO: But some ops seem to use const 0 instead?
            if (regnum >= n) {
                WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC));
                regnum = 0;
            }

            if (abs) {
                VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));
            } else {
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                else
                    VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
            }
        } else {
            fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT);
            fpr.SpillLockV(vregs[i]);
            MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0, negate);
        }
    }
}
예제 #3
0
	void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
		if (prefix == 0xE4) return;

		int n = GetNumVectorElements(sz);
		u8 origV[4];
		static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

		for (int i = 0; i < n; i++)
			origV[i] = vregs[i];

		for (int i = 0; i < n; i++)
		{
			int regnum = (prefix >> (i*2)) & 3;
			int abs    = (prefix >> (8+i)) & 1;
			int negate = (prefix >> (16+i)) & 1;
			int constants = (prefix >> (12+i)) & 1;

			// Unchanged, hurray.
			if (!constants && regnum == i && !abs && !negate)
				continue;

			// This puts the value into a temp reg, so we won't write the modified value back.
			vregs[i] = fpr.GetTempV();
			fpr.MapRegV(vregs[i], MAP_NOINIT | MAP_DIRTY);

			if (!constants) {
				// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
				// TODO: But some ops seem to use const 0 instead?
				if (regnum >= n) {
					ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", prefix, sz);
					regnum = 0;
				}
				
				if (abs) {
					VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
				} else {
					VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
				}
			} else {
				// TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though.
				MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0);
			}

			// TODO: This can be integrated into the VABS / VMOV above, and also the constants.
			if (negate)
				VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));

			// TODO: This probably means it will swap out soon, inefficiently...
			fpr.ReleaseSpillLockV(vregs[i]);
		}
	}
예제 #4
0
	void Jit::Comp_VVectorInit(u32 op)
	{
		CONDITIONAL_DISABLE;

		// WARNING: No prefix support!
		if (js.MayHavePrefix()) {
			Comp_Generic(op);
			js.EatPrefix();
			return;
		}

		switch ((op >> 16) & 0xF)
		{
		case 6: // v=zeros; break;  //vzero
			MOVI2F(S0, 0.0f, R0);
			break;
		case 7: // v=ones; break;   //vone
			MOVI2F(S0, 1.0f, R0);
			break;
		default:
			DISABLE;
			break;
		}

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 dregs[4];
		GetVectorRegsPrefixD(dregs, sz, _VD);
		fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);

		for (int i = 0; i < n; ++i)
			VMOV(fpr.V(dregs[i]), S0);

		ApplyPrefixD(dregs, sz);
		fpr.ReleaseSpillLocks();
	}
예제 #5
0
void Jit::Comp_VVectorInit(MIPSOpcode op)
{
    CONDITIONAL_DISABLE;

    // WARNING: No prefix support!
    if (js.HasUnknownPrefix() || disablePrefixes) {
        DISABLE;
    }

    switch ((op >> 16) & 0xF)
    {
    case 6: // v=zeros; break;  //vzero
        MOVI2F(S0, 0.0f, R0);
        break;
    case 7: // v=ones; break;   //vone
        MOVI2F(S0, 1.0f, R0);
        break;
    default:
        DISABLE;
        break;
    }

    VectorSize sz = GetVecSize(op);
    int n = GetNumVectorElements(sz);

    u8 dregs[4];
    GetVectorRegsPrefixD(dregs, sz, _VD);
    fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY);

    for (int i = 0; i < n; ++i)
        VMOV(fpr.V(dregs[i]), S0);

    ApplyPrefixD(dregs, sz);

    fpr.ReleaseSpillLocksAndDiscardTemps();
}
예제 #6
0
void Jit::Comp_FPU2op(u32 op)
{
	CONDITIONAL_DISABLE;

	int fs = _FS;
	int fd = _FD;
	// logBlocks = 1;

	switch (op & 0x3f) 
	{
	case 4:	//F(fd)	   = sqrtf(F(fs));            break; //sqrt
		fpr.MapDirtyIn(fd, fs);
		VSQRT(fpr.R(fd), fpr.R(fs));
		break;
	case 5:	//F(fd)    = fabsf(F(fs));            break; //abs
		fpr.MapDirtyIn(fd, fs);
		VABS(fpr.R(fd), fpr.R(fs));
		break;
	case 6:	//F(fd)	   = F(fs);                   break; //mov
		fpr.MapDirtyIn(fd, fs);
		VMOV(fpr.R(fd), fpr.R(fs));
		break;
	case 7:	//F(fd)	   = -F(fs);                  break; //neg
		fpr.MapDirtyIn(fd, fs);
		VNEG(fpr.R(fd), fpr.R(fs));
		break;
	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
		break;
	case 13: //FsI(fd) = Rto0(F(fs)));            break; //trunc.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
		break;
	case 14: //FsI(fd) = (int)ceilf (F(fs));      break; //ceil.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VADD(S0,fpr.R(fs),S0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
		break;
	case 15: //FsI(fd) = (int)floorf(F(fs));      break; //floor.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VSUB(S0,fpr.R(fs),S0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
		break;
	case 32: //F(fd)   = (float)FsI(fs);          break; //cvt.s.w
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED);
		break;
	case 36: //FsI(fd) = (int)  F(fs);            break; //cvt.w.s
		fpr.MapDirtyIn(fd, fs);
		LDR(R0, CTXREG, offsetof(MIPSState, fcr31));
		AND(R0, R0, Operand2(3));
		// MIPS Rounding Mode:
		//	 0: Round nearest
		//	 1: Round to zero
		//	 2: Round up (ceil)
		//	 3: Round down (floor)
		CMP(R0, Operand2(2));
		SetCC(CC_GE); MOVI2F(S0, 0.5f, R1);
		SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0);
		SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0);
		SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */
		SetCC(CC_AL);
		CMP(R0, Operand2(1));
		SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */
		SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */
		SetCC(CC_AL);
		break;
	default:
		DISABLE;
	}
}
예제 #7
0
void Jit::Comp_SV(MIPSOpcode op) {
    CONDITIONAL_DISABLE;

    s32 imm = (signed short)(op&0xFFFC);
    int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
    MIPSGPReg rs = _RS;

    bool doCheck = false;
    switch (op >> 26)
    {
    case 50: //lv.s  // VI(vt) = Memory::Read_U32(addr);
    {
        // CC might be set by slow path below, so load regs first.
        fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT);
        if (gpr.IsImm(rs)) {
            u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
            MOVI2R(R0, addr + (u32)Memory::base);
        } else {
            gpr.MapReg(rs);
            if (g_Config.bFastMemory) {
                SetR0ToEffectiveAddress(rs, imm);
            } else {
                SetCCAndR0ForSafeAddress(rs, imm, R1);
                doCheck = true;
            }
            ADD(R0, R0, R11);
        }
#ifdef __ARM_ARCH_7S__
        FixupBranch skip;
        if (doCheck) {
            skip = B_CC(CC_EQ);
        }
        VLDR(fpr.V(vt), R0, 0);
        if (doCheck) {
            SetJumpTarget(skip);
            SetCC(CC_AL);
        }
#else
        VLDR(fpr.V(vt), R0, 0);
        if (doCheck) {
            SetCC(CC_EQ);
            MOVI2F(fpr.V(vt), 0.0f, R0);
            SetCC(CC_AL);
        }
#endif
    }
    break;

    case 58: //sv.s   // Memory::Write_U32(VI(vt), addr);
    {
        // CC might be set by slow path below, so load regs first.
        fpr.MapRegV(vt);
        if (gpr.IsImm(rs)) {
            u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
            MOVI2R(R0, addr + (u32)Memory::base);
        } else {
            gpr.MapReg(rs);
            if (g_Config.bFastMemory) {
                SetR0ToEffectiveAddress(rs, imm);
            } else {
                SetCCAndR0ForSafeAddress(rs, imm, R1);
                doCheck = true;
            }
            ADD(R0, R0, R11);
        }
#ifdef __ARM_ARCH_7S__
        FixupBranch skip;
        if (doCheck) {
            skip = B_CC(CC_EQ);
        }
        VSTR(fpr.V(vt), R0, 0);
        if (doCheck) {
            SetJumpTarget(skip);
            SetCC(CC_AL);
        }
#else
        VSTR(fpr.V(vt), R0, 0);
        if (doCheck) {
            SetCC(CC_AL);
        }
#endif
    }
    break;


    default:
        DISABLE;
    }
}
예제 #8
0
void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
    _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);
    if (!js.prefixD) return;

    int n = GetNumVectorElements(sz);
    for (int i = 0; i < n; i++) 	{
        if (js.VfpuWriteMask(i))
            continue;

        // TODO: These clampers are wrong - put this into google
        // and look at the plot:   abs(x) - abs(x-0.5) + 0.5
        // It's too steep.

        // Also, they mishandle NaN and Inf.
        int sat = (js.prefixD >> (i * 2)) & 3;
        if (sat == 1) {
            // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, 0.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LE);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            /*
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 + 0.5f
            VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);*/
        } else if (sat == 3) {
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, -1.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LT);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            // clamped = fabs(x) - fabs(x-1.0f);        // [-1, 1]
            /*
            fpr.MapRegV(vregs[i], MAP_DIRTY);
            MOVI2F(S0, 1.0f, R0);
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2
            */
        }
    }
}
예제 #9
0
	void Jit::Comp_VV2Op(u32 op) {
		CONDITIONAL_DISABLE;

		DISABLE;

		if (js.HasUnknownPrefix())
			DISABLE;

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 sregs[4], dregs[4];
		GetVectorRegsPrefixS(sregs, sz, _VS);
		GetVectorRegsPrefixD(dregs, sz, _VD);

		ARMReg tempxregs[4];
		for (int i = 0; i < n; ++i)
		{
			if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
			{
				int reg = fpr.GetTempV();
				fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
				fpr.SpillLockV(reg);
				tempxregs[i] = fpr.V(reg);
			}
			else
			{
				fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
				fpr.SpillLockV(dregs[i]);
				tempxregs[i] = fpr.V(dregs[i]);
			}
		}

		// Warning: sregs[i] and tempxregs[i] may be the same reg.
		// Helps for vmov, hurts for vrcp, etc.
		for (int i = 0; i < n; ++i)
		{
			switch ((op >> 16) & 0x1f)
			{
			case 0: // d[i] = s[i]; break; //vmov
				// Probably for swizzle.
				VMOV(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 1: // d[i] = fabsf(s[i]); break; //vabs
				//if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
				VABS(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 2: // d[i] = -s[i]; break; //vneg
				VNEG(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;    // vsat0
				DISABLE;
				break;
			case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;  // vsat1
				DISABLE;
				break;
			case 16: // d[i] = 1.0f / s[i]; break; //vrcp
				MOVI2F(S0, 1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
				break;
			case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
				MOVI2F(S0, 1.0f, R0);
				VSQRT(S1, fpr.V(sregs[i]));
				VDIV(tempxregs[i], S0, S1);
				break;
			case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
				DISABLE;
				break;
			case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
				DISABLE;
				break;
			case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
				DISABLE;
				break;
			case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
				DISABLE;
				break;
			case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
				VSQRT(tempxregs[i], fpr.V(sregs[i]));
				VABS(tempxregs[i], tempxregs[i]);
				break;
			case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
				DISABLE;
				break;
			case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
				MOVI2F(S0, -1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
				break;
			case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
				DISABLE;
				break;
			case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
				DISABLE;
				break;
			}
		}

		fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
		for (int i = 0; i < n; ++i)
		{
			VMOV(fpr.V(dregs[i]), tempxregs[i]);
		}

		ApplyPrefixD(dregs, sz);

		fpr.ReleaseSpillLocks();
	}