Beispiel #1
0
bool FPURegCache::IsMappedVS(const u8 *v, VectorSize vsz) {
	const int n = GetNumVectorElements(vsz);

	// Make sure the first reg is at least mapped in the right place.
	if (!IsMappedVS(v[0]))
		return false;
	if (vregs[v[0]].lane != 1)
		return false;

	// And make sure the rest are mapped to the same reg in the right positions.
	X64Reg xr = VSX(v);
	for (int i = 1; i < n; ++i) {
		u8 vi = v[i];
		if (!IsMappedVS(vi) || VSX(&vi) != xr)
			return false;
		if (vregs[vi].lane != i + 1)
			return false;
	}
	// TODO: Optimize this case?  It happens.
	for (int i = n; i < 4; ++i) {
		if (xregs[xr].mipsRegs[i] != -1) {
			return false;
		}
	}
	return true;
}
Beispiel #2
0
void FPURegCache::SimpleRegsV(const u8 *v, VectorSize vsz, int flags) {
	const int n = GetNumVectorElements(vsz);
	// TODO: Could be more optimal (in case of Discard or etc.)
	for (int i = 0; i < n; ++i) {
		SimpleRegV(v[i], flags);
	}
}
Beispiel #3
0
void ApplyPrefixD(float *v, VectorSize size, bool onlyWriteMask = false)
{
	u32 data = currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX];
	if (!data)
		return;
	int n = GetNumVectorElements(size);
	bool writeMask[4];
	for (int i = 0; i < n; i++)
	{
		int mask = (data >> (8 + i)) & 1;
		writeMask[i] = mask ? true : false;
		if (!onlyWriteMask) {
			int sat = (data >> (i * 2)) & 3;
			if (sat == 1)
			{
				if (v[i] > 1.0f) v[i] = 1.0f;
				if (v[i] < 0.0f) v[i] = 0.0f;
			}
			else if (sat == 3)
			{
				if (v[i] > 1.0f)  v[i] = 1.0f;
				if (v[i] < -1.0f) v[i] = -1.0f;
			}
		}
	}
Beispiel #4
0
	void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
		_assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);
		if (!js.prefixD) return;

		int n = GetNumVectorElements(sz);
		for (int i = 0; i < n; i++) 	{
			if (js.VfpuWriteMask(i))
				continue;

			int sat = (js.prefixD >> (i * 2)) & 3;
			if (sat == 1) {
				// clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
				fpr.MapRegV(vregs[i], MAP_DIRTY);
				MOVI2F(S0, 0.5, R0);
				VABS(S1, fpr.V(vregs[i]));     // S1 = fabs(x)
				VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
				VABS(S2, S2);
				VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 + 0.5f
				VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);
			} else if (sat == 3) {
				// clamped = fabs(x) - fabs(x-1.0f);        // [-1, 1]
				fpr.MapRegV(vregs[i], MAP_DIRTY);
				MOVI2F(S0, 1.0, R0);
				VABS(S1, fpr.V(vregs[i]));     // S1 = fabs(x)
				VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
				VABS(S2, S2);
				VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2
			}
		}
	}
Beispiel #5
0
bool FPURegCache::TryMapRegsVS(const u8 *v, VectorSize vsz, int flags) {
	const int n = GetNumVectorElements(vsz);

	if (!CanMapVS(v, vsz)) {
		return false;
	}

	if (IsMappedVS(v, vsz)) {
		// Already mapped then, perfect.  Just mark dirty.
		if ((flags & MAP_DIRTY) != 0)
			xregs[VSX(v)].dirty = true;
		return true;
	}

	// At this point, some or all are in single regs or memory, and they're not locked there.

	if (n == 1) {
		// Single is easy, just map normally but track as a SIMD reg.
		// This way V/VS can warn about improper usage properly.
		MapRegV(v[0], flags);
		vregs[v[0]].lane = 1;
		if ((flags & MAP_DIRTY) != 0)
			xregs[VSX(v)].dirty = true;
		Invariant();
		return true;
	}

	X64Reg xr;
	if ((flags & MAP_NOINIT) != MAP_NOINIT) {
		xr = LoadRegsVS(v, n);
	} else {
		xr = GetFreeXReg();
	}

	// Victory, now let's clean up everything.
	OpArg newloc = Gen::R(xr);
	bool dirty = (flags & MAP_DIRTY) != 0;
	for (int i = 0; i < n; ++i) {
		MIPSCachedFPReg &vr = vregs[v[i]];
		if (vr.away) {
			// Clear the xreg it was in before.
			X64Reg oldXReg = vr.location.GetSimpleReg();
			xregs[oldXReg].mipsReg = -1;
			if (xregs[oldXReg].dirty) {
				// Inherit the "dirtiness" (ultimately set below for all regs.)
				dirty = true;
				xregs[oldXReg].dirty = false;
			}
		}
		xregs[xr].mipsRegs[i] = v[i] + 32;
		vr.location = newloc;
		vr.lane = i + 1;
		vr.away = true;
	}
	xregs[xr].dirty = dirty;

	Invariant();
	return true;
}
Beispiel #6
0
void FPURegCache::MapRegsV(int vec, VectorSize sz, int flags) {
	u8 r[4];
	GetVectorRegs(r, sz, vec);
	SpillLockV(r, sz);
	for (int i = 0; i < GetNumVectorElements(sz); i++) {
		MapReg(r[i] + 32, (flags & MAP_NOINIT) != MAP_NOINIT, (flags & MAP_DIRTY) != 0);
	}
}
Beispiel #7
0
void FPURegCache::MapRegsV(int vec, VectorSize sz, int flags) {
	u8 v[4];
	GetVectorRegs(v, sz, vec);
	SpillLockV(v, sz);
	for (int i = 0; i < GetNumVectorElements(sz); i++) {
		BindToRegister(v[i] + 32, (flags & MAP_NOINIT) == 0, (flags & MAP_DIRTY) != 0);
	}
}
Beispiel #8
0
	void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
		if (prefix == 0xE4) return;

		int n = GetNumVectorElements(sz);
		u8 origV[4];
		static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

		for (int i = 0; i < n; i++)
			origV[i] = vregs[i];

		for (int i = 0; i < n; i++)
		{
			int regnum = (prefix >> (i*2)) & 3;
			int abs    = (prefix >> (8+i)) & 1;
			int negate = (prefix >> (16+i)) & 1;
			int constants = (prefix >> (12+i)) & 1;

			// Unchanged, hurray.
			if (!constants && regnum == i && !abs && !negate)
				continue;

			// This puts the value into a temp reg, so we won't write the modified value back.
			vregs[i] = fpr.GetTempV();
			fpr.MapRegV(vregs[i], MAP_NOINIT | MAP_DIRTY);

			if (!constants) {
				// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
				// TODO: But some ops seem to use const 0 instead?
				if (regnum >= n) {
					ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", prefix, sz);
					regnum = 0;
				}
				
				if (abs) {
					VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
				} else {
					VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
				}
			} else {
				// TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though.
				MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0);
			}

			// TODO: This can be integrated into the VABS / VMOV above, and also the constants.
			if (negate)
				VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));

			// TODO: This probably means it will swap out soon, inefficiently...
			fpr.ReleaseSpillLockV(vregs[i]);
		}
	}
Beispiel #9
0
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
    if (prefix == 0xE4) return;

    int n = GetNumVectorElements(sz);
    u8 origV[4];
    static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

    for (int i = 0; i < n; i++)
        origV[i] = vregs[i];

    for (int i = 0; i < n; i++)
    {
        int regnum = (prefix >> (i*2)) & 3;
        int abs    = (prefix >> (8+i)) & 1;
        int negate = (prefix >> (16+i)) & 1;
        int constants = (prefix >> (12+i)) & 1;

        // Unchanged, hurray.
        if (!constants && regnum == i && !abs && !negate)
            continue;

        // This puts the value into a temp reg, so we won't write the modified value back.
        vregs[i] = fpr.GetTempV();
        if (!constants) {
            fpr.MapDirtyInV(vregs[i], origV[regnum]);
            fpr.SpillLockV(vregs[i]);

            // Prefix may say "z, z, z, z" but if this is a pair, we force to x.
            // TODO: But some ops seem to use const 0 instead?
            if (regnum >= n) {
                WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC));
                regnum = 0;
            }

            if (abs) {
                VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));
            } else {
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                else
                    VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
            }
        } else {
            fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT);
            fpr.SpillLockV(vregs[i]);
            MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0, negate);
        }
    }
}
Beispiel #10
0
void Jit::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) {
    _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);

    GetVectorRegs(regs, sz, vectorReg);
    if (js.prefixD == 0)
        return;

    int n = GetNumVectorElements(sz);
    for (int i = 0; i < n; i++) {
        // Hopefully this is rare, we'll just write it into a reg we drop.
        if (js.VfpuWriteMask(i))
            regs[i] = fpr.GetTempV();
    }
}
Beispiel #11
0
void FPURegCache::MapRegsVS(const u8 *r, VectorSize vsz, int flags) {
	const int n = GetNumVectorElements(vsz);

	_dbg_assert_msg_(JIT, jo_->enableVFPUSIMD, "Should not map simd regs when option is off.");

	if (!TryMapRegsVS(r, vsz, flags)) {
		// TODO: Could be more optimal.
		for (int i = 0; i < n; ++i) {
			StoreFromRegisterV(r[i]);
		}
		if (!TryMapRegsVS(r, vsz, flags)) {
			_dbg_assert_msg_(JIT, false, "MapRegsVS() failed on second try.");
		}
	}
}
Beispiel #12
0
void ApplyPrefixST(float *v, u32 data, VectorSize size)
{
	// Possible optimization shortcut:
	if (data == 0xe4)
		return;

	int n = GetNumVectorElements(size);
	float origV[4];
	static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

	for (int i = 0; i < n; i++)
	{
		origV[i] = v[i];
	}

	for (int i = 0; i < n; i++)
	{
		int regnum = (data >> (i*2)) & 3;
		int abs    = (data >> (8+i)) & 1;
		int negate = (data >> (16+i)) & 1;
		int constants = (data >> (12+i)) & 1;

		if (!constants)
		{
			// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
			// TODO: But some ops seem to use const 0 instead?
			if (regnum >= n) {
				ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x: %i / %d at PC = %08x (%s)", data, regnum, n, currentMIPS->pc, currentMIPS->DisasmAt(currentMIPS->pc));
				//for (int i = 0; i < 12; i++) {
				//	ERROR_LOG(CPU, "  vfpuCtrl[%i] = %08x", i, currentMIPS->vfpuCtrl[i]);
				//}
				regnum = 0;
			}

			v[i] = origV[regnum];
			if (abs)
				v[i] = fabs(v[i]);
		}
		else
		{
			v[i] = constantArray[regnum + (abs<<2)];
		}

		if (negate)
			v[i] = -v[i];
	}
}
Beispiel #13
0
	void Jit::Comp_VDot(u32 op)
	{
		// DISABLE;
		CONDITIONAL_DISABLE;
		// WARNING: No prefix support!
		if (js.MayHavePrefix()) {
			Comp_Generic(op);
			js.EatPrefix();
			return;
		}

		int vd = _VD;
		int vs = _VS;
		int vt = _VT;
		VectorSize sz = GetVecSize(op);

		// TODO: Force read one of them into regs? probably not.
		u8 sregs[4], tregs[4];
		GetVectorRegs(sregs, sz, vs);
		GetVectorRegs(tregs, sz, vt);

		// TODO: applyprefixST here somehow (shuffle, etc...)
		fpr.MapRegsV(sregs, sz, 0);
		fpr.MapRegsV(tregs, sz, 0);
		VMUL(S0, fpr.V(sregs[0]), fpr.V(tregs[0]));

		int n = GetNumVectorElements(sz);
		for (int i = 1; i < n; i++) {
			// sum += s[i]*t[i];
			VMLA(S0, fpr.V(sregs[i]), fpr.V(tregs[i]));
		}
		fpr.ReleaseSpillLocks();

		fpr.MapRegV(vd, MAP_NOINIT | MAP_DIRTY);

		// TODO: applyprefixD here somehow (write mask etc..)
		VMOV(fpr.V(vd), S0);

		fpr.ReleaseSpillLocks();

		js.EatPrefix();
	}
Beispiel #14
0
void ApplyPrefixD(float *v, VectorSize size, bool onlyWriteMask = false)
{
	u32 data = currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX];
	if (!data || onlyWriteMask)
		return;
	int n = GetNumVectorElements(size);
	for (int i = 0; i < n; i++)
	{
		int sat = (data >> (i * 2)) & 3;
		if (sat == 1)
		{
			if (v[i] > 1.0f) v[i] = 1.0f;
			// This includes -0.0f -> +0.0f.
			if (v[i] <= 0.0f) v[i] = 0.0f;
		}
		else if (sat == 3)
		{
			if (v[i] > 1.0f)  v[i] = 1.0f;
			if (v[i] < -1.0f) v[i] = -1.0f;
		}
	}
}
Beispiel #15
0
bool FPURegCache::CanMapVS(const u8 *v, VectorSize vsz) {
	const int n = GetNumVectorElements(vsz);

	if (!jo_->enableVFPUSIMD) {
		return false;
	}

	if (IsMappedVS(v, vsz)) {
		return true;
	} else if (vregs[v[0]].lane != 0) {
		const MIPSCachedFPReg &v0 = vregs[v[0]];
		_dbg_assert_msg_(JIT, v0.away, "Must be away when lane != 0");
		_dbg_assert_msg_(JIT, v0.location.IsSimpleReg(), "Must be is register when lane != 0");

		// Already in a different simd set.
		return false;
	}

	if (vregs[v[0]].locked) {
		// If it's locked, we can't mess with it.
		return false;
	}

	// Next, fail if any of the other regs are in simd currently.
	// TODO: Only if locked?  Not sure if it will be worth breaking them anyway.
	for (int i = 1; i < n; ++i) {
		if (vregs[v[i]].lane != 0) {
			return false;
		}
		// If it's locked, in simd or not, we can't use it.
		if (vregs[v[i]].locked) {
			return false;
		}
		_assert_msg_(JIT, !vregs[v[i]].location.IsImm(), "Cannot handle imms in fp cache.");
	}

	return true;
}
Beispiel #16
0
void ApplyPrefixST(float *v, u32 data, VectorSize size)
{
  // Possible optimization shortcut:
  if (data == 0xe4)
    return;

	int n = GetNumVectorElements(size);
	float origV[4];
	static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

	for (int i = 0; i < n; i++)
	{
		origV[i] = v[i];
	}

	for (int i = 0; i < n; i++)
	{
		int regnum = (data >> (i*2)) & 3;
		int abs    = (data >> (8+i)) & 1;
		int negate = (data >> (16+i)) & 1;
		int constants = (data >> (12+i)) & 1;

		if (!constants)
		{
			v[i] = origV[regnum];
			if (abs)
				v[i] = fabs(v[i]);
		}
		else
		{
			v[i] = constantArray[regnum + (abs<<2)];
		}

		if (negate)
			v[i] = -v[i];
	}
}
Beispiel #17
0
	void Jit::Comp_VVectorInit(u32 op)
	{
		CONDITIONAL_DISABLE;

		// WARNING: No prefix support!
		if (js.MayHavePrefix()) {
			Comp_Generic(op);
			js.EatPrefix();
			return;
		}

		switch ((op >> 16) & 0xF)
		{
		case 6: // v=zeros; break;  //vzero
			MOVI2F(S0, 0.0f, R0);
			break;
		case 7: // v=ones; break;   //vone
			MOVI2F(S0, 1.0f, R0);
			break;
		default:
			DISABLE;
			break;
		}

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 dregs[4];
		GetVectorRegsPrefixD(dregs, sz, _VD);
		fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);

		for (int i = 0; i < n; ++i)
			VMOV(fpr.V(dregs[i]), S0);

		ApplyPrefixD(dregs, sz);
		fpr.ReleaseSpillLocks();
	}
Beispiel #18
0
void Jit::Comp_VVectorInit(MIPSOpcode op)
{
    CONDITIONAL_DISABLE;

    // WARNING: No prefix support!
    if (js.HasUnknownPrefix() || disablePrefixes) {
        DISABLE;
    }

    switch ((op >> 16) & 0xF)
    {
    case 6: // v=zeros; break;  //vzero
        MOVI2F(S0, 0.0f, R0);
        break;
    case 7: // v=ones; break;   //vone
        MOVI2F(S0, 1.0f, R0);
        break;
    default:
        DISABLE;
        break;
    }

    VectorSize sz = GetVecSize(op);
    int n = GetNumVectorElements(sz);

    u8 dregs[4];
    GetVectorRegsPrefixD(dregs, sz, _VD);
    fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY);

    for (int i = 0; i < n; ++i)
        VMOV(fpr.V(dregs[i]), S0);

    ApplyPrefixD(dregs, sz);

    fpr.ReleaseSpillLocksAndDiscardTemps();
}
Beispiel #19
0
void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
    _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);
    if (!js.prefixD) return;

    int n = GetNumVectorElements(sz);
    for (int i = 0; i < n; i++) 	{
        if (js.VfpuWriteMask(i))
            continue;

        // TODO: These clampers are wrong - put this into google
        // and look at the plot:   abs(x) - abs(x-0.5) + 0.5
        // It's too steep.

        // Also, they mishandle NaN and Inf.
        int sat = (js.prefixD >> (i * 2)) & 3;
        if (sat == 1) {
            // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, 0.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LE);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            /*
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 + 0.5f
            VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);*/
        } else if (sat == 3) {
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, -1.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LT);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            // clamped = fabs(x) - fabs(x-1.0f);        // [-1, 1]
            /*
            fpr.MapRegV(vregs[i], MAP_DIRTY);
            MOVI2F(S0, 1.0f, R0);
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2
            */
        }
    }
}
Beispiel #20
0
	void Jit::Comp_VecDo3(u32 op)
	{
		CONDITIONAL_DISABLE;
		DISABLE;
		// WARNING: No prefix support!
		if (js.MayHavePrefix())
		{
			Comp_Generic(op);
			js.EatPrefix();
			return;
		}

		int vd = _VD;
		int vs = _VS;
		int vt = _VT;

		void (ARMXEmitter::*triop)(ARMReg, ARMReg, ARMReg) = NULL;
		switch (op >> 26)
		{
		case 24: //VFPU0
			switch ((op >> 23)&7)
			{
			case 0: // d[i] = s[i] + t[i]; break; //vadd
				triop = &ARMXEmitter::VADD;
				break;
			case 1: // d[i] = s[i] - t[i]; break; //vsub
				triop = &ARMXEmitter::VSUB;
				break;
			case 7: // d[i] = s[i] / t[i]; break; //vdiv
				triop = &ARMXEmitter::VDIV;
				break;
			}
			break;
		case 25: //VFPU1
			switch ((op >> 23)&7)
			{
			case 0: // d[i] = s[i] * t[i]; break; //vmul
				triop = &ARMXEmitter::VMUL;
				break;
			}
			break;
		}

		if (!triop) {
			DISABLE;
		}

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 sregs[4], tregs[4], dregs[4];
		GetVectorRegsPrefixS(sregs, sz, _VS);
		GetVectorRegsPrefixT(tregs, sz, _VT);
		GetVectorRegsPrefixD(dregs, sz, _VD);

		MIPSReg tempregs[4];
		for (int i = 0; i < n; i++) {
			if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs, n, tregs)) {
				tempregs[i] = fpr.GetTempV();
			} else {
				fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] || dregs[i] == tregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
				tempregs[i] = dregs[i];
			}
		}

		for (int i = 0; i < n; i++) {
			fpr.SpillLockV(sregs[i]);
			fpr.SpillLockV(tregs[i]);
			fpr.MapRegV(sregs[i]);
			fpr.MapRegV(tregs[i]);
			fpr.MapRegV(tempregs[i]);
			(this->*triop)(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i]));
			fpr.ReleaseSpillLockV(sregs[i]);
			fpr.ReleaseSpillLockV(tregs[i]);
		}

		fpr.MapRegsV(dregs, sz, MAP_DIRTY);
		for (int i = 0; i < n; i++) {
			if (dregs[i] != tempregs[i])
				VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
		}
		ApplyPrefixD(dregs, sz);
		
		fpr.ReleaseSpillLocks();
		
		js.EatPrefix();
	}
Beispiel #21
0
void FPURegCache::MapRegsV(const u8 *v, VectorSize sz, int flags) {
	SpillLockV(v, sz);
	for (int i = 0; i < GetNumVectorElements(sz); i++) {
		BindToRegister(v[i] + 32, (flags & MAP_NOINIT) == 0, (flags & MAP_DIRTY) != 0);
	}
}
Beispiel #22
0
	void Jit::Comp_VV2Op(u32 op) {
		CONDITIONAL_DISABLE;

		DISABLE;

		if (js.HasUnknownPrefix())
			DISABLE;

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 sregs[4], dregs[4];
		GetVectorRegsPrefixS(sregs, sz, _VS);
		GetVectorRegsPrefixD(dregs, sz, _VD);

		ARMReg tempxregs[4];
		for (int i = 0; i < n; ++i)
		{
			if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
			{
				int reg = fpr.GetTempV();
				fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
				fpr.SpillLockV(reg);
				tempxregs[i] = fpr.V(reg);
			}
			else
			{
				fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
				fpr.SpillLockV(dregs[i]);
				tempxregs[i] = fpr.V(dregs[i]);
			}
		}

		// Warning: sregs[i] and tempxregs[i] may be the same reg.
		// Helps for vmov, hurts for vrcp, etc.
		for (int i = 0; i < n; ++i)
		{
			switch ((op >> 16) & 0x1f)
			{
			case 0: // d[i] = s[i]; break; //vmov
				// Probably for swizzle.
				VMOV(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 1: // d[i] = fabsf(s[i]); break; //vabs
				//if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
				VABS(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 2: // d[i] = -s[i]; break; //vneg
				VNEG(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;    // vsat0
				DISABLE;
				break;
			case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;  // vsat1
				DISABLE;
				break;
			case 16: // d[i] = 1.0f / s[i]; break; //vrcp
				MOVI2F(S0, 1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
				break;
			case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
				MOVI2F(S0, 1.0f, R0);
				VSQRT(S1, fpr.V(sregs[i]));
				VDIV(tempxregs[i], S0, S1);
				break;
			case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
				DISABLE;
				break;
			case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
				DISABLE;
				break;
			case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
				DISABLE;
				break;
			case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
				DISABLE;
				break;
			case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
				VSQRT(tempxregs[i], fpr.V(sregs[i]));
				VABS(tempxregs[i], tempxregs[i]);
				break;
			case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
				DISABLE;
				break;
			case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
				MOVI2F(S0, -1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
				break;
			case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
				DISABLE;
				break;
			case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
				DISABLE;
				break;
			}
		}

		fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
		for (int i = 0; i < n; ++i)
		{
			VMOV(fpr.V(dregs[i]), tempxregs[i]);
		}

		ApplyPrefixD(dregs, sz);

		fpr.ReleaseSpillLocks();
	}
Beispiel #23
0
void FPURegCache::SpillLockV(const u8 *v, VectorSize sz) {
	for (int i = 0; i < GetNumVectorElements(sz); i++) {
		vregs[v[i]].locked = true;
	}
}
Beispiel #24
0
void FPURegCache::ReleaseSpillLockV(const u8 *vec, VectorSize sz) {
	for (int i = 0; i < GetNumVectorElements(sz); i++) {
		vregs[vec[i]].locked = false;
	}
}
void ArmJit::NEONApplyPrefixD(DestARMReg dest) {
	// Apply clamps to dest.rd
	int n = GetNumVectorElements(dest.sz);

	int sat1_mask = 0;
	int sat3_mask = 0;
	int full_mask = (1 << n) - 1;
	for (int i = 0; i < n; i++) {
		int sat = (js.prefixD >> (i * 2)) & 3;
		if (sat == 1)
			sat1_mask |= 1 << i;
		if (sat == 3)
			sat3_mask |= 1 << i;
	}

	if (sat1_mask && sat3_mask) {
		// Why would anyone do this?
		ELOG("PREFIXD: Can't have both sat[0-1] and sat[-1-1] at the same time yet");
	}

	if (sat1_mask) {
		if (sat1_mask != full_mask) {
			ELOG("PREFIXD: Can't have partial sat1 mask yet (%i vs %i)", sat1_mask, full_mask);
		}
		if (IsD(dest.rd)) {
			VMOV_immf(D0, 0.0);
			VMOV_immf(D1, 1.0);
			VMAX(F_32, dest.rd, dest.rd, D0);
			VMIN(F_32, dest.rd, dest.rd, D1);
		} else {
			VMOV_immf(Q0, 1.0);
			VMIN(F_32, dest.rd, dest.rd, Q0);
			VMOV_immf(Q0, 0.0);
			VMAX(F_32, dest.rd, dest.rd, Q0);
		}
	}

	if (sat3_mask && sat1_mask != full_mask) {
		if (sat3_mask != full_mask) {
			ELOG("PREFIXD: Can't have partial sat3 mask yet (%i vs %i)", sat3_mask, full_mask);
		}
		if (IsD(dest.rd)) {
			VMOV_immf(D0, 0.0);
			VMOV_immf(D1, 1.0);
			VMAX(F_32, dest.rd, dest.rd, D0);
			VMIN(F_32, dest.rd, dest.rd, D1);
		} else {
			VMOV_immf(Q0, 1.0);
			VMIN(F_32, dest.rd, dest.rd, Q0);
			VMOV_immf(Q0, -1.0);
			VMAX(F_32, dest.rd, dest.rd, Q0);
		}
	}

	// Check for actual mask operation (unrelated to the "masks" above).
	if (dest.backingRd != dest.rd) {
		// This means that we need to apply the write mask, from rd to backingRd.
		// What a pain. We can at least shortcut easy cases like half the register.
		// And we can generate the masks easily with some of the crazy vector imm modes. (bits2bytes for example).
		// So no need to load them from RAM.
		int writeMask = (~(js.prefixD >> 8)) & 0xF;

		if (writeMask == 3) {
			ILOG("Doing writemask = 3");
			VMOV(D_0(dest.rd), D_0(dest.backingRd));
		} else {
			// TODO
			ELOG("PREFIXD: Arbitrary write masks not supported (%i / %i)", writeMask, full_mask);
			VMOV(dest.backingRd, dest.rd);
		}
	}
Beispiel #26
0
void FPURegCache::MapRegsV(const u8 *r, VectorSize sz, int flags) {
	SpillLockV(r, sz);
	for (int i = 0; i < GetNumVectorElements(sz); i++) {
		MapReg(r[i] + 32, (flags & MAP_NOINIT) != MAP_NOINIT, (flags & MAP_DIRTY) != 0);
	}
}