예제 #1
0
void JitArm::ps_rsqrte(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 b = inst.FB, d = inst.FD;

	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);
	ARMReg fpscrReg = gpr.GetReg();
	ARMReg V0 = D1;
	ARMReg rA = gpr.GetReg();

	MOVI2R(fpscrReg, (u32)&PPC_NAN);
	VLDR(V0, fpscrReg, 0);
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));

	VCMP(vB0);
	VMRS(_PC);
	FixupBranch Less0 = B_CC(CC_LT);
		VMOV(vD0, V0);
		SetFPException(fpscrReg, FPSCR_VXSQRT);
		FixupBranch SkipOrr0 = B();
	SetJumpTarget(Less0);
	SetCC(CC_EQ);
		ORR(rA, rA, 1);
	SetCC();
	SetJumpTarget(SkipOrr0);

	VCMP(vB1);
	VMRS(_PC);
	FixupBranch Less1 = B_CC(CC_LT);
		VMOV(vD1, V0);
		SetFPException(fpscrReg, FPSCR_VXSQRT);
		FixupBranch SkipOrr1 = B();
	SetJumpTarget(Less1);
	SetCC(CC_EQ);
		ORR(rA, rA, 2);
	SetCC();
	SetJumpTarget(SkipOrr1);

	CMP(rA, 0);
	FixupBranch noException = B_CC(CC_EQ);
	SetFPException(fpscrReg, FPSCR_ZX);
	SetJumpTarget(noException);

	VCVT(S0, vB0, 0);
	VCVT(S1, vB1, 0);

	NEONXEmitter nemit(this);
	nemit.VRSQRTE(F_32, D0, D0);
	VCVT(vD0, S0, 0);
	VCVT(vD1, S1, 0);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(fpscrReg, rA);
}
예제 #2
0
void JitArm::ps_muls1(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 a = inst.FA, c = inst.FC, d = inst.FD;

	ARMReg vA0 = fpr.R0(a);
	ARMReg vA1 = fpr.R1(a);
	ARMReg vC1 = fpr.R1(c);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();


	VMUL(V0, vA0, vC1);
	VMUL(V1, vA1, vC1);
	VMOV(vD0, V0);
	VMOV(vD1, V1);

	fpr.Unlock(V0);
	fpr.Unlock(V1);
}
예제 #3
0
void JitArm::ps_mr(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 b = inst.FB, d = inst.FD;

	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);
	VMOV(vD0, vB0);
	VMOV(vD1, vB1);
}
void JitArm::stfs(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg v0 = fpr.R0(inst.FS);
	VCVT(S0, v0, 0);

	if (inst.RA)
	{
		MOVI2R(rB, inst.SIMM_16);
		ARMReg RA = gpr.R(inst.RA);
		ADD(rB, rB, RA);
	}
	else
	{
		MOVI2R(rB, (u32)inst.SIMM_16);
	}

	MOVI2R(rA, (u32)&Memory::Write_U32);
	PUSH(4, R0, R1, R2, R3);
	VMOV(R0, S0);
	MOV(R1, rB);

	BL(rA);

	POP(4, R0, R1, R2, R3);

	gpr.Unlock(rA, rB);
}
예제 #5
0
void JitArm::ps_merge10(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 a = inst.FA, b = inst.FB, d = inst.FD;

	ARMReg vA1 = fpr.R1(a);
	ARMReg vB0 = fpr.R0(b);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);
	ARMReg V0 = fpr.GetReg();

	VMOV(V0, vB0);
	VMOV(vD0, vA1);
	VMOV(vD1, V0);

	fpr.Unlock(V0);
}
예제 #6
0
void JitArm::fmrx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(FloatingPoint)

	ARMReg vD = fpr.R0(inst.FD);
	ARMReg vB = fpr.R0(inst.FB);

	VMOV(vD, vB);
	
	if (inst.Rc) Helper_UpdateCR1(vD);
}
예제 #7
0
	void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
		if (prefix == 0xE4) return;

		int n = GetNumVectorElements(sz);
		u8 origV[4];
		static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

		for (int i = 0; i < n; i++)
			origV[i] = vregs[i];

		for (int i = 0; i < n; i++)
		{
			int regnum = (prefix >> (i*2)) & 3;
			int abs    = (prefix >> (8+i)) & 1;
			int negate = (prefix >> (16+i)) & 1;
			int constants = (prefix >> (12+i)) & 1;

			// Unchanged, hurray.
			if (!constants && regnum == i && !abs && !negate)
				continue;

			// This puts the value into a temp reg, so we won't write the modified value back.
			vregs[i] = fpr.GetTempV();
			fpr.MapRegV(vregs[i], MAP_NOINIT | MAP_DIRTY);

			if (!constants) {
				// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
				// TODO: But some ops seem to use const 0 instead?
				if (regnum >= n) {
					ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", prefix, sz);
					regnum = 0;
				}
				
				if (abs) {
					VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
				} else {
					VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
				}
			} else {
				// TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though.
				MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0);
			}

			// TODO: This can be integrated into the VABS / VMOV above, and also the constants.
			if (negate)
				VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));

			// TODO: This probably means it will swap out soon, inefficiently...
			fpr.ReleaseSpillLockV(vregs[i]);
		}
	}
예제 #8
0
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
    if (prefix == 0xE4) return;

    int n = GetNumVectorElements(sz);
    u8 origV[4];
    static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};

    for (int i = 0; i < n; i++)
        origV[i] = vregs[i];

    for (int i = 0; i < n; i++)
    {
        int regnum = (prefix >> (i*2)) & 3;
        int abs    = (prefix >> (8+i)) & 1;
        int negate = (prefix >> (16+i)) & 1;
        int constants = (prefix >> (12+i)) & 1;

        // Unchanged, hurray.
        if (!constants && regnum == i && !abs && !negate)
            continue;

        // This puts the value into a temp reg, so we won't write the modified value back.
        vregs[i] = fpr.GetTempV();
        if (!constants) {
            fpr.MapDirtyInV(vregs[i], origV[regnum]);
            fpr.SpillLockV(vregs[i]);

            // Prefix may say "z, z, z, z" but if this is a pair, we force to x.
            // TODO: But some ops seem to use const 0 instead?
            if (regnum >= n) {
                WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC));
                regnum = 0;
            }

            if (abs) {
                VABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));
            } else {
                if (negate)
                    VNEG(fpr.V(vregs[i]), fpr.V(origV[regnum]));
                else
                    VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
            }
        } else {
            fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT);
            fpr.SpillLockV(vregs[i]);
            MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0, negate);
        }
    }
}
예제 #9
0
void JitArm::fsubsx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(FloatingPoint)
	
	ARMReg vA = fpr.R0(inst.FA);
	ARMReg vB = fpr.R0(inst.FB);
	ARMReg vD0 = fpr.R0(inst.FD);
	ARMReg vD1 = fpr.R1(inst.FD);

	VSUB(vD0, vA, vB);
	VMOV(vD1, vD0);
	if (inst.Rc) Helper_UpdateCR1(vD0);
}
예제 #10
0
void JitArm::lfs(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(LoadStoreFloating)
	Default(inst); return;

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions));
	CMP(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_EQ);

	if (inst.RA)
	{
		MOVI2R(rB, inst.SIMM_16);
		ARMReg RA = gpr.R(inst.RA);
		ADD(rB, rB, RA);
	}
	else
		MOVI2R(rB, (u32)inst.SIMM_16);

	MOVI2R(rA, (u32)&Memory::Read_U32);	
	PUSH(4, R0, R1, R2, R3);
	MOV(R0, rB);
	BL(rA);
	MOV(rA, R0);
	POP(4, R0, R1, R2, R3);
	
	ARMReg v0 = fpr.R0(inst.FD, false);
	ARMReg v1 = fpr.R1(inst.FD, false);
	
	VMOV(v0, rA, false);
	VMOV(v1, rA, false);

	gpr.Unlock(rA, rB);
	SetJumpTarget(DoNotLoad);
}
예제 #11
0
void JitArm::ps_sel(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;

	ARMReg vA0 = fpr.R0(a);
	ARMReg vA1 = fpr.R1(a);
	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vC0 = fpr.R0(c);
	ARMReg vC1 = fpr.R1(c);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);

	VCMP(vA0);
	VMRS(_PC);

	FixupBranch GT0 = B_CC(CC_GE);
	VMOV(vD0, vB0);
	FixupBranch EQ0 = B();
	SetJumpTarget(GT0);
	VMOV(vD0, vC0);
	SetJumpTarget(EQ0);

	VCMP(vA1);
	VMRS(_PC);
	FixupBranch GT1 = B_CC(CC_GE);
	VMOV(vD1, vB1);
	FixupBranch EQ1 = B();
	SetJumpTarget(GT1);
	VMOV(vD1, vC1);
	SetJumpTarget(EQ1);
}
예제 #12
0
// Breaks Animal Crossing
void JitArm::fmulsx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(FloatingPoint)
	
	Default(inst); return;
	
	ARMReg vA = fpr.R0(inst.FA);
	ARMReg vC = fpr.R0(inst.FC);
	ARMReg vD0 = fpr.R0(inst.FD);
	ARMReg vD1 = fpr.R1(inst.FD);

	VMUL(vD0, vA, vC);
	VMOV(vD1, vD0);
	if (inst.Rc) Helper_UpdateCR1(vD0);
}
예제 #13
0
void JitArm::ps_sum1(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;

	ARMReg vA0 = fpr.R0(a);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vC0 = fpr.R0(c);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);

	VMOV(vD0, vC0);
	VADD(vD1, vA0, vB1);
}
예제 #14
0
	void Jit::Comp_VDot(u32 op)
	{
		// DISABLE;
		CONDITIONAL_DISABLE;
		// WARNING: No prefix support!
		if (js.MayHavePrefix()) {
			Comp_Generic(op);
			js.EatPrefix();
			return;
		}

		int vd = _VD;
		int vs = _VS;
		int vt = _VT;
		VectorSize sz = GetVecSize(op);

		// TODO: Force read one of them into regs? probably not.
		u8 sregs[4], tregs[4];
		GetVectorRegs(sregs, sz, vs);
		GetVectorRegs(tregs, sz, vt);

		// TODO: applyprefixST here somehow (shuffle, etc...)
		fpr.MapRegsV(sregs, sz, 0);
		fpr.MapRegsV(tregs, sz, 0);
		VMUL(S0, fpr.V(sregs[0]), fpr.V(tregs[0]));

		int n = GetNumVectorElements(sz);
		for (int i = 1; i < n; i++) {
			// sum += s[i]*t[i];
			VMLA(S0, fpr.V(sregs[i]), fpr.V(tregs[i]));
		}
		fpr.ReleaseSpillLocks();

		fpr.MapRegV(vd, MAP_NOINIT | MAP_DIRTY);

		// TODO: applyprefixD here somehow (write mask etc..)
		VMOV(fpr.V(vd), S0);

		fpr.ReleaseSpillLocks();

		js.EatPrefix();
	}
예제 #15
0
void kamikaze_add(vec3f pos, float s)
{
	actor* a = ACTOR_get(actor_pool);
			
	VMOV(a->pos, pos);
	a->pos[2] -= 0.1f;
	

	a->vel[0] = 0.0f;
	a->vel[1] = -rand01();
	a->vel[2] = -5.0f  -10.0f*rand01();

	a->ang = 0.0;	
	a->collide_size[0] = s;
	a->collide_size[1] = s;
	a->aux[0] = linear(20.0f, 30.0f,rand01());
	

	
	a->update = kamikaze_update;
	a->render = kamikaze_render;
	
}
예제 #16
0
	void Jit::Comp_VVectorInit(u32 op)
	{
		CONDITIONAL_DISABLE;

		// WARNING: No prefix support!
		if (js.MayHavePrefix()) {
			Comp_Generic(op);
			js.EatPrefix();
			return;
		}

		switch ((op >> 16) & 0xF)
		{
		case 6: // v=zeros; break;  //vzero
			MOVI2F(S0, 0.0f, R0);
			break;
		case 7: // v=ones; break;   //vone
			MOVI2F(S0, 1.0f, R0);
			break;
		default:
			DISABLE;
			break;
		}

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 dregs[4];
		GetVectorRegsPrefixD(dregs, sz, _VD);
		fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);

		for (int i = 0; i < n; ++i)
			VMOV(fpr.V(dregs[i]), S0);

		ApplyPrefixD(dregs, sz);
		fpr.ReleaseSpillLocks();
	}
예제 #17
0
void Jit::Comp_VVectorInit(MIPSOpcode op)
{
    CONDITIONAL_DISABLE;

    // WARNING: No prefix support!
    if (js.HasUnknownPrefix() || disablePrefixes) {
        DISABLE;
    }

    switch ((op >> 16) & 0xF)
    {
    case 6: // v=zeros; break;  //vzero
        MOVI2F(S0, 0.0f, R0);
        break;
    case 7: // v=ones; break;   //vone
        MOVI2F(S0, 1.0f, R0);
        break;
    default:
        DISABLE;
        break;
    }

    VectorSize sz = GetVecSize(op);
    int n = GetNumVectorElements(sz);

    u8 dregs[4];
    GetVectorRegsPrefixD(dregs, sz, _VD);
    fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY);

    for (int i = 0; i < n; ++i)
        VMOV(fpr.V(dregs[i]), S0);

    ApplyPrefixD(dregs, sz);

    fpr.ReleaseSpillLocksAndDiscardTemps();
}
예제 #18
0
void Jit::Comp_FPULS(u32 op)
{
	CONDITIONAL_DISABLE;

	s32 offset = (s16)(op & 0xFFFF);
	int ft = _FT;
	int rs = _RS;
	// u32 addr = R(rs) + offset;
	// logBlocks = 1;
	bool doCheck = false;
	switch(op >> 26)
	{
	case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1
		fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);
		if (gpr.IsImm(rs)) {
			u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
			MOVI2R(R0, addr + (u32)Memory::base);
		} else {
			gpr.MapReg(rs);
			if (g_Config.bFastMemory) {
				SetR0ToEffectiveAddress(rs, offset);
			} else {
				SetCCAndR0ForSafeAddress(rs, offset, R1);
				doCheck = true;
			}
			ADD(R0, R0, R11);
		}
		VLDR(fpr.R(ft), R0, 0);
		if (doCheck) {
			SetCC(CC_EQ);
			MOVI2R(R0, 0);
			VMOV(fpr.R(ft), R0);
			SetCC(CC_AL);
		}
		break;

	case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1
		fpr.MapReg(ft);
		if (gpr.IsImm(rs)) {
			u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
			MOVI2R(R0, addr + (u32)Memory::base);
		} else {
			gpr.MapReg(rs);
			if (g_Config.bFastMemory) {
				SetR0ToEffectiveAddress(rs, offset);
			} else {
				SetCCAndR0ForSafeAddress(rs, offset, R1);
				doCheck = true;
			}
			ADD(R0, R0, R11);
		}
		VSTR(fpr.R(ft), R0, 0);
		if (doCheck) {
			SetCC(CC_AL);
		}
		break;

	default:
		Comp_Generic(op);
		return;
	}
}
예제 #19
0
void Jit::Comp_FPU2op(u32 op)
{
	CONDITIONAL_DISABLE;

	int fs = _FS;
	int fd = _FD;
	// logBlocks = 1;

	switch (op & 0x3f) 
	{
	case 4:	//F(fd)	   = sqrtf(F(fs));            break; //sqrt
		fpr.MapDirtyIn(fd, fs);
		VSQRT(fpr.R(fd), fpr.R(fs));
		break;
	case 5:	//F(fd)    = fabsf(F(fs));            break; //abs
		fpr.MapDirtyIn(fd, fs);
		VABS(fpr.R(fd), fpr.R(fs));
		break;
	case 6:	//F(fd)	   = F(fs);                   break; //mov
		fpr.MapDirtyIn(fd, fs);
		VMOV(fpr.R(fd), fpr.R(fs));
		break;
	case 7:	//F(fd)	   = -F(fs);                  break; //neg
		fpr.MapDirtyIn(fd, fs);
		VNEG(fpr.R(fd), fpr.R(fs));
		break;
	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
		break;
	case 13: //FsI(fd) = Rto0(F(fs)));            break; //trunc.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
		break;
	case 14: //FsI(fd) = (int)ceilf (F(fs));      break; //ceil.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VADD(S0,fpr.R(fs),S0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
		break;
	case 15: //FsI(fd) = (int)floorf(F(fs));      break; //floor.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VSUB(S0,fpr.R(fs),S0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
		break;
	case 32: //F(fd)   = (float)FsI(fs);          break; //cvt.s.w
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED);
		break;
	case 36: //FsI(fd) = (int)  F(fs);            break; //cvt.w.s
		fpr.MapDirtyIn(fd, fs);
		LDR(R0, CTXREG, offsetof(MIPSState, fcr31));
		AND(R0, R0, Operand2(3));
		// MIPS Rounding Mode:
		//	 0: Round nearest
		//	 1: Round to zero
		//	 2: Round up (ceil)
		//	 3: Round down (floor)
		CMP(R0, Operand2(2));
		SetCC(CC_GE); MOVI2F(S0, 0.5f, R1);
		SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0);
		SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0);
		SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */
		SetCC(CC_AL);
		CMP(R0, Operand2(1));
		SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */
		SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */
		SetCC(CC_AL);
		break;
	default:
		DISABLE;
	}
}
예제 #20
0
void JitArm::fctiwx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
	{
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
		SetJumpTarget(noException);
	}
	// Check if less than min float
	{
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		VMRS(_PC);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
		SetJumpTarget(noException);
	}
	// Within ranges, convert to integer
	// Set rounding mode first
	// PPC <-> ARM rounding modes
	// 0, 1, 2, 3 <-> 0, 3, 1, 2
	ARMReg rB = gpr.GetReg();
	VMRS(rA);
	// Bits 22-23
	BIC(rA, rA, Operand2(3, 5));

	LDR(rB, R9, PPCSTATE_OFF(fpscr));
	AND(rB, rB, 0x3); // Get the FPSCR rounding bits
	CMP(rB, 1);
	SetCC(CC_EQ); // zero
		ORR(rA, rA, Operand2(3, 5));
	SetCC(CC_NEQ);
		CMP(rB, 2); // +inf
		SetCC(CC_EQ);
			ORR(rA, rA, Operand2(1, 5));
		SetCC(CC_NEQ);
			CMP(rB, 3); // -inf
			SetCC(CC_EQ);
				ORR(rA, rA, Operand2(2, 5));
	SetCC();
	VMSR(rA);
	ORR(rA, rA, Operand2(3, 5));
	VCVT(vD, vB, TO_INT | IS_SIGNED);
	VMSR(rA);
	gpr.Unlock(rB);
	VCMPE(vD, vB);
	VMRS(_PC);

	SetCC(CC_EQ);
		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetCC();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
	VMRS(_PC);
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, FRMask);
	SetCC();
	SetJumpTarget(DoneEqual);

	SetJumpTarget(DoneMax);
	SetJumpTarget(DoneMin);

	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(rA);
	gpr.Unlock(fpscrReg);
	fpr.Unlock(V0);
	fpr.Unlock(V1);
	fpr.Unlock(V2);
}
예제 #21
0
void JitArm::fctiwzx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
	{
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
		SetJumpTarget(noException);
	}
	// Check if less than min float
	{
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		VMRS(_PC);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
		SetJumpTarget(noException);
	}
	// Within ranges, convert to integer
	VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
	VCMPE(vD, vB);
	VMRS(_PC);

	SetCC(CC_EQ);
		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetCC();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
	VMRS(_PC);
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, FRMask);
	SetCC();
	SetJumpTarget(DoneEqual);

	SetJumpTarget(DoneMax);
	SetJumpTarget(DoneMin);

	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(rA);
	gpr.Unlock(fpscrReg);
	fpr.Unlock(V0);
	fpr.Unlock(V1);
	fpr.Unlock(V2);
}
예제 #22
0
void Jit::Comp_SVQ(MIPSOpcode op)
{
    CONDITIONAL_DISABLE;

    int imm = (signed short)(op&0xFFFC);
    int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5);
    MIPSGPReg rs = _RS;

    bool doCheck = false;
    switch (op >> 26)
    {
    case 54: //lv.q
    {
        // CC might be set by slow path below, so load regs first.
        u8 vregs[4];
        GetVectorRegs(vregs, V_Quad, vt);
        fpr.MapRegsAndSpillLockV(vregs, V_Quad, MAP_DIRTY | MAP_NOINIT);

        if (gpr.IsImm(rs)) {
            u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
            MOVI2R(R0, addr + (u32)Memory::base);
        } else {
            gpr.MapReg(rs);
            if (g_Config.bFastMemory) {
                SetR0ToEffectiveAddress(rs, imm);
            } else {
                SetCCAndR0ForSafeAddress(rs, imm, R1);
                doCheck = true;
            }
            ADD(R0, R0, R11);
        }

#ifdef __ARM_ARCH_7S__
        FixupBranch skip;
        if (doCheck) {
            skip = B_CC(CC_EQ);
        }

        for (int i = 0; i < 4; i++)
            VLDR(fpr.V(vregs[i]), R0, i * 4);

        if (doCheck) {
            SetJumpTarget(skip);
            SetCC(CC_AL);
        }
#else
        for (int i = 0; i < 4; i++)
            VLDR(fpr.V(vregs[i]), R0, i * 4);

        if (doCheck) {
            SetCC(CC_EQ);
            MOVI2R(R0, 0);
            for (int i = 0; i < 4; i++)
                VMOV(fpr.V(vregs[i]), R0);
            SetCC(CC_AL);
        }
#endif
    }
    break;

    case 62: //sv.q
    {
        // CC might be set by slow path below, so load regs first.
        u8 vregs[4];
        GetVectorRegs(vregs, V_Quad, vt);
        fpr.MapRegsAndSpillLockV(vregs, V_Quad, 0);

        if (gpr.IsImm(rs)) {
            u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
            MOVI2R(R0, addr + (u32)Memory::base);
        } else {
            gpr.MapReg(rs);
            if (g_Config.bFastMemory) {
                SetR0ToEffectiveAddress(rs, imm);
            } else {
                SetCCAndR0ForSafeAddress(rs, imm, R1);
                doCheck = true;
            }
            ADD(R0, R0, R11);
        }

#ifdef __ARM_ARCH_7S__
        FixupBranch skip;
        if (doCheck) {
            skip = B_CC(CC_EQ);
        }

        for (int i = 0; i < 4; i++)
            VSTR(fpr.V(vregs[i]), R0, i * 4);

        if (doCheck) {
            SetJumpTarget(skip);
            SetCC(CC_AL);
        }
#else
        for (int i = 0; i < 4; i++)
            VSTR(fpr.V(vregs[i]), R0, i * 4);

        if (doCheck) {
            SetCC(CC_AL);
        }
#endif
    }
    break;

    default:
        DISABLE;
        break;
    }
    fpr.ReleaseSpillLocksAndDiscardTemps();
}
예제 #23
0
	void Jit::Comp_SV(u32 op) {
		CONDITIONAL_DISABLE;

		s32 imm = (signed short)(op&0xFFFC);
		int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
		int rs = _RS;

		bool doCheck = false;
		switch (op >> 26)
		{
		case 50: //lv.s  // VI(vt) = Memory::Read_U32(addr);
			{
				// CC might be set by slow path below, so load regs first.
				fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT);
				fpr.ReleaseSpillLocks();
				if (gpr.IsImm(rs)) {
					u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
					MOVI2R(R0, addr + (u32)Memory::base);
				} else {
					gpr.MapReg(rs);
					if (g_Config.bFastMemory) {
						SetR0ToEffectiveAddress(rs, imm);
					} else {
						SetCCAndR0ForSafeAddress(rs, imm, R1);
						doCheck = true;
					}
					ADD(R0, R0, R11);
				}
				VLDR(fpr.V(vt), R0, 0);
				if (doCheck) {
					SetCC(CC_EQ);
					MOVI2R(R0, 0);
					VMOV(fpr.V(vt), R0);
					SetCC(CC_AL);
				}
			}
			break;

		case 58: //sv.s   // Memory::Write_U32(VI(vt), addr);
			{
				// CC might be set by slow path below, so load regs first.
				fpr.MapRegV(vt);
				fpr.ReleaseSpillLocks();
				if (gpr.IsImm(rs)) {
					u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF;
					MOVI2R(R0, addr + (u32)Memory::base);
				} else {
					gpr.MapReg(rs);
					if (g_Config.bFastMemory) {
						SetR0ToEffectiveAddress(rs, imm);
					} else {
						SetCCAndR0ForSafeAddress(rs, imm, R1);
						doCheck = true;
					}
					ADD(R0, R0, R11);
				}
				VSTR(fpr.V(vt), R0, 0);
				if (doCheck) {
					SetCC(CC_AL);
				}
			}
			break;


		default:
			DISABLE;
		}
	}
예제 #24
0
void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
    _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN);
    if (!js.prefixD) return;

    int n = GetNumVectorElements(sz);
    for (int i = 0; i < n; i++) 	{
        if (js.VfpuWriteMask(i))
            continue;

        // TODO: These clampers are wrong - put this into google
        // and look at the plot:   abs(x) - abs(x-0.5) + 0.5
        // It's too steep.

        // Also, they mishandle NaN and Inf.
        int sat = (js.prefixD >> (i * 2)) & 3;
        if (sat == 1) {
            // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, 0.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LE);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            /*
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 + 0.5f
            VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);*/
        } else if (sat == 3) {
            fpr.MapRegV(vregs[i], MAP_DIRTY);

            MOVI2F(S0, -1.0f, R0);
            MOVI2F(S1, 1.0f, R0);
            VCMP(fpr.V(vregs[i]), S0);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_LT);
            VMOV(fpr.V(vregs[i]), S0);
            SetCC(CC_AL);
            VCMP(fpr.V(vregs[i]), S1);
            VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
            SetCC(CC_GT);
            VMOV(fpr.V(vregs[i]), S1);
            SetCC(CC_AL);

            // clamped = fabs(x) - fabs(x-1.0f);        // [-1, 1]
            /*
            fpr.MapRegV(vregs[i], MAP_DIRTY);
            MOVI2F(S0, 1.0f, R0);
            VABS(S1, fpr.V(vregs[i]));                  // S1 = fabs(x)
            VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
            VABS(fpr.V(vregs[i]), fpr.V(vregs[i]));
            VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2
            */
        }
    }
}
예제 #25
0
	void Jit::Comp_VV2Op(u32 op) {
		CONDITIONAL_DISABLE;

		DISABLE;

		if (js.HasUnknownPrefix())
			DISABLE;

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 sregs[4], dregs[4];
		GetVectorRegsPrefixS(sregs, sz, _VS);
		GetVectorRegsPrefixD(dregs, sz, _VD);

		ARMReg tempxregs[4];
		for (int i = 0; i < n; ++i)
		{
			if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
			{
				int reg = fpr.GetTempV();
				fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
				fpr.SpillLockV(reg);
				tempxregs[i] = fpr.V(reg);
			}
			else
			{
				fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
				fpr.SpillLockV(dregs[i]);
				tempxregs[i] = fpr.V(dregs[i]);
			}
		}

		// Warning: sregs[i] and tempxregs[i] may be the same reg.
		// Helps for vmov, hurts for vrcp, etc.
		for (int i = 0; i < n; ++i)
		{
			switch ((op >> 16) & 0x1f)
			{
			case 0: // d[i] = s[i]; break; //vmov
				// Probably for swizzle.
				VMOV(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 1: // d[i] = fabsf(s[i]); break; //vabs
				//if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
				VABS(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 2: // d[i] = -s[i]; break; //vneg
				VNEG(tempxregs[i], fpr.V(sregs[i]));
				break;
			case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;    // vsat0
				DISABLE;
				break;
			case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break;  // vsat1
				DISABLE;
				break;
			case 16: // d[i] = 1.0f / s[i]; break; //vrcp
				MOVI2F(S0, 1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
				break;
			case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
				MOVI2F(S0, 1.0f, R0);
				VSQRT(S1, fpr.V(sregs[i]));
				VDIV(tempxregs[i], S0, S1);
				break;
			case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
				DISABLE;
				break;
			case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
				DISABLE;
				break;
			case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
				DISABLE;
				break;
			case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
				DISABLE;
				break;
			case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
				VSQRT(tempxregs[i], fpr.V(sregs[i]));
				VABS(tempxregs[i], tempxregs[i]);
				break;
			case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
				DISABLE;
				break;
			case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
				MOVI2F(S0, -1.0f, R0);
				VDIV(tempxregs[i], S0, fpr.V(sregs[i]));
				break;
			case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
				DISABLE;
				break;
			case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
				DISABLE;
				break;
			}
		}

		fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
		for (int i = 0; i < n; ++i)
		{
			VMOV(fpr.V(dregs[i]), tempxregs[i]);
		}

		ApplyPrefixD(dregs, sz);

		fpr.ReleaseSpillLocks();
	}
void JitArm::lfXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg RA;

	u32 a = inst.RA, b = inst.RB;

	s32 offset = inst.SIMM_16;
	bool single = false;
	bool update = false;
	bool zeroA = false;
	s32 offsetReg = -1;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 567: // lfsux
					single = true;
					update = true;
					offsetReg = b;
				break;
				case 535: // lfsx
					single = true;
					zeroA = true;
					offsetReg = b;
				break;
				case 631: // lfdux
					update = true;
					offsetReg = b;
				break;
				case 599: // lfdx
					zeroA = true;
					offsetReg = b;
				break;
			}
		break;
		case 49: // lfsu
			update = true;
			single = true;
		break;
		case 48: // lfs
			single = true;
			zeroA = true;
		break;
		case 51: // lfdu
			update = true;
		break;
		case 50: // lfd
			zeroA = true;
		break;
	}

	ARMReg v0 = fpr.R0(inst.FD), v1;
	if (single)
		v1 = fpr.R1(inst.FD);

	if (update)
	{
		RA = gpr.R(a);
		// Update path /always/ uses RA
		if (offsetReg == -1) // uses SIMM_16
		{
			MOVI2R(rB, offset);
			ADD(rB, rB, RA);
		}
		else
		{
			ADD(rB, gpr.R(offsetReg), RA);
		}
	}
	else
	{
		if (zeroA)
		{
			if (offsetReg == -1)
			{
				if (a)
				{
					RA = gpr.R(a);
					MOVI2R(rB, offset);
					ADD(rB, rB, RA);
				}
				else
				{
					MOVI2R(rB, (u32)offset);
				}
			}
			else
			{
				ARMReg RB = gpr.R(offsetReg);
				if (a)
				{
					RA = gpr.R(a);
					ADD(rB, RB, RA);
				}
				else
				{
					MOV(rB, RB);
				}
			}
		}
	}
	LDR(rA, R9, PPCSTATE_OFF(Exceptions));
	CMP(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_EQ);

	if (update)
		MOV(RA, rB);

	if (Core::g_CoreStartupParameter.bFastmem)
	{
		Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
		BIC(rB, rB, mask); // 1
		MOVI2R(rA, (u32)Memory::base, false); // 2-3
		ADD(rB, rB, rA); // 4

		NEONXEmitter nemit(this);
		if (single)
		{
			VLDR(S0, rB, 0);
			nemit.VREV32(I_8, D0, D0); // Byte swap to result
			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			VLDR(v0, rB, 0);
			nemit.VREV64(I_8, v0, v0); // Byte swap to result
		}
	}
	else
	{
		PUSH(4, R0, R1, R2, R3);
		MOV(R0, rB);
		if (single)
		{
			MOVI2R(rA, (u32)&Memory::Read_U32);
			BL(rA);

			VMOV(S0, R0);

			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			MOVI2R(rA, (u32)&Memory::Read_F64);
			BL(rA);

#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
			VMOV(v0, R0);
#else
			VMOV(v0, D0);
#endif
		}
		POP(4, R0, R1, R2, R3);
	}
	gpr.Unlock(rA, rB);
	SetJumpTarget(DoNotLoad);
}
void JitArm::stfXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg RA;

	u32 a = inst.RA, b = inst.RB;

	s32 offset = inst.SIMM_16;
	bool single = false;
	bool update = false;
	bool zeroA = false;
	s32 offsetReg = -1;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 663: // stfsx
					single = true;
					zeroA = true;
					offsetReg = b;
				break;
				case 695: // stfsux
					single = true;
					offsetReg = b;
				break;
				case 727: // stfdx
					zeroA = true;
					offsetReg = b;
				break;
				case 759: // stfdux
					update = true;
					offsetReg = b;
				break;
			}
		break;
		case 53: // stfsu
			update = true;
			single = true;
		break;
		case 52: // stfs
			single = true;
			zeroA = true;
		break;
		case 55: // stfdu
			update = true;
		break;
		case 54: // stfd
			zeroA = true;
		break;
	}

	ARMReg v0 = fpr.R0(inst.FS);

	if (update)
	{
		RA = gpr.R(a);
		// Update path /always/ uses RA
		if (offsetReg == -1) // uses SIMM_16
		{
			MOVI2R(rB, offset);
			ADD(rB, rB, RA);
		}
		else
		{
			ADD(rB, gpr.R(offsetReg), RA);
		}
	}
	else
	{
		if (zeroA)
		{
			if (offsetReg == -1)
			{
				if (a)
				{
					RA = gpr.R(a);
					MOVI2R(rB, offset);
					ADD(rB, rB, RA);
				}
				else
				{
					MOVI2R(rB, (u32)offset);
				}
			}
			else
			{
				ARMReg RB = gpr.R(offsetReg);
				if (a)
				{
					RA = gpr.R(a);
					ADD(rB, RB, RA);
				}
				else
				{
					MOV(rB, RB);
				}
			}
		}
	}

	if (update)
	{
		LDR(rA, R9, PPCSTATE_OFF(Exceptions));
		CMP(rA, EXCEPTION_DSI);

		SetCC(CC_NEQ);
		MOV(RA, rB);
		SetCC();
	}
	if (Core::g_CoreStartupParameter.bFastmem)
	{
		Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
		BIC(rB, rB, mask); // 1
		MOVI2R(rA, (u32)Memory::base, false); // 2-3
		ADD(rB, rB, rA); // 4

		NEONXEmitter nemit(this);
		if (single)
		{
			VCVT(S0, v0, 0);
			nemit.VREV32(I_8, D0, D0);
			VSTR(S0, rB, 0);
		}
		else
		{
			nemit.VREV64(I_8, D0, v0);
			VSTR(D0, rB, 0);
		}
	}
	else
	{
		PUSH(4, R0, R1, R2, R3);
		if (single)
		{
			MOVI2R(rA, (u32)&Memory::Write_U32);
			VCVT(S0, v0, 0);
			VMOV(R0, S0);
			MOV(R1, rB);

			BL(rA);
		}
		else
		{
			MOVI2R(rA, (u32)&Memory::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
			VMOV(R0, v0);
			MOV(R2, rB);
#else
			VMOV(D0, v0);
			MOV(R0, rB);
#endif
			BL(rA);
		}
		POP(4, R0, R1, R2, R3);
	}
	gpr.Unlock(rA, rB);
}
예제 #28
0
void ArmJit::NEONApplyPrefixD(DestARMReg dest) {
	// Apply clamps to dest.rd
	int n = GetNumVectorElements(dest.sz);

	int sat1_mask = 0;
	int sat3_mask = 0;
	int full_mask = (1 << n) - 1;
	for (int i = 0; i < n; i++) {
		int sat = (js.prefixD >> (i * 2)) & 3;
		if (sat == 1)
			sat1_mask |= 1 << i;
		if (sat == 3)
			sat3_mask |= 1 << i;
	}

	if (sat1_mask && sat3_mask) {
		// Why would anyone do this?
		ELOG("PREFIXD: Can't have both sat[0-1] and sat[-1-1] at the same time yet");
	}

	if (sat1_mask) {
		if (sat1_mask != full_mask) {
			ELOG("PREFIXD: Can't have partial sat1 mask yet (%i vs %i)", sat1_mask, full_mask);
		}
		if (IsD(dest.rd)) {
			VMOV_immf(D0, 0.0);
			VMOV_immf(D1, 1.0);
			VMAX(F_32, dest.rd, dest.rd, D0);
			VMIN(F_32, dest.rd, dest.rd, D1);
		} else {
			VMOV_immf(Q0, 1.0);
			VMIN(F_32, dest.rd, dest.rd, Q0);
			VMOV_immf(Q0, 0.0);
			VMAX(F_32, dest.rd, dest.rd, Q0);
		}
	}

	if (sat3_mask && sat1_mask != full_mask) {
		if (sat3_mask != full_mask) {
			ELOG("PREFIXD: Can't have partial sat3 mask yet (%i vs %i)", sat3_mask, full_mask);
		}
		if (IsD(dest.rd)) {
			VMOV_immf(D0, 0.0);
			VMOV_immf(D1, 1.0);
			VMAX(F_32, dest.rd, dest.rd, D0);
			VMIN(F_32, dest.rd, dest.rd, D1);
		} else {
			VMOV_immf(Q0, 1.0);
			VMIN(F_32, dest.rd, dest.rd, Q0);
			VMOV_immf(Q0, -1.0);
			VMAX(F_32, dest.rd, dest.rd, Q0);
		}
	}

	// Check for actual mask operation (unrelated to the "masks" above).
	if (dest.backingRd != dest.rd) {
		// This means that we need to apply the write mask, from rd to backingRd.
		// What a pain. We can at least shortcut easy cases like half the register.
		// And we can generate the masks easily with some of the crazy vector imm modes. (bits2bytes for example).
		// So no need to load them from RAM.
		int writeMask = (~(js.prefixD >> 8)) & 0xF;

		if (writeMask == 3) {
			ILOG("Doing writemask = 3");
			VMOV(D_0(dest.rd), D_0(dest.backingRd));
		} else {
			// TODO
			ELOG("PREFIXD: Arbitrary write masks not supported (%i / %i)", writeMask, full_mask);
			VMOV(dest.backingRd, dest.rd);
		}
	}
예제 #29
0
	void Jit::Comp_VecDo3(u32 op)
	{
		CONDITIONAL_DISABLE;
		DISABLE;
		// WARNING: No prefix support!
		if (js.MayHavePrefix())
		{
			Comp_Generic(op);
			js.EatPrefix();
			return;
		}

		int vd = _VD;
		int vs = _VS;
		int vt = _VT;

		void (ARMXEmitter::*triop)(ARMReg, ARMReg, ARMReg) = NULL;
		switch (op >> 26)
		{
		case 24: //VFPU0
			switch ((op >> 23)&7)
			{
			case 0: // d[i] = s[i] + t[i]; break; //vadd
				triop = &ARMXEmitter::VADD;
				break;
			case 1: // d[i] = s[i] - t[i]; break; //vsub
				triop = &ARMXEmitter::VSUB;
				break;
			case 7: // d[i] = s[i] / t[i]; break; //vdiv
				triop = &ARMXEmitter::VDIV;
				break;
			}
			break;
		case 25: //VFPU1
			switch ((op >> 23)&7)
			{
			case 0: // d[i] = s[i] * t[i]; break; //vmul
				triop = &ARMXEmitter::VMUL;
				break;
			}
			break;
		}

		if (!triop) {
			DISABLE;
		}

		VectorSize sz = GetVecSize(op);
		int n = GetNumVectorElements(sz);

		u8 sregs[4], tregs[4], dregs[4];
		GetVectorRegsPrefixS(sregs, sz, _VS);
		GetVectorRegsPrefixT(tregs, sz, _VT);
		GetVectorRegsPrefixD(dregs, sz, _VD);

		MIPSReg tempregs[4];
		for (int i = 0; i < n; i++) {
			if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs, n, tregs)) {
				tempregs[i] = fpr.GetTempV();
			} else {
				fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] || dregs[i] == tregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
				tempregs[i] = dregs[i];
			}
		}

		for (int i = 0; i < n; i++) {
			fpr.SpillLockV(sregs[i]);
			fpr.SpillLockV(tregs[i]);
			fpr.MapRegV(sregs[i]);
			fpr.MapRegV(tregs[i]);
			fpr.MapRegV(tempregs[i]);
			(this->*triop)(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i]));
			fpr.ReleaseSpillLockV(sregs[i]);
			fpr.ReleaseSpillLockV(tregs[i]);
		}

		fpr.MapRegsV(dregs, sz, MAP_DIRTY);
		for (int i = 0; i < n; i++) {
			if (dregs[i] != tempregs[i])
				VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
		}
		ApplyPrefixD(dregs, sz);
		
		fpr.ReleaseSpillLocks();
		
		js.EatPrefix();
	}