Example #1
0
void JitArm::ps_rsqrte(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 b = inst.FB, d = inst.FD;

	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);
	ARMReg fpscrReg = gpr.GetReg();
	ARMReg V0 = D1;
	ARMReg rA = gpr.GetReg();

	MOVI2R(fpscrReg, (u32)&PPC_NAN);
	VLDR(V0, fpscrReg, 0);
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));

	VCMP(vB0);
	VMRS(_PC);
	FixupBranch Less0 = B_CC(CC_LT);
		VMOV(vD0, V0);
		SetFPException(fpscrReg, FPSCR_VXSQRT);
		FixupBranch SkipOrr0 = B();
	SetJumpTarget(Less0);
	SetCC(CC_EQ);
		ORR(rA, rA, 1);
	SetCC();
	SetJumpTarget(SkipOrr0);

	VCMP(vB1);
	VMRS(_PC);
	FixupBranch Less1 = B_CC(CC_LT);
		VMOV(vD1, V0);
		SetFPException(fpscrReg, FPSCR_VXSQRT);
		FixupBranch SkipOrr1 = B();
	SetJumpTarget(Less1);
	SetCC(CC_EQ);
		ORR(rA, rA, 2);
	SetCC();
	SetJumpTarget(SkipOrr1);

	CMP(rA, 0);
	FixupBranch noException = B_CC(CC_EQ);
	SetFPException(fpscrReg, FPSCR_ZX);
	SetJumpTarget(noException);

	VCVT(S0, vB0, 0);
	VCVT(S1, vB1, 0);

	NEONXEmitter nemit(this);
	nemit.VRSQRTE(F_32, D0, D0);
	VCVT(vD0, S0, 0);
	VCVT(vD1, S1, 0);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(fpscrReg, rA);
}
void JitArm::stfs(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg v0 = fpr.R0(inst.FS);
	VCVT(S0, v0, 0);

	if (inst.RA)
	{
		MOVI2R(rB, inst.SIMM_16);
		ARMReg RA = gpr.R(inst.RA);
		ADD(rB, rB, RA);
	}
	else
	{
		MOVI2R(rB, (u32)inst.SIMM_16);
	}

	MOVI2R(rA, (u32)&Memory::Write_U32);
	PUSH(4, R0, R1, R2, R3);
	VMOV(R0, S0);
	MOV(R1, rB);

	BL(rA);

	POP(4, R0, R1, R2, R3);

	gpr.Unlock(rA, rB);
}
Example #3
0
void Jit::Comp_FPU2op(u32 op)
{
	CONDITIONAL_DISABLE;

	int fs = _FS;
	int fd = _FD;
	// logBlocks = 1;

	switch (op & 0x3f) 
	{
	case 4:	//F(fd)	   = sqrtf(F(fs));            break; //sqrt
		fpr.MapDirtyIn(fd, fs);
		VSQRT(fpr.R(fd), fpr.R(fs));
		break;
	case 5:	//F(fd)    = fabsf(F(fs));            break; //abs
		fpr.MapDirtyIn(fd, fs);
		VABS(fpr.R(fd), fpr.R(fs));
		break;
	case 6:	//F(fd)	   = F(fs);                   break; //mov
		fpr.MapDirtyIn(fd, fs);
		VMOV(fpr.R(fd), fpr.R(fs));
		break;
	case 7:	//F(fd)	   = -F(fs);                  break; //neg
		fpr.MapDirtyIn(fd, fs);
		VNEG(fpr.R(fd), fpr.R(fs));
		break;
	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
		break;
	case 13: //FsI(fd) = Rto0(F(fs)));            break; //trunc.w.s
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO);
		break;
	case 14: //FsI(fd) = (int)ceilf (F(fs));      break; //ceil.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VADD(S0,fpr.R(fs),S0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
		break;
	case 15: //FsI(fd) = (int)floorf(F(fs));      break; //floor.w.s
		fpr.MapDirtyIn(fd, fs);
		MOVI2F(S0, 0.5f, R0);
		VSUB(S0,fpr.R(fs),S0);
		VCVT(fpr.R(fd), S0,        TO_INT | IS_SIGNED);
		break;
	case 32: //F(fd)   = (float)FsI(fs);          break; //cvt.s.w
		fpr.MapDirtyIn(fd, fs);
		VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED);
		break;
	case 36: //FsI(fd) = (int)  F(fs);            break; //cvt.w.s
		fpr.MapDirtyIn(fd, fs);
		LDR(R0, CTXREG, offsetof(MIPSState, fcr31));
		AND(R0, R0, Operand2(3));
		// MIPS Rounding Mode:
		//	 0: Round nearest
		//	 1: Round to zero
		//	 2: Round up (ceil)
		//	 3: Round down (floor)
		CMP(R0, Operand2(2));
		SetCC(CC_GE); MOVI2F(S0, 0.5f, R1);
		SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0);
		SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0);
		SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */
		SetCC(CC_AL);
		CMP(R0, Operand2(1));
		SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */
		SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */
		SetCC(CC_AL);
		break;
	default:
		DISABLE;
	}
}
void JitArm::fctiwx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
	{
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
		SetJumpTarget(noException);
	}
	// Check if less than min float
	{
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		VMRS(_PC);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
		SetJumpTarget(noException);
	}
	// Within ranges, convert to integer
	// Set rounding mode first
	// PPC <-> ARM rounding modes
	// 0, 1, 2, 3 <-> 0, 3, 1, 2
	ARMReg rB = gpr.GetReg();
	VMRS(rA);
	// Bits 22-23
	BIC(rA, rA, Operand2(3, 5));

	LDR(rB, R9, PPCSTATE_OFF(fpscr));
	AND(rB, rB, 0x3); // Get the FPSCR rounding bits
	CMP(rB, 1);
	SetCC(CC_EQ); // zero
		ORR(rA, rA, Operand2(3, 5));
	SetCC(CC_NEQ);
		CMP(rB, 2); // +inf
		SetCC(CC_EQ);
			ORR(rA, rA, Operand2(1, 5));
		SetCC(CC_NEQ);
			CMP(rB, 3); // -inf
			SetCC(CC_EQ);
				ORR(rA, rA, Operand2(2, 5));
	SetCC();
	VMSR(rA);
	ORR(rA, rA, Operand2(3, 5));
	VCVT(vD, vB, TO_INT | IS_SIGNED);
	VMSR(rA);
	gpr.Unlock(rB);
	VCMPE(vD, vB);
	VMRS(_PC);

	SetCC(CC_EQ);
		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetCC();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
	VMRS(_PC);
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, FRMask);
	SetCC();
	SetJumpTarget(DoneEqual);

	SetJumpTarget(DoneMax);
	SetJumpTarget(DoneMin);

	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(rA);
	gpr.Unlock(fpscrReg);
	fpr.Unlock(V0);
	fpr.Unlock(V1);
	fpr.Unlock(V2);
}
void JitArm::fctiwzx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
	{
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
		SetJumpTarget(noException);
	}
	// Check if less than min float
	{
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		VMRS(_PC);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
		SetJumpTarget(noException);
	}
	// Within ranges, convert to integer
	VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
	VCMPE(vD, vB);
	VMRS(_PC);

	SetCC(CC_EQ);
		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetCC();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
	VMRS(_PC);
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, FRMask);
	SetCC();
	SetJumpTarget(DoneEqual);

	SetJumpTarget(DoneMax);
	SetJumpTarget(DoneMin);

	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(rA);
	gpr.Unlock(fpscrReg);
	fpr.Unlock(V0);
	fpr.Unlock(V1);
	fpr.Unlock(V2);
}
void JitArm::lfXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg RA;

	u32 a = inst.RA, b = inst.RB;

	s32 offset = inst.SIMM_16;
	bool single = false;
	bool update = false;
	bool zeroA = false;
	s32 offsetReg = -1;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 567: // lfsux
					single = true;
					update = true;
					offsetReg = b;
				break;
				case 535: // lfsx
					single = true;
					zeroA = true;
					offsetReg = b;
				break;
				case 631: // lfdux
					update = true;
					offsetReg = b;
				break;
				case 599: // lfdx
					zeroA = true;
					offsetReg = b;
				break;
			}
		break;
		case 49: // lfsu
			update = true;
			single = true;
		break;
		case 48: // lfs
			single = true;
			zeroA = true;
		break;
		case 51: // lfdu
			update = true;
		break;
		case 50: // lfd
			zeroA = true;
		break;
	}

	ARMReg v0 = fpr.R0(inst.FD), v1;
	if (single)
		v1 = fpr.R1(inst.FD);

	if (update)
	{
		RA = gpr.R(a);
		// Update path /always/ uses RA
		if (offsetReg == -1) // uses SIMM_16
		{
			MOVI2R(rB, offset);
			ADD(rB, rB, RA);
		}
		else
		{
			ADD(rB, gpr.R(offsetReg), RA);
		}
	}
	else
	{
		if (zeroA)
		{
			if (offsetReg == -1)
			{
				if (a)
				{
					RA = gpr.R(a);
					MOVI2R(rB, offset);
					ADD(rB, rB, RA);
				}
				else
				{
					MOVI2R(rB, (u32)offset);
				}
			}
			else
			{
				ARMReg RB = gpr.R(offsetReg);
				if (a)
				{
					RA = gpr.R(a);
					ADD(rB, RB, RA);
				}
				else
				{
					MOV(rB, RB);
				}
			}
		}
	}
	LDR(rA, R9, PPCSTATE_OFF(Exceptions));
	CMP(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_EQ);

	if (update)
		MOV(RA, rB);

	if (Core::g_CoreStartupParameter.bFastmem)
	{
		Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
		BIC(rB, rB, mask); // 1
		MOVI2R(rA, (u32)Memory::base, false); // 2-3
		ADD(rB, rB, rA); // 4

		NEONXEmitter nemit(this);
		if (single)
		{
			VLDR(S0, rB, 0);
			nemit.VREV32(I_8, D0, D0); // Byte swap to result
			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			VLDR(v0, rB, 0);
			nemit.VREV64(I_8, v0, v0); // Byte swap to result
		}
	}
	else
	{
		PUSH(4, R0, R1, R2, R3);
		MOV(R0, rB);
		if (single)
		{
			MOVI2R(rA, (u32)&Memory::Read_U32);
			BL(rA);

			VMOV(S0, R0);

			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			MOVI2R(rA, (u32)&Memory::Read_F64);
			BL(rA);

#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
			VMOV(v0, R0);
#else
			VMOV(v0, D0);
#endif
		}
		POP(4, R0, R1, R2, R3);
	}
	gpr.Unlock(rA, rB);
	SetJumpTarget(DoNotLoad);
}
void JitArm::stfXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg RA;

	u32 a = inst.RA, b = inst.RB;

	s32 offset = inst.SIMM_16;
	bool single = false;
	bool update = false;
	bool zeroA = false;
	s32 offsetReg = -1;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 663: // stfsx
					single = true;
					zeroA = true;
					offsetReg = b;
				break;
				case 695: // stfsux
					single = true;
					offsetReg = b;
				break;
				case 727: // stfdx
					zeroA = true;
					offsetReg = b;
				break;
				case 759: // stfdux
					update = true;
					offsetReg = b;
				break;
			}
		break;
		case 53: // stfsu
			update = true;
			single = true;
		break;
		case 52: // stfs
			single = true;
			zeroA = true;
		break;
		case 55: // stfdu
			update = true;
		break;
		case 54: // stfd
			zeroA = true;
		break;
	}

	ARMReg v0 = fpr.R0(inst.FS);

	if (update)
	{
		RA = gpr.R(a);
		// Update path /always/ uses RA
		if (offsetReg == -1) // uses SIMM_16
		{
			MOVI2R(rB, offset);
			ADD(rB, rB, RA);
		}
		else
		{
			ADD(rB, gpr.R(offsetReg), RA);
		}
	}
	else
	{
		if (zeroA)
		{
			if (offsetReg == -1)
			{
				if (a)
				{
					RA = gpr.R(a);
					MOVI2R(rB, offset);
					ADD(rB, rB, RA);
				}
				else
				{
					MOVI2R(rB, (u32)offset);
				}
			}
			else
			{
				ARMReg RB = gpr.R(offsetReg);
				if (a)
				{
					RA = gpr.R(a);
					ADD(rB, RB, RA);
				}
				else
				{
					MOV(rB, RB);
				}
			}
		}
	}

	if (update)
	{
		LDR(rA, R9, PPCSTATE_OFF(Exceptions));
		CMP(rA, EXCEPTION_DSI);

		SetCC(CC_NEQ);
		MOV(RA, rB);
		SetCC();
	}
	if (Core::g_CoreStartupParameter.bFastmem)
	{
		Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
		BIC(rB, rB, mask); // 1
		MOVI2R(rA, (u32)Memory::base, false); // 2-3
		ADD(rB, rB, rA); // 4

		NEONXEmitter nemit(this);
		if (single)
		{
			VCVT(S0, v0, 0);
			nemit.VREV32(I_8, D0, D0);
			VSTR(S0, rB, 0);
		}
		else
		{
			nemit.VREV64(I_8, D0, v0);
			VSTR(D0, rB, 0);
		}
	}
	else
	{
		PUSH(4, R0, R1, R2, R3);
		if (single)
		{
			MOVI2R(rA, (u32)&Memory::Write_U32);
			VCVT(S0, v0, 0);
			VMOV(R0, S0);
			MOV(R1, rB);

			BL(rA);
		}
		else
		{
			MOVI2R(rA, (u32)&Memory::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
			VMOV(R0, v0);
			MOV(R2, rB);
#else
			VMOV(D0, v0);
			MOV(R0, rB);
#endif
			BL(rA);
		}
		POP(4, R0, R1, R2, R3);
	}
	gpr.Unlock(rA, rB);
}