void Jit64::lfs(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	int d = inst.RD;
	int a = inst.RA;
	FALLBACK_IF(!a);

	s32 offset = (s32)(s16)inst.SIMM_16;

	SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false);

	MEMCHECK_START

	fpr.Lock(d);
	fpr.BindToRegister(d, false);
	ConvertSingleToDouble(fpr.RX(d), EAX, true);

	MEMCHECK_END

	fpr.UnlockAll();
}
void JitILBase::ps_sum(UGeckoInstruction inst)
{
	// TODO: This operation strikes me as a bit strange...
	// perhaps we can optimize it depending on the users?
	// TODO: ps_sum breaks Sonic Colours (black screen)
	FALLBACK_IF(true);

	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc || inst.SUBOP5 != 10);

	IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA);
	IREmitter::InstLoc temp;

	val = ibuild.EmitCompactMRegToPacked(val);
	val = ibuild.EmitFPDup0(val);
	temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
	val = ibuild.EmitFPAdd(val, temp);
	temp = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
	val = ibuild.EmitFPMerge11(val, temp);
	val = ibuild.EmitExpandPackedToMReg(val);
	ibuild.EmitStoreFReg(val, inst.FD);
}
Ejemplo n.º 3
0
void JitILBase::subfic(UGeckoInstruction inst)
{
    INSTRUCTION_START
    JITDISABLE(bJITIntegerOff);
    IREmitter::InstLoc nota, lhs, val, test;
    nota = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RA),
                          ibuild.EmitIntConst(-1));

    if (inst.SIMM_16 == -1)
    {
        val = nota;
        test = ibuild.EmitIntConst(1);
    }
    else
    {
        lhs = ibuild.EmitIntConst(inst.SIMM_16 + 1);
        val = ibuild.EmitAdd(nota, lhs);
        test = ibuild.EmitICmpUgt(lhs, val);
    }

    ibuild.EmitStoreGReg(val, inst.RD);
    ibuild.EmitStoreCarry(test);
}
Ejemplo n.º 4
0
void JitArm::lfs(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(LoadStoreFloating)
	Default(inst); return;

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions));
	CMP(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_EQ);

	if (inst.RA)
	{
		MOVI2R(rB, inst.SIMM_16);
		ARMReg RA = gpr.R(inst.RA);
		ADD(rB, rB, RA);
	}
	else
		MOVI2R(rB, (u32)inst.SIMM_16);

	MOVI2R(rA, (u32)&Memory::Read_U32);	
	PUSH(4, R0, R1, R2, R3);
	MOV(R0, rB);
	BL(rA);
	MOV(rA, R0);
	POP(4, R0, R1, R2, R3);
	
	ARMReg v0 = fpr.R0(inst.FD, false);
	ARMReg v1 = fpr.R1(inst.FD, false);
	
	VMOV(v0, rA, false);
	VMOV(v1, rA, false);

	gpr.Unlock(rA, rB);
	SetJumpTarget(DoNotLoad);
}
Ejemplo n.º 5
0
void JitArm64::crXXX(UGeckoInstruction inst)
{
  INSTRUCTION_START
  JITDISABLE(bJITSystemRegistersOff);

  // Special case: crclr
  if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193)
  {
    // Clear CR field bit
    int field = inst.CRBD >> 2;
    int bit = 3 - (inst.CRBD & 3);

    ARM64Reg WA = gpr.GetReg();
    ARM64Reg XA = EncodeRegTo64(WA);
    LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
    switch (bit)
    {
    case CR_SO_BIT:
      AND(XA, XA, 64 - 62, 62, true);  // XA & ~(1<<61)
      break;

    case CR_EQ_BIT:
      ORR(XA, XA, 0, 0, true);  // XA | 1<<0
      break;

    case CR_GT_BIT:
      ORR(XA, XA, 64 - 63, 0, true);  // XA | 1<<63
      break;

    case CR_LT_BIT:
      AND(XA, XA, 64 - 63, 62, true);  // XA & ~(1<<62)
      break;
    }
    STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
    gpr.Unlock(WA);
    return;
  }
void JitILBase::psq_l(UGeckoInstruction inst)
{
  INSTRUCTION_START
  JITDISABLE(bJITLoadStorePairedOff);
  FALLBACK_IF(jo.memcheck || inst.W);

  // For performance, the AsmCommon routines assume address translation is on.
  FALLBACK_IF(!UReg_MSR(MSR).DR);

  IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12);
  IREmitter::InstLoc val;

  if (inst.RA)
    addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));

  if (inst.OPCD == 57)
    ibuild.EmitStoreGReg(addr, inst.RA);

  val = ibuild.EmitLoadPaired(
      addr,
      inst.I | (inst.W << 3));  // The lower 3 bits is for GQR index. The next 1 bit is for inst.W
  val = ibuild.EmitExpandPackedToMReg(val);
  ibuild.EmitStoreFReg(val, inst.RD);
}
Ejemplo n.º 7
0
void JitILBase::subfex(UGeckoInstruction inst)
{
	INSTRUCTION_START
		JITDISABLE(bJITIntegerOff);

	if (inst.OE)
		PanicAlert("OE: subfex");

	IREmitter::InstLoc val, test, lhs, rhs, carry;
	rhs = ibuild.EmitLoadGReg(inst.RA);
	carry = ibuild.EmitLoadCarry();
	rhs = ibuild.EmitXor(rhs, ibuild.EmitIntConst(-1));
	rhs = ibuild.EmitAdd(rhs, carry);
	test = ibuild.EmitICmpEq(rhs, ibuild.EmitIntConst(0));
	test = ibuild.EmitAnd(test, carry);
	lhs = ibuild.EmitLoadGReg(inst.RB);
	val = ibuild.EmitAdd(lhs, rhs);
	ibuild.EmitStoreGReg(val, inst.RD);
	test = ibuild.EmitOr(test, ibuild.EmitICmpUgt(lhs, val));
	ibuild.EmitStoreCarry(test);

	if (inst.Rc)
		ComputeRC(ibuild, val);
}
Ejemplo n.º 8
0
void JitILBase::addex(UGeckoInstruction inst)
{
	INSTRUCTION_START
		JITDISABLE(bJITIntegerOff);

	IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA);
	IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB);

	IREmitter::InstLoc ab = ibuild.EmitAdd(a, b);
	IREmitter::InstLoc new_carry = ibuild.EmitICmpUlt(ab, a);

	IREmitter::InstLoc previous_carry = ibuild.EmitLoadCarry();
	IREmitter::InstLoc abc = ibuild.EmitAdd(ab, previous_carry);
	new_carry = ibuild.EmitOr(new_carry, ibuild.EmitICmpUlt(abc, ab));

	ibuild.EmitStoreGReg(abc, inst.RD);
	ibuild.EmitStoreCarry(new_carry);

	if (inst.OE)
		PanicAlert("OE: addex");

	if (inst.Rc)
		ComputeRC(ibuild, abc);
}
Ejemplo n.º 9
0
void JitArm::ps_sel(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;

	ARMReg vA0 = fpr.R0(a);
	ARMReg vA1 = fpr.R1(a);
	ARMReg vB0 = fpr.R0(b);
	ARMReg vB1 = fpr.R1(b);
	ARMReg vC0 = fpr.R0(c);
	ARMReg vC1 = fpr.R1(c);
	ARMReg vD0 = fpr.R0(d, false);
	ARMReg vD1 = fpr.R1(d, false);

	VCMP(vA0);
	VMRS(_PC);

	FixupBranch GT0 = B_CC(CC_GE);
	VMOV(vD0, vB0);
	FixupBranch EQ0 = B();
	SetJumpTarget(GT0);
	VMOV(vD0, vC0);
	SetJumpTarget(EQ0);

	VCMP(vA1);
	VMRS(_PC);
	FixupBranch GT1 = B_CC(CC_GE);
	VMOV(vD1, vB1);
	FixupBranch EQ1 = B();
	SetJumpTarget(GT1);
	VMOV(vD1, vC1);
	SetJumpTarget(EQ1);
}
Ejemplo n.º 10
0
void JitILBase::fp_arith_s(UGeckoInstruction inst)
{
	INSTRUCTION_START
		JITDISABLE(bJITFloatingPointOff);
	FALLBACK_IF(inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21));

	// Only the interpreter has "proper" support for (some) FP flags
	FALLBACK_IF(inst.SUBOP5 == 25 && SConfig::GetInstance().bFPRF);

	IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA);
	switch (inst.SUBOP5)
	{
	case 20:  // sub
		val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB));
		break;
	case 21:  // add
		val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB));
		break;
	case 25:  // mul
		val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
		break;
	default:
		_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
	}

	if (inst.OPCD == 59)
	{
		val = ibuild.EmitDoubleToSingle(val);
		val = ibuild.EmitDupSingleToMReg(val);
	}
	else
	{
		val = ibuild.EmitInsertDoubleInMReg(val, ibuild.EmitLoadFReg(inst.FD));
	}
	ibuild.EmitStoreFReg(val, inst.FD);
}
Ejemplo n.º 11
0
void JitArm::lfXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg RA;

	u32 a = inst.RA, b = inst.RB;

	s32 offset = inst.SIMM_16;
	bool single = false;
	bool update = false;
	bool zeroA = false;
	s32 offsetReg = -1;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 567: // lfsux
					single = true;
					update = true;
					offsetReg = b;
				break;
				case 535: // lfsx
					single = true;
					zeroA = true;
					offsetReg = b;
				break;
				case 631: // lfdux
					update = true;
					offsetReg = b;
				break;
				case 599: // lfdx
					zeroA = true;
					offsetReg = b;
				break;
			}
		break;
		case 49: // lfsu
			update = true;
			single = true;
		break;
		case 48: // lfs
			single = true;
			zeroA = true;
		break;
		case 51: // lfdu
			update = true;
		break;
		case 50: // lfd
			zeroA = true;
		break;
	}

	ARMReg v0 = fpr.R0(inst.FD), v1;
	if (single)
		v1 = fpr.R1(inst.FD);

	if (update)
	{
		RA = gpr.R(a);
		// Update path /always/ uses RA
		if (offsetReg == -1) // uses SIMM_16
		{
			MOVI2R(rB, offset);
			ADD(rB, rB, RA);
		}
		else
		{
			ADD(rB, gpr.R(offsetReg), RA);
		}
	}
	else
	{
		if (zeroA)
		{
			if (offsetReg == -1)
			{
				if (a)
				{
					RA = gpr.R(a);
					MOVI2R(rB, offset);
					ADD(rB, rB, RA);
				}
				else
				{
					MOVI2R(rB, (u32)offset);
				}
			}
			else
			{
				ARMReg RB = gpr.R(offsetReg);
				if (a)
				{
					RA = gpr.R(a);
					ADD(rB, RB, RA);
				}
				else
				{
					MOV(rB, RB);
				}
			}
		}
	}
	LDR(rA, R9, PPCSTATE_OFF(Exceptions));
	CMP(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_EQ);

	if (update)
		MOV(RA, rB);

	if (Core::g_CoreStartupParameter.bFastmem)
	{
		Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
		BIC(rB, rB, mask); // 1
		MOVI2R(rA, (u32)Memory::base, false); // 2-3
		ADD(rB, rB, rA); // 4

		NEONXEmitter nemit(this);
		if (single)
		{
			VLDR(S0, rB, 0);
			nemit.VREV32(I_8, D0, D0); // Byte swap to result
			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			VLDR(v0, rB, 0);
			nemit.VREV64(I_8, v0, v0); // Byte swap to result
		}
	}
	else
	{
		PUSH(4, R0, R1, R2, R3);
		MOV(R0, rB);
		if (single)
		{
			MOVI2R(rA, (u32)&Memory::Read_U32);
			BL(rA);

			VMOV(S0, R0);

			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			MOVI2R(rA, (u32)&Memory::Read_F64);
			BL(rA);

#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
			VMOV(v0, R0);
#else
			VMOV(v0, D0);
#endif
		}
		POP(4, R0, R1, R2, R3);
	}
	gpr.Unlock(rA, rB);
	SetJumpTarget(DoNotLoad);
}
Ejemplo n.º 12
0
void JitArm::stfXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg RA;

	u32 a = inst.RA, b = inst.RB;

	s32 offset = inst.SIMM_16;
	bool single = false;
	bool update = false;
	bool zeroA = false;
	s32 offsetReg = -1;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 663: // stfsx
					single = true;
					zeroA = true;
					offsetReg = b;
				break;
				case 695: // stfsux
					single = true;
					offsetReg = b;
				break;
				case 727: // stfdx
					zeroA = true;
					offsetReg = b;
				break;
				case 759: // stfdux
					update = true;
					offsetReg = b;
				break;
			}
		break;
		case 53: // stfsu
			update = true;
			single = true;
		break;
		case 52: // stfs
			single = true;
			zeroA = true;
		break;
		case 55: // stfdu
			update = true;
		break;
		case 54: // stfd
			zeroA = true;
		break;
	}

	ARMReg v0 = fpr.R0(inst.FS);

	if (update)
	{
		RA = gpr.R(a);
		// Update path /always/ uses RA
		if (offsetReg == -1) // uses SIMM_16
		{
			MOVI2R(rB, offset);
			ADD(rB, rB, RA);
		}
		else
		{
			ADD(rB, gpr.R(offsetReg), RA);
		}
	}
	else
	{
		if (zeroA)
		{
			if (offsetReg == -1)
			{
				if (a)
				{
					RA = gpr.R(a);
					MOVI2R(rB, offset);
					ADD(rB, rB, RA);
				}
				else
				{
					MOVI2R(rB, (u32)offset);
				}
			}
			else
			{
				ARMReg RB = gpr.R(offsetReg);
				if (a)
				{
					RA = gpr.R(a);
					ADD(rB, RB, RA);
				}
				else
				{
					MOV(rB, RB);
				}
			}
		}
	}

	if (update)
	{
		LDR(rA, R9, PPCSTATE_OFF(Exceptions));
		CMP(rA, EXCEPTION_DSI);

		SetCC(CC_NEQ);
		MOV(RA, rB);
		SetCC();
	}
	if (Core::g_CoreStartupParameter.bFastmem)
	{
		Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
		BIC(rB, rB, mask); // 1
		MOVI2R(rA, (u32)Memory::base, false); // 2-3
		ADD(rB, rB, rA); // 4

		NEONXEmitter nemit(this);
		if (single)
		{
			VCVT(S0, v0, 0);
			nemit.VREV32(I_8, D0, D0);
			VSTR(S0, rB, 0);
		}
		else
		{
			nemit.VREV64(I_8, D0, v0);
			VSTR(D0, rB, 0);
		}
	}
	else
	{
		PUSH(4, R0, R1, R2, R3);
		if (single)
		{
			MOVI2R(rA, (u32)&Memory::Write_U32);
			VCVT(S0, v0, 0);
			VMOV(R0, S0);
			MOV(R1, rB);

			BL(rA);
		}
		else
		{
			MOVI2R(rA, (u32)&Memory::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
			VMOV(R0, v0);
			MOV(R2, rB);
#else
			VMOV(D0, v0);
			MOV(R0, rB);
#endif
			BL(rA);
		}
		POP(4, R0, R1, R2, R3);
	}
	gpr.Unlock(rA, rB);
}
Ejemplo n.º 13
0
void JitILBase::reg_imm(UGeckoInstruction inst)
{
	INSTRUCTION_START
		JITDISABLE(bJITIntegerOff);
	int d = inst.RD, a = inst.RA, s = inst.RS;
	IREmitter::InstLoc val, test, c;
	switch (inst.OPCD)
	{
	case 14:  // addi
		val = ibuild.EmitIntConst(inst.SIMM_16);
		if (a)
			val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val);
		ibuild.EmitStoreGReg(val, d);
		break;
	case 15:  // addis
		val = ibuild.EmitIntConst(inst.SIMM_16 << 16);
		if (a)
			val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val);
		ibuild.EmitStoreGReg(val, d);
		break;
	case 24:  // ori
		val = ibuild.EmitIntConst(inst.UIMM);
		val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val);
		ibuild.EmitStoreGReg(val, a);
		break;
	case 25:  // oris
		val = ibuild.EmitIntConst(inst.UIMM << 16);
		val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val);
		ibuild.EmitStoreGReg(val, a);
		break;
	case 28:  // andi
		val = ibuild.EmitIntConst(inst.UIMM);
		val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val);
		ibuild.EmitStoreGReg(val, a);
		ComputeRC(ibuild, val);
		break;
	case 29:  // andis
		val = ibuild.EmitIntConst(inst.UIMM << 16);
		val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val);
		ibuild.EmitStoreGReg(val, a);
		ComputeRC(ibuild, val);
		break;
	case 26:  // xori
		val = ibuild.EmitIntConst(inst.UIMM);
		val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val);
		ibuild.EmitStoreGReg(val, a);
		break;
	case 27:  // xoris
		val = ibuild.EmitIntConst(inst.UIMM << 16);
		val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val);
		ibuild.EmitStoreGReg(val, a);
		break;
	case 12:  // addic
	case 13:  // addic_rc
		c = ibuild.EmitIntConst(inst.SIMM_16);
		val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), c);
		ibuild.EmitStoreGReg(val, d);
		test = ibuild.EmitICmpUgt(c, val);
		ibuild.EmitStoreCarry(test);
		if (inst.OPCD == 13)
			ComputeRC(ibuild, val);
		break;
	default:
		FALLBACK_IF(true);
	}
}
Ejemplo n.º 14
0
void JitArm64::mftb(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITSystemRegistersOff);
	mfspr(inst);
}
Ejemplo n.º 15
0
void JitArm::mftb(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(SystemRegisters)
	mfspr(inst);
}
Ejemplo n.º 16
0
void JitArm::fcmpo(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff)
	u32 a = inst.FA, b = inst.FB;
	int cr = inst.CRFD;

	ARMReg vA = fpr.R0(a);
	ARMReg vB = fpr.R0(b);
	ARMReg fpscrReg = gpr.GetReg();
	ARMReg crReg = gpr.GetReg();
	Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
	Operand2 LessThan(0x8, 0xA); // 0x8000
	Operand2 GreaterThan(0x4, 0xA); // 0x4000
	Operand2 EqualTo(0x2, 0xA); // 0x2000
	Operand2 NANRes(0x1, 0xA); // 0x1000
	FixupBranch Done1, Done2, Done3;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	BIC(fpscrReg, fpscrReg, FPRFMask);

	VCMPE(vA, vB);
	VMRS(_PC);
	SetCC(CC_LT);
		ORR(fpscrReg, fpscrReg, LessThan);
		MOV(crReg,  8);
		Done1 = B();
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, GreaterThan);
		MOV(crReg,  4);
		Done2 = B();
	SetCC(CC_EQ);
		ORR(fpscrReg, fpscrReg, EqualTo);
		MOV(crReg,  2);
		Done3 = B();
	SetCC();

	ORR(fpscrReg, fpscrReg, NANRes);
	MOV(crReg,  1);

	VCMPE(vA, vA);
	VMRS(_PC);
	FixupBranch NanA = B_CC(CC_NEQ);
	VCMPE(vB, vB);
	VMRS(_PC);
	FixupBranch NanB = B_CC(CC_NEQ);

	SetFPException(fpscrReg, FPSCR_VXVC);
	FixupBranch Done4 = B();

	SetJumpTarget(NanA);
	SetJumpTarget(NanB);

	SetFPException(fpscrReg, FPSCR_VXSNAN);

	TST(fpscrReg, VEMask);

	FixupBranch noVXVC = B_CC(CC_NEQ);
	SetFPException(fpscrReg, FPSCR_VXVC);

	SetJumpTarget(noVXVC);
	SetJumpTarget(Done1);
	SetJumpTarget(Done2);
	SetJumpTarget(Done3);
	SetJumpTarget(Done4);
	STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(fpscrReg, crReg);
}
Ejemplo n.º 17
0
void Jit64::reg_imm(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(Integer)
	u32 d = inst.RD, a = inst.RA, s = inst.RS;
	switch (inst.OPCD)
	{
	case 14:  // addi
		// occasionally used as MOV - emulate, with immediate propagation
		if (gpr.R(a).IsImm() && d != a && a != 0) {
			gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16);
		} else if (inst.SIMM_16 == 0 && d != a && a != 0) {
			gpr.Lock(a, d);
			gpr.BindToRegister(d, false, true);
			MOV(32, gpr.R(d), gpr.R(a));
			gpr.UnlockAll();
		} else {
			regimmop(d, a, false, (u32)(s32)inst.SIMM_16,  Add, &XEmitter::ADD); //addi
		}
		break;
	case 15:
		if (a == 0) {	// lis
			// Merge with next instruction if loading a 32-bits immediate value (lis + addi, lis + ori)
			if (!js.isLastInstruction && !Core::g_CoreStartupParameter.bEnableDebugging) {
				if ((js.next_inst.OPCD == 14) && (js.next_inst.RD == d) && (js.next_inst.RA == d)) {      // addi
					gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) + (u32)(s32)js.next_inst.SIMM_16);
					js.downcountAmount++;
					js.skipnext = true;
					break;
				}
				else if ((js.next_inst.OPCD == 24) && (js.next_inst.RA == d) && (js.next_inst.RS == d))	{ // ori
					gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) | (u32)js.next_inst.UIMM);
					js.downcountAmount++;
					js.skipnext = true;
					break;
				}
			}

			// Not merged
			regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD);
		}
		else {	// addis
			regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD);
		}
		break;
	case 24: 
		if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc)  //check for nop
		{NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one.
		regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR); 
		break; //ori
	case 25: regimmop(a, s, true, inst.UIMM << 16, Or,  &XEmitter::OR, false); break;//oris
	case 28: regimmop(a, s, true, inst.UIMM,       And, &XEmitter::AND, true); break;
	case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break;
	case 26: regimmop(a, s, true, inst.UIMM,       Xor, &XEmitter::XOR, false); break; //xori
	case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris
	case 12: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); break; //addic
	case 13: regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); break; //addic_rc
	default:
		Default(inst);
		break;
	}
}
void Jit64::stfd(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);
	FALLBACK_IF(js.memcheck || !inst.RA);

	int s = inst.RS;
	int a = inst.RA;

	u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
	if (Core::g_CoreStartupParameter.bMMU ||
		Core::g_CoreStartupParameter.bTLBHack) {
			mem_mask |= Memory::ADDR_MASK_MEM1;
	}
#ifdef ENABLE_MEM_CHECK
	if (Core::g_CoreStartupParameter.bEnableDebugging)
	{
		mem_mask |= Memory::EXRAM_MASK;
	}
#endif

	gpr.FlushLockX(ABI_PARAM1);
	gpr.Lock(a);
	fpr.Lock(s);
	gpr.BindToRegister(a, true, false);

	s32 offset = (s32)(s16)inst.SIMM_16;
	LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
	TEST(32, R(ABI_PARAM1), Imm32(mem_mask));
	FixupBranch safe = J_CC(CC_NZ);

	// Fast routine
	if (cpu_info.bSSSE3) {
		MOVAPD(XMM0, fpr.R(s));
		PSHUFB(XMM0, M((void*)bswapShuffle1x8));
#if _M_X86_64
		MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0);
#else
		AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
		MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0);
#endif
	} else {
		MOVAPD(XMM0, fpr.R(s));
		MOVD_xmm(R(EAX), XMM0);
		UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);

		PSRLQ(XMM0, 32);
		MOVD_xmm(R(EAX), XMM0);
		UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
	}
	FixupBranch exit = J(true);
	SetJumpTarget(safe);

	// Safe but slow routine
	MOVAPD(XMM0, fpr.R(s));
	PSRLQ(XMM0, 32);
	MOVD_xmm(R(EAX), XMM0);
	SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));

	MOVAPD(XMM0, fpr.R(s));
	MOVD_xmm(R(EAX), XMM0);
	LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
	SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());

	SetJumpTarget(exit);

	gpr.UnlockAll();
	gpr.UnlockAllX();
	fpr.UnlockAll();
}
Ejemplo n.º 19
0
void JitILBase::ps_maddXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITPairedOff);
	FALLBACK_IF(inst.Rc);

	IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), op2, op3;
	val = ibuild.EmitCompactMRegToPacked(val);

	switch (inst.SUBOP5)
	{
	case 14: // madds0
	{
		op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
		op2 = ibuild.EmitFPDup0(op2);
		val = ibuild.EmitFPMul(val, op2);
		op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
		val = ibuild.EmitFPAdd(val, op3);
		break;
	}
	case 15: // madds1
	{
		op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
		op2 = ibuild.EmitFPDup1(op2);
		val = ibuild.EmitFPMul(val, op2);
		op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
		val = ibuild.EmitFPAdd(val, op3);
		break;
	}
	case 28: // msub
	{
		op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
		val = ibuild.EmitFPMul(val, op2);
		op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
		val = ibuild.EmitFPSub(val, op3);
		break;
	}
	case 29: // madd
	{
		op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
		val = ibuild.EmitFPMul(val, op2);
		op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
		val = ibuild.EmitFPAdd(val, op3);
		break;
	}
	case 30: // nmsub
	{
		op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
		val = ibuild.EmitFPMul(val, op2);
		op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
		val = ibuild.EmitFPSub(val, op3);
		val = ibuild.EmitFPNeg(val);
		break;
	}
	case 31: // nmadd
	{
		op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
		val = ibuild.EmitFPMul(val, op2);
		op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
		val = ibuild.EmitFPAdd(val, op3);
		val = ibuild.EmitFPNeg(val);
		break;
	}
	}

	val = ibuild.EmitExpandPackedToMReg(val);
	ibuild.EmitStoreFReg(val, inst.FD);
}
Ejemplo n.º 20
0
void JitArm64::twx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITSystemRegistersOff);

	s32 a = inst.RA;

	ARM64Reg WA = gpr.GetReg();

	if (inst.OPCD == 3) // twi
	{
		if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 4096)
		{
			// Can fit in immediate in to the instruction encoding
			CMP(gpr.R(a), inst.SIMM_16);
		}
		else
		{
			MOVI2R(WA, (s32)(s16)inst.SIMM_16);
			CMP(gpr.R(a), WA);
		}
	}
	else // tw
	{
		CMP(gpr.R(a), gpr.R(inst.RB));
	}

	std::vector<FixupBranch> fixups;
	CCFlags conditions[] = { CC_LT, CC_GT, CC_EQ, CC_VC, CC_VS };

	for (int i = 0; i < 5; i++)
	{
		if (inst.TO & (1 << i))
		{
			FixupBranch f = B(conditions[i]);
			fixups.push_back(f);
		}
	}
	FixupBranch dont_trap = B();

	for (const FixupBranch& fixup : fixups)
	{
		SetJumpTarget(fixup);
	}

	gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
	fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);

	LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
	ORR(WA, WA, 24, 0); // Same as WA | EXCEPTION_PROGRAM
	STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));

	MOVI2R(WA, js.compilerPC);

	// WA is unlocked in this function
	WriteExceptionExit(WA);

	SetJumpTarget(dont_trap);

	if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
	{
		gpr.Flush(FlushMode::FLUSH_ALL);
		fpr.Flush(FlushMode::FLUSH_ALL);
		WriteExit(js.compilerPC + 4);
	}
}
Ejemplo n.º 21
0
void JitArm::lXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreOff);

	u32 a = inst.RA, b = inst.RB, d = inst.RD;
	s32 offset = inst.SIMM_16;
	u32 accessSize = 0;
	s32 offsetReg = -1;
	bool update = false;
	bool signExtend = false;
	bool reverse = false;
	bool fastmem = false;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 55: // lwzux
					update = true;
				case 23: // lwzx
					fastmem = true;
					accessSize = 32;
					offsetReg = b;
				break;
				case 119: //lbzux
					update = true;
				case 87: // lbzx
					fastmem = true;
					accessSize = 8;
					offsetReg = b;
				break;
				case 311: // lhzux
					update = true;
				case 279: // lhzx
					fastmem = true;
					accessSize = 16;
					offsetReg = b;
				break;
				case 375: // lhaux
					update = true;
				case 343: // lhax
					accessSize = 16;
					signExtend = true;
					offsetReg = b;
				break;
				case 534: // lwbrx
					accessSize = 32;
					reverse = true;
				break;
				case 790: // lhbrx
					accessSize = 16;
					reverse = true;
				break;
			}
		break;
		case 33: // lwzu
			update = true;
		case 32: // lwz
			fastmem = true;
			accessSize = 32;
		break;
		case 35: // lbzu
			update = true;
		case 34: // lbz
			fastmem = true;
			accessSize = 8;
		break;
		case 41: // lhzu
			update = true;
		case 40: // lhz
			fastmem = true;
			accessSize = 16;
		break;
		case 43: // lhau
			update = true;
		case 42: // lha
			signExtend = true;
			accessSize = 16;
		break;
	}

	// Check for exception before loading
	ARMReg rA = gpr.GetReg(false);

	LDR(rA, R9, PPCSTATE_OFF(Exceptions));
	TST(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_NEQ);

	SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse);

	if (update)
	{
		ARMReg RA = gpr.R(a);
		if (offsetReg == -1)
		{
			rA = gpr.GetReg(false);
			MOVI2R(rA, offset);
			ADD(RA, RA, rA);
		}
		else
		{
			ADD(RA, RA, gpr.R(offsetReg));
		}
	}

	SetJumpTarget(DoNotLoad);

	// LWZ idle skipping
	if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
	    inst.OPCD == 32 &&
	    (inst.hex & 0xFFFF0000) == 0x800D0000 &&
	    (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
	    (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
	    Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
	{
		ARMReg RD = gpr.R(d);

		// if it's still 0, we can wait until the next event
		TST(RD, RD);
		FixupBranch noIdle = B_CC(CC_NEQ);

		gpr.Flush(FLUSH_MAINTAIN_STATE);
		fpr.Flush(FLUSH_MAINTAIN_STATE);

		rA = gpr.GetReg();

		MOVI2R(rA, (u32)&PowerPC::OnIdle);
		MOVI2R(R0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
		BL(rA);

		gpr.Unlock(rA);
		WriteExceptionExit();

		SetJumpTarget(noIdle);

		//js.compilerPC += 8;
		return;
	}
}
Ejemplo n.º 22
0
void JitArm64::mfspr(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITSystemRegistersOff);

	u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
	int d = inst.RD;
	switch (iIndex)
	{
	case SPR_TL:
	case SPR_TU:
	{
		ARM64Reg WA = gpr.GetReg();
		ARM64Reg WB = gpr.GetReg();
		ARM64Reg XA = EncodeRegTo64(WA);
		ARM64Reg XB = EncodeRegTo64(WB);

		// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
		// cost of calling out to C for this is actually significant.
		MOVI2R(XA, (u64)&CoreTiming::globalTimer);
		LDR(INDEX_UNSIGNED, XA, XA, 0);
		MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks);
		LDR(INDEX_UNSIGNED, XB, XB, 0);
		SUB(XA, XA, XB);

		// It might seem convenient to correct the timer for the block position here for even more accurate
		// timing, but as of currently, this can break games. If we end up reading a time *after* the time
		// at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only
		// 50 downcount remaining, some games don't function correctly, such as Karaoke Party Revolution,
		// which won't get past the loading screen.
		// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
		ORR(XB, SP, 1, 60);
		ADD(XB, XB, 1);
		UMULH(XA, XA, XB);

		MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue);
		LDR(INDEX_UNSIGNED, XB, XB, 0);
		ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3));
		STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL]));

		if (MergeAllowedNextInstructions(1))
		{
			const UGeckoInstruction& next = js.op[1].inst;
			// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
			// if we can.
			u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
			// Be careful; the actual opcode is for mftb (371), not mfspr (339)
			int n = next.RD;
			if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
			{
				js.downcountAmount++;
				js.skipInstructions = 1;
				gpr.BindToRegister(d, false);
				gpr.BindToRegister(n, false);
				if (iIndex == SPR_TL)
					MOV(gpr.R(d), WA);
				else
					ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));

				if (nextIndex == SPR_TL)
					MOV(gpr.R(n), WA);
				else
					ORR(EncodeRegTo64(gpr.R(n)), SP, XA, ArithOption(XA, ST_LSR, 32));

				gpr.Unlock(WA, WB);
				break;
			}
		}
		gpr.BindToRegister(d, false);
		if (iIndex == SPR_TU)
			ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));
		else
			MOV(gpr.R(d), WA);
		gpr.Unlock(WA, WB);
	}
	break;
	case SPR_XER:
	{
		gpr.BindToRegister(d, false);
		ARM64Reg RD = gpr.R(d);
		ARM64Reg WA = gpr.GetReg();
		LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl));
		LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
		ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_CA_SHIFT));
		LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov));
		ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_OV_SHIFT));
		gpr.Unlock(WA);
	}
	break;
	case SPR_WPAR:
	case SPR_DEC:
		FALLBACK_IF(true);
	default:
		gpr.BindToRegister(d, false);
		ARM64Reg RD = gpr.R(d);
		LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
		break;
	}
}
Ejemplo n.º 23
0
void Jit64::fcmpx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff);
	FALLBACK_IF(jo.fpAccurateFcmp);

	//bool ordered = inst.SUBOP10 == 32;
	int a   = inst.FA;
	int b   = inst.FB;
	int crf = inst.CRFD;

	fpr.Lock(a,b);
	fpr.BindToRegister(b, true);

	// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
	UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));

	FixupBranch pNaN, pLesser, pGreater;
	FixupBranch continue1, continue2, continue3;

	if (a != b)
	{
		// if B > A, goto Lesser's jump target
		pLesser  = J_CC(CC_A);
	}

	// if (B != B) or (A != A), goto NaN's jump target
	pNaN = J_CC(CC_P);

	if (a != b)
	{
		// if B < A, goto Greater's jump target
		// JB can't precede the NaN check because it doesn't test ZF
		pGreater = J_CC(CC_B);
	}

	// Equal
	MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
	continue1 = J();

	// NAN
	SetJumpTarget(pNaN);
	MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1));

	if (a != b)
	{
		continue2 = J();

		// Greater Than
		SetJumpTarget(pGreater);
		MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
		continue3 = J();

		// Less Than
		SetJumpTarget(pLesser);
		MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
	}

	SetJumpTarget(continue1);
	if (a != b)
	{
		SetJumpTarget(continue2);
		SetJumpTarget(continue3);
	}

	fpr.UnlockAll();
}
Ejemplo n.º 24
0
void JitArm::fctiwzx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
	{
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
		SetJumpTarget(noException);
	}
	// Check if less than min float
	{
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		VMRS(_PC);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
		SetJumpTarget(noException);
	}
	// Within ranges, convert to integer
	VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
	VCMPE(vD, vB);
	VMRS(_PC);

	SetCC(CC_EQ);
		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetCC();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
	VMRS(_PC);
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, FRMask);
	SetCC();
	SetJumpTarget(DoneEqual);

	SetJumpTarget(DoneMax);
	SetJumpTarget(DoneMin);

	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(rA);
	gpr.Unlock(fpscrReg);
	fpr.Unlock(V0);
	fpr.Unlock(V1);
	fpr.Unlock(V2);
}
void Jit64::lfd(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);
	FALLBACK_IF(js.memcheck || !inst.RA);

	int d = inst.RD;
	int a = inst.RA;

	s32 offset = (s32)(s16)inst.SIMM_16;
	gpr.FlushLockX(ABI_PARAM1);
	gpr.Lock(a);
	MOV(32, R(ABI_PARAM1), gpr.R(a));
	// TODO - optimize. This has to load the previous value - upper double should stay unmodified.
	fpr.Lock(d);
	fpr.BindToRegister(d, true);
	X64Reg xd = fpr.RX(d);

	if (cpu_info.bSSSE3)
	{
#if _M_X86_64
		MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
#else
		AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
		MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
#endif
		PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe));
		MOVSD(xd, R(XMM0));
	} else {
#if _M_X86_64
		LoadAndSwap(64, EAX, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
		MOV(64, M(&temp64), R(EAX));

		MEMCHECK_START

		MOVSD(XMM0, M(&temp64));
		MOVSD(xd, R(XMM0));

		MEMCHECK_END
#else
		AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
		MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
		BSWAP(32, EAX);
		MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX));

		MEMCHECK_START

		MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
		BSWAP(32, EAX);
		MOV(32, M(&temp64), R(EAX));
		MOVSD(XMM0, M(&temp64));
		MOVSD(xd, R(XMM0));

		MEMCHECK_END
#endif
	}

	gpr.UnlockAll();
	gpr.UnlockAllX();
	fpr.UnlockAll();
}
Ejemplo n.º 26
0
void JitArm::fctiwx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff)
	u32 b = inst.FB;
	u32 d = inst.FD;

	ARMReg vB = fpr.R0(b);
	ARMReg vD = fpr.R0(d);
	ARMReg V0 = fpr.GetReg();
	ARMReg V1 = fpr.GetReg();
	ARMReg V2 = fpr.GetReg();

	ARMReg rA = gpr.GetReg();
	ARMReg fpscrReg = gpr.GetReg();

	FixupBranch DoneMax, DoneMin;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	MOVI2R(rA, (u32)minmaxFloat);

	// Check if greater than max float
	{
		VLDR(V0, rA, 8); // Load Max
		VCMPE(vB, V0);
		VMRS(_PC); // Loads in to APSR
		FixupBranch noException = B_CC(CC_LE);
		VMOV(vD, V0); // Set to max
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMax = B();
		SetJumpTarget(noException);
	}
	// Check if less than min float
	{
		VLDR(V0, rA, 0);
		VCMPE(vB, V0);
		VMRS(_PC);
		FixupBranch noException = B_CC(CC_GE);
		VMOV(vD, V0);
		SetFPException(fpscrReg, FPSCR_VXCVI);
		DoneMin = B();
		SetJumpTarget(noException);
	}
	// Within ranges, convert to integer
	// Set rounding mode first
	// PPC <-> ARM rounding modes
	// 0, 1, 2, 3 <-> 0, 3, 1, 2
	ARMReg rB = gpr.GetReg();
	VMRS(rA);
	// Bits 22-23
	BIC(rA, rA, Operand2(3, 5));

	LDR(rB, R9, PPCSTATE_OFF(fpscr));
	AND(rB, rB, 0x3); // Get the FPSCR rounding bits
	CMP(rB, 1);
	SetCC(CC_EQ); // zero
		ORR(rA, rA, Operand2(3, 5));
	SetCC(CC_NEQ);
		CMP(rB, 2); // +inf
		SetCC(CC_EQ);
			ORR(rA, rA, Operand2(1, 5));
		SetCC(CC_NEQ);
			CMP(rB, 3); // -inf
			SetCC(CC_EQ);
				ORR(rA, rA, Operand2(2, 5));
	SetCC();
	VMSR(rA);
	ORR(rA, rA, Operand2(3, 5));
	VCVT(vD, vB, TO_INT | IS_SIGNED);
	VMSR(rA);
	gpr.Unlock(rB);
	VCMPE(vD, vB);
	VMRS(_PC);

	SetCC(CC_EQ);
		BIC(fpscrReg, fpscrReg, FRFIMask);
		FixupBranch DoneEqual = B();
	SetCC();
	SetFPException(fpscrReg, FPSCR_XX);
	ORR(fpscrReg, fpscrReg, FIMask);
	VABS(V1, vB);
	VABS(V2, vD);
	VCMPE(V2, V1);
	VMRS(_PC);
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, FRMask);
	SetCC();
	SetJumpTarget(DoneEqual);

	SetJumpTarget(DoneMax);
	SetJumpTarget(DoneMin);

	MOVI2R(rA, (u32)&doublenum);
	VLDR(V0, rA, 0);
	NEONXEmitter nemit(this);
	nemit.VORR(vD, vD, V0);

	if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA);

	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(rA);
	gpr.Unlock(fpscrReg);
	fpr.Unlock(V0);
	fpr.Unlock(V1);
	fpr.Unlock(V2);
}
Ejemplo n.º 27
0
void JitArm::stX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreOff);

	u32 a = inst.RA, b = inst.RB, s = inst.RS;
	s32 offset = inst.SIMM_16;
	u32 accessSize = 0;
	s32 regOffset = -1;
	bool update = false;
	bool fastmem = false;
	switch (inst.OPCD)
	{
		case 45: // sthu
			update = true;
		case 44: // sth
			accessSize = 16;
		break;
		case 31:
			switch (inst.SUBOP10)
			{
				case 183: // stwux
					update = true;
				case 151: // stwx
					fastmem = true;
					accessSize = 32;
					regOffset = b;
				break;
				case 247: // stbux
					update = true;
				case 215: // stbx
					accessSize = 8;
					regOffset = b;
				break;
				case 439: // sthux
					update = true;
				case 407: // sthx
					accessSize = 16;
					regOffset = b;
				break;
			}
		break;
		case 37: // stwu
			update = true;
		case 36: // stw
			fastmem = true;
			accessSize = 32;
		break;
		case 39: // stbu
			update = true;
		case 38: // stb
			accessSize = 8;
		break;
	}
	SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
	if (update)
	{
		ARMReg rA = gpr.GetReg();
		ARMReg RB;
		ARMReg RA = gpr.R(a);
		if (regOffset != -1)
			RB = gpr.R(regOffset);
		// Check for DSI exception prior to writing back address
		LDR(rA, R9, PPCSTATE_OFF(Exceptions));
		TST(rA, EXCEPTION_DSI);
		FixupBranch DoNotWrite = B_CC(CC_NEQ);
		if (a)
		{
			if (regOffset == -1)
			{
				MOVI2R(rA, offset);
				ADD(RA, RA, rA);
			}
			else
			{
				ADD(RA, RA, RB);
			}
		}
		else
		{
			if (regOffset == -1)
				MOVI2R(RA, (u32)offset);
			else
				MOV(RA, RB);
		}
		SetJumpTarget(DoNotWrite);
		gpr.Unlock(rA);
	}
}