void JitArm64::mcrxr(UGeckoInstruction inst)
{
  INSTRUCTION_START
  JITDISABLE(bJITSystemRegistersOff);

  ARM64Reg WA = gpr.GetReg();
  ARM64Reg XA = EncodeRegTo64(WA);
  ARM64Reg WB = gpr.GetReg();
  ARM64Reg XB = EncodeRegTo64(WB);

  // Copy XER[0-3] into CR[inst.CRFD]
  LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
  LDRB(INDEX_UNSIGNED, WB, PPC_REG, PPCSTATE_OFF(xer_so_ov));

  // [0 SO OV CA]
  ADD(WA, WA, WB, ArithOption(WB, ST_LSL, 2));
  // [SO OV CA 0] << 3
  LSL(WA, WA, 4);

  MOVP2R(XB, m_crTable);
  LDR(XB, XB, XA);
  STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD]));

  // Clear XER[0-3]
  STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_ca));
  STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_so_ov));

  gpr.Unlock(WA, WB);
}
void JitArm64::mtsrin(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITSystemRegistersOff);

	u32 b = inst.RB, d = inst.RD;
	gpr.BindToRegister(d, d == b);

	ARM64Reg index = gpr.GetReg();
	ARM64Reg index64 = EncodeRegTo64(index);
	ARM64Reg RB = gpr.R(b);

	UBFM(index, RB, 28, 31);
	ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2));
	STR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0]));

	gpr.Unlock(index);
}
void JitArm64::mfspr(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITSystemRegistersOff);

	u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
	int d = inst.RD;
	switch (iIndex)
	{
	case SPR_TL:
	case SPR_TU:
	{
		ARM64Reg WA = gpr.GetReg();
		ARM64Reg WB = gpr.GetReg();
		ARM64Reg XA = EncodeRegTo64(WA);
		ARM64Reg XB = EncodeRegTo64(WB);

		// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
		// cost of calling out to C for this is actually significant.
		MOVI2R(XA, (u64)&CoreTiming::globalTimer);
		LDR(INDEX_UNSIGNED, XA, XA, 0);
		MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks);
		LDR(INDEX_UNSIGNED, XB, XB, 0);
		SUB(XA, XA, XB);

		// It might seem convenient to correct the timer for the block position here for even more accurate
		// timing, but as of currently, this can break games. If we end up reading a time *after* the time
		// at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only
		// 50 downcount remaining, some games don't function correctly, such as Karaoke Party Revolution,
		// which won't get past the loading screen.
		// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
		ORR(XB, SP, 1, 60);
		ADD(XB, XB, 1);
		UMULH(XA, XA, XB);

		MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue);
		LDR(INDEX_UNSIGNED, XB, XB, 0);
		ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3));
		STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL]));

		if (MergeAllowedNextInstructions(1))
		{
			const UGeckoInstruction& next = js.op[1].inst;
			// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
			// if we can.
			u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
			// Be careful; the actual opcode is for mftb (371), not mfspr (339)
			int n = next.RD;
			if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
			{
				js.downcountAmount++;
				js.skipInstructions = 1;
				gpr.BindToRegister(d, false);
				gpr.BindToRegister(n, false);
				if (iIndex == SPR_TL)
					MOV(gpr.R(d), WA);
				else
					ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));

				if (nextIndex == SPR_TL)
					MOV(gpr.R(n), WA);
				else
					ORR(EncodeRegTo64(gpr.R(n)), SP, XA, ArithOption(XA, ST_LSR, 32));

				gpr.Unlock(WA, WB);
				break;
			}
		}
		gpr.BindToRegister(d, false);
		if (iIndex == SPR_TU)
			ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));
		else
			MOV(gpr.R(d), WA);
		gpr.Unlock(WA, WB);
	}
	break;
	case SPR_XER:
	{
		gpr.BindToRegister(d, false);
		ARM64Reg RD = gpr.R(d);
		ARM64Reg WA = gpr.GetReg();
		LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl));
		LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
		ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_CA_SHIFT));
		LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov));
		ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_OV_SHIFT));
		gpr.Unlock(WA);
	}
	break;
	case SPR_WPAR:
	case SPR_DEC:
		FALLBACK_IF(true);
	default:
		gpr.BindToRegister(d, false);
		ARM64Reg RD = gpr.R(d);
		LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
		break;
	}
}