Exemplo n.º 1
0
void MIPSEmitter::J(const void *func, std::function<void ()> delaySlot) {
	SetJumpTarget(J(delaySlot), func);
}
Exemplo n.º 2
0
void Jit::Comp_FPULS(u32 op)
{
	CONDITIONAL_DISABLE;

	s32 offset = (s16)(op & 0xFFFF);
	int ft = _FT;
	int rs = _RS;
	// u32 addr = R(rs) + offset;
	// logBlocks = 1;
	bool doCheck = false;
	switch(op >> 26)
	{
	case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1
		fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);
		if (gpr.IsImm(rs)) {
			u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
			MOVI2R(R0, addr + (u32)Memory::base);
		} else {
			gpr.MapReg(rs);
			if (g_Config.bFastMemory) {
				SetR0ToEffectiveAddress(rs, offset);
			} else {
				SetCCAndR0ForSafeAddress(rs, offset, R1);
				doCheck = true;
			}
			ADD(R0, R0, R11);
		}
#ifdef __ARM_ARCH_7S__
        FixupBranch skip;
        if (doCheck) {
            skip = B_CC(CC_EQ);
        }
        VLDR(fpr.R(ft), R0, 0);
        if (doCheck) {
            SetJumpTarget(skip);
            SetCC(CC_AL);
        }
#else
		VLDR(fpr.R(ft), R0, 0);
		if (doCheck) {
			SetCC(CC_EQ);
			MOVI2R(R0, 0);
			VMOV(fpr.R(ft), R0);
			SetCC(CC_AL);
		}
#endif
		break;

	case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1
		fpr.MapReg(ft);
		if (gpr.IsImm(rs)) {
			u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
			MOVI2R(R0, addr + (u32)Memory::base);
		} else {
			gpr.MapReg(rs);
			if (g_Config.bFastMemory) {
				SetR0ToEffectiveAddress(rs, offset);
			} else {
				SetCCAndR0ForSafeAddress(rs, offset, R1);
				doCheck = true;
			}
			ADD(R0, R0, R11);
		}
#ifdef __ARM_ARCH_7S__
        FixupBranch skip2;
        if (doCheck) {
            skip2 = B_CC(CC_EQ);
        }
        VSTR(fpr.R(ft), R0, 0);
        if (doCheck) {
            SetJumpTarget(skip2);
            SetCC(CC_AL);
        }
#else
		VSTR(fpr.R(ft), R0, 0);
		if (doCheck) {
			SetCC(CC_AL);
		}
#endif
		break;

	default:
		Comp_Generic(op);
		return;
	}
}
Exemplo n.º 3
0
void MIPSEmitter::BGTZ(MIPSReg rs, const void *func, std::function<void ()> delaySlot) {
	SetJumpTarget(BGTZ(rs, delaySlot), func);
}
Exemplo n.º 4
0
void MIPSEmitter::SetJumpTarget(const FixupBranch &branch) {
	SetJumpTarget(branch, code_);
}
Exemplo n.º 5
0
void JitArm::lXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreOff);

	u32 a = inst.RA, b = inst.RB, d = inst.RD;
	s32 offset = inst.SIMM_16;
	u32 accessSize = 0;
	s32 offsetReg = -1;
	bool update = false;
	bool signExtend = false;
	bool reverse = false;
	bool fastmem = false;

	switch (inst.OPCD)
	{
		case 31:
			switch (inst.SUBOP10)
			{
				case 55: // lwzux
					update = true;
				case 23: // lwzx
					fastmem = true;
					accessSize = 32;
					offsetReg = b;
				break;
				case 119: //lbzux
					update = true;
				case 87: // lbzx
					fastmem = true;
					accessSize = 8;
					offsetReg = b;
				break;
				case 311: // lhzux
					update = true;
				case 279: // lhzx
					fastmem = true;
					accessSize = 16;
					offsetReg = b;
				break;
				case 375: // lhaux
					update = true;
				case 343: // lhax
					accessSize = 16;
					signExtend = true;
					offsetReg = b;
				break;
				case 534: // lwbrx
					accessSize = 32;
					reverse = true;
				break;
				case 790: // lhbrx
					accessSize = 16;
					reverse = true;
				break;
			}
		break;
		case 33: // lwzu
			update = true;
		case 32: // lwz
			fastmem = true;
			accessSize = 32;
		break;
		case 35: // lbzu
			update = true;
		case 34: // lbz
			fastmem = true;
			accessSize = 8;
		break;
		case 41: // lhzu
			update = true;
		case 40: // lhz
			fastmem = true;
			accessSize = 16;
		break;
		case 43: // lhau
			update = true;
		case 42: // lha
			signExtend = true;
			accessSize = 16;
		break;
	}

	// Check for exception before loading
	ARMReg rA = gpr.GetReg(false);

	LDR(rA, R9, PPCSTATE_OFF(Exceptions));
	TST(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_NEQ);

	SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse);

	if (update)
	{
		ARMReg RA = gpr.R(a);
		if (offsetReg == -1)
		{
			rA = gpr.GetReg(false);
			MOVI2R(rA, offset);
			ADD(RA, RA, rA);
		}
		else
		{
			ADD(RA, RA, gpr.R(offsetReg));
		}
	}

	SetJumpTarget(DoNotLoad);

	// LWZ idle skipping
	if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
	    inst.OPCD == 32 &&
	    (inst.hex & 0xFFFF0000) == 0x800D0000 &&
	    (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
	    (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
	    Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
	{
		ARMReg RD = gpr.R(d);

		// if it's still 0, we can wait until the next event
		TST(RD, RD);
		FixupBranch noIdle = B_CC(CC_NEQ);

		gpr.Flush(FLUSH_MAINTAIN_STATE);
		fpr.Flush(FLUSH_MAINTAIN_STATE);

		rA = gpr.GetReg();

		MOVI2R(rA, (u32)&PowerPC::OnIdle);
		MOVI2R(R0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
		BL(rA);

		gpr.Unlock(rA);
		WriteExceptionExit();

		SetJumpTarget(noIdle);

		//js.compilerPC += 8;
		return;
	}
}
Exemplo n.º 6
0
void JitArm::ps_cmpo1(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff);
	u32 a = inst.FA, b = inst.FB;
	int cr = inst.CRFD;

	ARMReg vA = fpr.R1(a);
	ARMReg vB = fpr.R1(b);
	ARMReg fpscrReg = gpr.GetReg();
	ARMReg crReg = gpr.GetReg();
	Operand2 FPRFMask(0x1F, 0xA); // 0x1F000
	Operand2 LessThan(0x8, 0xA); // 0x8000
	Operand2 GreaterThan(0x4, 0xA); // 0x4000
	Operand2 EqualTo(0x2, 0xA); // 0x2000
	Operand2 NANRes(0x1, 0xA); // 0x1000
	FixupBranch Done1, Done2, Done3;
	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	BIC(fpscrReg, fpscrReg, FPRFMask);

	VCMPE(vA, vB);
	VMRS(_PC);
	SetCC(CC_LT);
		ORR(fpscrReg, fpscrReg, LessThan);
		MOV(crReg,  8);
		Done1 = B();
	SetCC(CC_GT);
		ORR(fpscrReg, fpscrReg, GreaterThan);
		MOV(crReg,  4);
		Done2 = B();
	SetCC(CC_EQ);
		ORR(fpscrReg, fpscrReg, EqualTo);
		MOV(crReg,  2);
		Done3 = B();
	SetCC();

	ORR(fpscrReg, fpscrReg, NANRes);
	MOV(crReg,  1);

	VCMPE(vA, vA);
	VMRS(_PC);
	FixupBranch NanA = B_CC(CC_NEQ);
	VCMPE(vB, vB);
	VMRS(_PC);
	FixupBranch NanB = B_CC(CC_NEQ);

	SetFPException(fpscrReg, FPSCR_VXVC);
	FixupBranch Done4 = B();

	SetJumpTarget(NanA);
	SetJumpTarget(NanB);

	SetFPException(fpscrReg, FPSCR_VXSNAN);

	TST(fpscrReg, VEMask);

	FixupBranch noVXVC = B_CC(CC_NEQ);
	SetFPException(fpscrReg, FPSCR_VXVC);

	SetJumpTarget(noVXVC);
	SetJumpTarget(Done1);
	SetJumpTarget(Done2);
	SetJumpTarget(Done3);
	SetJumpTarget(Done4);
	STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr);
	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
	gpr.Unlock(fpscrReg, crReg);
}
Exemplo n.º 7
0
// Produce optimized form of timesRepeat: [...].
// Returns true if optimization performed.
// Note that we perform the conditional jump as a backwards jump at the end for optimal performance
bool Compiler::ParseTimesRepeatLoop(const TEXTRANGE& messageRange)
{
	POTE oteSelector = AddSymbolToFrame("timesRepeat:", messageRange);

	if (!ThisTokenIsBinary('['))
	{
		Warning(messageRange, CWarnExpectNiladicBlockArg, (Oop)oteSelector);
		return false;
	}
	
	// Can apply extra optimizations if receiver is known SmallInteger
	int loopTimes=0;
	bool isIntReceiver = LastIsPushSmallInteger(loopTimes);

	int startMark = GenDup();

	int jumpOver = GenJumpInstruction(LongJump);
	int loopHead = m_codePointer;
	
	PushOptimizedScope();
	
	// Parse the loop block and ignore its result
	ParseOptimizeBlock(0);
	GenPopStack();
	PopOptimizedScope(ThisTokenRange().m_stop);
	NextToken();

	if (isIntReceiver && loopTimes <= 0)
	{
		// Blank out all our bytecodes if we have 0 or less loops
		const int loopEnd = m_codePointer;
		for (int p = startMark; p < loopEnd; p++)
			UngenInstruction(p);

		return true;
	}

	// Decrement counter
	GenInstruction(DecrementStackTop);

	// Dup counter for compare
	int testMark = GenDup();

	// Fill in forward unconditional jump before the head of the loop
	// which jumps to the conditional test
	SetJumpTarget(jumpOver, testMark);

	if (isIntReceiver)
	{
		// Using IsZero speeds up empty loop by about 5%.
		_ASSERTE(loopTimes > 0);
		GenInstruction(SpecialSendIsZero);
	}
	else
	{
		// Test for another run around the wheel - favour use of #<, therefore need to test against 1 rather than 0
		GenInteger(1, TEXTRANGE());

		// No breakpoint wanted here
		//GenMessage("<", 1);
		GenInstruction(SendArithmeticLT);
	}

	// Conditional jump back to loop head
	GenJump(LongJumpIfFalse, loopHead);

	// Pop off the loop counter, leaving the integer receiver on the stack
	GenPopStack();
	
	return true;
}
Exemplo n.º 8
0
void JitArm64::twx(UGeckoInstruction inst)
{
  INSTRUCTION_START
  JITDISABLE(bJITSystemRegistersOff);

  s32 a = inst.RA;

  ARM64Reg WA = gpr.GetReg();

  if (inst.OPCD == 3)  // twi
  {
    CMPI2R(gpr.R(a), (s32)(s16)inst.SIMM_16, WA);
  }
  else  // tw
  {
    CMP(gpr.R(a), gpr.R(inst.RB));
  }

  std::vector<FixupBranch> fixups;
  CCFlags conditions[] = {CC_LT, CC_GT, CC_EQ, CC_VC, CC_VS};

  for (int i = 0; i < 5; i++)
  {
    if (inst.TO & (1 << i))
    {
      FixupBranch f = B(conditions[i]);
      fixups.push_back(f);
    }
  }
  FixupBranch dont_trap = B();

  for (const FixupBranch& fixup : fixups)
  {
    SetJumpTarget(fixup);
  }

  FixupBranch far = B();
  SwitchToFarCode();
  SetJumpTarget(far);

  gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
  fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);

  LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
  ORR(WA, WA, 24, 0);  // Same as WA | EXCEPTION_PROGRAM
  STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
  gpr.Unlock(WA);

  WriteExceptionExit(js.compilerPC);

  SwitchToNearCode();

  SetJumpTarget(dont_trap);

  if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
  {
    gpr.Flush(FlushMode::FLUSH_ALL);
    fpr.Flush(FlushMode::FLUSH_ALL);
    WriteExit(js.compilerPC + 4);
  }
}
Exemplo n.º 9
0
bool Compiler::ParseIfNotNil(const TEXTRANGE& messageRange, int exprStartPos)
{
	if (!ThisTokenIsBinary('['))
	{
		Warning(CWarnExpectMonadicOrNiladicBlockArg, (Oop)InternSymbol("ifNotNil:"));
		return false;
	}

	int dupMark = GenDup();
	BreakPoint();
	const int sendIsNil = GenInstruction(SpecialSendIsNil);
	AddTextMap(sendIsNil, exprStartPos, LastTokenRange().m_stop);

	// We're going to add a pop and jump on condition instruction here
	// Its a forward jump, so we need to patch up the target later
	const int popAndJumpMark = GenJumpInstruction(LongJumpIfTrue);
	int argc = ParseIfNotNilBlock();

	// If the ifNotNil: block did not have any arguments, then we do not need the Dup, and we also
	// need to patch out the corresponding pop.
	if (!argc)
	{
		UngenInstruction(dupMark);
		UngenInstruction(popAndJumpMark + lengthOfByteCode(LongJumpIfTrue));
	}

	int ifNilMark;

	// Has an #ifNil: branch?
	if (strcmp(ThisTokenText(), "ifNil:") == 0)
	{
		POTE oteSelector = AddSymbolToFrame("ifNotNil:ifNil:", messageRange);

		// Generate the jump out instruction (forward jump, so target not yet known)
		int jumpOutMark = GenJumpInstruction(LongJump);

		// Mark first instruction of the "else" branch
		ifNilMark = m_codePointer;

		// ifNotNil:ifNil: form
		NextToken();

		ParseIfNilBlock(!argc);

		SetJumpTarget(jumpOutMark, GenNop());
	}
	else
	{
		// No "ifNil:" branch 

		POTE oteSelector = AddSymbolToFrame("ifNotNil:", messageRange);

		if (!argc)
		{
			// Since we've removed the Dup if the ifNotNil: block had no args, we need to ensure there is nil atop the stack
			// This should normally get optimized away later if the expression value is not used.

			// Generate the jump out instruction
			int jumpOutMark = GenJumpInstruction(LongJump);

			ifNilMark = GenInstruction(ShortPushNil);

			SetJumpTarget(jumpOutMark, GenNop());
		}
		else
		{
			ifNilMark = GenNop();
		}
	}
	
	SetJumpTarget(popAndJumpMark, ifNilMark);
	
	return true;
}
Exemplo n.º 10
0
void Compiler::ParseToByNumberDo(int toPointer, Oop oopNumber, bool bNegativeStep)
{
	PushOptimizedScope();
	
	// Add an "each". This should be at the top of the temporary stack as it is taken over by the do: block
	TempVarRef* pEachTempRef = AddOptimizedTemp(eachTempName);
	
	// Start to generate bytecodes

	// First we must store that from value into our 'each' counter variable/argument. This involves
	// stepping back a bit ...
	_ASSERTE(toPointer < m_codePointer);
	int currentPos = m_codePointer;
	m_codePointer = toPointer;
	// Note we store so leaving the value on the stack as the result of the whole expression
	GenStoreTemp(pEachTempRef);
	m_codePointer = currentPos + m_bytecodes[toPointer].instructionLength();
	
	// Dup the to: value
	GenDup();
	// And push the from value
	GenPushTemp(pEachTempRef);

	// We must jump over the block to the test first time through
	int jumpOver = GenJumpInstruction(LongJump);

	int loopHead = m_codePointer;

	// Parse the one argument block.
	// Leave nothing on the stack, expect 1 argument
	ParseOptimizeBlock(1);

	// Pop off the result of the optimized block
	GenPopStack();
	
	GenDup();
	GenPushTemp(pEachTempRef);
	// If the step is 1/-1, this will be optimised down to Increment/Decrement
	GenNumber(oopNumber, LastTokenRange());
	int add = GenInstruction(SendArithmeticAdd);
	int store = GenStoreTemp(pEachTempRef);

	int comparePointer = m_codePointer;
	
	if (bNegativeStep)
		GenInstruction(SendArithmeticGT);
	else
		// Note that < is the best message to send, since it is normally directly implemented
		GenInstruction(SendArithmeticLT);

	GenJump(LongJumpIfFalse, loopHead);

	// Pop the to: value
	GenPopStack();

	SetJumpTarget(jumpOver, comparePointer);

	TODO("Is this in the right place? What is the last real IP of the loop");
	PopOptimizedScope(ThisTokenRange().m_stop);
	NextToken();
}
Exemplo n.º 11
0
bool Compiler::ParseIfNil(const TEXTRANGE& messageRange, int exprStartPos)
{
	if (!ThisTokenIsBinary('['))
	{
		Warning(CWarnExpectNiladicBlockArg, (Oop)InternSymbol("ifNil:"));
		return false;
	}

	int dupMark = GenDup();

	BreakPoint();
	const int sendIsNil = GenInstruction(SpecialSendIsNil);
	const int mapEntry = AddTextMap(sendIsNil, exprStartPos, LastTokenRange().m_stop);

	// We're going to add a pop and jump on condition instruction here
	// Its a forward jump, so we need to patch up the target later
	const int popAndJumpMark = GenJumpInstruction(LongJumpIfFalse);

	ParseIfNilBlock(false);
	
	int ifNotNilMark;

	if (strcmp(ThisTokenText(), "ifNotNil:") == 0)
	{
		POTE oteSelector = AddSymbolToFrame("ifNil:ifNotNil:", messageRange);

		// Generate the jump out instruction (forward jump, so target not yet known)
		int jumpOutMark = GenJumpInstruction(LongJump);

		// Mark first instruction of the "else" branch
		ifNotNilMark = m_codePointer;

		// #ifNil:ifNotNil: form
		NextToken();

		int argc = ParseIfNotNilBlock();

		// Be careful, may not actually be a literal block there
		if (m_ok)
		{
			// If the ifNotNil: block does not need an argument, we can patch out the Dup
			// and corresponding pop
			if (!argc)
			{
				UngenInstruction(dupMark);
				_ASSERTE(m_bytecodes[popAndJumpMark + lengthOfByteCode(LongJumpIfFalse)].byte == PopStackTop);
				UngenInstruction(popAndJumpMark + lengthOfByteCode(LongJumpIfFalse));
				_ASSERTE(m_bytecodes[ifNotNilMark].byte == PopStackTop);
				UngenInstruction(ifNotNilMark);
			}

			// Set unconditional jump at the end of the ifNil to jump over the "else" branch to a Nop
			SetJumpTarget(jumpOutMark, GenNop());
		}
	}
	else
	{
		POTE oteSelector = AddSymbolToFrame("ifNil:", messageRange);

		// No "else" branch, but we still need an instruction to jump to
		ifNotNilMark = GenNop();
	}

	if (m_ok)
	{
		// Conditional jump to the "else" branch (or the Nop if no else branch)
		SetJumpTarget(popAndJumpMark, ifNotNilMark);

		if (mapEntry >= 0)
			m_textMaps[mapEntry].stop = LastTokenRange().m_stop;
	}

	return true;
}
Exemplo n.º 12
0
LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
	_assert_msg_(G3D, id.linear, "Linear should be set on sampler id");
	BeginWrite();

	// We'll first write the nearest sampler, which we will CALL.
	// This may differ slightly based on the "linear" flag.
	const u8 *nearest = AlignCode16();

	if (!Jit_ReadTextureFormat(id)) {
		EndWrite();
		SetCodePtr(const_cast<u8 *>(nearest));
		return nullptr;
	}

	RET();

	// Now the actual linear func, which is exposed externally.
	const u8 *start = AlignCode16();

	// NOTE: This doesn't use the general register mapping.
	// POSIX: arg1=uptr, arg2=vptr, arg3=frac_u, arg4=frac_v, arg5=src, arg6=bufw, stack+8=level
	// Win64: arg1=uptr, arg2=vptr, arg3=frac_u, arg4=frac_v, stack+40=src, stack+48=bufw, stack+56=level
	//
	// We map these to nearest CALLs, with order: u, v, src, bufw, level

	// Let's start by saving a bunch of registers.
	PUSH(R15);
	PUSH(R14);
	PUSH(R13);
	PUSH(R12);
	// Won't need frac_u/frac_v for a while.
	PUSH(arg4Reg);
	PUSH(arg3Reg);
	// Extra space to restore alignment and save resultReg for lerp.
	// TODO: Maybe use XMMs instead?
	SUB(64, R(RSP), Imm8(24));

	MOV(64, R(R12), R(arg1Reg));
	MOV(64, R(R13), R(arg2Reg));
#ifdef _WIN32
	// First arg now starts at 24 (extra space) + 48 (pushed stack) + 8 (ret address) + 32 (shadow space)
	const int argOffset = 24 + 48 + 8 + 32;
	MOV(64, R(R14), MDisp(RSP, argOffset));
	MOV(32, R(R15), MDisp(RSP, argOffset + 8));
	// level is at argOffset + 16.
#else
	MOV(64, R(R14), R(arg5Reg));
	MOV(32, R(R15), R(arg6Reg));
	// level is at 24 + 48 + 8.
#endif

	// Early exit on !srcPtr.
	FixupBranch zeroSrc;
	if (id.hasInvalidPtr) {
		CMP(PTRBITS, R(R14), Imm8(0));
		FixupBranch nonZeroSrc = J_CC(CC_NZ);
		XOR(32, R(RAX), R(RAX));
		zeroSrc = J(true);
		SetJumpTarget(nonZeroSrc);
	}

	// At this point:
	// R12=uptr, R13=vptr, stack+24=frac_u, stack+32=frac_v, R14=src, R15=bufw, stack+X=level

	auto doNearestCall = [&](int off) {
		MOV(32, R(uReg), MDisp(R12, off));
		MOV(32, R(vReg), MDisp(R13, off));
		MOV(64, R(srcReg), R(R14));
		MOV(32, R(bufwReg), R(R15));
		// Leave level, we just always load from RAM.  Separate CLUTs is uncommon.

		CALL(nearest);
		MOV(32, MDisp(RSP, off), R(resultReg));
	};

	doNearestCall(0);
	doNearestCall(4);
	doNearestCall(8);
	doNearestCall(12);

	// Convert TL, TR, BL, BR to floats for easier blending.
	if (!cpu_info.bSSE4_1) {
		PXOR(XMM0, R(XMM0));
	}

	MOVD_xmm(fpScratchReg1, MDisp(RSP, 0));
	MOVD_xmm(fpScratchReg2, MDisp(RSP, 4));
	MOVD_xmm(fpScratchReg3, MDisp(RSP, 8));
	MOVD_xmm(fpScratchReg4, MDisp(RSP, 12));

	if (cpu_info.bSSE4_1) {
		PMOVZXBD(fpScratchReg1, R(fpScratchReg1));
		PMOVZXBD(fpScratchReg2, R(fpScratchReg2));
		PMOVZXBD(fpScratchReg3, R(fpScratchReg3));
		PMOVZXBD(fpScratchReg4, R(fpScratchReg4));
	} else {
		PUNPCKLBW(fpScratchReg1, R(XMM0));
		PUNPCKLBW(fpScratchReg2, R(XMM0));
		PUNPCKLBW(fpScratchReg3, R(XMM0));
		PUNPCKLBW(fpScratchReg4, R(XMM0));
		PUNPCKLWD(fpScratchReg1, R(XMM0));
		PUNPCKLWD(fpScratchReg2, R(XMM0));
		PUNPCKLWD(fpScratchReg3, R(XMM0));
		PUNPCKLWD(fpScratchReg4, R(XMM0));
	}
	CVTDQ2PS(fpScratchReg1, R(fpScratchReg1));
	CVTDQ2PS(fpScratchReg2, R(fpScratchReg2));
	CVTDQ2PS(fpScratchReg3, R(fpScratchReg3));
	CVTDQ2PS(fpScratchReg4, R(fpScratchReg4));

	// Okay, now multiply the R sides by frac_u, and L by (256 - frac_u)...
	MOVD_xmm(fpScratchReg5, MDisp(RSP, 24));
	CVTDQ2PS(fpScratchReg5, R(fpScratchReg5));
	SHUFPS(fpScratchReg5, R(fpScratchReg5), _MM_SHUFFLE(0, 0, 0, 0));
	if (RipAccessible(by256)) {
		MULPS(fpScratchReg5, M(by256));  // rip accessible
	} else {
		Crash();  // TODO
	}
	MOVAPS(XMM0, M(ones));
	SUBPS(XMM0, R(fpScratchReg5));

	MULPS(fpScratchReg1, R(XMM0));
	MULPS(fpScratchReg2, R(fpScratchReg5));
	MULPS(fpScratchReg3, R(XMM0));
	MULPS(fpScratchReg4, R(fpScratchReg5));

	// Now set top=fpScratchReg1, bottom=fpScratchReg3.
	ADDPS(fpScratchReg1, R(fpScratchReg2));
	ADDPS(fpScratchReg3, R(fpScratchReg4));

	// Next, time for frac_v.
	MOVD_xmm(fpScratchReg5, MDisp(RSP, 32));
	CVTDQ2PS(fpScratchReg5, R(fpScratchReg5));
	SHUFPS(fpScratchReg5, R(fpScratchReg5), _MM_SHUFFLE(0, 0, 0, 0));
	MULPS(fpScratchReg5, M(by256));
	MOVAPS(XMM0, M(ones));
	SUBPS(XMM0, R(fpScratchReg5));

	MULPS(fpScratchReg1, R(XMM0));
	MULPS(fpScratchReg3, R(fpScratchReg5));

	// Still at the 255 scale, now we're interpolated.
	ADDPS(fpScratchReg1, R(fpScratchReg3));

	// Time to convert back to a single 32 bit value.
	CVTPS2DQ(fpScratchReg1, R(fpScratchReg1));
	PACKSSDW(fpScratchReg1, R(fpScratchReg1));
	PACKUSWB(fpScratchReg1, R(fpScratchReg1));
	MOVD_xmm(R(resultReg), fpScratchReg1);

	if (id.hasInvalidPtr) {
		SetJumpTarget(zeroSrc);
	}

	ADD(64, R(RSP), Imm8(24));
	POP(arg3Reg);
	POP(arg4Reg);
	POP(R12);
	POP(R13);
	POP(R14);
	POP(R15);

	RET();

	EndWrite();
	return (LinearFunc)start;
}
void Jit64::stfd(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);
	FALLBACK_IF(js.memcheck || !inst.RA);

	int s = inst.RS;
	int a = inst.RA;

	u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
	if (Core::g_CoreStartupParameter.bMMU ||
		Core::g_CoreStartupParameter.bTLBHack) {
			mem_mask |= Memory::ADDR_MASK_MEM1;
	}
#ifdef ENABLE_MEM_CHECK
	if (Core::g_CoreStartupParameter.bEnableDebugging)
	{
		mem_mask |= Memory::EXRAM_MASK;
	}
#endif

	gpr.FlushLockX(ABI_PARAM1);
	gpr.Lock(a);
	fpr.Lock(s);
	gpr.BindToRegister(a, true, false);

	s32 offset = (s32)(s16)inst.SIMM_16;
	LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
	TEST(32, R(ABI_PARAM1), Imm32(mem_mask));
	FixupBranch safe = J_CC(CC_NZ);

	// Fast routine
	if (cpu_info.bSSSE3) {
		MOVAPD(XMM0, fpr.R(s));
		PSHUFB(XMM0, M((void*)bswapShuffle1x8));
#if _M_X86_64
		MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0);
#else
		AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
		MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0);
#endif
	} else {
		MOVAPD(XMM0, fpr.R(s));
		MOVD_xmm(R(EAX), XMM0);
		UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);

		PSRLQ(XMM0, 32);
		MOVD_xmm(R(EAX), XMM0);
		UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
	}
	FixupBranch exit = J(true);
	SetJumpTarget(safe);

	// Safe but slow routine
	MOVAPD(XMM0, fpr.R(s));
	PSRLQ(XMM0, 32);
	MOVD_xmm(R(EAX), XMM0);
	SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));

	MOVAPD(XMM0, fpr.R(s));
	MOVD_xmm(R(EAX), XMM0);
	LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
	SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());

	SetJumpTarget(exit);

	gpr.UnlockAll();
	gpr.UnlockAllX();
	fpr.UnlockAll();
}
void JitArm::lfXX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff)

	ARMReg rA = gpr.GetReg();
	ARMReg rB = gpr.GetReg();
	ARMReg RA;

	u32 a = inst.RA, b = inst.RB;

	s32 offset = inst.SIMM_16;
	bool single = false;
	bool update = false;
	bool zeroA = false;
	s32 offsetReg = -1;

	switch (inst.OPCD)
	{
		case 31:
			switch(inst.SUBOP10)
			{
				case 567: // lfsux
					single = true;
					update = true;
					offsetReg = b;
				break;
				case 535: // lfsx
					single = true;
					zeroA = true;
					offsetReg = b;
				break;
				case 631: // lfdux
					update = true;
					offsetReg = b;
				break;
				case 599: // lfdx
					zeroA = true;
					offsetReg = b;
				break;
			}
		break;
		case 49: // lfsu
			update = true;
			single = true;
		break;
		case 48: // lfs
			single = true;
			zeroA = true;
		break;
		case 51: // lfdu
			update = true;
		break;
		case 50: // lfd
			zeroA = true;
		break;
	}

	ARMReg v0 = fpr.R0(inst.FD), v1;
	if (single)
		v1 = fpr.R1(inst.FD);

	if (update)
	{
		RA = gpr.R(a);
		// Update path /always/ uses RA
		if (offsetReg == -1) // uses SIMM_16
		{
			MOVI2R(rB, offset);
			ADD(rB, rB, RA);
		}
		else
			ADD(rB, gpr.R(offsetReg), RA);
	}
	else
	{

		if (zeroA)
		{
			if (offsetReg == -1)
			{
				if (a)
				{
					RA = gpr.R(a);
					MOVI2R(rB, offset);
					ADD(rB, rB, RA);
				}
				else
					MOVI2R(rB, (u32)offset);
			}
			else
			{
				ARMReg RB = gpr.R(offsetReg);
				if (a)
				{
					RA = gpr.R(a);
					ADD(rB, RB, RA);
				}
				else
					MOV(rB, RB);
			}
		}
	}
	LDR(rA, R9, PPCSTATE_OFF(Exceptions));
	CMP(rA, EXCEPTION_DSI);
	FixupBranch DoNotLoad = B_CC(CC_EQ);

	if (update)
		MOV(RA, rB);

	if (Core::g_CoreStartupParameter.bFastmem)
	{
		Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK)
		BIC(rB, rB, mask); // 1
		MOVI2R(rA, (u32)Memory::base, false); // 2-3
		ADD(rB, rB, rA); // 4

		NEONXEmitter nemit(this);
		if (single)
		{
			VLDR(S0, rB, 0);
			nemit.VREV32(I_8, D0, D0); // Byte swap to result
			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			VLDR(v0, rB, 0);
			nemit.VREV64(I_8, v0, v0); // Byte swap to result
		}
	}
	else
	{
		PUSH(4, R0, R1, R2, R3);
		MOV(R0, rB);
		if (single)
		{
			MOVI2R(rA, (u32)&Memory::Read_U32);
			BL(rA);

			VMOV(S0, R0);

			VCVT(v0, S0, 0);
			VCVT(v1, S0, 0);
		}
		else
		{
			MOVI2R(rA, (u32)&Memory::Read_F64);
			BL(rA);

#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
			VMOV(v0, R0);
#else
			VMOV(v0, D0);
#endif
		}
		POP(4, R0, R1, R2, R3);
	}
	gpr.Unlock(rA, rB);
	SetJumpTarget(DoNotLoad);
}
Exemplo n.º 15
0
void JitArmILAsmRoutineManager::Generate()
{
	enterCode = GetCodePtr();
	PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR);
	// Take care to 8-byte align stack for function calls.
	// We are misaligned here because of an odd number of args for PUSH.
	// It's not like x86 where you need to account for an extra 4 bytes
	// consumed by CALL.
	SUB(_SP, _SP, 4);

	MOVI2R(R0, (u32)&CoreTiming::downcount);
	MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);

	FixupBranch skipToRealDispatcher = B();
	dispatcher = GetCodePtr();
		printf("ILDispatcher is %p\n", dispatcher);

		// Downcount Check
		// The result of slice decrementation should be in flags if somebody jumped here
		// IMPORTANT - We jump on negative, not carry!!!
		FixupBranch bail = B_CC(CC_MI);

		SetJumpTarget(skipToRealDispatcher);
		dispatcherNoCheck = GetCodePtr();

		// This block of code gets the address of the compiled block of code
		// It runs though to the compiling portion if it isn't found
			LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12

			Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK
			BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here.

			MOVI2R(R14, (u32)jit->GetBlockCache()->iCache);

			LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here
			// R12 Confirmed this is the correct iCache Location loaded.
			TST(R12, 0x80); // Test  to see if it is a JIT block.

			SetCC(CC_EQ);
				// Success, it is our Jitblock.
				MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers());
				// LDR R14 right here to get CodePointers()[0] pointer.
				LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size
				LDR(R14, R14, R12); // Load the block address in to R14

				B(R14);
				// No need to jump anywhere after here, the block will go back to dispatcher start
			SetCC();

		// If we get to this point, that means that we don't have the block cached to execute
		// So call ArmJit to compile the block and then execute it.
		MOVI2R(R14, (u32)&Jit);
		BL(R14);

		B(dispatcherNoCheck);

		// fpException()
		// Floating Point Exception Check, Jumped to if false
		fpException = GetCodePtr();
			LDR(R0, R9, PPCSTATE_OFF(Exceptions));
			ORR(R0, R0, EXCEPTION_FPU_UNAVAILABLE);
			STR(R0, R9, PPCSTATE_OFF(Exceptions));
				QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions);
			LDR(R0, R9, PPCSTATE_OFF(npc));
			STR(R0, R9, PPCSTATE_OFF(pc));
		B(dispatcher);

		SetJumpTarget(bail);
		doTiming = GetCodePtr();
		// XXX: In JIT64, Advance() gets called /after/ the exception checking
		// once it jumps back to the start of outerLoop
		QuickCallFunction(R14, (void*)&CoreTiming::Advance);

		// Does exception checking
		testExceptions = GetCodePtr();
			LDR(R0, R9, PPCSTATE_OFF(pc));
			STR(R0, R9, PPCSTATE_OFF(npc));
				QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions);
			LDR(R0, R9, PPCSTATE_OFF(npc));
			STR(R0, R9, PPCSTATE_OFF(pc));
		// Check the state pointer to see if we are exiting
		// Gets checked on every exception check
			MOVI2R(R0, (u32)PowerPC::GetStatePtr());
			MVN(R1, 0);
			LDR(R0, R0);
			TST(R0, R1);
			FixupBranch Exit = B_CC(CC_NEQ);

	B(dispatcher);

	SetJumpTarget(Exit);

	ADD(_SP, _SP, 4);

	POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC);  // Returns

	GenerateCommon();

	FlushIcache();
}
Exemplo n.º 16
0
void Jit::Comp_FPU2op(MIPSOpcode op) {
	CONDITIONAL_DISABLE;
	
	int fs = _FS;
	int fd = _FD;

	switch (op & 0x3f) 
	{
	case 5:	//F(fd)	= fabsf(F(fs)); break; //abs
		fpr.SpillLock(fd, fs);
		fpr.MapReg(fd, fd == fs, true);
		MOVSS(fpr.RX(fd), fpr.R(fs));
		PAND(fpr.RX(fd), M(ssNoSignMask));
		break;

	case 6:	//F(fd)	= F(fs);				break; //mov
		if (fd != fs) {
			fpr.SpillLock(fd, fs);
			fpr.MapReg(fd, fd == fs, true);
			MOVSS(fpr.RX(fd), fpr.R(fs));
		}
		break;

	case 7:	//F(fd)	= -F(fs);			 break; //neg
		fpr.SpillLock(fd, fs);
		fpr.MapReg(fd, fd == fs, true);
		MOVSS(fpr.RX(fd), fpr.R(fs));
		PXOR(fpr.RX(fd), M(ssSignBits2));
		break;


	case 4:	//F(fd)	= sqrtf(F(fs)); break; //sqrt
		fpr.SpillLock(fd, fs); // this probably works, just badly tested
		fpr.MapReg(fd, fd == fs, true);
		SQRTSS(fpr.RX(fd), fpr.R(fs));
		break;

	case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break;//trunc.w.s
		{
			fpr.SpillLock(fs, fd);
			fpr.StoreFromRegister(fd);
			CVTTSS2SI(EAX, fpr.R(fs));

			// Did we get an indefinite integer value?
			CMP(32, R(EAX), Imm32(0x80000000));
			FixupBranch skip = J_CC(CC_NE);
			MOVSS(XMM0, fpr.R(fs));
			XORPS(XMM1, R(XMM1));
			CMPSS(XMM0, R(XMM1), CMP_LT);

			// At this point, -inf = 0xffffffff, inf/nan = 0x00000000.
			// We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.
			MOVD_xmm(R(EAX), XMM0);
			XOR(32, R(EAX), Imm32(0x7fffffff));

			SetJumpTarget(skip);
			MOV(32, fpr.R(fd), R(EAX));
		}
		break;

	case 32: //F(fd)	= (float)FsI(fs);			break; //cvt.s.w
		// Store to memory so we can read it as an integer value.
		fpr.StoreFromRegister(fs);
		CVTSI2SS(XMM0, fpr.R(fs));
		MOVSS(fpr.R(fd), XMM0);
		break;

	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
	case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s
	case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
	case 36: //FsI(fd) = (int)	F(fs);			 break; //cvt.w.s
	default:
		DISABLE;
		return;
	}
	fpr.ReleaseSpillLocks();
}
Exemplo n.º 17
0
void Jit::Comp_FPU2op(MIPSOpcode op) {
	CONDITIONAL_DISABLE(FPU);
	
	int fs = _FS;
	int fd = _FD;

	auto execRounding = [&](void (XEmitter::*conv)(X64Reg, OpArg), int setMXCSR) {
		fpr.SpillLock(fd, fs);
		fpr.MapReg(fd, fs == fd, true);

		// Small optimization: 0 is our default mode anyway.
		if (setMXCSR == 0 && !js.hasSetRounding) {
			setMXCSR = -1;
		}
		if (setMXCSR != -1) {
			STMXCSR(MIPSSTATE_VAR(mxcsrTemp));
			MOV(32, R(TEMPREG), MIPSSTATE_VAR(mxcsrTemp));
			AND(32, R(TEMPREG), Imm32(~(3 << 13)));
			OR(32, R(TEMPREG), Imm32(setMXCSR << 13));
			MOV(32, MIPSSTATE_VAR(temp), R(TEMPREG));
			LDMXCSR(MIPSSTATE_VAR(temp));
		}

		(this->*conv)(TEMPREG, fpr.R(fs));

		// Did we get an indefinite integer value?
		CMP(32, R(TEMPREG), Imm32(0x80000000));
		FixupBranch skip = J_CC(CC_NE);
		if (fd != fs) {
			CopyFPReg(fpr.RX(fd), fpr.R(fs));
		}
		XORPS(XMM1, R(XMM1));
		CMPSS(fpr.RX(fd), R(XMM1), CMP_LT);

		// At this point, -inf = 0xffffffff, inf/nan = 0x00000000.
		// We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.
		MOVD_xmm(R(TEMPREG), fpr.RX(fd));
		XOR(32, R(TEMPREG), Imm32(0x7fffffff));

		SetJumpTarget(skip);
		MOVD_xmm(fpr.RX(fd), R(TEMPREG));

		if (setMXCSR != -1) {
			LDMXCSR(MIPSSTATE_VAR(mxcsrTemp));
		}
	};

	switch (op & 0x3f) {
	case 5:	//F(fd)	= fabsf(F(fs)); break; //abs
		fpr.SpillLock(fd, fs);
		fpr.MapReg(fd, fd == fs, true);
		MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssNoSignMask[0]));
		if (fd != fs && fpr.IsMapped(fs)) {
			MOVAPS(fpr.RX(fd), MatR(TEMPREG));
			ANDPS(fpr.RX(fd), fpr.R(fs));
		} else {
			if (fd != fs) {
				MOVSS(fpr.RX(fd), fpr.R(fs));
			}
			ANDPS(fpr.RX(fd), MatR(TEMPREG));
		}
		break;

	case 6:	//F(fd)	= F(fs);				break; //mov
		if (fd != fs) {
			fpr.SpillLock(fd, fs);
			fpr.MapReg(fd, fd == fs, true);
			CopyFPReg(fpr.RX(fd), fpr.R(fs));
		}
		break;

	case 7:	//F(fd)	= -F(fs);			 break; //neg
		fpr.SpillLock(fd, fs);
		fpr.MapReg(fd, fd == fs, true);
		MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssSignBits2[0]));
		if (fd != fs && fpr.IsMapped(fs)) {
			MOVAPS(fpr.RX(fd), MatR(TEMPREG));
			XORPS(fpr.RX(fd), fpr.R(fs));
		} else {
			if (fd != fs) {
				MOVSS(fpr.RX(fd), fpr.R(fs));
			}
			XORPS(fpr.RX(fd), MatR(TEMPREG));
		}
		break;

	case 4:	//F(fd)	= sqrtf(F(fs)); break; //sqrt
		fpr.SpillLock(fd, fs);
		fpr.MapReg(fd, fd == fs, true);
		SQRTSS(fpr.RX(fd), fpr.R(fs));
		break;

	case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break; //trunc.w.s
		execRounding(&XEmitter::CVTTSS2SI, -1);
		break;

	case 32: //F(fd)	= (float)FsI(fs);			break; //cvt.s.w
		fpr.SpillLock(fd, fs);
		fpr.MapReg(fd, fs == fd, true);
		if (fpr.IsMapped(fs)) {
			CVTDQ2PS(fpr.RX(fd), fpr.R(fs));
		} else {
			// If fs was fd, we'd be in the case above since we mapped fd.
			MOVSS(fpr.RX(fd), fpr.R(fs));
			CVTDQ2PS(fpr.RX(fd), fpr.R(fd));
		}
		break;

	case 36: //FsI(fd) = (int)	F(fs);			 break; //cvt.w.s
		// Uses the current rounding mode.
		execRounding(&XEmitter::CVTSS2SI, -1);
		break;

	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
		execRounding(&XEmitter::CVTSS2SI, 0);
		break;
	case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s
		execRounding(&XEmitter::CVTSS2SI, 2);
		break;
	case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
		execRounding(&XEmitter::CVTSS2SI, 1);
		break;
	default:
		DISABLE;
		return;
	}
	fpr.ReleaseSpillLocks();
}
Exemplo n.º 18
0
void JitArm::stX(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreOff);

	u32 a = inst.RA, b = inst.RB, s = inst.RS;
	s32 offset = inst.SIMM_16;
	u32 accessSize = 0;
	s32 regOffset = -1;
	bool update = false;
	bool fastmem = false;
	switch (inst.OPCD)
	{
		case 45: // sthu
			update = true;
		case 44: // sth
			accessSize = 16;
		break;
		case 31:
			switch (inst.SUBOP10)
			{
				case 183: // stwux
					update = true;
				case 151: // stwx
					fastmem = true;
					accessSize = 32;
					regOffset = b;
				break;
				case 247: // stbux
					update = true;
				case 215: // stbx
					accessSize = 8;
					regOffset = b;
				break;
				case 439: // sthux
					update = true;
				case 407: // sthx
					accessSize = 16;
					regOffset = b;
				break;
			}
		break;
		case 37: // stwu
			update = true;
		case 36: // stw
			fastmem = true;
			accessSize = 32;
		break;
		case 39: // stbu
			update = true;
		case 38: // stb
			accessSize = 8;
		break;
	}
	SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
	if (update)
	{
		ARMReg rA = gpr.GetReg();
		ARMReg RB;
		ARMReg RA = gpr.R(a);
		if (regOffset != -1)
			RB = gpr.R(regOffset);
		// Check for DSI exception prior to writing back address
		LDR(rA, R9, PPCSTATE_OFF(Exceptions));
		TST(rA, EXCEPTION_DSI);
		FixupBranch DoNotWrite = B_CC(CC_NEQ);
		if (a)
		{
			if (regOffset == -1)
			{
				MOVI2R(rA, offset);
				ADD(RA, RA, rA);
			}
			else
			{
				ADD(RA, RA, RB);
			}
		}
		else
		{
			if (regOffset == -1)
				MOVI2R(RA, (u32)offset);
			else
				MOV(RA, RB);
		}
		SetJumpTarget(DoNotWrite);
		gpr.Unlock(rA);
	}
}
Exemplo n.º 19
0
void MIPSEmitter::BNE(MIPSReg rs, MIPSReg rt, const void *func, std::function<void ()> delaySlot) {
	SetJumpTarget(BNE(rs, rt, delaySlot), func);
}
Exemplo n.º 20
0
void Jit64::fcmpx(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITFloatingPointOff);
	FALLBACK_IF(jo.fpAccurateFcmp);

	//bool ordered = inst.SUBOP10 == 32;
	int a   = inst.FA;
	int b   = inst.FB;
	int crf = inst.CRFD;

	fpr.Lock(a,b);
	fpr.BindToRegister(b, true);

	// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
	UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));

	FixupBranch pNaN, pLesser, pGreater;
	FixupBranch continue1, continue2, continue3;

	if (a != b)
	{
		// if B > A, goto Lesser's jump target
		pLesser  = J_CC(CC_A);
	}

	// if (B != B) or (A != A), goto NaN's jump target
	pNaN = J_CC(CC_P);

	if (a != b)
	{
		// if B < A, goto Greater's jump target
		// JB can't precede the NaN check because it doesn't test ZF
		pGreater = J_CC(CC_B);
	}

	// Equal
	MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
	continue1 = J();

	// NAN
	SetJumpTarget(pNaN);
	MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1));

	if (a != b)
	{
		continue2 = J();

		// Greater Than
		SetJumpTarget(pGreater);
		MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
		continue3 = J();

		// Less Than
		SetJumpTarget(pLesser);
		MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
	}

	SetJumpTarget(continue1);
	if (a != b)
	{
		SetJumpTarget(continue2);
		SetJumpTarget(continue3);
	}

	fpr.UnlockAll();
}
Exemplo n.º 21
0
const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
	js.cancel = false;
	js.blockStart = js.compilerPC = mips_->pc;
	js.nextExit = 0;
	js.downcountAmount = 0;
	js.curBlock = b;
	js.compiling = true;
	js.inDelaySlot = false;
	js.afterOp = JitState::AFTER_NONE;
	js.PrefixStart();

	// We add a check before the block, used when entering from a linked block.
	b->checkedEntry = GetCodePtr();
	// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
	FixupBranch skip = J_CC(CC_NBE);
	MOV(32, M(&mips_->pc), Imm32(js.blockStart));
	JMP(asm_.outerLoop, true);  // downcount hit zero - go advance.
	SetJumpTarget(skip);

	b->normalEntry = GetCodePtr();

	MIPSAnalyst::AnalysisResults analysis = MIPSAnalyst::Analyze(em_address);

	gpr.Start(mips_, analysis);
	fpr.Start(mips_, analysis);

	js.numInstructions = 0;
	while (js.compiling) {
		// Jit breakpoints are quite fast, so let's do them in release too.
		CheckJitBreakpoint(js.compilerPC, 0);

		MIPSOpcode inst = Memory::Read_Instruction(js.compilerPC);
		js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);

		MIPSCompileOp(inst);

		if (js.afterOp & JitState::AFTER_CORE_STATE) {
			// TODO: Save/restore?
			FlushAll();

			// If we're rewinding, CORE_NEXTFRAME should not cause a rewind.
			// It doesn't really matter either way if we're not rewinding.
			// CORE_RUNNING is <= CORE_NEXTFRAME.
			CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME));
			FixupBranch skipCheck = J_CC(CC_LE);
			if (js.afterOp & JitState::AFTER_REWIND_PC_BAD_STATE)
				MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
			else
				MOV(32, M(&mips_->pc), Imm32(js.compilerPC + 4));
			WriteSyscallExit();
			SetJumpTarget(skipCheck);

			js.afterOp = JitState::AFTER_NONE;
		}

		js.compilerPC += 4;
		js.numInstructions++;

		// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
		if (GetSpaceLeft() < 0x800)
		{
			FlushAll();
			WriteExit(js.compilerPC, js.nextExit++);
			js.compiling = false;
		}
	}

	b->codeSize = (u32)(GetCodePtr() - b->normalEntry);
	NOP();
	AlignCode4();
	b->originalSize = js.numInstructions;
	return b->normalEntry;
}