Esempio n. 1
0
//input: a15b15 ... a0b0
void format_output(dqword *state){
    //a15b15 a13b13 ... a3b3 a1b1 a14b14 a12b12 ... a0b0
	dqword tmp = PSHUFB(*state, LOAD(PRESENTOutShuffleU));
	//a14b14  a12b12 ... a2b2 a0b0 a15b15  a13b13 ... a1b1
	*state = PSHUFB(*state, LOAD(PRESENTOutShuffleL));
	dqword tmpstate = MASK128U(XORDQW(MASK4U(tmp), MASK4L(SRLQW(*state, 4))));
	tmp = MASK128L(XORDQW(MASK4L(tmp), MASK4U(SLLQW(*state, 4))));
    *state = ORDQW(tmpstate, tmp);
}
Esempio n. 2
0
/*
key1: k79k78...k16 k79k78...k16
key2: k15k14...k0  |  k15k14...k0
*/
void key_schedule(dqword *key1, dqword *key2, int roundid){

	//[k38, k37, k36, k35, k34] = [k38, k37, k36, k35, k34] ^ round_counter
	dqword test = LOAD(PRESENTRCounter80 + roundid*16*sizeof(unsigned char));
	*key1 = XORDQW(*key1, LOAD(PRESENTRCounter80 + roundid*16*sizeof(unsigned char)));
	//[k79, k78, ..., k1, k0] = [k18, k17, ..., k20, k19]
	dqword tmp = XORDQW(SLLQW(*key1, 61), SLLQW(*key2, 45));
	*key2 = SRLQW(SLLQW(*key1, 45), 48);
	*key1 = XORDQW(SRLQW(*key1, 19), tmp);
	//[k79, k78, k77, k76] = Sbox[k79, k78, k77, k76]
	
	test = ORDQW(*key1, LOAD(PRESENTKeyMask));
	test = SRLQW(test, 4);
	test = PSHUFB(LOAD(PRESENTSBoxL), test);
	tmp = SLLQW(PSHUFB(LOAD(PRESENTSBoxL), SRLQW(ORDQW(*key1, LOAD(PRESENTKeyMask)), 4)), 4);
	*key1 = XORDQW(tmp, ANDDQW(*key1, LOAD(PRESENTKeyMask)));
}
// In Release on 32bit build,
// this seemed to cause a problem with PokePark2
// at start after talking to first pokemon,
// you run and smash a box, then he goes on about
// following him and then you cant do anything.
// I have enabled interpreter for this function
// in the mean time.
// Parlane
void Jit64::stfs(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);

	bool update = inst.OPCD & 1;
	int s = inst.RS;
	int a = inst.RA;
	s32 offset = (s32)(s16)inst.SIMM_16;

	FALLBACK_IF(!a || update);

	fpr.BindToRegister(s, true, false);
	ConvertDoubleToSingle(XMM0, fpr.RX(s));

	if (gpr.R(a).IsImm())
	{
		u32 addr = (u32)(gpr.R(a).offset + offset);
		if (Memory::IsRAMAddress(addr))
		{
			if (cpu_info.bSSSE3) {
				PSHUFB(XMM0, M((void *)bswapShuffle1x4));
				WriteFloatToConstRamAddress(XMM0, addr);
				return;
			}
		}
		else if (addr == 0xCC008000)
		{
			// Float directly to write gather pipe! Fun!
			CALL((void*)asm_routines.fifoDirectWriteFloat);
			// TODO
			js.fifoBytesThisBlock += 4;
			return;
		}
	}

	gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
	gpr.Lock(a);
	MOV(32, R(ABI_PARAM2), gpr.R(a));
	ADD(32, R(ABI_PARAM2), Imm32(offset));
	if (update && offset)
	{
		// We must flush immediate values from the following register because
		// it may take another value at runtime if no MMU exception has been raised
		gpr.KillImmediate(a, true, true);

		MEMCHECK_START

		MOV(32, gpr.R(a), R(ABI_PARAM2));

		MEMCHECK_END
	}
void Jit64::lfd(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);
	FALLBACK_IF(js.memcheck || !inst.RA);

	int d = inst.RD;
	int a = inst.RA;

	s32 offset = (s32)(s16)inst.SIMM_16;
	gpr.FlushLockX(ABI_PARAM1);
	gpr.Lock(a);
	MOV(32, R(ABI_PARAM1), gpr.R(a));
	// TODO - optimize. This has to load the previous value - upper double should stay unmodified.
	fpr.Lock(d);
	fpr.BindToRegister(d, true);
	X64Reg xd = fpr.RX(d);

	if (cpu_info.bSSSE3)
	{
#if _M_X86_64
		MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
#else
		AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
		MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
#endif
		PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe));
		MOVSD(xd, R(XMM0));
	} else {
#if _M_X86_64
		LoadAndSwap(64, EAX, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
		MOV(64, M(&temp64), R(EAX));

		MEMCHECK_START

		MOVSD(XMM0, M(&temp64));
		MOVSD(xd, R(XMM0));

		MEMCHECK_END
#else
		AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
		MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
		BSWAP(32, EAX);
		MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX));

		MEMCHECK_START

		MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
		BSWAP(32, EAX);
		MOV(32, M(&temp64), R(EAX));
		MOVSD(XMM0, M(&temp64));
		MOVSD(xd, R(XMM0));

		MEMCHECK_END
#endif
	}

	gpr.UnlockAll();
	gpr.UnlockAllX();
	fpr.UnlockAll();
}
void Jit64::stfd(UGeckoInstruction inst)
{
	INSTRUCTION_START
	JITDISABLE(bJITLoadStoreFloatingOff);
	FALLBACK_IF(js.memcheck || !inst.RA);

	int s = inst.RS;
	int a = inst.RA;

	u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
	if (Core::g_CoreStartupParameter.bMMU ||
		Core::g_CoreStartupParameter.bTLBHack) {
			mem_mask |= Memory::ADDR_MASK_MEM1;
	}
#ifdef ENABLE_MEM_CHECK
	if (Core::g_CoreStartupParameter.bEnableDebugging)
	{
		mem_mask |= Memory::EXRAM_MASK;
	}
#endif

	gpr.FlushLockX(ABI_PARAM1);
	gpr.Lock(a);
	fpr.Lock(s);
	gpr.BindToRegister(a, true, false);

	s32 offset = (s32)(s16)inst.SIMM_16;
	LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
	TEST(32, R(ABI_PARAM1), Imm32(mem_mask));
	FixupBranch safe = J_CC(CC_NZ);

	// Fast routine
	if (cpu_info.bSSSE3) {
		MOVAPD(XMM0, fpr.R(s));
		PSHUFB(XMM0, M((void*)bswapShuffle1x8));
#if _M_X86_64
		MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0);
#else
		AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
		MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0);
#endif
	} else {
		MOVAPD(XMM0, fpr.R(s));
		MOVD_xmm(R(EAX), XMM0);
		UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);

		PSRLQ(XMM0, 32);
		MOVD_xmm(R(EAX), XMM0);
		UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
	}
	FixupBranch exit = J(true);
	SetJumpTarget(safe);

	// Safe but slow routine
	MOVAPD(XMM0, fpr.R(s));
	PSRLQ(XMM0, 32);
	MOVD_xmm(R(EAX), XMM0);
	SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));

	MOVAPD(XMM0, fpr.R(s));
	MOVD_xmm(R(EAX), XMM0);
	LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
	SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());

	SetJumpTarget(exit);

	gpr.UnlockAll();
	gpr.UnlockAllX();
	fpr.UnlockAll();
}
Esempio n. 6
0
void sBoxLayer(dqword *state){
	*state = XORDQW(PSHUFB(LOAD(PRESENTSBoxL), MASK4L(*state)), PSHUFB(LOAD(PRESENTSBoxH), MASK4L(SRLDW(*state, 4))));
}
Esempio n. 7
0
void pLayer(dqword *state){
	bit_permutation(state, LOAD(PRESENTPlayerMask1), 14);
	bit_permutation(state, LOAD(PRESENTPlayerMask2), 7);
	*state = PSHUFB(*state, LOAD(PRESENTPlayerShuffle));
}