//input: a15b15 ... a0b0 void format_output(dqword *state){ //a15b15 a13b13 ... a3b3 a1b1 a14b14 a12b12 ... a0b0 dqword tmp = PSHUFB(*state, LOAD(PRESENTOutShuffleU)); //a14b14 a12b12 ... a2b2 a0b0 a15b15 a13b13 ... a1b1 *state = PSHUFB(*state, LOAD(PRESENTOutShuffleL)); dqword tmpstate = MASK128U(XORDQW(MASK4U(tmp), MASK4L(SRLQW(*state, 4)))); tmp = MASK128L(XORDQW(MASK4L(tmp), MASK4U(SLLQW(*state, 4)))); *state = ORDQW(tmpstate, tmp); }
/* key1: k79k78...k16 k79k78...k16 key2: k15k14...k0 | k15k14...k0 */ void key_schedule(dqword *key1, dqword *key2, int roundid){ //[k38, k37, k36, k35, k34] = [k38, k37, k36, k35, k34] ^ round_counter dqword test = LOAD(PRESENTRCounter80 + roundid*16*sizeof(unsigned char)); *key1 = XORDQW(*key1, LOAD(PRESENTRCounter80 + roundid*16*sizeof(unsigned char))); //[k79, k78, ..., k1, k0] = [k18, k17, ..., k20, k19] dqword tmp = XORDQW(SLLQW(*key1, 61), SLLQW(*key2, 45)); *key2 = SRLQW(SLLQW(*key1, 45), 48); *key1 = XORDQW(SRLQW(*key1, 19), tmp); //[k79, k78, k77, k76] = Sbox[k79, k78, k77, k76] test = ORDQW(*key1, LOAD(PRESENTKeyMask)); test = SRLQW(test, 4); test = PSHUFB(LOAD(PRESENTSBoxL), test); tmp = SLLQW(PSHUFB(LOAD(PRESENTSBoxL), SRLQW(ORDQW(*key1, LOAD(PRESENTKeyMask)), 4)), 4); *key1 = XORDQW(tmp, ANDDQW(*key1, LOAD(PRESENTKeyMask))); }
// In Release on 32bit build, // this seemed to cause a problem with PokePark2 // at start after talking to first pokemon, // you run and smash a box, then he goes on about // following him and then you cant do anything. // I have enabled interpreter for this function // in the mean time. // Parlane void Jit64::stfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); bool update = inst.OPCD & 1; int s = inst.RS; int a = inst.RA; s32 offset = (s32)(s16)inst.SIMM_16; FALLBACK_IF(!a || update); fpr.BindToRegister(s, true, false); ConvertDoubleToSingle(XMM0, fpr.RX(s)); if (gpr.R(a).IsImm()) { u32 addr = (u32)(gpr.R(a).offset + offset); if (Memory::IsRAMAddress(addr)) { if (cpu_info.bSSSE3) { PSHUFB(XMM0, M((void *)bswapShuffle1x4)); WriteFloatToConstRamAddress(XMM0, addr); return; } } else if (addr == 0xCC008000) { // Float directly to write gather pipe! Fun! CALL((void*)asm_routines.fifoDirectWriteFloat); // TODO js.fifoBytesThisBlock += 4; return; } } gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.Lock(a); MOV(32, R(ABI_PARAM2), gpr.R(a)); ADD(32, R(ABI_PARAM2), Imm32(offset)); if (update && offset) { // We must flush immediate values from the following register because // it may take another value at runtime if no MMU exception has been raised gpr.KillImmediate(a, true, true); MEMCHECK_START MOV(32, gpr.R(a), R(ABI_PARAM2)); MEMCHECK_END }
void Jit64::lfd(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); FALLBACK_IF(js.memcheck || !inst.RA); int d = inst.RD; int a = inst.RA; s32 offset = (s32)(s16)inst.SIMM_16; gpr.FlushLockX(ABI_PARAM1); gpr.Lock(a); MOV(32, R(ABI_PARAM1), gpr.R(a)); // TODO - optimize. This has to load the previous value - upper double should stay unmodified. fpr.Lock(d); fpr.BindToRegister(d, true); X64Reg xd = fpr.RX(d); if (cpu_info.bSSSE3) { #if _M_X86_64 MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); #else AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset)); #endif PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe)); MOVSD(xd, R(XMM0)); } else { #if _M_X86_64 LoadAndSwap(64, EAX, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); MOV(64, M(&temp64), R(EAX)); MEMCHECK_START MOVSD(XMM0, M(&temp64)); MOVSD(xd, R(XMM0)); MEMCHECK_END #else AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset)); BSWAP(32, EAX); MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX)); MEMCHECK_START MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4)); BSWAP(32, EAX); MOV(32, M(&temp64), R(EAX)); MOVSD(XMM0, M(&temp64)); MOVSD(xd, R(XMM0)); MEMCHECK_END #endif } gpr.UnlockAll(); gpr.UnlockAllX(); fpr.UnlockAll(); }
void Jit64::stfd(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); FALLBACK_IF(js.memcheck || !inst.RA); int s = inst.RS; int a = inst.RA; u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { mem_mask |= Memory::ADDR_MASK_MEM1; } #ifdef ENABLE_MEM_CHECK if (Core::g_CoreStartupParameter.bEnableDebugging) { mem_mask |= Memory::EXRAM_MASK; } #endif gpr.FlushLockX(ABI_PARAM1); gpr.Lock(a); fpr.Lock(s); gpr.BindToRegister(a, true, false); s32 offset = (s32)(s16)inst.SIMM_16; LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); TEST(32, R(ABI_PARAM1), Imm32(mem_mask)); FixupBranch safe = J_CC(CC_NZ); // Fast routine if (cpu_info.bSSSE3) { MOVAPD(XMM0, fpr.R(s)); PSHUFB(XMM0, M((void*)bswapShuffle1x8)); #if _M_X86_64 MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0); #else AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0); #endif } else { MOVAPD(XMM0, fpr.R(s)); MOVD_xmm(R(EAX), XMM0); UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4); PSRLQ(XMM0, 32); MOVD_xmm(R(EAX), XMM0); UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); } FixupBranch exit = J(true); SetJumpTarget(safe); // Safe but slow routine MOVAPD(XMM0, fpr.R(s)); PSRLQ(XMM0, 32); MOVD_xmm(R(EAX), XMM0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0))); MOVAPD(XMM0, fpr.R(s)); MOVD_xmm(R(EAX), XMM0); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse()); SetJumpTarget(exit); gpr.UnlockAll(); gpr.UnlockAllX(); fpr.UnlockAll(); }
void sBoxLayer(dqword *state){ *state = XORDQW(PSHUFB(LOAD(PRESENTSBoxL), MASK4L(*state)), PSHUFB(LOAD(PRESENTSBoxH), MASK4L(SRLDW(*state, 4)))); }
void pLayer(dqword *state){ bit_permutation(state, LOAD(PRESENTPlayerMask1), 14); bit_permutation(state, LOAD(PRESENTPlayerMask2), 7); *state = PSHUFB(*state, LOAD(PRESENTPlayerShuffle)); }