void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, const word32 *subkeys, unsigned int rounds) { #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28}; const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24}; #else const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24}; const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28}; #endif // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ... uint32x4_p x1 = VecPermute(block0, block1, m1); uint32x4_p y1 = VecPermute(block0, block1, m2); for (int i = static_cast<int>(rounds-1); i >= 0; --i) { #if CRYPTOPP_POWER7_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; uint32x4_p rk = VecLoad(subkeys+i); rk = VecPermute(rk, rk, m); #endif y1 = VecXor(y1, x1); y1 = RotateRight32<3>(y1); x1 = VecXor(x1, rk); x1 = VecSub(x1, y1); x1 = RotateLeft32<8>(x1); } #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4}; const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12}; #else const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20}; const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28}; #endif // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4] block0 = (uint32x4_p)VecPermute(x1, y1, m3); block1 = (uint32x4_p)VecPermute(x1, y1, m4); }
void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, uint32x4_p &block2, uint32x4_p &block3, uint32x4_p &block4, uint32x4_p &block5, const word64 *subkeys, unsigned int rounds) { #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... uint64x2_p x1 = (uint64x2_p)VecPermute(block0, block1, m1); uint64x2_p y1 = (uint64x2_p)VecPermute(block0, block1, m2); uint64x2_p x2 = (uint64x2_p)VecPermute(block2, block3, m1); uint64x2_p y2 = (uint64x2_p)VecPermute(block2, block3, m2); uint64x2_p x3 = (uint64x2_p)VecPermute(block4, block5, m1); uint64x2_p y3 = (uint64x2_p)VecPermute(block4, block5, m2); for (int i = static_cast<int>(rounds-1); i >= 0; --i) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]); y1 = VecXor(y1, x1); y2 = VecXor(y2, x2); y3 = VecXor(y3, x3); y1 = RotateRight64<3>(y1); y2 = RotateRight64<3>(y2); y3 = RotateRight64<3>(y3); x1 = VecXor(x1, rk); x2 = VecXor(x2, rk); x3 = VecXor(x3, rk); x1 = VecSub(x1, y1); x2 = VecSub(x2, y2); x3 = VecSub(x3, y3); x1 = RotateLeft64<8>(x1); x2 = RotateLeft64<8>(x2); x3 = RotateLeft64<8>(x3); } #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... block0 = (uint32x4_p)VecPermute(x1, y1, m3); block1 = (uint32x4_p)VecPermute(x1, y1, m4); block2 = (uint32x4_p)VecPermute(x2, y2, m3); block3 = (uint32x4_p)VecPermute(x2, y2, m4); block4 = (uint32x4_p)VecPermute(x3, y3, m3); block5 = (uint32x4_p)VecPermute(x3, y3, m4); }
void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds) { #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... uint64x2_p x1 = (uint64x2_p)VecPermute(block, block, m1); uint64x2_p y1 = (uint64x2_p)VecPermute(block, block, m2); for (int i=0; i < static_cast<int>(rounds); ++i) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]); x1 = RotateRight64<8>(x1); x1 = VecAdd(x1, y1); x1 = VecXor(x1, rk); y1 = RotateLeft64<3>(y1); y1 = VecXor(y1, x1); } #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; //const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; //const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... block = (uint32x4_p)VecPermute(x1, y1, m3); }
inline void SIMON128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds) { #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... uint64x2_p x1 = (uint64x2_p)VecPermute(block, block, m1); uint64x2_p y1 = (uint64x2_p)VecPermute(block, block, m2); if (rounds & 1) { std::swap(x1, y1); const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]); y1 = VecXor(VecXor(y1, rk), SIMON128_f(x1)); rounds--; } for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2) { const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i+1]); x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk1); const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i]); y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk2); } #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; //const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; //const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... block = (uint32x4_p)VecPermute(x1, y1, m3); }
inline void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, uint32x4_p &block2, uint32x4_p &block3, uint32x4_p &block4, uint32x4_p &block5, const word64 *subkeys, unsigned int rounds) { #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... uint64x2_p x1 = (uint64x2_p)VecPermute(block0, block1, m1); uint64x2_p y1 = (uint64x2_p)VecPermute(block0, block1, m2); uint64x2_p x2 = (uint64x2_p)VecPermute(block2, block3, m1); uint64x2_p y2 = (uint64x2_p)VecPermute(block2, block3, m2); uint64x2_p x3 = (uint64x2_p)VecPermute(block4, block5, m1); uint64x2_p y3 = (uint64x2_p)VecPermute(block4, block5, m2); for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2) { const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]); y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1); y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk1); y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk1); const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]); x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2); x2 = VecXor(VecXor(x2, SIMON128_f(y2)), rk2); x3 = VecXor(VecXor(x3, SIMON128_f(y3)), rk2); } if (rounds & 1) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]); y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk); y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk); y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk); std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3); } #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... block0 = (uint32x4_p)VecPermute(x1, y1, m3); block1 = (uint32x4_p)VecPermute(x1, y1, m4); block2 = (uint32x4_p)VecPermute(x2, y2, m3); block3 = (uint32x4_p)VecPermute(x2, y2, m4); block4 = (uint32x4_p)VecPermute(x3, y3, m3); block5 = (uint32x4_p)VecPermute(x3, y3, m4); }
inline uint64x2_p SIMON128_f(const uint64x2_p val) { return VecXor(RotateLeft64<2>(val), VecAnd(RotateLeft64<1>(val), RotateLeft64<8>(val))); }