void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, uint32x4_p &block2, uint32x4_p &block3, uint32x4_p &block4, uint32x4_p &block5, const word64 *subkeys, unsigned int rounds) { #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m1 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m2 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m1 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m2 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ... uint64x2_p x1 = (uint64x2_p)VecPermute(block0, block1, m1); uint64x2_p y1 = (uint64x2_p)VecPermute(block0, block1, m2); uint64x2_p x2 = (uint64x2_p)VecPermute(block2, block3, m1); uint64x2_p y2 = (uint64x2_p)VecPermute(block2, block3, m2); uint64x2_p x3 = (uint64x2_p)VecPermute(block4, block5, m1); uint64x2_p y3 = (uint64x2_p)VecPermute(block4, block5, m2); for (int i = static_cast<int>(rounds-1); i >= 0; --i) { const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]); y1 = VecXor(y1, x1); y2 = VecXor(y2, x2); y3 = VecXor(y3, x3); y1 = RotateRight64<3>(y1); y2 = RotateRight64<3>(y2); y3 = RotateRight64<3>(y3); x1 = VecXor(x1, rk); x2 = VecXor(x2, rk); x3 = VecXor(x3, rk); x1 = VecSub(x1, y1); x2 = VecSub(x2, y2); x3 = VecSub(x3, y3); x1 = RotateLeft64<8>(x1); x2 = RotateLeft64<8>(x2); x3 = RotateLeft64<8>(x3); } #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m3 = {31,30,29,28,27,26,25,24, 15,14,13,12,11,10,9,8}; const uint8x16_p m4 = {23,22,21,20,19,18,17,16, 7,6,5,4,3,2,1,0}; #else const uint8x16_p m3 = {7,6,5,4,3,2,1,0, 23,22,21,20,19,18,17,16}; const uint8x16_p m4 = {15,14,13,12,11,10,9,8, 31,30,29,28,27,26,25,24}; #endif // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ... block0 = (uint32x4_p)VecPermute(x1, y1, m3); block1 = (uint32x4_p)VecPermute(x1, y1, m4); block2 = (uint32x4_p)VecPermute(x2, y2, m3); block3 = (uint32x4_p)VecPermute(x2, y2, m4); block4 = (uint32x4_p)VecPermute(x3, y3, m3); block5 = (uint32x4_p)VecPermute(x3, y3, m4); }
void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1, const word32 *subkeys, unsigned int rounds) { #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28}; const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24}; #else const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24}; const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28}; #endif // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ... uint32x4_p x1 = VecPermute(block0, block1, m1); uint32x4_p y1 = VecPermute(block0, block1, m2); for (int i = static_cast<int>(rounds-1); i >= 0; --i) { #if CRYPTOPP_POWER7_AVAILABLE const uint32x4_p rk = vec_splats(subkeys[i]); #else // subkeys has extra elements so memory backs the last subkey const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3}; uint32x4_p rk = VecLoad(subkeys+i); rk = VecPermute(rk, rk, m); #endif y1 = VecXor(y1, x1); y1 = RotateRight32<3>(y1); x1 = VecXor(x1, rk); x1 = VecSub(x1, y1); x1 = RotateLeft32<8>(x1); } #if (CRYPTOPP_BIG_ENDIAN) const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4}; const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12}; #else const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20}; const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28}; #endif // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4] block0 = (uint32x4_p)VecPermute(x1, y1, m3); block1 = (uint32x4_p)VecPermute(x1, y1, m4); }