/** * Executes a reduced version of G function with only one round * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's or BlaMka's G function */ inline static void reducedSpongeLyra(uint64_t *v) { #if (SPONGE == 0) ROUND_LYRA(0); #endif #if (SPONGE == 1) uint64_t t0,t1,t2; ROUND_LYRA_BLAMKA(0); #endif }
/** * Executes a reduced version of Blake2b's G function with only one round * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function */ inline static void reducedBlake2bLyra(uint64_t *v) { ROUND_LYRA(0); }
/** * Execute Blake2b's G function, with all 12 rounds. * * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function */ inline static void blake2bLyra(uint64_t *v) { ROUND_LYRA(0); ROUND_LYRA(1); ROUND_LYRA(2); ROUND_LYRA(3); ROUND_LYRA(4); ROUND_LYRA(5); ROUND_LYRA(6); ROUND_LYRA(7); ROUND_LYRA(8); ROUND_LYRA(9); ROUND_LYRA(10); ROUND_LYRA(11); }
inline static void lyra_round( uint64_t *v ) { #ifdef __AVX2__ __m256i a = _mm256_load_si256( (__m256i*)(&v[ 0]) ); __m256i b = _mm256_load_si256( (__m256i*)(&v[ 4]) ); __m256i c = _mm256_load_si256( (__m256i*)(&v[ 8]) ); __m256i d = _mm256_load_si256( (__m256i*)(&v[12]) ); G_4X64( a, b, c, d ); // swap words b = mm256_rotl256_1x64( b ); c = mm256_swap128( c ); d = mm256_rotr256_1x64( d ); G_4X64( a, b, c, d ); // unswap b = mm256_rotr256_1x64( b ); c = mm256_swap128( c ); d = mm256_rotl256_1x64( d ); _mm256_store_si256( (__m256i*)(&v[ 0]), a ); _mm256_store_si256( (__m256i*)(&v[ 4]), b ); _mm256_store_si256( (__m256i*)(&v[ 8]), c ); _mm256_store_si256( (__m256i*)(&v[12]), d ); #elif defined __AVX__ __m128i a0, a1, b0, b1, c0, c1, d0, d1; a0 = _mm_load_si128( (__m128i*)(&v[ 0]) ); a1 = _mm_load_si128( (__m128i*)(&v[ 2]) ); b0 = _mm_load_si128( (__m128i*)(&v[ 4]) ); b1 = _mm_load_si128( (__m128i*)(&v[ 6]) ); c0 = _mm_load_si128( (__m128i*)(&v[ 8]) ); c1 = _mm_load_si128( (__m128i*)(&v[10]) ); d0 = _mm_load_si128( (__m128i*)(&v[12]) ); d1 = _mm_load_si128( (__m128i*)(&v[14]) ); G_2X64( a0, b0, c0, d0 ); G_2X64( a1, b1, c1, d1 ); // swap words mm128_rotl256_1x64( b0, b1 ); mm128_swap128( c0, c1 ); mm128_rotr256_1x64( d0, d1 ); G_2X64( a0, b0, c0, d0 ); G_2X64( a1, b1, c1, d1 ); // unswap mm128_rotr256_1x64( b0, b1 ); mm128_swap128( c0, c1 ); mm128_rotl256_1x64( d0, d1 ); _mm_store_si128( (__m128i*)(&v[ 0]), a0 ); _mm_store_si128( (__m128i*)(&v[ 2]), a1 ); _mm_store_si128( (__m128i*)(&v[ 4]), b0 ); _mm_store_si128( (__m128i*)(&v[ 6]), b1 ); _mm_store_si128( (__m128i*)(&v[ 8]), c0 ); _mm_store_si128( (__m128i*)(&v[10]), c1 ); _mm_store_si128( (__m128i*)(&v[12]), d0 ); _mm_store_si128( (__m128i*)(&v[14]), d1 ); #else // macro assumes v is defined ROUND_LYRA(0); #endif }
/** * Execute G function, with all 12 rounds. * * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's or BlaMka's G function */ inline static void spongeLyra(uint64_t *v) { #if (SPONGE == 0) ROUND_LYRA(0); ROUND_LYRA(1); ROUND_LYRA(2); ROUND_LYRA(3); ROUND_LYRA(4); ROUND_LYRA(5); ROUND_LYRA(6); ROUND_LYRA(7); ROUND_LYRA(8); ROUND_LYRA(9); ROUND_LYRA(10); ROUND_LYRA(11); #endif #if (SPONGE == 1) uint64_t t0,t1,t2; ROUND_LYRA_BLAMKA(0); ROUND_LYRA_BLAMKA(1); ROUND_LYRA_BLAMKA(2); ROUND_LYRA_BLAMKA(3); ROUND_LYRA_BLAMKA(4); ROUND_LYRA_BLAMKA(5); ROUND_LYRA_BLAMKA(6); ROUND_LYRA_BLAMKA(7); ROUND_LYRA_BLAMKA(8); ROUND_LYRA_BLAMKA(9); ROUND_LYRA_BLAMKA(10); ROUND_LYRA_BLAMKA(11); ROUND_LYRA_BLAMKA(12); ROUND_LYRA_BLAMKA(13); ROUND_LYRA_BLAMKA(14); ROUND_LYRA_BLAMKA(15); ROUND_LYRA_BLAMKA(16); ROUND_LYRA_BLAMKA(17); ROUND_LYRA_BLAMKA(18); ROUND_LYRA_BLAMKA(19); ROUND_LYRA_BLAMKA(20); ROUND_LYRA_BLAMKA(21); ROUND_LYRA_BLAMKA(22); ROUND_LYRA_BLAMKA(23); #endif }
/** * Execute Blake2b's G function, with all 12 rounds. * * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function */ inline static void blake2bLyra(uint64_t *v) { #if defined __AVX2__ // may be still used by squeeze LYRA_INIT_AVX2; // defines local a[4] LYRA_12_ROUNDS_AVX2( a[0], a[1], a[2], a[3] ); LYRA_CLOSE_AVX2; #elif defined __AVX__ LYRA_INIT_AVX; // defines locals a0[4], a1[4] LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_ROUND_AVX; LYRA_CLOSE_AVX; #else ROUND_LYRA(0); ROUND_LYRA(1); ROUND_LYRA(2); ROUND_LYRA(3); ROUND_LYRA(4); ROUND_LYRA(5); ROUND_LYRA(6); ROUND_LYRA(7); ROUND_LYRA(8); ROUND_LYRA(9); ROUND_LYRA(10); ROUND_LYRA(11); #endif }