/** * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 * words of type uint64_t), using Blake2b's G function as the internal permutation * * @param state The current state of the sponge * @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) */ void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) { //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state #if defined __AVX2__ __m256i state_v[2], in_v[2]; state_v[0] = _mm256_load_si256( (__m256i*)(&state[0]) ); in_v [0] = _mm256_loadu_si256( (__m256i*)(&in[0]) ); state_v[1] = _mm256_load_si256( (__m256i*)(&state[4]) ); in_v [1] = _mm256_loadu_si256( (__m256i*)(&in[4]) ); _mm256_store_si256( (__m256i*)(&state[0]), _mm256_xor_si256( state_v[0], in_v[0] ) ); _mm256_store_si256( (__m256i*)(&state[4]), _mm256_xor_si256( state_v[1], in_v[1] ) ); #elif defined __AVX__ __m128i state_v[4], in_v[4]; state_v[0] = _mm_load_si128( (__m128i*)(&state[0]) ); state_v[1] = _mm_load_si128( (__m128i*)(&state[2]) ); state_v[2] = _mm_load_si128( (__m128i*)(&state[4]) ); state_v[3] = _mm_load_si128( (__m128i*)(&state[6]) ); in_v[0] = _mm_loadu_si128( (__m128i*)(&in[0]) ); in_v[1] = _mm_loadu_si128( (__m128i*)(&in[2]) ); in_v[2] = _mm_loadu_si128( (__m128i*)(&in[4]) ); in_v[3] = _mm_loadu_si128( (__m128i*)(&in[6]) ); _mm_store_si128( (__m128i*)(&state[0]), _mm_xor_si128( state_v[0], in_v[0] ) ); _mm_store_si128( (__m128i*)(&state[2]), _mm_xor_si128( state_v[1], in_v[1] ) ); _mm_store_si128( (__m128i*)(&state[4]), _mm_xor_si128( state_v[2], in_v[2] ) ); _mm_store_si128( (__m128i*)(&state[6]), _mm_xor_si128( state_v[3], in_v[3] ) ); #else state[0] ^= in[0]; state[1] ^= in[1]; state[2] ^= in[2]; state[3] ^= in[3]; state[4] ^= in[4]; state[5] ^= in[5]; state[6] ^= in[6]; state[7] ^= in[7]; #endif //Applies the transformation f to the sponge's state blake2bLyra(state); }
/** * Performs a squeeze operation, using Blake2b's G function as the * internal permutation * * @param state The current state of the sponge * @param out Array that will receive the data squeezed * @param len The number of bytes to be squeezed into the "out" array */ inline void squeeze(uint64_t *state, byte *out, unsigned int len) { int fullBlocks = len / BLOCK_LEN_BYTES; byte *ptr = out; int i; //Squeezes full blocks for (i = 0; i < fullBlocks; i++) { memcpy(ptr, state, BLOCK_LEN_BYTES); blake2bLyra(state); ptr += BLOCK_LEN_BYTES; } //Squeezes remaining bytes memcpy(ptr, state, (len % BLOCK_LEN_BYTES)); }
/** * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 * words of type uint64_t), using Blake2b's G function as the internal permutation * * @param state The current state of the sponge * @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) */ inline void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) { //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state state[0] ^= in[0]; state[1] ^= in[1]; state[2] ^= in[2]; state[3] ^= in[3]; state[4] ^= in[4]; state[5] ^= in[5]; state[6] ^= in[6]; state[7] ^= in[7]; //Applies the transformation f to the sponge's state blake2bLyra(state); }
/** * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words * of type uint64_t), using Blake2b's G function as the internal permutation * * @param state The current state of the sponge * @param in The block to be absorbed (BLOCK_LEN_INT64 words) */ inline void absorbBlock(uint64_t *state, const uint64_t *in) { #if defined __AVX2__ __m256i state_v[4], in_v[3]; // only state is guaranteed aligned 256 state_v[0] = _mm256_load_si256( (__m256i*)(&state[0]) ); in_v [0] = _mm256_loadu_si256( (__m256i*)(&in[0]) ); state_v[1] = _mm256_load_si256( (__m256i*)(&state[4]) ); in_v [1] = _mm256_loadu_si256( (__m256i*)(&in[4]) ); state_v[2] = _mm256_load_si256( (__m256i*)(&state[8]) ); in_v [2] = _mm256_loadu_si256( (__m256i*)(&in[8]) ); state_v[3] = _mm256_load_si256( (__m256i*)(&state[12]) ); state_v[0] = _mm256_xor_si256( state_v[0], in_v[0] ); state_v[1] = _mm256_xor_si256( state_v[1], in_v[1] ); state_v[2] = _mm256_xor_si256( state_v[2], in_v[2] ); LYRA_12_ROUNDS_AVX2( state_v[0], state_v[1], state_v[2], state_v[3] ); _mm256_store_si256( (__m256i*)&state[0], state_v[0] ); _mm256_store_si256( (__m256i*)&state[4], state_v[1] ); _mm256_store_si256( (__m256i*)&state[8], state_v[2] ); _mm256_store_si256( (__m256i*)&state[12], state_v[3] ); #elif defined __AVX__ __m128i state_v[6], in_v[6]; state_v[0] = _mm_load_si128( (__m128i*)(&state[0]) ); state_v[1] = _mm_load_si128( (__m128i*)(&state[2]) ); state_v[2] = _mm_load_si128( (__m128i*)(&state[4]) ); state_v[3] = _mm_load_si128( (__m128i*)(&state[6]) ); state_v[4] = _mm_load_si128( (__m128i*)(&state[8]) ); state_v[5] = _mm_load_si128( (__m128i*)(&state[10]) ); in_v[0] = _mm_load_si128( (__m128i*)(&in[0]) ); in_v[1] = _mm_load_si128( (__m128i*)(&in[2]) ); in_v[2] = _mm_load_si128( (__m128i*)(&in[4]) ); in_v[3] = _mm_load_si128( (__m128i*)(&in[6]) ); in_v[4] = _mm_load_si128( (__m128i*)(&in[8]) ); in_v[5] = _mm_load_si128( (__m128i*)(&in[10]) ); // do blake2bLyra without init // LYRA_ROUND_AVX2( state_v ) _mm_store_si128( (__m128i*)(&state[0]), _mm_xor_si128( state_v[0], in_v[0] ) ); _mm_store_si128( (__m128i*)(&state[2]), _mm_xor_si128( state_v[1], in_v[1] ) ); _mm_store_si128( (__m128i*)(&state[4]), _mm_xor_si128( state_v[2], in_v[2] ) ); _mm_store_si128( (__m128i*)(&state[6]), _mm_xor_si128( state_v[3], in_v[3] ) ); _mm_store_si128( (__m128i*)(&state[8]), _mm_xor_si128( state_v[4], in_v[4] ) ); _mm_store_si128( (__m128i*)(&state[10]), _mm_xor_si128( state_v[5], in_v[5] ) ); //Applies the transformation f to the sponge's state blake2bLyra(state); #else //XORs the first BLOCK_LEN_INT64 words of "in" with the current state state[0] ^= in[0]; state[1] ^= in[1]; state[2] ^= in[2]; state[3] ^= in[3]; state[4] ^= in[4]; state[5] ^= in[5]; state[6] ^= in[6]; state[7] ^= in[7]; state[8] ^= in[8]; state[9] ^= in[9]; state[10] ^= in[10]; state[11] ^= in[11]; //Applies the transformation f to the sponge's state blake2bLyra(state); #endif }