int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) { if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; if( blake2sp_init_root( S->R, outlen, keylen ) < 0 ) return -1; for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; { uint8_t block[BLAKE2S_BLOCKBYTES]; memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ } return 0; }
int blake2sp_init( blake2sp_state *S, const uint8_t outlen ) { if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; if( blake2sp_init_root( S->R, outlen, 0 ) < 0 ) return -1; for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; return 0; }
ERL_NIF_TERM blake2sp_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; blake2s_state S[PARALLELISM_DEGREE][1]; blake2s_state FS[1]; ErlNifBinary input, key, salt, personal; uint8_t out[BLAKE2S_OUTBYTES] = {0}; unsigned int outlen; int i; ERL_NIF_TERM tmphash[BLAKE2S_OUTBYTES]; if (argc != 5 || !enif_inspect_binary(env, argv[0], &input) || !enif_inspect_binary(env, argv[1], &key) || !enif_get_uint(env, argv[2], &outlen) || !enif_inspect_binary(env, argv[3], &salt) || !enif_inspect_binary(env, argv[4], &personal)) return enif_make_badarg(env); if (!outlen || outlen > BLAKE2S_OUTBYTES) return -1; if( key.size > BLAKE2S_KEYBYTES ) return -1; for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S[i], outlen, key.size, i, salt.data, personal.data, salt.size, personal.size) < 0 ) return -1; S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node if( key.size > 0 ) { uint8_t block[BLAKE2S_BLOCKBYTES]; memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key.data, key.size ); for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ } #if defined(_OPENMP) #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) #endif { #if defined(_OPENMP) size_t id__ = omp_get_thread_num(); #endif uint64_t inlen__ = input.size; const uint8_t *in__ = ( const uint8_t * )input.data; in__ += id__ * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } if( inlen__ > id__ * BLAKE2S_BLOCKBYTES ) { const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES; const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES; blake2s_update( S[id__], in__, len ); } blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES ); } if( blake2sp_init_root( FS, outlen, key.size, salt.data, personal.data, salt.size, personal.size) < 0 ) return -1; FS->last_node = 1; // Mark as last node for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES ); blake2s_final( FS, out, outlen );; for (i = 0; i < outlen; i++) { tmphash[i] = enif_make_uint(env, out[i]); } return enif_make_list_from_array(env, tmphash, outlen); }
int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; blake2s_state S[PARALLELISM_DEGREE][1]; blake2s_state FS[1]; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; if ( NULL == out ) return -1; if ( NULL == key && keylen > 0) return -1; if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( keylen > BLAKE2S_KEYBYTES ) return -1; for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node if( keylen > 0 ) { uint8_t block[BLAKE2S_BLOCKBYTES]; memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ } #if defined(_OPENMP) #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) #endif { #if defined(_OPENMP) size_t id__ = omp_get_thread_num(); #endif uint64_t inlen__ = inlen; const uint8_t *in__ = ( const uint8_t * )in; in__ += id__ * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } if( inlen__ > id__ * BLAKE2S_BLOCKBYTES ) { const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES; const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES; blake2s_update( S[id__], in__, len ); } blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES ); } if( blake2sp_init_root( FS, outlen, keylen ) < 0 ) return -1; FS->last_node = 1; for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES ); return blake2s_final( FS, out, outlen ); }