/** * Executes Lyra2 based on the G function from Blake2b or BlaMka. This version supports salts and passwords * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, * where "b" is the underlying sponge's bitrate). In this implementation, the "params" is composed by all * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value * of nCols, (i.e., params = kLen || pwdlen || saltlen || timeCost || nRows || nCols). * * @param K The derived key to be output by the algorithm * @param kLen Desired key length * @param pwd User password * @param pwdlen Password length * @param salt Salt * @param saltlen Salt length * @param timeCost Parameter to determine the processing time (T) * @param nRows Number or rows of the memory matrix (R) * @param nCols Number of columns of the memory matrix (C) * * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) */ int LYRA2_multiThread(void *K, unsigned int kLen, const void *pwd, unsigned int pwdlen, const void *salt, unsigned int saltlen, unsigned int timeCost, unsigned int nRows, unsigned int nCols){ //============================= Basic variables ============================// uint64_t i,j; //auxiliary iteration counter //==========================================================================/ //========== Initializing the Memory Matrix and pointers to it =============// //Allocates pointers to each row of the matrix __m128i **memMatrix = malloc(nRows * sizeof (uint64_t*)); if (memMatrix == NULL) { return -1; } //Allocates pointers to each key unsigned char **pKeys = malloc(nPARALLEL * sizeof (unsigned char*)); if (pKeys == NULL) { return -1; } if (nRows < 3) return -1; if (timeCost < 1) return -1; #if _OPENMP <= 201107 //OpenMP 3.X or less #pragma omp parallel num_threads(nPARALLEL) default(none) /*private(pwd)*/ shared(memMatrix, pKeys, pwd, pwdlen, salt, saltlen, nRows, nCols, kLen, timeCost, nPARALLEL) #endif // _OPENMP #if _OPENMP > 201107 //OpenMP 4.0 #pragma omp parallel proc_bind(spread) num_threads(nPARALLEL) default(none) /*private(pwd)*/ shared(memMatrix, pKeys, pwd, pwdlen, salt, saltlen, nRows, nCols, kLen, timeCost, nPARALLEL) #endif // _OPENMP { //============================= Basic threads variables ============================// int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 uint64_t step = 1; //Visitation step (used during Setup and Wandering phases) uint64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) uint64_t sync = 4; //Synchronize counter uint64_t sqrt = 2; //Square of window (i.e., square(window)), when a window is a square number; //otherwise, sqrt = 2*square(window/2) uint64_t row0 = 3; //row0: sequentially written during Setup; randomly picked during Wandering uint64_t prev0 = 2; //prev0: stores the previous value of row0 uint64_t rowP = 1; //rowP: revisited during Setup, and then read [and written]; randomly picked during Wandering uint64_t prevP = 0; //prevP: stores the previous value of rowP uint64_t threadNumber = 0; uint64_t iP; uint64_t jP; //Starts with threadNumber. uint64_t kP; uint64_t wCont; uint64_t sizeSlicedRows; uint64_t off0; uint64_t offP; //==========================================================================/ //========================== BootStrapping Phase ==========================// // Size of each chunk that each thread will work with sizeSlicedRows = nRows/nPARALLEL; // Thread index: threadNumber = omp_get_thread_num(); uint64_t sliceStart = threadNumber*sizeSlicedRows; uint64_t halfSlice = sizeSlicedRows/2; iP = (uint64_t) ((uint64_t) sizeSlicedRows * (uint64_t) ROW_LEN_BYTES); __m128i *threadSliceMatrix = malloc(iP); if (threadSliceMatrix == NULL) { printf("Error: unable to allocate memory (nRows too large?)\n"); exit(EXIT_FAILURE); } //Places the pointers in the correct positions __m128i *ptrWord = threadSliceMatrix; for (kP = 0; kP < sizeSlicedRows; kP++) { memMatrix[threadNumber*sizeSlicedRows + kP] = ptrWord; ptrWord += ROW_LEN_INT128; } unsigned char *threadKey = malloc(kLen); if (threadKey == NULL) { exit(EXIT_FAILURE); } //Places the pointers in the correct positions pKeys[threadNumber] = threadKey; //==========================================================================/ //============= Padding (password + salt + params) with 10*1 ===============// //OBS.:The memory matrix will temporarily hold the password: not for saving memory, //but this ensures that the password copied locally will be overwritten as soon as possible //First, we clean enough blocks for the password, salt, params and padding //Change the ''8'' if different amounts of parameters were passed uint64_t nBlocksInput = ((pwdlen + saltlen + 8 * sizeof (int)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; byte *ptrByte = (byte*) threadSliceMatrix; memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); //Prepends the password memcpy(ptrByte, pwd, pwdlen); ptrByte += pwdlen; //Concatenates the salt memcpy(ptrByte, salt, saltlen); ptrByte += saltlen; //Concatenates the params: every integer passed as parameter, in the order they are provided by the interface memcpy(ptrByte, &kLen, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &pwdlen, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &saltlen, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &timeCost, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &nRows, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &nCols, sizeof (int)); ptrByte += sizeof (int); int p = nPARALLEL; memcpy(ptrByte, &p, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &threadNumber, sizeof (int)); ptrByte += sizeof (int);// */ //Now comes the padding *ptrByte = 0x80; //first byte of padding: right after the password ptrByte = (byte*) threadSliceMatrix; //resets the pointer to the start of the memory matrix ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block //==========================================================================/ //============== Initializing the Sponge State =============/ //Sponge state: 8 __m128i, BLOCK_LEN_INT128 words of them for the bitrate (b) and the remainder for the capacity (c) //Thread State __m128i *threadState = malloc(8 * sizeof (__m128i)); if (threadState == NULL) { exit(EXIT_FAILURE); } initState(threadState); //==========================================================================/ //============= Absorbing the input data with the sponge ===============// //Absorbing salt, password and params: this is the only place in which the block length is hard-coded to 512 bits, for compatibility with Blake2b and BlaMka ptrWord = threadSliceMatrix; for (kP = 0; kP < nBlocksInput; kP++) { absorbBlockBlake2Safe(threadState, ptrWord); //absorbs each block of pad(pwd || salt || params) ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT128; //goes to next block of pad(pwd || salt || params) } //================================================================================/ //================================ Setup Phase ==================================// //==Initializes a (nRows x nCols) memory matrix, it's cells having b bits each)==// //Initializes M[0] reducedSqueezeRow0(threadState, memMatrix[sliceStart]); //The locally copied password is most likely overwritten here //Initializes M[1] reducedDuplexRow1and2(threadState, memMatrix[sliceStart], memMatrix[sliceStart+1]); //Initializes M[2] reducedDuplexRow1and2(threadState, memMatrix[sliceStart + 1], memMatrix[sliceStart + 2]); jP = threadNumber; //Filling Loop for (row0 = 3; row0 < sizeSlicedRows; row0++) { //Performs a reduced-round duplexing operation over "Mj[rowP][col] [+] Mi[prev0][col] [+] Mj[prevP][col]", filling Mi[row0] and updating Mj[rowP] //Mi[row0][N_COLS-1-col] = Mi[prev0][col] XOR rand; //Mj[rowP][col] = Mj[rowP][col] XOR rot(rand) rot(): right rotation by 'omega' bits (e.g., 1 or more words) reducedDuplexRowFilling(threadState, memMatrix[jP*sizeSlicedRows + rowP], memMatrix[sliceStart + prev0], memMatrix[jP*sizeSlicedRows + prevP], memMatrix[sliceStart + row0]); //Updates the "prev" indices: the rows more recently updated prev0 = row0; prevP = rowP; //updates the value of rowP: deterministically picked, with a variable step rowP = (rowP + step) & (window - 1); //Checks if all rows in the window where visited. if (rowP == 0) { window *= 2; //doubles the size of the re-visitation window step = sqrt + gap; //changes the step: approximately doubles its value gap = -gap; //inverts the modifier to the step if (gap == -1){ sqrt *= 2; //Doubles sqrt every other iteration } } //Synchronize threads and change the slices if (row0 == sync) { sync += sqrt/2; jP = (jP + 1) % nPARALLEL; #pragma omp barrier } } // Needs all matrix done before starting Wandering Phase. #pragma omp barrier //============================ Wandering Phase =============================// //=====Iteratively overwrites pseudorandom cells of the memory matrix=======// window = halfSlice; sync = sqrt; off0 = 0; offP = window; uint64_t offTemp; //Visitation Loop for (wCont = 0; wCont < timeCost*sizeSlicedRows; wCont++){ //Selects a pseudorandom indices row0 and rowP //------------------------------------------------------------------------------------------ /*(USE THIS IF window IS A POWER OF 2)*/ //row0 = off0 + (((uint64_t)(((__uint128_t *)threadState)[0])) & (window-1)); //rowP = offP + (((uint64_t)(((__uint128_t *)threadState)[1])) & (window-1)); /*(USE THIS FOR THE "GENERIC" CASE)*/ row0 = off0 + (((uint64_t)(((__uint128_t *)threadState)[0])) % window); //row0 = off0 + (lsw(rand) mod window) rowP = offP + (((uint64_t)(((__uint128_t *)threadState)[1])) % window); //row1 = offP + (lsw(rot(rand)) mod window) //Selects a pseudorandom indices j0 (LSW(rot^2 (rand)) mod p) jP = ((uint64_t)(((__uint128_t *)threadState)[2])) % nPARALLEL; //jP = lsw(rot^2(rand)) mod nPARALLEL //Performs a reduced-round duplexing operation over "Mi[row0][col] [+] Mj[rowP][col] [+] Mi[prev0][col0]", updating Mi[row0] //Mi[row0][col] = Mi[row0][col] XOR rand; reducedDuplexRowWanderingParallel(threadState, memMatrix[sliceStart + row0], memMatrix[jP*sizeSlicedRows + rowP], memMatrix[sliceStart + prev0]); //update prev: they now point to the last rows ever updated prev0 = row0; //Synchronize threads and change the slices if (wCont == sync) { sync += sqrt; offTemp = off0; off0 = offP; offP = offTemp; #pragma omp barrier } } #pragma omp barrier //==========================================================================/ //============================ Wrap-up Phase ===============================// //========================= Output computation =============================// //Absorbs one last block of the memory matrix with the full-round sponge absorbColumn(threadState, memMatrix[sliceStart + row0]); //Squeezes the key squeeze(threadState, threadKey, kLen); //========================= Freeing the thread memory =============================// free(threadSliceMatrix); //Wiping out the sponge's internal state before freeing it memset(threadState, 0, 8 * sizeof (__m128i)); free(threadState); } // Parallelism End // XORs all Keys for (i = 1; i < nPARALLEL; i++) { for (j = 0; j < kLen; j++) { pKeys[0][j] ^= pKeys[i][j]; } } // Returns in the correct variable memcpy(K, pKeys[0], kLen); //========================= Freeing the memory =============================// free(memMatrix); //Free each thread Key for (i = 0; i < nPARALLEL; i++) { free(pKeys[i]); } //Free the pointers to allKeys free(pKeys); //==========================================================================/ return 0; }
/** * Executes Lyra2 based on the G function from Blake2b or BlaMka. This version supports salts and passwords * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, * where "b" is the underlying sponge's bitrate). In this implementation, the "params" is composed by all * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value * of nCols, (i.e., params = kLen || pwdlen || saltlen || timeCost || nRows || nCols). * * @param K The derived key to be output by the algorithm * @param kLen Desired key length * @param pwd User password * @param pwdlen Password length * @param salt Salt * @param saltlen Salt length * @param timeCost Parameter to determine the processing time (T) * @param nRows Number or rows of the memory matrix (R) * @param nCols Number of columns of the memory matrix (C) * * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) */ int LYRA2_singleThread(void *K, unsigned int kLen, const void *pwd, unsigned int pwdlen, const void *salt, unsigned int saltlen, unsigned int timeCost, unsigned int nRows, unsigned int nCols){ //============================= Basic variables ============================// int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 uint64_t step = 1; //Visitation step (used during Setup to dictate the sequence in which rows are read) uint64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) uint64_t sqrt = 2; //Square of window (i.e., square(window)), when a window is a square number; //otherwise, sqrt = 2*square(window/2) uint64_t row0 = 3; //row0: sequentially written during Setup; randomly picked during Wandering uint64_t prev0 = 2; //prev0: stores the previous value of row0 uint64_t row1 = 1; //row1: revisited during Setup, and then read [and written]; randomly picked during Wandering uint64_t prev1 = 0; //prev1: stores the previous value of row1 uint64_t i; //auxiliary iteration counter //==========================================================================/ if (nRows < 3) return -1; if (timeCost < 1) return -1; //========== Initializing the Memory Matrix and pointers to it =============// //Tries to allocate enough space for the whole memory matrix i = (uint64_t) ((uint64_t)nRows * (uint64_t)ROW_LEN_BYTES); __m128i *wholeMatrix = malloc(i); if (wholeMatrix == NULL) { return -1; } //Allocates pointers to each row of the matrix __m128i **memMatrix = malloc(nRows * sizeof (uint64_t*)); if (memMatrix == NULL) { return -1; } //Places the pointers in the correct positions __m128i *ptrWord = wholeMatrix; for (i = 0; i < nRows; i++) { memMatrix[i] = ptrWord; ptrWord += ROW_LEN_INT128; } //==========================================================================/ //============= Padding (password + salt + params) with 10*1 ===============// //OBS.:The memory matrix will temporarily hold the password: not for saving memory, //but this ensures that the password copied locally will be overwritten as soon as possible //First, we clean enough blocks for the password, salt, params and padding //Change the ''6'' if different amounts of parameters were passed uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (int)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; byte *ptrByte = (byte*) wholeMatrix; memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); //Prepends the password memcpy(ptrByte, pwd, pwdlen); ptrByte += pwdlen; //Concatenates the salt memcpy(ptrByte, salt, saltlen); ptrByte += saltlen; //Concatenates the params: every integer passed as parameter, in the order they are provided by the interface memcpy(ptrByte, &kLen, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &pwdlen, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &saltlen, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &timeCost, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &nRows, sizeof (int)); ptrByte += sizeof (int); memcpy(ptrByte, &nCols, sizeof (int)); ptrByte += sizeof (int); //Now comes the padding *ptrByte = 0x80; //first byte of padding: right after the password ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block //==========================================================================/ //============== Initializing the Sponge State =============/ //Sponge state: 8 __m128i, BLOCK_LEN_INT128 words of them for the bitrate (b) and the remainder for the capacity (c) __m128i *state = malloc(8 * sizeof (__m128i)); if (state == NULL) { return -1; } initState(state); //==========================================================================/ //============= Absorbing the input data with the sponge ===============// //Absorbing salt, password and params: this is the only place in which the block length is hard-coded to 512 bits, for compatibility with Blake2b and BlaMka ptrWord = wholeMatrix; for (i = 0; i < nBlocksInput; i++) { absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || params) ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT128; //goes to next block of pad(pwd || salt || params) } //================================================================================/ //================================ Setup Phase ==================================// //==Initializes a (nRows x nCols) memory matrix, it's cells having b bits each)==// //Initializes M[0] reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here //Initializes M[1] reducedDuplexRow1and2(state, memMatrix[0], memMatrix[1]); //Initializes M[2] reducedDuplexRow1and2(state, memMatrix[1], memMatrix[2]); //Filling Loop for(row0 = 3 ; row0 < nRows; row0++){ //Performs a reduced-round duplexing operation over "M[row1][col] [+] M[prev0][col] [+] M[prev1][col]", filling M[row0] and updating M[row1] //M[row0][N_COLS-1-col] = M[prev0][col] XOR rand; //M[row1][col] = M[row1][col] XOR rot(rand) rot(): right rotation by 'omega' bits (e.g., 1 or more words) reducedDuplexRowFilling(state, memMatrix[row1], memMatrix[prev0], memMatrix[prev1], memMatrix[row0]); //Updates the "prev" indices: the rows more recently updated prev0 = row0; prev1 = row1; //updates the value of row1: deterministically picked, with a variable step row1 = (row1 + step) & (window - 1); //Checks if all rows in the window where visited. if (row1 == 0) { window *= 2; //doubles the size of the re-visitation window step = sqrt + gap; //changes the step: approximately doubles its value gap = -gap; //inverts the modifier to the step if (gap == -1){ sqrt *= 2; //Doubles sqrt every other iteration } } } //============================ Wandering Phase =============================// //=====Iteratively overwrites pseudorandom cells of the memory matrix=======// //Visitation Loop for (i = 0 ; i < timeCost*nRows ; i++) { //Selects a pseudorandom indices row0 and row1 //------------------------------------------------------------------------------------------ /*(USE THIS IF nRows IS A POWER OF 2)*/ //row0 = ((uint64_t)(((__uint128_t *)state)[0])) & (nRows-1); //row1 = ((uint64_t)(((__uint128_t *)state)[1])) & (nRows-1); /*(USE THIS FOR THE "GENERIC" CASE)*/ row0 = ((uint64_t)(((__uint128_t *)state)[0])) % nRows; //row0 = lsw(rand) mod nRows row1 = ((uint64_t)(((__uint128_t *)state)[1])) % nRows; //row1 = lsw(rot(rand)) mod nRows //Performs a reduced-round duplexing operation over "M[row0][col] [+] M[row1][col] [+] M[prev0][col0] [+] M[prev1][col1]", updating both M[row0] and M[row1] //M[row0][col] = M[row0][col] XOR rand; //M[row1][col] = M[row1][col] XOR rot(rand) rot(): right rotation by 'omega' bits (e.g., 1 or more words) reducedDuplexRowWandering(state, memMatrix[row0], memMatrix[row1], memMatrix[prev0], memMatrix[prev1]); //update prev's: they now point to the last rows ever updated prev0 = row0; prev1 = row1; } //==========================================================================/ //============================ Wrap-up Phase ===============================// //========================= Output computation =============================// //Absorbs one last block of the memory matrix with the full-round sponge absorbColumn(state, memMatrix[row0]); //Squeezes the key with the full-round sponge squeeze(state, K, kLen); //==========================================================================/ //========================= Freeing the memory =============================// free(memMatrix); free(wholeMatrix); //Wiping out the sponge's internal state before freeing it memset(state, 0, 8 * sizeof (__m128i)); free(state); //==========================================================================/ return 0; }
/** * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols). * * @param K The derived key to be output by the algorithm * @param kLen Desired key length * @param pwd User password * @param pwdlen Password length * @param salt Salt * @param saltlen Salt length * @param timeCost Parameter to determine the processing time (T) * @param nRows Number or rows of the memory matrix (R) * @param nCols Number of columns of the memory matrix (C) * * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) */ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) { //============================= Basic variables ============================// int64_t row = 2; //index of row to be processed int64_t prev = 1; //index of prev (last row ever computed/modified) int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering) int64_t tau; //Time Loop iterator int64_t step = 1; //Visitation step (used during Setup and Wandering phases) int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 int64_t i; //auxiliary iteration counter //==========================================================================/ //========== Initializing the Memory Matrix and pointers to it =============// //Tries to allocate enough space for the whole memory matrix i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); uint64_t *wholeMatrix = (uint64_t*)malloc(i); if (wholeMatrix == NULL) { return -1; } memset(wholeMatrix, 0, i); //Allocates pointers to each row of the matrix uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*)); if (memMatrix == NULL) { return -1; } //Places the pointers in the correct positions uint64_t *ptrWord = wholeMatrix; for (i = 0; i < nRows; i++) { memMatrix[i] = ptrWord; ptrWord += ROW_LEN_INT64; } //==========================================================================/ //============= Getting the password + salt + basil padded with 10*1 ===============// //OBS.:The memory matrix will temporarily hold the password: not for saving memory, //but this ensures that the password copied locally will be overwritten as soon as possible //First, we clean enough blocks for the password, salt, basil and padding uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; byte *ptrByte = (byte*) wholeMatrix; memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); //Prepends the password memcpy(ptrByte, pwd, pwdlen); ptrByte += pwdlen; //Concatenates the salt memcpy(ptrByte, salt, saltlen); ptrByte += saltlen; //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface memcpy(ptrByte, &kLen, sizeof (uint64_t)); ptrByte += sizeof (uint64_t); memcpy(ptrByte, &pwdlen, sizeof (uint64_t)); ptrByte += sizeof (uint64_t); memcpy(ptrByte, &saltlen, sizeof (uint64_t)); ptrByte += sizeof (uint64_t); memcpy(ptrByte, &timeCost, sizeof (uint64_t)); ptrByte += sizeof (uint64_t); memcpy(ptrByte, &nRows, sizeof (uint64_t)); ptrByte += sizeof (uint64_t); memcpy(ptrByte, &nCols, sizeof (uint64_t)); ptrByte += sizeof (uint64_t); //Now comes the padding *ptrByte = 0x80; //first byte of padding: right after the password ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block //==========================================================================/ //======================= Initializing the Sponge State ====================// //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t)); if (state == NULL) { return -1; } initState(state); //==========================================================================/ //================================ Setup Phase =============================// //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits ptrWord = wholeMatrix; for (i = 0; i < nBlocksInput; i++) { absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil) } //Initializes M[0] and M[1] reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here reducedDuplexRow1(state, memMatrix[0], memMatrix[1]); do { //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); //updates the value of row* (deterministically picked during Setup)) rowa = (rowa + step) & (window - 1); //update prev: it now points to the last row ever computed prev = row; //updates row: goes to the next row to be computed row++; //Checks if all rows in the window where visited. if (rowa == 0) { step = window + gap; //changes the step: approximately doubles its value window *= 2; //doubles the size of the re-visitation window gap = -gap; //inverts the modifier to the step } } while (row < nRows); //==========================================================================/ //============================ Wandering Phase =============================// row = 0; //Resets the visitation to the first row of the memory matrix for (tau = 1; tau <= timeCost; tau++) { //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1 step = (tau % 2 == 0) ? -1 : nRows / 2 - 1; do { //Selects a pseudorandom index row* //------------------------------------------------------------------------------------------ //rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE) //------------------------------------------------------------------------------------------ //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); //update prev: it now points to the last row ever computed prev = row; //updates row: goes to the next row to be computed //------------------------------------------------------------------------------------------ //row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE) //------------------------------------------------------------------------------------------ } while (row != 0); } //==========================================================================/ //============================ Wrap-up Phase ===============================// //Absorbs the last block of the memory matrix absorbBlock(state, memMatrix[rowa]); //Squeezes the key squeeze(state, (unsigned char*)K, kLen); //==========================================================================/ //========================= Freeing the memory =============================// free(memMatrix); free(wholeMatrix); //Wiping out the sponge's internal state before freeing it memset(state, 0, 16 * sizeof (uint64_t)); free(state); //==========================================================================/ return 0; }