/* the compression function (long variants) */ inline void F1024(u64 *h, const u64 *m) { static u64 y[COLS1024] __attribute__((aligned(16))); static u64 z[COLS1024] __attribute__((aligned(16))); static u64 outQ[COLS1024] __attribute__((aligned(16))); static u64 inP[COLS1024] __attribute__((aligned(16))); int i; for (i = 0; i < COLS1024; i++) { z[i] = m[i]; inP[i] = h[i] ^ m[i]; } /* compute Q(m) */ RND1024Q(z,y,0); for (i = 1; i < ROUNDS1024-1; i += 2) { RND1024Q(y,z,U64BIG((u64)i)); RND1024Q(z,y,U64BIG((u64)(i+1))); } RND1024Q(y,outQ,U64BIG((u64)(ROUNDS1024-1))); /* compute P(h+m) */ RND1024P(inP,z,0); for (i = 1; i < ROUNDS1024-1; i += 2) { RND1024P(z,y,U64BIG(((u64)i)<<56)); RND1024P(y,z,U64BIG(((u64)(i+1))<<56)); } RND1024P(z,y,U64BIG(((u64)(ROUNDS1024-1))<<56)); /* h' == h + Q(m) + P(h+m) */ #pragma vector aligned for (i = 0; i < COLS1024; i++) { h[i] ^= outQ[i] ^ y[i]; } }
/* initialise context */ int Init(context* ctx) { int i; /* set initial value */ for (i = 0; i < COLS-1; i++) ctx->state[i] = 0; ctx->state[COLS-1] = U64BIG((u64)(8*DIGESTSIZE)); /* set other variables */ ctx->buf_ptr = 0; ctx->block_counter = 0; return 0; }
/* initialise context */ HashReturn Init(hashState* ctx, int hashbitlen) { /* output size (in bits) must be a positive integer less than or equal to 512, and divisible by 8 */ if (hashbitlen <= 0 || (hashbitlen%8) || hashbitlen > 512) return BAD_HASHLEN; /* set number of state columns and state size depending on variant */ if (hashbitlen <= 256) { ctx->size = SHORT; ctx->statesize = SIZE512; ctx->chaining = calloc(COLS512,sizeof(u64)); ctx->buffer = malloc(SIZE1024); //modified ctx->blocksize = SIZE1024; //added /* set initial value */ ctx->chaining[COLS512-1] = U64BIG((u64)hashbitlen); } /* else { ctx->size = LONG; ctx->chaining = calloc(COLS1024,sizeof(u64)); ctx->buffer = malloc(SIZE1024); //set initial value ctx->chaining[COLS1024-1] = U64BIG((u64)hashbitlen); }*/ /* set other variables */ ctx->hashbitlen = hashbitlen; ctx->buf_ptr = 0; ctx->block_counter = 0; ctx->bits_in_last_byte = 0; //ADDED BY GURPREET ctx->cnt_buf_ptr = 0; //added ctx->cnt_block = 1; //added as there will always be atleast 1 block /* initialize counter buffer to zero*/ while (ctx->cnt_buf_ptr < ctx->statesize) { ctx->counter[ctx->cnt_buf_ptr++] = 0; } //---- return SUCCESS; }
HashReturn_gr reinit_groestl( hashState_groestl* ctx ) { int i; if (ctx->chaining == NULL || ctx->buffer == NULL) return FAIL_GR; for ( i = 0; i < SIZE512; i++ ) { ctx->chaining[i] = _mm_setzero_si128(); ctx->buffer[i] = _mm_setzero_si128(); } ((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH); INIT(ctx->chaining); ctx->buf_ptr = 0; ctx->rem_ptr = 0; return SUCCESS_GR; }
/* initialise context */ HashReturn Init(hashState* ctx) { u8 i = 0; /* output size (in bits) must be a positive integer less than or equal to 512, and divisible by 8 */ if (LENGTH <= 0 || (LENGTH%8) || LENGTH > 512) return BAD_HASHLEN; /* set number of state columns and state size depending on variant */ ctx->columns = COLS; ctx->statesize = SIZE; #if (LENGTH <= 256) ctx->v = SHORT; #else ctx->v = LONG; #endif SET_CONSTANTS(); for (i=0; i<SIZE/8; i++) ctx->chaining[i] = 0; for (i=0; i<SIZE; i++) ctx->buffer[i] = 0; if (ctx->chaining == NULL || ctx->buffer == NULL) return FAIL; /* set initial value */ ctx->chaining[ctx->columns-1] = U64BIG((u64)LENGTH); INIT(ctx->chaining); /* set other variables */ ctx->buf_ptr = 0; ctx->block_counter = 0; ctx->bits_in_last_byte = 0; return SUCCESS; }
/* given state h, do h <- P(h)+h */ void OutputTransformation(hashState *ctx) { int j; u64 temp[COLS1024]; u64 y[COLS1024]; u64 z[COLS1024]; /* determine variant */ switch (ctx->size) { case SHORT : for (j = 0; j < COLS512; j++) { temp[j] = ctx->chaining[j]; } RND512P(temp,z,U64BIG(0x0000000000000000ull)); RND512P(z,y,U64BIG(0x0100000000000000ull)); RND512P(y,z,U64BIG(0x0200000000000000ull)); RND512P(z,y,U64BIG(0x0300000000000000ull)); RND512P(y,z,U64BIG(0x0400000000000000ull)); RND512P(z,y,U64BIG(0x0500000000000000ull)); RND512P(y,z,U64BIG(0x0600000000000000ull)); RND512P(z,y,U64BIG(0x0700000000000000ull)); RND512P(y,z,U64BIG(0x0800000000000000ull)); RND512P(z,temp,U64BIG(0x0900000000000000ull)); for (j = 0; j < COLS512; j++) { ctx->chaining[j] ^= temp[j]; } break; case LONG : for (j = 0; j < COLS1024; j++) { temp[j] = ctx->chaining[j]; } RND1024P(temp,y,0); for (j = 1; j < ROUNDS1024-1; j += 2) { RND1024P(y,z,U64BIG(((u64)j)<<56)); RND1024P(z,y,U64BIG(((u64)j+1)<<56)); } RND1024P(y,temp,U64BIG(((u64)(ROUNDS1024-1))<<56)); for (j = 0; j < COLS1024; j++) { ctx->chaining[j] ^= temp[j]; } break; } }
void PrintState2(u64 y[COLS1024]) { int i; for (i = 0; i < COLS1024; i++) printf("%016llx\n", U64BIG(y[i])); printf("\n"); }
void PrintState1(u64 y[COLS512]) { int i; for (i = 0; i < COLS512; i++) printf("%016llx\n", U64BIG(y[i])); printf("\n"); }
/* the compression function (short variants) */ inline void F512(u64 *h, const u64 *m, u64 *c) { //modified u64 y[COLS512] __attribute__((aligned(16))); u64 z[COLS512] __attribute__((aligned(16))); u64 outQ[COLS512] __attribute__((aligned(16))); u64 inP[COLS512] __attribute__((aligned(16))); int i,j=0; //ADDED By Gurpreet u64 ml[COLS512] __attribute__((aligned(16))); //msg_left u64 mr[COLS512] __attribute__((aligned(16))); //msg_right j=0; /*divide msg into two 512 blocks*/ for (i = 0; i < COLS1024; i++) { //modified if(i<COLS512){ ml[i] = m[i]; } else { mr[j] = m[i]; j++; } } //--------------- /* compute c+ml h+mr */ for (i = 0; i < COLS512; i++) { inP[i] = c[i] ^ ml[i]; z[i] = h[i]^mr[i]; } /* compute Q(h+mr) */ RND512Q(z,y,U64BIG(0x0000000000000000ull)); RND512Q(y,z,U64BIG(0x0000000000000001ull)); RND512Q(z,y,U64BIG(0x0000000000000002ull)); RND512Q(y,z,U64BIG(0x0000000000000003ull)); RND512Q(z,y,U64BIG(0x0000000000000004ull)); RND512Q(y,z,U64BIG(0x0000000000000005ull)); RND512Q(z,y,U64BIG(0x0000000000000006ull)); RND512Q(y,z,U64BIG(0x0000000000000007ull)); RND512Q(z,y,U64BIG(0x0000000000000008ull)); RND512Q(y,outQ,U64BIG(0x0000000000000009ull)); /* compute P(c+ml) */ RND512P(inP,y,U64BIG(0x0000000000000000ull)); RND512P(y,z, U64BIG(0x0100000000000000ull)); RND512P(z,y, U64BIG(0x0200000000000000ull)); RND512P(y,z, U64BIG(0x0300000000000000ull)); RND512P(z,y, U64BIG(0x0400000000000000ull)); RND512P(y,z, U64BIG(0x0500000000000000ull)); RND512P(z,y, U64BIG(0x0600000000000000ull)); RND512P(y,z, U64BIG(0x0700000000000000ull)); RND512P(z,y, U64BIG(0x0800000000000000ull)); RND512P(y,inP, U64BIG(0x0900000000000000ull)); //ADDED by GURPREET /* compute P(c+ml) + Q(h+mr) */ for (i = 0; i < COLS512; i++) { inP[i] = inP[i]^outQ[i]; } /* compute P(P(h+ml) + Q(h+mr)) */ RND512P(inP,z,U64BIG(0x0000000000000000ull)); RND512P(z,y, U64BIG(0x0100000000000000ull)); RND512P(y,z, U64BIG(0x0200000000000000ull)); RND512P(z,y, U64BIG(0x0300000000000000ull)); RND512P(y,z, U64BIG(0x0400000000000000ull)); RND512P(z,y, U64BIG(0x0500000000000000ull)); RND512P(y,z, U64BIG(0x0600000000000000ull)); RND512P(z,y, U64BIG(0x0700000000000000ull)); RND512P(y,z, U64BIG(0x0800000000000000ull)); RND512P(z,inP, U64BIG(0x0900000000000000ull)); /* compute P(P(h+ml) + Q(h+mr)) + Q(h+mr) + h */ #pragma vector aligned for (i = 0; i < COLS512; i++) { h[i] ^= inP[i] ^ outQ[i]; } }
int crypto_aead_decrypt( unsigned char *m, unsigned long long *mlen, unsigned char *nsec, const unsigned char *c, unsigned long long clen, const unsigned char *ad, unsigned long long adlen, const unsigned char *npub, const unsigned char *k) { *mlen = 0; if (clen < CRYPTO_KEYBYTES) return -1; u64 K0 = U64BIG(((u64*)k)[0]); u64 K1 = U64BIG(((u64*)k)[1]); u64 N0 = U64BIG(((u64*)npub)[0]); u64 N1 = U64BIG(((u64*)npub)[1]); u64 x0, x1, x2, x3, x4; u64 t0, t1, t2, t3, t4; u64 rlen; int i; // initialization x0 = (u64)((CRYPTO_KEYBYTES * 8) << 16 | PA_ROUNDS << 8 | PB_ROUNDS << 0) << 40; x1 = K0; x2 = K1; x3 = N0; x4 = N1; P12; x3 ^= K0; x4 ^= K1; // process associated data if (adlen) { rlen = adlen; while (rlen >= RATE) { x0 ^= U64BIG(*(u64*)ad); P6; rlen -= RATE; ad += RATE; } for (i = 0; i < rlen; ++i, ++ad) x0 ^= INS_BYTE(*ad, i); x0 ^= INS_BYTE(0x80, rlen); P6; } x4 ^= 1; // process plaintext rlen = clen - CRYPTO_KEYBYTES; while (rlen >= RATE) { *(u64*)m = U64BIG(x0) ^ *(u64*)c; x0 = U64BIG(*((u64*)c)); P6; rlen -= RATE; m += RATE; c += RATE; } for (i = 0; i < rlen; ++i, ++m, ++c) { *m = EXT_BYTE(x0, i) ^ *c; x0 &= ~INS_BYTE(0xff, i); x0 |= INS_BYTE(*c, i); } x0 ^= INS_BYTE(0x80, rlen); // finalization x1 ^= K0; x2 ^= K1; P12; x3 ^= K0; x4 ^= K1; // return -1 if verification fails if (((u64*)c)[0] != U64BIG(x3) || ((u64*)c)[1] != U64BIG(x4)) return -1; // return plaintext *mlen = clen - CRYPTO_KEYBYTES; return 0; }
int crypto_aead_encrypt( unsigned char *c, unsigned long long *clen, const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, const unsigned char *nsec, const unsigned char *npub, const unsigned char *k) { u64 K0 = U64BIG(((u64*)k)[0]); u64 K1 = U64BIG(((u64*)k)[1]); u64 N0 = U64BIG(((u64*)npub)[0]); u64 N1 = U64BIG(((u64*)npub)[1]); u64 x0, x1, x2, x3, x4; u64 t0, t1, t2, t3, t4; u64 rlen; int i; // initialization x0 = (u64)((CRYPTO_KEYBYTES * 8) << 16 | PA_ROUNDS << 8 | PB_ROUNDS << 0) << 40; x1 = K0; x2 = K1; x3 = N0; x4 = N1; P12; x3 ^= K0; x4 ^= K1; // process associated data if (adlen) { rlen = adlen; while (rlen >= RATE) { x0 ^= U64BIG(*(u64*)ad); P6; rlen -= RATE; ad += RATE; } for (i = 0; i < rlen; ++i, ++ad) x0 ^= INS_BYTE(*ad, i); x0 ^= INS_BYTE(0x80, rlen); P6; } x4 ^= 1; // process plaintext rlen = mlen; while (rlen >= RATE) { x0 ^= U64BIG(*(u64*)m); *(u64*)c = U64BIG(x0); P6; rlen -= RATE; m += RATE; c += RATE; } for (i = 0; i < rlen; ++i, ++m, ++c) { x0 ^= INS_BYTE(*m, i); *c = EXT_BYTE(x0, i); } x0 ^= INS_BYTE(0x80, rlen); // finalization x1 ^= K0; x2 ^= K1; P12; x3 ^= K0; x4 ^= K1; // return tag ((u64*)c)[0] = U64BIG(x3); ((u64*)c)[1] = U64BIG(x4); *clen = mlen + CRYPTO_KEYBYTES; return 0; }