void block(state& s) { //ChaCha20 runs 20 rounds, alternating between "column rounds" and //"diagonal rounds". Each round consists of four quarter-rounds, and //they are run as follows. Quarter rounds 1-4 are part of a "column" //round, while 5-8 are part of a "diagonal" round: const state initial = s; for (int i = 0; i < 10; ++i) { quarter_round(s[ 0], s[ 4], s[ 8], s[12]); quarter_round(s[ 1], s[ 5], s[ 9], s[13]); quarter_round(s[ 2], s[ 6], s[10], s[14]); quarter_round(s[ 3], s[ 7], s[11], s[15]); quarter_round(s[ 0], s[ 5], s[10], s[15]); quarter_round(s[ 1], s[ 6], s[11], s[12]); quarter_round(s[ 2], s[ 7], s[ 8], s[13]); quarter_round(s[ 3], s[ 4], s[ 9], s[14]); } // At the end of 20 rounds (or 10 iterations of the above list), we add // the original input words to the output words, and serialize the // result by sequencing the words one-by-one in little-endian order. for (int i = 0; i < 16; ++i) { s[i] += initial[i]; } }
extern_c void crandom_chacha_expand(u_int64_t iv, u_int64_t ctr, int nr, int output_size, const unsigned char *key_, unsigned char *output_) { # if MIGHT_HAVE_SSE2 if (HAVE(SSE2)) { ssereg *key = (ssereg *)key_; ssereg *output = (ssereg *)output_; ssereg a1 = key[0], a2 = a1, aa = a1, b1 = key[1], b2 = b1, bb = b1, c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1, d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull}, d2 = d1, dd = d1, p = {0, 1}; int i,r; # if (NEED_XOP) if (HAVE(XOP)) { for (i=0; i<output_size; i+=128) { for (r=nr; r>0; r-=2) DOUBLE_ROUND(quarter_round_xop); OUTPUT_FUNCTION; } return; } # endif # if (NEED_SSSE3) if (HAVE(SSSE3)) { for (i=0; i<output_size; i+=128) { for (r=nr; r>0; r-=2) DOUBLE_ROUND(quarter_round_ssse3); OUTPUT_FUNCTION; } return; } # endif # if (NEED_SSE2) if (HAVE(SSE2)) { for (i=0; i<output_size; i+=128) { for (r=nr; r>0; r-=2) DOUBLE_ROUND(quarter_round_sse2); OUTPUT_FUNCTION; } return; } # endif } # endif # if NEED_CONV { const u_int32_t *key = (const u_int32_t *)key_; u_int32_t x[16], input[16] = { key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7], iv, iv>>32, ctr, ctr>>32, 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 }, *output = (u_int32_t *)output_; int i, r; for (i=0; i<output_size; i+= 64) { for (r=0; r<16; r++) { x[r] = input[r]; } for (r=nr; r>0; r-=2) { quarter_round(&x[0], &x[4], &x[8], &x[12]); quarter_round(&x[1], &x[5], &x[9], &x[13]); quarter_round(&x[2], &x[6], &x[10], &x[14]); quarter_round(&x[3], &x[7], &x[11], &x[15]); quarter_round(&x[0], &x[5], &x[10], &x[15]); quarter_round(&x[1], &x[6], &x[11], &x[12]); quarter_round(&x[2], &x[7], &x[8], &x[13]); quarter_round(&x[3], &x[4], &x[9], &x[14]); } for (r=0; r<16; r++) { output[r] = x[r] + input[r]; } output += 16; input[11] ++; if (!input[11]) input[12]++; } }