static void _salsa20_block(unsigned rounds, uint32_t *input, uint8_t *output) { unsigned x0, x1, x2, x3, x4, x5, x6, x7; unsigned x8, x9, x10, x11, x12, x13, x14, x15; unsigned i; x0 = input[0]; x1 = input[1]; x2 = input[2]; x3 = input[3]; x4 = input[4]; x5 = input[5]; x6 = input[6]; x7 = input[7]; x8 = input[8]; x9 = input[9]; x10 = input[10]; x11 = input[11]; x12 = input[12]; x13 = input[13]; x14 = input[14]; x15 = input[15]; for (i = rounds; i > 0; i -= 2) { /* Column round */ x4 = XOR( x4, ROTL32( x0 + x12, 7)); x8 = XOR( x8, ROTL32( x4 + x0, 9)); x12 = XOR(x12, ROTL32( x8 + x4, 13)); x0 = XOR( x0, ROTL32(x12 + x8, 18)); x9 = XOR( x9, ROTL32( x5 + x1, 7)); x13 = XOR(x13, ROTL32( x9 + x5, 9)); x1 = XOR( x1, ROTL32(x13 + x9, 13)); x5 = XOR( x5, ROTL32( x1 + x13, 18)); x14 = XOR(x14, ROTL32(x10 + x6, 7)); x2 = XOR( x2, ROTL32(x14 + x10, 9)); x6 = XOR( x6, ROTL32( x2 + x14, 13)); x10 = XOR(x10, ROTL32( x6 + x2, 18)); x3 = XOR( x3, ROTL32(x15 + x11, 7)); x7 = XOR( x7, ROTL32( x3 + x15, 9)); x11 = XOR(x11, ROTL32( x7 + x3, 13)); x15 = XOR(x15, ROTL32(x11 + x7, 18)); /* Row round */ x1 = XOR( x1, ROTL32( x0 + x3, 7)); x2 = XOR( x2, ROTL32( x1 + x0, 9)); x3 = XOR( x3, ROTL32( x2 + x1, 13)); x0 = XOR( x0, ROTL32( x3 + x2, 18)); x6 = XOR( x6, ROTL32( x5 + x4, 7)); x7 = XOR( x7, ROTL32( x6 + x5, 9)); x4 = XOR( x4, ROTL32( x7 + x6, 13)); x5 = XOR( x5, ROTL32( x4 + x7, 18)); x11 = XOR(x11, ROTL32(x10 + x9, 7)); x8 = XOR( x8, ROTL32(x11 + x10, 9)); x9 = XOR( x9, ROTL32( x8 + x11, 13)); x10 = XOR(x10, ROTL32( x9 + x8, 18)); x12 = XOR(x12, ROTL32(x15 + x14, 7)); x13 = XOR(x13, ROTL32(x12 + x15, 9)); x14 = XOR(x14, ROTL32(x13 + x12, 13)); x15 = XOR(x15, ROTL32(x14 + x13, 18)); } x0 = x0 + input[0]; x1 = x1 + input[1]; x2 = x2 + input[2]; x3 = x3 + input[3]; x4 = x4 + input[4]; x5 = x5 + input[5]; x6 = x6 + input[6]; x7 = x7 + input[7]; x8 = x8 + input[8]; x9 = x9 + input[9]; x10 = x10 + input[10]; x11 = x11 + input[11]; x12 = x12 + input[12]; x13 = x13 + input[13]; x14 = x14 + input[14]; x15 = x15 + input[15]; STORE_U32_LITTLE (output + 0, x0); STORE_U32_LITTLE (output + 4, x1); STORE_U32_LITTLE (output + 8, x2); STORE_U32_LITTLE (output + 12, x3); STORE_U32_LITTLE (output + 16, x4); STORE_U32_LITTLE (output + 20, x5); STORE_U32_LITTLE (output + 24, x6); STORE_U32_LITTLE (output + 28, x7); STORE_U32_LITTLE (output + 32, x8); STORE_U32_LITTLE (output + 36, x9); STORE_U32_LITTLE (output + 40, x10); STORE_U32_LITTLE (output + 44, x11); STORE_U32_LITTLE (output + 48, x12); STORE_U32_LITTLE (output + 52, x13); STORE_U32_LITTLE (output + 56, x14); STORE_U32_LITTLE (output + 60, x15); /* Increment block counter */ input[8] = input[8] + 1; if (!input[8]) { input[9] = input[9] + 1; /* stopping at 2^70 bytes per nonce is user's responsibility */ } }
void sha1Process(sha1Param* sp) { register uint32_t a, b, c, d, e; register uint32_t *w; register byte t; #if WORDS_BIGENDIAN w = sp->data + 16; #else w = sp->data; t = 16; while (t--) { register uint32_t temp = swapu32(*w); *(w++) = temp; } #endif t = 64; while (t--) { register uint32_t temp = w[-3] ^ w[-8] ^ w[-14] ^ w[-16]; *(w++) = ROTL32(temp, 1); } w = sp->data; a = sp->h[0]; b = sp->h[1]; c = sp->h[2]; d = sp->h[3]; e = sp->h[4]; SUBROUND1(a,b,c,d,e,w[ 0],k[0]); SUBROUND1(e,a,b,c,d,w[ 1],k[0]); SUBROUND1(d,e,a,b,c,w[ 2],k[0]); SUBROUND1(c,d,e,a,b,w[ 3],k[0]); SUBROUND1(b,c,d,e,a,w[ 4],k[0]); SUBROUND1(a,b,c,d,e,w[ 5],k[0]); SUBROUND1(e,a,b,c,d,w[ 6],k[0]); SUBROUND1(d,e,a,b,c,w[ 7],k[0]); SUBROUND1(c,d,e,a,b,w[ 8],k[0]); SUBROUND1(b,c,d,e,a,w[ 9],k[0]); SUBROUND1(a,b,c,d,e,w[10],k[0]); SUBROUND1(e,a,b,c,d,w[11],k[0]); SUBROUND1(d,e,a,b,c,w[12],k[0]); SUBROUND1(c,d,e,a,b,w[13],k[0]); SUBROUND1(b,c,d,e,a,w[14],k[0]); SUBROUND1(a,b,c,d,e,w[15],k[0]); SUBROUND1(e,a,b,c,d,w[16],k[0]); SUBROUND1(d,e,a,b,c,w[17],k[0]); SUBROUND1(c,d,e,a,b,w[18],k[0]); SUBROUND1(b,c,d,e,a,w[19],k[0]); SUBROUND2(a,b,c,d,e,w[20],k[1]); SUBROUND2(e,a,b,c,d,w[21],k[1]); SUBROUND2(d,e,a,b,c,w[22],k[1]); SUBROUND2(c,d,e,a,b,w[23],k[1]); SUBROUND2(b,c,d,e,a,w[24],k[1]); SUBROUND2(a,b,c,d,e,w[25],k[1]); SUBROUND2(e,a,b,c,d,w[26],k[1]); SUBROUND2(d,e,a,b,c,w[27],k[1]); SUBROUND2(c,d,e,a,b,w[28],k[1]); SUBROUND2(b,c,d,e,a,w[29],k[1]); SUBROUND2(a,b,c,d,e,w[30],k[1]); SUBROUND2(e,a,b,c,d,w[31],k[1]); SUBROUND2(d,e,a,b,c,w[32],k[1]); SUBROUND2(c,d,e,a,b,w[33],k[1]); SUBROUND2(b,c,d,e,a,w[34],k[1]); SUBROUND2(a,b,c,d,e,w[35],k[1]); SUBROUND2(e,a,b,c,d,w[36],k[1]); SUBROUND2(d,e,a,b,c,w[37],k[1]); SUBROUND2(c,d,e,a,b,w[38],k[1]); SUBROUND2(b,c,d,e,a,w[39],k[1]); SUBROUND3(a,b,c,d,e,w[40],k[2]); SUBROUND3(e,a,b,c,d,w[41],k[2]); SUBROUND3(d,e,a,b,c,w[42],k[2]); SUBROUND3(c,d,e,a,b,w[43],k[2]); SUBROUND3(b,c,d,e,a,w[44],k[2]); SUBROUND3(a,b,c,d,e,w[45],k[2]); SUBROUND3(e,a,b,c,d,w[46],k[2]); SUBROUND3(d,e,a,b,c,w[47],k[2]); SUBROUND3(c,d,e,a,b,w[48],k[2]); SUBROUND3(b,c,d,e,a,w[49],k[2]); SUBROUND3(a,b,c,d,e,w[50],k[2]); SUBROUND3(e,a,b,c,d,w[51],k[2]); SUBROUND3(d,e,a,b,c,w[52],k[2]); SUBROUND3(c,d,e,a,b,w[53],k[2]); SUBROUND3(b,c,d,e,a,w[54],k[2]); SUBROUND3(a,b,c,d,e,w[55],k[2]); SUBROUND3(e,a,b,c,d,w[56],k[2]); SUBROUND3(d,e,a,b,c,w[57],k[2]); SUBROUND3(c,d,e,a,b,w[58],k[2]); SUBROUND3(b,c,d,e,a,w[59],k[2]); SUBROUND4(a,b,c,d,e,w[60],k[3]); SUBROUND4(e,a,b,c,d,w[61],k[3]); SUBROUND4(d,e,a,b,c,w[62],k[3]); SUBROUND4(c,d,e,a,b,w[63],k[3]); SUBROUND4(b,c,d,e,a,w[64],k[3]); SUBROUND4(a,b,c,d,e,w[65],k[3]); SUBROUND4(e,a,b,c,d,w[66],k[3]); SUBROUND4(d,e,a,b,c,w[67],k[3]); SUBROUND4(c,d,e,a,b,w[68],k[3]); SUBROUND4(b,c,d,e,a,w[69],k[3]); SUBROUND4(a,b,c,d,e,w[70],k[3]); SUBROUND4(e,a,b,c,d,w[71],k[3]); SUBROUND4(d,e,a,b,c,w[72],k[3]); SUBROUND4(c,d,e,a,b,w[73],k[3]); SUBROUND4(b,c,d,e,a,w[74],k[3]); SUBROUND4(a,b,c,d,e,w[75],k[3]); SUBROUND4(e,a,b,c,d,w[76],k[3]); SUBROUND4(d,e,a,b,c,w[77],k[3]); SUBROUND4(c,d,e,a,b,w[78],k[3]); SUBROUND4(b,c,d,e,a,w[79],k[3]); sp->h[0] += a; sp->h[1] += b; sp->h[2] += c; sp->h[3] += d; sp->h[4] += e; }
void salsa20Process(salsa20Param* mp) { uint32_t X[16]; int i; for (i = 0; i < 16; ++i) { #ifdef WORDS_BIGENDIAN /* XXX untested */ X[i] = swapu32(mp->data[i]); #else X[i] = mp->data[i]; #endif } for (i = 20; i > 0; i -= 2) { X[ 4] ^= ROTL32(X[ 0]+X[12], 7); X[ 8] ^= ROTL32(X[ 4]+X[ 0], 9); X[12] ^= ROTL32(X[ 8]+X[ 4],13); X[ 0] ^= ROTL32(X[12]+X[ 8],18); X[ 9] ^= ROTL32(X[ 5]+X[ 1], 7); X[13] ^= ROTL32(X[ 9]+X[ 5], 9); X[ 1] ^= ROTL32(X[13]+X[ 9],13); X[ 5] ^= ROTL32(X[ 1]+X[13],18); X[14] ^= ROTL32(X[10]+X[ 6], 7); X[ 2] ^= ROTL32(X[14]+X[10], 9); X[ 6] ^= ROTL32(X[ 2]+X[14],13); X[10] ^= ROTL32(X[ 6]+X[ 2],18); X[ 3] ^= ROTL32(X[15]+X[11], 7); X[ 7] ^= ROTL32(X[ 3]+X[15], 9); X[11] ^= ROTL32(X[ 7]+X[ 3],13); X[15] ^= ROTL32(X[11]+X[ 7],18); X[ 1] ^= ROTL32(X[ 0]+X[ 3], 7); X[ 2] ^= ROTL32(X[ 1]+X[ 0], 9); X[ 3] ^= ROTL32(X[ 2]+X[ 1],13); X[ 0] ^= ROTL32(X[ 3]+X[ 2],18); X[ 6] ^= ROTL32(X[ 5]+X[ 4], 7); X[ 7] ^= ROTL32(X[ 6]+X[ 5], 9); X[ 4] ^= ROTL32(X[ 7]+X[ 6],13); X[ 5] ^= ROTL32(X[ 4]+X[ 7],18); X[11] ^= ROTL32(X[10]+X[ 9], 7); X[ 8] ^= ROTL32(X[11]+X[10], 9); X[ 9] ^= ROTL32(X[ 8]+X[11],13); X[10] ^= ROTL32(X[ 9]+X[ 8],18); X[12] ^= ROTL32(X[15]+X[14], 7); X[13] ^= ROTL32(X[12]+X[15], 9); X[14] ^= ROTL32(X[13]+X[12],13); X[15] ^= ROTL32(X[14]+X[13],18); } for (i = 0; i < 16; ++i) X[i] += mp->data[i]; /* CBC chaining on stream cipher blocks. */ for (i = 0; i < 16; i++) mp->h[i] += X[i]; }