static void sha256_block_data_order(SHA256_CTX *ctx, const void *in, size_t num) { unsigned MD32_REG_T a, b, c, d, e, f, g, h, s0, s1, T1, T2; SHA_LONG X[16], l; int i; const unsigned char *data = in; while (num--) { a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; for (i = 0; i < 16; i++) { HOST_c2l(data, l); T1 = X[i] = l; T1 += h + Sigma1(e) + Ch(e, f, g) + K256[i]; T2 = Sigma0(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } for (; i < 64; i++) { s0 = X[(i + 1) & 0x0f]; s0 = sigma0(s0); s1 = X[(i + 14) & 0x0f]; s1 = sigma1(s1); T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf]; T1 += h + Sigma1(e) + Ch(e, f, g) + K256[i]; T2 = Sigma0(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; } }
static int sha512_compress(hash_state * md, unsigned char *buf) #endif { ulong64 S[8], W[80], t0, t1; int i; /* copy state into S */ for (i = 0; i < 8; i++) { S[i] = md->sha512.state[i]; } /* copy the state into 1024-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD64H(W[i], buf + (8*i)); } /* fill W[16..79] */ for (i = 16; i < 80; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ #ifdef LTC_SMALL_CODE for (i = 0; i < 80; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } #else #define RND(a,b,c,d,e,f,g,h,i) \ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; for (i = 0; i < 80; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } #endif /* feedback */ for (i = 0; i < 8; i++) { md->sha512.state[i] = md->sha512.state[i] + S[i]; } return CRYPT_OK; }
static inline void sha256_transform(__m128i *state, __m128i *block, __m128i *dst) { __m128i W[64], t1, t2; W[0] = block[ 0]; W[1] = block[ 1]; W[2] = block[ 2]; W[3] = block[ 3]; W[4] = block[ 4]; W[5] = block[ 5]; W[6] = block[ 6]; W[7] = block[ 7]; W[8] = block[ 8]; W[9] = block[ 9]; W[10] = block[10]; W[11] = block[11]; W[12] = block[12]; W[13] = block[13]; W[14] = block[14]; W[15] = block[15]; W[16] = add4(sigma1(W[16 - 2]), W[16 - 7], sigma0(W[16 - 15]), W[16 - 16]); W[17] = add4(sigma1(W[17 - 2]), W[17 - 7], sigma0(W[17 - 15]), W[17 - 16]); W[18] = add4(sigma1(W[18 - 2]), W[18 - 7], sigma0(W[18 - 15]), W[18 - 16]); W[19] = add4(sigma1(W[19 - 2]), W[19 - 7], sigma0(W[19 - 15]), W[19 - 16]); W[20] = add4(sigma1(W[20 - 2]), W[20 - 7], sigma0(W[20 - 15]), W[20 - 16]); W[21] = add4(sigma1(W[21 - 2]), W[21 - 7], sigma0(W[21 - 15]), W[21 - 16]); W[22] = add4(sigma1(W[22 - 2]), W[22 - 7], sigma0(W[22 - 15]), W[22 - 16]); W[23] = add4(sigma1(W[23 - 2]), W[23 - 7], sigma0(W[23 - 15]), W[23 - 16]); W[24] = add4(sigma1(W[24 - 2]), W[24 - 7], sigma0(W[24 - 15]), W[24 - 16]); W[25] = add4(sigma1(W[25 - 2]), W[25 - 7], sigma0(W[25 - 15]), W[25 - 16]); W[26] = add4(sigma1(W[26 - 2]), W[26 - 7], sigma0(W[26 - 15]), W[26 - 16]); W[27] = add4(sigma1(W[27 - 2]), W[27 - 7], sigma0(W[27 - 15]), W[27 - 16]); W[28] = add4(sigma1(W[28 - 2]), W[28 - 7], sigma0(W[28 - 15]), W[28 - 16]); W[29] = add4(sigma1(W[29 - 2]), W[29 - 7], sigma0(W[29 - 15]), W[29 - 16]); W[30] = add4(sigma1(W[30 - 2]), W[30 - 7], sigma0(W[30 - 15]), W[30 - 16]); W[31] = add4(sigma1(W[31 - 2]), W[31 - 7], sigma0(W[31 - 15]), W[31 - 16]); W[32] = add4(sigma1(W[32 - 2]), W[32 - 7], sigma0(W[32 - 15]), W[32 - 16]); W[33] = add4(sigma1(W[33 - 2]), W[33 - 7], sigma0(W[33 - 15]), W[33 - 16]); W[34] = add4(sigma1(W[34 - 2]), W[34 - 7], sigma0(W[34 - 15]), W[34 - 16]); W[35] = add4(sigma1(W[35 - 2]), W[35 - 7], sigma0(W[35 - 15]), W[35 - 16]); W[36] = add4(sigma1(W[36 - 2]), W[36 - 7], sigma0(W[36 - 15]), W[36 - 16]); W[37] = add4(sigma1(W[37 - 2]), W[37 - 7], sigma0(W[37 - 15]), W[37 - 16]); W[38] = add4(sigma1(W[38 - 2]), W[38 - 7], sigma0(W[38 - 15]), W[38 - 16]); W[39] = add4(sigma1(W[39 - 2]), W[39 - 7], sigma0(W[39 - 15]), W[39 - 16]); W[40] = add4(sigma1(W[40 - 2]), W[40 - 7], sigma0(W[40 - 15]), W[40 - 16]); W[41] = add4(sigma1(W[41 - 2]), W[41 - 7], sigma0(W[41 - 15]), W[41 - 16]); W[42] = add4(sigma1(W[42 - 2]), W[42 - 7], sigma0(W[42 - 15]), W[42 - 16]); W[43] = add4(sigma1(W[43 - 2]), W[43 - 7], sigma0(W[43 - 15]), W[43 - 16]); W[44] = add4(sigma1(W[44 - 2]), W[44 - 7], sigma0(W[44 - 15]), W[44 - 16]); W[45] = add4(sigma1(W[45 - 2]), W[45 - 7], sigma0(W[45 - 15]), W[45 - 16]); W[46] = add4(sigma1(W[46 - 2]), W[46 - 7], sigma0(W[46 - 15]), W[46 - 16]); W[47] = add4(sigma1(W[47 - 2]), W[47 - 7], sigma0(W[47 - 15]), W[47 - 16]); W[48] = add4(sigma1(W[48 - 2]), W[48 - 7], sigma0(W[48 - 15]), W[48 - 16]); W[49] = add4(sigma1(W[49 - 2]), W[49 - 7], sigma0(W[49 - 15]), W[49 - 16]); W[50] = add4(sigma1(W[50 - 2]), W[50 - 7], sigma0(W[50 - 15]), W[50 - 16]); W[51] = add4(sigma1(W[51 - 2]), W[51 - 7], sigma0(W[51 - 15]), W[51 - 16]); W[52] = add4(sigma1(W[52 - 2]), W[52 - 7], sigma0(W[52 - 15]), W[52 - 16]); W[53] = add4(sigma1(W[53 - 2]), W[53 - 7], sigma0(W[53 - 15]), W[53 - 16]); W[54] = add4(sigma1(W[54 - 2]), W[54 - 7], sigma0(W[54 - 15]), W[54 - 16]); W[55] = add4(sigma1(W[55 - 2]), W[55 - 7], sigma0(W[55 - 15]), W[55 - 16]); W[56] = add4(sigma1(W[56 - 2]), W[56 - 7], sigma0(W[56 - 15]), W[56 - 16]); W[57] = add4(sigma1(W[57 - 2]), W[57 - 7], sigma0(W[57 - 15]), W[57 - 16]); W[58] = add4(sigma1(W[58 - 2]), W[58 - 7], sigma0(W[58 - 15]), W[58 - 16]); W[59] = add4(sigma1(W[59 - 2]), W[59 - 7], sigma0(W[59 - 15]), W[59 - 16]); W[60] = add4(sigma1(W[60 - 2]), W[60 - 7], sigma0(W[60 - 15]), W[60 - 16]); W[61] = add4(sigma1(W[61 - 2]), W[61 - 7], sigma0(W[61 - 15]), W[61 - 16]); W[62] = add4(sigma1(W[62 - 2]), W[62 - 7], sigma0(W[62 - 15]), W[62 - 16]); W[63] = add4(sigma1(W[63 - 2]), W[63 - 7], sigma0(W[63 - 15]), W[63 - 16]); // read existing state __m128i a = state[0]; __m128i b = state[1]; __m128i c = state[2]; __m128i d = state[3]; __m128i e = state[4]; __m128i f = state[5]; __m128i g = state[6]; __m128i h = state[7]; t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x428a2f98), W[0]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x71374491), W[1]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb5c0fbcf), W[2]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xe9b5dba5), W[3]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x3956c25b), W[4]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x59f111f1), W[5]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x923f82a4), W[6]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xab1c5ed5), W[7]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xd807aa98), W[8]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x12835b01), W[9]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x243185be), W[10]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x550c7dc3), W[11]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x72be5d74), W[12]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x80deb1fe), W[13]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x9bdc06a7), W[14]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc19bf174), W[15]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xe49b69c1), W[16]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xefbe4786), W[17]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x0fc19dc6), W[18]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x240ca1cc), W[19]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x2de92c6f), W[20]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4a7484aa), W[21]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5cb0a9dc), W[22]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x76f988da), W[23]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x983e5152), W[24]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa831c66d), W[25]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb00327c8), W[26]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xbf597fc7), W[27]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xc6e00bf3), W[28]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd5a79147), W[29]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x06ca6351), W[30]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x14292967), W[31]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x27b70a85), W[32]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x2e1b2138), W[33]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x4d2c6dfc), W[34]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x53380d13), W[35]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x650a7354), W[36]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x766a0abb), W[37]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x81c2c92e), W[38]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x92722c85), W[39]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xa2bfe8a1), W[40]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa81a664b), W[41]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xc24b8b70), W[42]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xc76c51a3), W[43]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xd192e819), W[44]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd6990624), W[45]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xf40e3585), W[46]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x106aa070), W[47]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x19a4c116), W[48]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x1e376c08), W[49]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x2748774c), W[50]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x34b0bcb5), W[51]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x391c0cb3), W[52]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4ed8aa4a), W[53]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5b9cca4f), W[54]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x682e6ff3), W[55]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x748f82ee), W[56]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x78a5636f), W[57]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x84c87814), W[58]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x8cc70208), W[59]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x90befffa), W[60]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xa4506ceb), W[61]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xbef9a3f7), W[62]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc67178f2), W[63]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); dst[0] = add2(state[0], a); dst[1] = add2(state[1], b); dst[2] = add2(state[2], c); dst[3] = add2(state[3], d); dst[4] = add2(state[4], e); dst[5] = add2(state[5], f); dst[6] = add2(state[6], g); dst[7] = add2(state[7], h); }
/* compress 1024-bits */ void ccsha512_ltc_compress(ccdigest_state_t state, unsigned long nblocks, const void *in) { uint64_t S[8], W[80], t0, t1; int i; uint64_t *s = ccdigest_u64(state); const unsigned char *buf = in; while(nblocks--) { /* copy state into S */ for (i = 0; i < 8; i++) { S[i] = s[i]; } /* copy the state into 1024-bits into W[0..15] */ for (i = 0; i < 16; i++) { CC_LOAD64_BE(W[i], buf + (8*i)); } /* fill W[16..79] */ for (i = 16; i < 80; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ #ifdef CC_SMALL_CODE for (i = 0; i < 80; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } #else #define RND(a,b,c,d,e,f,g,h,i) \ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; for (i = 0; i < 80; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } #endif /* feedback */ for (i = 0; i < 8; i++) { s[i] = s[i] + S[i]; } buf+=CCSHA512_BLOCK_SIZE; } }