static void sha256_compress(unsigned int* iv, const uint8_t* data) { unsigned int a, b, c, d, e, f, g, h; unsigned int s0, s1; unsigned int t1, t2; unsigned int work_space[16]; unsigned int n; unsigned int i; a = iv[0]; b = iv[1]; c = iv[2]; d = iv[3]; e = iv[4]; f = iv[5]; g = iv[6]; h = iv[7]; for (i = 0; i < 16; ++i) { n = BigEndian(&data); t1 = work_space[i] = n; t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i]; t2 = Sigma0(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } for (; i < 64; ++i) { s0 = work_space[(i + 1) & 0x0f]; s0 = sigma0(s0); s1 = work_space[(i + 14) & 0x0f]; s1 = sigma1(s1); t1 = work_space[i & 0xf] += s0 + s1 + work_space[(i + 9) & 0xf]; t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i]; t2 = Sigma0(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } iv[0] += a; iv[1] += b; iv[2] += c; iv[3] += d; iv[4] += e; iv[5] += f; iv[6] += g; iv[7] += h; }
static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host) { unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2; SHA_LONG X[16]; int i; const unsigned char *data=in; while (num--) { a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; if (host) { const SHA_LONG *W=(const SHA_LONG *)data; for (i=0;i<16;i++) { T1 = X[i] = W[i]; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } data += SHA256_CBLOCK; } else { SHA_LONG l; for (i=0;i<16;i++) { HOST_c2l(data,l); T1 = X[i] = l; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } } for (;i<64;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; } }
/** * sha512 compression function - 32-bit machines * @param res The resulting hash value * @param hash The chaining input value * @param in The message input */ void sha512_comp (hashblock res, const hashblock hash, const messageblock in) { const uint64_t *W=in; uint64_t A,E,T; uint64_t X[9+80],*F; uint64_t H[8]; int i; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { H[i]=PULL64(hash[i*8]); } F = X+80; A = H[0]; F[1] = H[1]; F[2] = H[2]; F[3] = H[3]; E = H[4]; F[5] = H[5]; F[6] = H[6]; F[7] = H[7]; for (i=0;i<16;i++,F--) { #ifdef B_ENDIAN T = W[i]; #else T = PULL64(W[i]); #endif F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A,F[1],F[2]); } for (;i<80;i++,F--) { T = sigma0(F[8+16-1]); T += sigma1(F[8+16-14]); T += F[8+16] + F[8+16-9]; F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A,F[1],F[2]); } H[0] += A; H[1] += F[1]; H[2] += F[2]; H[3] += F[3]; H[4] += E; H[5] += F[5]; H[6] += F[6]; H[7] += F[7]; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { PUSH64(H[i],res[i*8]); } }
/* * This code should give better results on 32-bit CPU with less than * ~24 registers, both size and performance wise... */ void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) { uint64_t A, E, T; uint64_t X[9 + 80], *F; int i; while (num--) { F = X + 80; A = state[0]; F[1] = state[1]; F[2] = state[2]; F[3] = state[3]; E = state[4]; F[5] = state[5]; F[6] = state[6]; F[7] = state[7]; for (i = 0; i < 16; i++, F--) { T = from_be_u64(W[i]); F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A, F[1], F[2]); } for (; i < 80; i++, F--) { T = sigma0(F[8 + 16 - 1]); T += sigma1(F[8 + 16 - 14]); T += F[8 + 16] + F[8 + 16 - 9]; F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A, F[1], F[2]); } state[0] += A; state[1] += F[1]; state[2] += F[2]; state[3] += F[3]; state[4] += E; state[5] += F[5]; state[6] += F[6]; state[7] += F[7]; W += 16; } }
/** * sha512 compression function - 64-bit machines * @param res The resulting hash value * @param hash The chaining input value * @param in The message input */ void sha512_comp (hashblock res, const hashblock hash, const messageblock in) { // CHANGE type casting added due to c++ const uint64_t *W=reinterpret_cast<const uint64_t*>(in); uint64_t a,b,c,d,e,f,g,h,s0,s1,T1,T2; uint64_t X[16]; uint64_t H[8]; int i; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { H[i]=PULL64(hash[i*8]); } a = H[0]; b = H[1]; c = H[2]; d = H[3]; e = H[4]; f = H[5]; g = H[6]; h = H[7]; for (i=0;i<16;i++) { #ifdef B_ENDIAN T1 = X[i] = W[i]; #else T1 = X[i] = PULL64(W[i]); #endif T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } for (;i<80;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } H[0] += a; H[1] += b; H[2] += c; H[3] += d; H[4] += e; H[5] += f; H[6] += g; H[7] += h; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { PUSH64(H[i],res[i*8]); } }
/** One round of SHA-256. */ void inline __attribute__((always_inline)) Round(__m256i a, __m256i b, __m256i c, __m256i& d, __m256i e, __m256i f, __m256i g, __m256i& h, __m256i k) { __m256i t1 = Add(h, Sigma1(e), Ch(e, f, g), k); __m256i t2 = Add(Sigma0(a), Maj(a, b, c)); d = Add(d, t1); h = Add(t1, t2); }
static void SHA256Transform(uint32_t *H, const uint8_t *cp) { uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64]; for (t = 0; t < 16; t++, cp += 4) W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3]; for (t = 16; t < 64; t++) W[t] = sigma1(W[t - 2]) + W[t - 7] + sigma0(W[t - 15]) + W[t - 16]; a = H[0]; b = H[1]; c = H[2]; d = H[3]; e = H[4]; f = H[5]; g = H[6]; h = H[7]; for (t = 0; t < 64; t++) { T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t]; T2 = SIGMA0(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } H[0] += a; H[1] += b; H[2] += c; H[3] += d; H[4] += e; H[5] += f; H[6] += g; H[7] += h; }
void HashSHA256Block(void* hash_block, SHA256_Context* ctx) { unsigned int a,b,c,d,e,f,g,h,T1,T2,i; unsigned int w[0x40]; unsigned char* block = (unsigned char*)hash_block; a = ctx->h0; b = ctx->h1; c = ctx->h2; d = ctx->h3; e = ctx->h4; f = ctx->h5; g = ctx->h6; h = ctx->h7; for (i = 0; i < 16; i++) w[i] = BSWAP(*(unsigned int*)(block + i * 4)); for (i = 16; i < 64; i++) w[i] = SigmaS1(w[i-2]) + w[i-7] + SigmaS0(w[i-15]) + w[i-16]; for (i = 0; i < 64; i++) { T1 = h + SigmaB1(e) + Ch(e,f,g) + sha256_constant[i] + w[i]; T2 = SigmaB0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } ctx->h0 += a; ctx->h1 += b; ctx->h2 += c; ctx->h3 += d; ctx->h4 += e; ctx->h5 += f; ctx->h6 += g; ctx->h7 += h; a = b = c = d = e = f = g = h = T1 = T2 = 0; memset(w,0,0x100); }
/***************************************** * sha256 compression function * * * * H points to chaining input * * in points to the message input * * * *****************************************/ void sha256_comp (hashblock res, const hashblock hash, const void *in) { uint32_t a,b,c,d,e,f,g,h,s0,s1,T1,T2; uint32_t H[8]; uint32_t X[16],l; int i; // CHANGE type casting added due to c++ const unsigned char *data=static_cast<const unsigned char*>(in); for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) { HOST_c2l(hash, H[i]); } a = H[0]; b = H[1]; c = H[2]; d = H[3]; e = H[4]; f = H[5]; g = H[6]; h = H[7]; for (i=0;i<16;i++) { HOST_c2l(data,l); T1 = X[i] = l; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } for (;i<64;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } H[0] += a; H[1] += b; H[2] += c; H[3] += d; H[4] += e; H[5] += f; H[6] += g; H[7] += h; for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) { HOST_l2c(H[i], res); } }
void sha512_compress(psDigestContext_t * md, unsigned char *buf) #endif { uint64 S[8], W[80], t0, t1; int i; /* copy state into S */ for (i = 0; i < 8; i++) { S[i] = md->sha512.state[i]; } /* copy the state into 1024-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD64H(W[i], buf + (8*i)); } /* fill W[16..79] */ for (i = 16; i < 80; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ #ifndef PS_SHA512_IMPROVE_PERF_INCREASE_CODESIZE for (i = 0; i < 80; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } #else #define RND(a,b,c,d,e,f,g,h,i) \ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; for (i = 0; i < 80; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } #endif /* PS_SHA512_IMPROVE_PERF_INCREASE_CODESIZE */ /* feedback */ for (i = 0; i < 8; i++) { md->sha512.state[i] = md->sha512.state[i] + S[i]; } }
void sha256_block_data_order (SHA256_CTX *ctx, const void *in) { unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2,t; SHA_LONG X[16],l,Ki; int i; const unsigned char *data=in; a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; for (i=0;i<16;i++) { HOST_c2l(data,l); X[i] = l; Ki=K256[i]; T1 = l + h + Sigma1(e) + Ch(e,f,g) + Ki; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } for (;i<64;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf]; t = X[(i+9)&0xf]; T1 += s0 + s1 + t; X[i&0xf] = T1; Ki=K256[i]; T1 += h + Sigma1(e) + Ch(e,f,g) + Ki; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } t=ctx->h[0]; ctx->h[0]=t+a; t=ctx->h[1]; ctx->h[1]=t+b; t=ctx->h[2]; ctx->h[2]=t+c; t=ctx->h[3]; ctx->h[3]=t+d; t=ctx->h[4]; ctx->h[4]=t+e; t=ctx->h[5]; ctx->h[5]=t+f; t=ctx->h[6]; ctx->h[6]=t+g; t=ctx->h[7]; ctx->h[7]=t+h; return; }
void _sha2block128(uchar *p, ulong len, uint64 *s) { uint64 a, b, c, d, e, f, g, h, t1, t2; uint64 *kp, *wp; uint64 w[80]; uchar *end; /* at this point, we have a multiple of 64 bytes */ for(end = p+len; p < end;){ a = s[0]; b = s[1]; c = s[2]; d = s[3]; e = s[4]; f = s[5]; g = s[6]; h = s[7]; for(wp = w; wp < &w[16]; wp++, p += 8) wp[0] = ((vlong)p[0])<<56 | ((vlong)p[1])<<48 | ((vlong)p[2])<<40 | ((vlong)p[3])<<32 | p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7]; for(; wp < &w[80]; wp++) { uint64 s0, s1; s0 = sigma0(wp[-15]); s1 = sigma1(wp[-2]); // wp[0] = sigma1(wp[-2]) + wp[-7] + sigma0(wp[-15]) + wp[-16]; wp[0] = s1 + wp[-7] + s0 + wp[-16]; } for(kp = K512, wp = w; wp < &w[80]; ) { t1 = h + SIGMA1(e) + Ch(e,f,g) + *kp++ + *wp++; t2 = SIGMA0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } /* save state */ s[0] += a; s[1] += b; s[2] += c; s[3] += d; s[4] += e; s[5] += f; s[6] += g; s[7] += h; } }
static void sha512_transform(u64 *state, const u8 *input) { u64 a, b, c, d, e, f, g, h, t1, t2; int i; u64 W[16]; /* load the state into our registers */ a=state[0]; b=state[1]; c=state[2]; d=state[3]; e=state[4]; f=state[5]; g=state[6]; h=state[7]; /* now iterate */ for (i=0; i<80; i+=8) { if (!(i & 8)) { int j; if (i < 16) { /* load the input */ for (j = 0; j < 16; j++) LOAD_OP(i + j, W, input); } else { for (j = 0; j < 16; j++) { BLEND_OP(i + j, W); } } } t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i ] + W[(i & 15)]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; } state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; /* erase our data */ a = b = c = d = e = f = g = h = t1 = t2 = 0; }
static void sha256_transform(sha256_context *ctx) { uint32 W[64]; // Words of message schedule. uint32 v[8]; // FIPS a, b, c, d, e, f, g, h working variables. if (ctx == NULL) // Clean variables and return. { cleandata(v,sizeof(v)); cleandata(W,sizeof(W)); return; } // Prepare message schedule. Loop unrolling provides some small gain here. W[0] = b2i(ctx->Data + 0 * 4 ); W[1] = b2i(ctx->Data + 1 * 4 ); W[2] = b2i(ctx->Data + 2 * 4 ); W[3] = b2i(ctx->Data + 3 * 4 ); W[4] = b2i(ctx->Data + 4 * 4 ); W[5] = b2i(ctx->Data + 5 * 4 ); W[6] = b2i(ctx->Data + 6 * 4 ); W[7] = b2i(ctx->Data + 7 * 4 ); W[8] = b2i(ctx->Data + 8 * 4 ); W[9] = b2i(ctx->Data + 9 * 4 ); W[10] = b2i(ctx->Data + 10 * 4 ); W[11] = b2i(ctx->Data + 11 * 4 ); W[12] = b2i(ctx->Data + 12 * 4 ); W[13] = b2i(ctx->Data + 13 * 4 ); W[14] = b2i(ctx->Data + 14 * 4 ); W[15] = b2i(ctx->Data + 15 * 4 ); for (uint I = 16; I < 64; I++) W[I] = sg1(W[I-2]) + W[I-7] + sg0(W[I-15]) + W[I-16]; uint32 *H=ctx->H; for (uint I = 0; I < 8; I++) v[I]=H[I]; // MSVC -O2 partially unrolls this loop automatically. for (uint I = 0; I < 64; I++) { uint T1 = v[7] + Sg1(v[4]) + Ch(v[4], v[5], v[6]) + K[I] + W[I]; // It is possible to eliminate variable copying if we unroll loop // and rename variables every time. But my test did not show any speed // gain on i7 for such full or partial unrolling. v[7] = v[6]; v[6] = v[5]; v[5] = v[4]; v[4] = v[3] + T1; // It works a little faster when moved here from beginning of loop. uint T2 = Sg0(v[0]) + Maj(v[0], v[1], v[2]); v[3] = v[2]; v[2] = v[1]; v[1] = v[0]; v[0] = T1 + T2; } for (uint I = 0; I < 8; I++) H[I]+=v[I]; }
/* ======================================================================== Routine Description: SHA256 computation for one block (512 bits) Arguments: pSHA_CTX Pointer to SHA256_CTX_STRUC Return Value: None Note: None ======================================================================== */ VOID RT_SHA256_Hash ( IN SHA256_CTX_STRUC *pSHA_CTX) { uint32_t W_i,t; uint32_t W[64]; uint32_t a,b,c,d,e,f,g,h,T1,T2; /* Prepare the message schedule, {W_i}, 0 < t < 15 */ memmove(W, pSHA_CTX->Block, SHA256_BLOCK_SIZE); for (W_i = 0; W_i < 16; W_i++) W[W_i] = cpu2be32(W[W_i]); /* Endian Swap */ /* End of for */ /* SHA256 hash computation */ /* Initialize the working variables */ a = pSHA_CTX->HashValue[0]; b = pSHA_CTX->HashValue[1]; c = pSHA_CTX->HashValue[2]; d = pSHA_CTX->HashValue[3]; e = pSHA_CTX->HashValue[4]; f = pSHA_CTX->HashValue[5]; g = pSHA_CTX->HashValue[6]; h = pSHA_CTX->HashValue[7]; /* 64 rounds */ for (t = 0;t < 64;t++) { if (t > 15) /* Prepare the message schedule, {W_i}, 16 < t < 63 */ W[t] = Sigma_256_1(W[t-2]) + W[t-7] + Sigma_256_0(W[t-15]) + W[t-16]; /* End of if */ T1 = h + Zsigma_256_1(e) + Ch(e,f,g) + SHA256_K[t] + W[t]; T2 = Zsigma_256_0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } /* End of for */ /* Compute the i^th intermediate hash value H^(i) */ pSHA_CTX->HashValue[0] += a; pSHA_CTX->HashValue[1] += b; pSHA_CTX->HashValue[2] += c; pSHA_CTX->HashValue[3] += d; pSHA_CTX->HashValue[4] += e; pSHA_CTX->HashValue[5] += f; pSHA_CTX->HashValue[6] += g; pSHA_CTX->HashValue[7] += h; memset(pSHA_CTX->Block, 0, SHA256_BLOCK_SIZE); pSHA_CTX->BlockLen = 0; } /* End of RT_SHA256_Hash */
void _sha2block64(uchar *p, ulong len, uint32 *s) { uint32 a, b, c, d, e, f, g, h, t1, t2; uint32 *kp, *wp; uint32 w[64]; uchar *end; /* at this point, we have a multiple of 64 bytes */ for(end = p+len; p < end;){ a = s[0]; b = s[1]; c = s[2]; d = s[3]; e = s[4]; f = s[5]; g = s[6]; h = s[7]; for(wp = w; wp < &w[16]; wp++, p += 4) wp[0] = p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; for(; wp < &w[64]; wp++) wp[0] = sigma1(wp[-2]) + wp[-7] + sigma0(wp[-15]) + wp[-16]; for(kp = K256, wp = w; wp < &w[64]; ) { t1 = h + SIGMA1(e) + Ch(e,f,g) + *kp++ + *wp++; t2 = SIGMA0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } /* save state */ s[0] += a; s[1] += b; s[2] += c; s[3] += d; s[4] += e; s[5] += f; s[6] += g; s[7] += h; } }
static void processblock(struct sha512 *s, const uint8_t *buf) { uint64_t W[80], t1, t2, a, b, c, d, e, f, g, h; int i; for (i = 0; i < 16; i++) { W[i] = (uint64_t)buf[8*i]<<56; W[i] |= (uint64_t)buf[8*i+1]<<48; W[i] |= (uint64_t)buf[8*i+2]<<40; W[i] |= (uint64_t)buf[8*i+3]<<32; W[i] |= (uint64_t)buf[8*i+4]<<24; W[i] |= (uint64_t)buf[8*i+5]<<16; W[i] |= (uint64_t)buf[8*i+6]<<8; W[i] |= buf[8*i+7]; } for (; i < 80; i++) W[i] = R1(W[i-2]) + W[i-7] + R0(W[i-15]) + W[i-16]; a = s->h[0]; b = s->h[1]; c = s->h[2]; d = s->h[3]; e = s->h[4]; f = s->h[5]; g = s->h[6]; h = s->h[7]; for (i = 0; i < 80; i++) { t1 = h + S1(e) + Ch(e,f,g) + K[i] + W[i]; t2 = S0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } s->h[0] += a; s->h[1] += b; s->h[2] += c; s->h[3] += d; s->h[4] += e; s->h[5] += f; s->h[6] += g; s->h[7] += h; }
static void SHA256_transform( SHA256_ctx* ctx ) { int t; unsigned int A = ctx->H[ 0 ]; unsigned int B = ctx->H[ 1 ]; unsigned int C = ctx->H[ 2 ]; unsigned int D = ctx->H[ 3 ]; unsigned int E = ctx->H[ 4 ]; unsigned int F = ctx->H[ 5 ]; unsigned int G = ctx->H[ 6 ]; unsigned int H = ctx->H[ 7 ]; unsigned int T1, T2; unsigned int W[ 64 ]; memcpy( W, ctx->M, 64 ); for ( t = 16; t < 64; t++ ) { W[ t ] = sig1(W[t-2]) + W[t-7] + sig0(W[t-15]) + W[t-16]; } for ( t = 0; t < 64; t++ ) { T1 = H + SIG1(E) + Ch(E,F,G) + K[t] + W[t]; T2 = SIG0(A) + Maj(A,B,C); H = G; G = F; F = E; E = D + T1; D = C; C = B; B = A; A = T1 + T2; } ctx->H[ 0 ] += A; ctx->H[ 1 ] += B; ctx->H[ 2 ] += C; ctx->H[ 3 ] += D; ctx->H[ 4 ] += E; ctx->H[ 5 ] += F; ctx->H[ 6 ] += G; ctx->H[ 7 ] += H; }
static void sha256_compress(hash_state * md) #endif { unsigned long S[8], W[64], t0, t1; int i; _ARGCHK(md != NULL); /* copy state into S */ for (i = 0; i < 8; i++) S[i] = md->sha256.state[i]; /* copy the state into 512-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD32H(W[i], md->sha256.buf + (4*i)); } /* fill W[16..63] */ for (i = 16; i < 64; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ for (i = 0; i < 64; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } /* feedback */ for (i = 0; i < 8; i++) { md->sha256.state[i] = md->sha256.state[i] + S[i]; } }
void GflSHA256::Generate(void) { int i; DWORD W[SHA256_WORK]; DWORD Hash[SHA256_WORK + SHA256_HASH]; for(i = 0; i < SHA256_BLOCK; i++) W[i] = ReverseEndian(m_aBlock[i]); for(i = SHA256_BLOCK; i < SHA256_WORK; i++) W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; for(i = 0; i < SHA256_HASH; i++) Hash[SHA256_WORK + i] = m_dwH[i]; DWORD *pHash = &Hash[SHA256_WORK]; DWORD dwT1, dwT2; for(i = 0; i < SHA256_WORK; i++){ pHash--; dwT1 = pHash[8] + S1(pHash[5]) + Ch(pHash[5], pHash[6], pHash[7]) + c_dwK[i] + W[i]; dwT2 = S0(pHash[1]) + Maj(pHash[1], pHash[2], pHash[3]); pHash[0] = dwT1 + dwT2; pHash[4] += dwT1; } for(i = 0; i < SHA256_HASH; i++) m_dwH[i] += pHash[i]; }
static void shs_transform(sha256 *sh) { /* basic transformation step */ mr_unsign32 a,b,c,d,e,f,g,h,t1,t2; int j; for (j=16; j<64; j++) sh->w[j]=theta1(sh->w[j-2])+sh->w[j-7]+theta0(sh->w[j-15])+sh->w[j-16]; a=sh->h[0]; b=sh->h[1]; c=sh->h[2]; d=sh->h[3]; e=sh->h[4]; f=sh->h[5]; g=sh->h[6]; h=sh->h[7]; for (j=0; j<64; j++) { /* 64 times - mush it up */ t1=h+Sig1(e)+Ch(e,f,g)+K[j]+sh->w[j]; t2=Sig0(a)+Maj(a,b,c); h=g; g=f; f=e; e=d+t1; d=c; c=b; b=a; a=t1+t2; } sh->h[0]+=a; sh->h[1]+=b; sh->h[2]+=c; sh->h[3]+=d; sh->h[4]+=e; sh->h[5]+=f; sh->h[6]+=g; sh->h[7]+=h; }
static void myF(myu64 *state, const myu64 *w, int k_index) { myu64 t, t1, t2; unsigned char i; Ch(&t, state+4, state+5, state+6); Sigma(&t1, state+4, 14, 18, 23); bigint_add64(t1.v, t1.v, (state+7)->v); bigint_add64(t1.v, t1.v, t.v); for(i=0;i<8;i++) t.v[i] = pgm_read_byte((unsigned char *)roundconstants_pgm+k_index*8+i); bigint_add64(t1.v, t1.v, t.v); bigint_add64(t1.v, t1.v, w->v); Sigma(&t2, state+0, 28, 34, 5); Maj(&t,state+0,state+1,state+2); bigint_add64(t2.v, t2.v, t.v); *(state+7) = *(state+6); *(state+6) = *(state+5); *(state+5) = *(state+4); bigint_add64((state+4)->v, (state+3)->v, t1.v); *(state+3) = *(state+2); *(state+2) = *(state+1); *(state+1) = *(state+0); bigint_add64((state+0)->v, t1.v, t2.v); }
/* Process LEN bytes of BUFFER, accumulating context into CTX. It is assumed that LEN % 128 == 0. */ void sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx) { const uint64_t *words = buffer; size_t nwords = len / sizeof (uint64_t); uint64_t a = ctx->H[0]; uint64_t b = ctx->H[1]; uint64_t c = ctx->H[2]; uint64_t d = ctx->H[3]; uint64_t e = ctx->H[4]; uint64_t f = ctx->H[5]; uint64_t g = ctx->H[6]; uint64_t h = ctx->H[7]; /* First increment the byte count. FIPS 180-2 specifies the possible length of the file up to 2^128 bits. Here we only compute the number of bytes. Do a double word increment. */ #ifdef USE_TOTAL128 ctx->total128 += len; #else uint64_t lolen = len; ctx->total[TOTAL128_low] += lolen; ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2) + (ctx->total[TOTAL128_low] < lolen)); #endif /* Process all bytes in the buffer with 128 bytes in each round of the loop. */ while (nwords > 0) { uint64_t W[80]; uint64_t a_save = a; uint64_t b_save = b; uint64_t c_save = c; uint64_t d_save = d; uint64_t e_save = e; uint64_t f_save = f; uint64_t g_save = g; uint64_t h_save = h; /* Operators defined in FIPS 180-2:4.1.2. */ #define Ch(x, y, z) ((x & y) ^ (~x & z)) #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) #define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39)) #define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41)) #define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7)) #define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6)) /* It is unfortunate that C does not provide an operator for cyclic rotation. Hope the C compiler is smart enough. */ #define CYCLIC(w, s) ((w >> s) | (w << (64 - s))) /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ for (unsigned int t = 0; t < 16; ++t) { W[t] = SWAP (*words); ++words; } for (unsigned int t = 16; t < 80; ++t) W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; /* The actual computation according to FIPS 180-2:6.3.2 step 3. */ for (unsigned int t = 0; t < 80; ++t) { uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; uint64_t T2 = S0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } /* Add the starting values of the context according to FIPS 180-2:6.3.2 step 4. */ a += a_save; b += b_save; c += c_save; d += d_save; e += e_save; f += f_save; g += g_save; h += h_save; /* Prepare for the next round. */ nwords -= 16; } /* Put checksum in context given as argument. */ ctx->H[0] = a; ctx->H[1] = b; ctx->H[2] = c; ctx->H[3] = d; ctx->H[4] = e; ctx->H[5] = f; ctx->H[6] = g; ctx->H[7] = h; }
static int sha512_compress(hash_state * md, unsigned char *buf) #endif { ulong64 S[8], W[80], t0, t1; int i; /* copy state into S */ for (i = 0; i < 8; i++) { S[i] = md->sha512.state[i]; } /* copy the state into 1024-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD64H(W[i], buf + (8*i)); } /* fill W[16..79] */ for (i = 16; i < 80; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ #ifdef LTC_SMALL_CODE for (i = 0; i < 80; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } #else #define RND(a,b,c,d,e,f,g,h,i) \ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; for (i = 0; i < 80; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } #endif /* feedback */ for (i = 0; i < 8; i++) { md->sha512.state[i] = md->sha512.state[i] + S[i]; } return CRYPT_OK; }
static void sha512_transform(uint64_t *state, const uint8_t buffer[128]) { uint64_t a, b, c, d, e, f, g, h; uint64_t block[80]; uint64_t T1; int i; a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; f = state[5]; g = state[6]; h = state[7]; #if CONFIG_SMALL for (i = 0; i < 80; i++) { uint64_t T2; if (i < 16) T1 = blk0(i); else T1 = blk(i); T1 += h + Sigma1_512(e) + Ch(e, f, g) + K512[i]; T2 = Sigma0_512(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } #else #define R512_0 \ ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \ ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \ ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \ ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \ ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \ ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \ ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \ ROUND512_0_TO_15(b, c, d, e, f, g, h, a) i = 0; R512_0; R512_0; #define R512_16 \ ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \ ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \ ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \ ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \ ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \ ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \ ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \ ROUND512_16_TO_80(b, c, d, e, f, g, h, a) R512_16; R512_16; R512_16; R512_16; R512_16; R512_16; R512_16; R512_16; #endif state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; }
static void sha256_transform(uint32_t *state, const uint8_t buffer[64]) { unsigned int i, a, b, c, d, e, f, g, h; uint32_t block[64]; uint32_t T1; a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; f = state[5]; g = state[6]; h = state[7]; #if CONFIG_SMALL for (i = 0; i < 64; i++) { uint32_t T2; if (i < 16) T1 = blk0(i); else T1 = blk(i); T1 += h + Sigma1_256(e) + Ch(e, f, g) + K256[i]; T2 = Sigma0_256(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } #else for (i = 0; i < 16;) { ROUND256_0_TO_15(a, b, c, d, e, f, g, h); ROUND256_0_TO_15(h, a, b, c, d, e, f, g); ROUND256_0_TO_15(g, h, a, b, c, d, e, f); ROUND256_0_TO_15(f, g, h, a, b, c, d, e); ROUND256_0_TO_15(e, f, g, h, a, b, c, d); ROUND256_0_TO_15(d, e, f, g, h, a, b, c); ROUND256_0_TO_15(c, d, e, f, g, h, a, b); ROUND256_0_TO_15(b, c, d, e, f, g, h, a); } for (; i < 64;) { ROUND256_16_TO_63(a, b, c, d, e, f, g, h); ROUND256_16_TO_63(h, a, b, c, d, e, f, g); ROUND256_16_TO_63(g, h, a, b, c, d, e, f); ROUND256_16_TO_63(f, g, h, a, b, c, d, e); ROUND256_16_TO_63(e, f, g, h, a, b, c, d); ROUND256_16_TO_63(d, e, f, g, h, a, b, c); ROUND256_16_TO_63(c, d, e, f, g, h, a, b); ROUND256_16_TO_63(b, c, d, e, f, g, h, a); } #endif state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; }
/**************** * Transform the message W which consists of 16 64-bit-words */ static void transform (SHA512_CONTEXT *hd, const unsigned char *data) { u64 a, b, c, d, e, f, g, h; u64 w[80]; int t; static const u64 k[] = { U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd), U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc), U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019), U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118), U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe), U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2), U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1), U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694), U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3), U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65), U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483), U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5), U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210), U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4), U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725), U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70), U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926), U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df), U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8), U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b), U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001), U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30), U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910), U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8), U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53), U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8), U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb), U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3), U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60), U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec), U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9), U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b), U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207), U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178), U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6), U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b), U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493), U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c), U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a), U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817) }; /* get values from the chaining vars */ a = hd->h0; b = hd->h1; c = hd->h2; d = hd->h3; e = hd->h4; f = hd->h5; g = hd->h6; h = hd->h7; #ifdef WORDS_BIGENDIAN memcpy (w, data, 128); #else { int i; byte *p2; for (i = 0, p2 = (byte *) w; i < 16; i++, p2 += 8) { p2[7] = *data++; p2[6] = *data++; p2[5] = *data++; p2[4] = *data++; p2[3] = *data++; p2[2] = *data++; p2[1] = *data++; p2[0] = *data++; } } #endif #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) for (t = 16; t < 80; t++) w[t] = S1 (w[t - 2]) + w[t - 7] + S0 (w[t - 15]) + w[t - 16]; for (t = 0; t < 80; ) { u64 t1, t2; /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes initialized to 0,1,2,3...255,0,... and 1000 iterations: Not unrolled with macros: 440ms Unrolled with macros: 350ms Unrolled with inline: 330ms */ #if 1 /* Not unrolled. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; t2 = Sum0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; t++; #else /* Unrolled to interweave the chain variables. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; t2 = Sum0 (a) + Maj (a, b, c); d += t1; h = t1 + t2; t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[t+1]; t2 = Sum0 (h) + Maj (h, a, b); c += t1; g = t1 + t2; t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[t+2]; t2 = Sum0 (g) + Maj (g, h, a); b += t1; f = t1 + t2; t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[t+3]; t2 = Sum0 (f) + Maj (f, g, h); a += t1; e = t1 + t2; t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[t+4]; t2 = Sum0 (e) + Maj (e, f, g); h += t1; d = t1 + t2; t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[t+5]; t2 = Sum0 (d) + Maj (d, e, f); g += t1; c = t1 + t2; t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[t+6]; t2 = Sum0 (c) + Maj (c, d, e); f += t1; b = t1 + t2; t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[t+7]; t2 = Sum0 (b) + Maj (b, c, d); e += t1; a = t1 + t2; t += 8; #endif } /* Update chaining vars. */ hd->h0 += a; hd->h1 += b; hd->h2 += c; hd->h3 += d; hd->h4 += e; hd->h5 += f; hd->h6 += g; hd->h7 += h; }
static void sha256_transform(u32 *state, const u8 *input) { u32 a, b, c, d, e, f, g, h, t1, t2; u32 W[64]; int i; /* load the input */ for (i = 0; i < 16; i++) LOAD_OP(i, W, input); /* now blend */ for (i = 16; i < 64; i++) BLEND_OP(i, W); /* load the state into our registers */ a=state[0]; b=state[1]; c=state[2]; d=state[3]; e=state[4]; f=state[5]; g=state[6]; h=state[7]; /* now iterate */ t1 = h + e1(e) + Ch(e,f,g) + 0x428a2f98 + W[ 0]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0x71374491 + W[ 1]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0xb5c0fbcf + W[ 2]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0xe9b5dba5 + W[ 3]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0x3956c25b + W[ 4]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0x59f111f1 + W[ 5]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0x923f82a4 + W[ 6]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0xab1c5ed5 + W[ 7]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; t1 = h + e1(e) + Ch(e,f,g) + 0xd807aa98 + W[ 8]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0x12835b01 + W[ 9]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0x243185be + W[10]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0x550c7dc3 + W[11]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0x72be5d74 + W[12]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0x80deb1fe + W[13]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0x9bdc06a7 + W[14]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0xc19bf174 + W[15]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; t1 = h + e1(e) + Ch(e,f,g) + 0xe49b69c1 + W[16]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0xefbe4786 + W[17]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0x0fc19dc6 + W[18]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0x240ca1cc + W[19]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0x2de92c6f + W[20]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0x4a7484aa + W[21]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0x5cb0a9dc + W[22]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0x76f988da + W[23]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; t1 = h + e1(e) + Ch(e,f,g) + 0x983e5152 + W[24]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0xa831c66d + W[25]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0xb00327c8 + W[26]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0xbf597fc7 + W[27]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0xc6e00bf3 + W[28]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0xd5a79147 + W[29]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0x06ca6351 + W[30]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0x14292967 + W[31]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; t1 = h + e1(e) + Ch(e,f,g) + 0x27b70a85 + W[32]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0x2e1b2138 + W[33]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0x4d2c6dfc + W[34]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0x53380d13 + W[35]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0x650a7354 + W[36]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0x766a0abb + W[37]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0x81c2c92e + W[38]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0x92722c85 + W[39]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; t1 = h + e1(e) + Ch(e,f,g) + 0xa2bfe8a1 + W[40]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0xa81a664b + W[41]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0xc24b8b70 + W[42]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0xc76c51a3 + W[43]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0xd192e819 + W[44]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0xd6990624 + W[45]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0xf40e3585 + W[46]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0x106aa070 + W[47]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; t1 = h + e1(e) + Ch(e,f,g) + 0x19a4c116 + W[48]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0x1e376c08 + W[49]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0x2748774c + W[50]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0x34b0bcb5 + W[51]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0x391c0cb3 + W[52]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0x4ed8aa4a + W[53]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0x5b9cca4f + W[54]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0x682e6ff3 + W[55]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; t1 = h + e1(e) + Ch(e,f,g) + 0x748f82ee + W[56]; t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; t1 = g + e1(d) + Ch(d,e,f) + 0x78a5636f + W[57]; t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; t1 = f + e1(c) + Ch(c,d,e) + 0x84c87814 + W[58]; t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; t1 = e + e1(b) + Ch(b,c,d) + 0x8cc70208 + W[59]; t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; t1 = d + e1(a) + Ch(a,b,c) + 0x90befffa + W[60]; t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; t1 = c + e1(h) + Ch(h,a,b) + 0xa4506ceb + W[61]; t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; t1 = b + e1(g) + Ch(g,h,a) + 0xbef9a3f7 + W[62]; t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; t1 = a + e1(f) + Ch(f,g,h) + 0xc67178f2 + W[63]; t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; /* clear any sensitive info... */ a = b = c = d = e = f = g = h = t1 = t2 = 0; memset(W, 0, 64 * sizeof(u32)); }
static void SHA1_HashBlock(SHA1_CONTEXT *p) { int t, j; UINT32 W[80], a, b, c, d, e, T; // Prepare Message Schedule, {W sub t}. // for (t = 0, j = 0; t <= 15; t++, j += 4) { W[t] = (p->block[j ] << 24) | (p->block[j+1] << 16) | (p->block[j+2] << 8) | (p->block[j+3] ); } for (t = 16; t <= 79; t++) { W[t] = ROTL(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1); } a = p->H[0]; b = p->H[1]; c = p->H[2]; d = p->H[3]; e = p->H[4]; for (t = 0; t <= 19; t++) { T = ROTL(a,5) + Ch(b,c,d) + e + 0x5A827999 + W[t]; e = d; d = c; c = ROTL(b,30); b = a; a = T; } for (t = 20; t <= 39; t++) { T = ROTL(a,5) + Parity(b,c,d) + e + 0x6ED9EBA1 + W[t]; e = d; d = c; c = ROTL(b,30); b = a; a = T; } for (t = 40; t <= 59; t++) { T = ROTL(a,5) + Maj(b,c,d) + e + 0x8F1BBCDC + W[t]; e = d; d = c; c = ROTL(b,30); b = a; a = T; } for (t = 60; t <= 79; t++) { T = ROTL(a,5) + Parity(b,c,d) + e + 0xCA62C1D6 + W[t]; e = d; d = c; c = ROTL(b,30); b = a; a = T; } p->H[0] += a; p->H[1] += b; p->H[2] += c; p->H[3] += d; p->H[4] += e; }
static inline void sha256_transform(__m128i *state, __m128i *block, __m128i *dst) { __m128i W[64], t1, t2; W[0] = block[ 0]; W[1] = block[ 1]; W[2] = block[ 2]; W[3] = block[ 3]; W[4] = block[ 4]; W[5] = block[ 5]; W[6] = block[ 6]; W[7] = block[ 7]; W[8] = block[ 8]; W[9] = block[ 9]; W[10] = block[10]; W[11] = block[11]; W[12] = block[12]; W[13] = block[13]; W[14] = block[14]; W[15] = block[15]; W[16] = add4(sigma1(W[16 - 2]), W[16 - 7], sigma0(W[16 - 15]), W[16 - 16]); W[17] = add4(sigma1(W[17 - 2]), W[17 - 7], sigma0(W[17 - 15]), W[17 - 16]); W[18] = add4(sigma1(W[18 - 2]), W[18 - 7], sigma0(W[18 - 15]), W[18 - 16]); W[19] = add4(sigma1(W[19 - 2]), W[19 - 7], sigma0(W[19 - 15]), W[19 - 16]); W[20] = add4(sigma1(W[20 - 2]), W[20 - 7], sigma0(W[20 - 15]), W[20 - 16]); W[21] = add4(sigma1(W[21 - 2]), W[21 - 7], sigma0(W[21 - 15]), W[21 - 16]); W[22] = add4(sigma1(W[22 - 2]), W[22 - 7], sigma0(W[22 - 15]), W[22 - 16]); W[23] = add4(sigma1(W[23 - 2]), W[23 - 7], sigma0(W[23 - 15]), W[23 - 16]); W[24] = add4(sigma1(W[24 - 2]), W[24 - 7], sigma0(W[24 - 15]), W[24 - 16]); W[25] = add4(sigma1(W[25 - 2]), W[25 - 7], sigma0(W[25 - 15]), W[25 - 16]); W[26] = add4(sigma1(W[26 - 2]), W[26 - 7], sigma0(W[26 - 15]), W[26 - 16]); W[27] = add4(sigma1(W[27 - 2]), W[27 - 7], sigma0(W[27 - 15]), W[27 - 16]); W[28] = add4(sigma1(W[28 - 2]), W[28 - 7], sigma0(W[28 - 15]), W[28 - 16]); W[29] = add4(sigma1(W[29 - 2]), W[29 - 7], sigma0(W[29 - 15]), W[29 - 16]); W[30] = add4(sigma1(W[30 - 2]), W[30 - 7], sigma0(W[30 - 15]), W[30 - 16]); W[31] = add4(sigma1(W[31 - 2]), W[31 - 7], sigma0(W[31 - 15]), W[31 - 16]); W[32] = add4(sigma1(W[32 - 2]), W[32 - 7], sigma0(W[32 - 15]), W[32 - 16]); W[33] = add4(sigma1(W[33 - 2]), W[33 - 7], sigma0(W[33 - 15]), W[33 - 16]); W[34] = add4(sigma1(W[34 - 2]), W[34 - 7], sigma0(W[34 - 15]), W[34 - 16]); W[35] = add4(sigma1(W[35 - 2]), W[35 - 7], sigma0(W[35 - 15]), W[35 - 16]); W[36] = add4(sigma1(W[36 - 2]), W[36 - 7], sigma0(W[36 - 15]), W[36 - 16]); W[37] = add4(sigma1(W[37 - 2]), W[37 - 7], sigma0(W[37 - 15]), W[37 - 16]); W[38] = add4(sigma1(W[38 - 2]), W[38 - 7], sigma0(W[38 - 15]), W[38 - 16]); W[39] = add4(sigma1(W[39 - 2]), W[39 - 7], sigma0(W[39 - 15]), W[39 - 16]); W[40] = add4(sigma1(W[40 - 2]), W[40 - 7], sigma0(W[40 - 15]), W[40 - 16]); W[41] = add4(sigma1(W[41 - 2]), W[41 - 7], sigma0(W[41 - 15]), W[41 - 16]); W[42] = add4(sigma1(W[42 - 2]), W[42 - 7], sigma0(W[42 - 15]), W[42 - 16]); W[43] = add4(sigma1(W[43 - 2]), W[43 - 7], sigma0(W[43 - 15]), W[43 - 16]); W[44] = add4(sigma1(W[44 - 2]), W[44 - 7], sigma0(W[44 - 15]), W[44 - 16]); W[45] = add4(sigma1(W[45 - 2]), W[45 - 7], sigma0(W[45 - 15]), W[45 - 16]); W[46] = add4(sigma1(W[46 - 2]), W[46 - 7], sigma0(W[46 - 15]), W[46 - 16]); W[47] = add4(sigma1(W[47 - 2]), W[47 - 7], sigma0(W[47 - 15]), W[47 - 16]); W[48] = add4(sigma1(W[48 - 2]), W[48 - 7], sigma0(W[48 - 15]), W[48 - 16]); W[49] = add4(sigma1(W[49 - 2]), W[49 - 7], sigma0(W[49 - 15]), W[49 - 16]); W[50] = add4(sigma1(W[50 - 2]), W[50 - 7], sigma0(W[50 - 15]), W[50 - 16]); W[51] = add4(sigma1(W[51 - 2]), W[51 - 7], sigma0(W[51 - 15]), W[51 - 16]); W[52] = add4(sigma1(W[52 - 2]), W[52 - 7], sigma0(W[52 - 15]), W[52 - 16]); W[53] = add4(sigma1(W[53 - 2]), W[53 - 7], sigma0(W[53 - 15]), W[53 - 16]); W[54] = add4(sigma1(W[54 - 2]), W[54 - 7], sigma0(W[54 - 15]), W[54 - 16]); W[55] = add4(sigma1(W[55 - 2]), W[55 - 7], sigma0(W[55 - 15]), W[55 - 16]); W[56] = add4(sigma1(W[56 - 2]), W[56 - 7], sigma0(W[56 - 15]), W[56 - 16]); W[57] = add4(sigma1(W[57 - 2]), W[57 - 7], sigma0(W[57 - 15]), W[57 - 16]); W[58] = add4(sigma1(W[58 - 2]), W[58 - 7], sigma0(W[58 - 15]), W[58 - 16]); W[59] = add4(sigma1(W[59 - 2]), W[59 - 7], sigma0(W[59 - 15]), W[59 - 16]); W[60] = add4(sigma1(W[60 - 2]), W[60 - 7], sigma0(W[60 - 15]), W[60 - 16]); W[61] = add4(sigma1(W[61 - 2]), W[61 - 7], sigma0(W[61 - 15]), W[61 - 16]); W[62] = add4(sigma1(W[62 - 2]), W[62 - 7], sigma0(W[62 - 15]), W[62 - 16]); W[63] = add4(sigma1(W[63 - 2]), W[63 - 7], sigma0(W[63 - 15]), W[63 - 16]); // read existing state __m128i a = state[0]; __m128i b = state[1]; __m128i c = state[2]; __m128i d = state[3]; __m128i e = state[4]; __m128i f = state[5]; __m128i g = state[6]; __m128i h = state[7]; t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x428a2f98), W[0]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x71374491), W[1]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb5c0fbcf), W[2]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xe9b5dba5), W[3]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x3956c25b), W[4]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x59f111f1), W[5]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x923f82a4), W[6]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xab1c5ed5), W[7]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xd807aa98), W[8]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x12835b01), W[9]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x243185be), W[10]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x550c7dc3), W[11]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x72be5d74), W[12]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x80deb1fe), W[13]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x9bdc06a7), W[14]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc19bf174), W[15]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xe49b69c1), W[16]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xefbe4786), W[17]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x0fc19dc6), W[18]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x240ca1cc), W[19]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x2de92c6f), W[20]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4a7484aa), W[21]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5cb0a9dc), W[22]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x76f988da), W[23]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x983e5152), W[24]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa831c66d), W[25]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb00327c8), W[26]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xbf597fc7), W[27]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xc6e00bf3), W[28]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd5a79147), W[29]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x06ca6351), W[30]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x14292967), W[31]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x27b70a85), W[32]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x2e1b2138), W[33]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x4d2c6dfc), W[34]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x53380d13), W[35]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x650a7354), W[36]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x766a0abb), W[37]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x81c2c92e), W[38]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x92722c85), W[39]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xa2bfe8a1), W[40]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa81a664b), W[41]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xc24b8b70), W[42]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xc76c51a3), W[43]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xd192e819), W[44]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd6990624), W[45]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xf40e3585), W[46]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x106aa070), W[47]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x19a4c116), W[48]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x1e376c08), W[49]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x2748774c), W[50]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x34b0bcb5), W[51]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x391c0cb3), W[52]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4ed8aa4a), W[53]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5b9cca4f), W[54]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x682e6ff3), W[55]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x748f82ee), W[56]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x78a5636f), W[57]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x84c87814), W[58]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x8cc70208), W[59]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x90befffa), W[60]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xa4506ceb), W[61]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xbef9a3f7), W[62]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc67178f2), W[63]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); dst[0] = add2(state[0], a); dst[1] = add2(state[1], b); dst[2] = add2(state[2], c); dst[3] = add2(state[3], d); dst[4] = add2(state[4], e); dst[5] = add2(state[5], f); dst[6] = add2(state[6], g); dst[7] = add2(state[7], h); }