static void sha256_compress(unsigned int* iv, const uint8_t* data) { unsigned int a, b, c, d, e, f, g, h; unsigned int s0, s1; unsigned int t1, t2; unsigned int work_space[16]; unsigned int n; unsigned int i; a = iv[0]; b = iv[1]; c = iv[2]; d = iv[3]; e = iv[4]; f = iv[5]; g = iv[6]; h = iv[7]; for (i = 0; i < 16; ++i) { n = BigEndian(&data); t1 = work_space[i] = n; t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i]; t2 = Sigma0(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } for (; i < 64; ++i) { s0 = work_space[(i + 1) & 0x0f]; s0 = sigma0(s0); s1 = work_space[(i + 14) & 0x0f]; s1 = sigma1(s1); t1 = work_space[i & 0xf] += s0 + s1 + work_space[(i + 9) & 0xf]; t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i]; t2 = Sigma0(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } iv[0] += a; iv[1] += b; iv[2] += c; iv[3] += d; iv[4] += e; iv[5] += f; iv[6] += g; iv[7] += h; }
static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host) { unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2; SHA_LONG X[16]; int i; const unsigned char *data=in; while (num--) { a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; if (host) { const SHA_LONG *W=(const SHA_LONG *)data; for (i=0;i<16;i++) { T1 = X[i] = W[i]; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } data += SHA256_CBLOCK; } else { SHA_LONG l; for (i=0;i<16;i++) { HOST_c2l(data,l); T1 = X[i] = l; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } } for (;i<64;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; } }
/** * sha512 compression function - 32-bit machines * @param res The resulting hash value * @param hash The chaining input value * @param in The message input */ void sha512_comp (hashblock res, const hashblock hash, const messageblock in) { const uint64_t *W=in; uint64_t A,E,T; uint64_t X[9+80],*F; uint64_t H[8]; int i; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { H[i]=PULL64(hash[i*8]); } F = X+80; A = H[0]; F[1] = H[1]; F[2] = H[2]; F[3] = H[3]; E = H[4]; F[5] = H[5]; F[6] = H[6]; F[7] = H[7]; for (i=0;i<16;i++,F--) { #ifdef B_ENDIAN T = W[i]; #else T = PULL64(W[i]); #endif F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A,F[1],F[2]); } for (;i<80;i++,F--) { T = sigma0(F[8+16-1]); T += sigma1(F[8+16-14]); T += F[8+16] + F[8+16-9]; F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A,F[1],F[2]); } H[0] += A; H[1] += F[1]; H[2] += F[2]; H[3] += F[3]; H[4] += E; H[5] += F[5]; H[6] += F[6]; H[7] += F[7]; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { PUSH64(H[i],res[i*8]); } }
/* * This code should give better results on 32-bit CPU with less than * ~24 registers, both size and performance wise... */ void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) { uint64_t A, E, T; uint64_t X[9 + 80], *F; int i; while (num--) { F = X + 80; A = state[0]; F[1] = state[1]; F[2] = state[2]; F[3] = state[3]; E = state[4]; F[5] = state[5]; F[6] = state[6]; F[7] = state[7]; for (i = 0; i < 16; i++, F--) { T = from_be_u64(W[i]); F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A, F[1], F[2]); } for (; i < 80; i++, F--) { T = sigma0(F[8 + 16 - 1]); T += sigma1(F[8 + 16 - 14]); T += F[8 + 16] + F[8 + 16 - 9]; F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A, F[1], F[2]); } state[0] += A; state[1] += F[1]; state[2] += F[2]; state[3] += F[3]; state[4] += E; state[5] += F[5]; state[6] += F[6]; state[7] += F[7]; W += 16; } }
/** * sha512 compression function - 64-bit machines * @param res The resulting hash value * @param hash The chaining input value * @param in The message input */ void sha512_comp (hashblock res, const hashblock hash, const messageblock in) { // CHANGE type casting added due to c++ const uint64_t *W=reinterpret_cast<const uint64_t*>(in); uint64_t a,b,c,d,e,f,g,h,s0,s1,T1,T2; uint64_t X[16]; uint64_t H[8]; int i; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { H[i]=PULL64(hash[i*8]); } a = H[0]; b = H[1]; c = H[2]; d = H[3]; e = H[4]; f = H[5]; g = H[6]; h = H[7]; for (i=0;i<16;i++) { #ifdef B_ENDIAN T1 = X[i] = W[i]; #else T1 = X[i] = PULL64(W[i]); #endif T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } for (;i<80;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } H[0] += a; H[1] += b; H[2] += c; H[3] += d; H[4] += e; H[5] += f; H[6] += g; H[7] += h; for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) { PUSH64(H[i],res[i*8]); } }
void setupCasadiVars(const std::vector<Matrix<C_DIM> >& X, const std::vector<Matrix<U_DIM> >& U, double* XU_arr, double* Sigma0_arr, double* l_arr, double* params_arr) { int index = 0; for(int t = 0; t < T-1; ++t) { for(int i=0; i < C_DIM; ++i) { XU_arr[index++] = X[t][i]; } for(int i=0; i < U_DIM; ++i) { XU_arr[index++] = U[t][i]; } } for(int i=0; i < C_DIM; ++i) { XU_arr[index++] = X[T-1][i]; } Matrix<X_DIM,X_DIM> Sigma0 = SqrtSigma0*SqrtSigma0; index = 0; for(int i=0; i < X_DIM; ++i) { for(int j=0; j < X_DIM; ++j) { Sigma0_arr[index++] = Sigma0(i,j); } } index = 0; for(int i=C_DIM; i < X_DIM; ++i) { l_arr[index++] = x0[i]; } params_arr[0] = alpha_belief; params_arr[1] = alpha_control; params_arr[2] = alpha_final_belief; }
/** One round of SHA-256. */ void inline __attribute__((always_inline)) Round(__m256i a, __m256i b, __m256i c, __m256i& d, __m256i e, __m256i f, __m256i g, __m256i& h, __m256i k) { __m256i t1 = Add(h, Sigma1(e), Ch(e, f, g), k); __m256i t2 = Add(Sigma0(a), Maj(a, b, c)); d = Add(d, t1); h = Add(t1, t2); }
/***************************************** * sha256 compression function * * * * H points to chaining input * * in points to the message input * * * *****************************************/ void sha256_comp (hashblock res, const hashblock hash, const void *in) { uint32_t a,b,c,d,e,f,g,h,s0,s1,T1,T2; uint32_t H[8]; uint32_t X[16],l; int i; // CHANGE type casting added due to c++ const unsigned char *data=static_cast<const unsigned char*>(in); for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) { HOST_c2l(hash, H[i]); } a = H[0]; b = H[1]; c = H[2]; d = H[3]; e = H[4]; f = H[5]; g = H[6]; h = H[7]; for (i=0;i<16;i++) { HOST_c2l(data,l); T1 = X[i] = l; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } for (;i<64;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } H[0] += a; H[1] += b; H[2] += c; H[3] += d; H[4] += e; H[5] += f; H[6] += g; H[7] += h; for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) { HOST_l2c(H[i], res); } }
void sha512_compress(psDigestContext_t * md, unsigned char *buf) #endif { uint64 S[8], W[80], t0, t1; int i; /* copy state into S */ for (i = 0; i < 8; i++) { S[i] = md->sha512.state[i]; } /* copy the state into 1024-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD64H(W[i], buf + (8*i)); } /* fill W[16..79] */ for (i = 16; i < 80; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ #ifndef PS_SHA512_IMPROVE_PERF_INCREASE_CODESIZE for (i = 0; i < 80; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } #else #define RND(a,b,c,d,e,f,g,h,i) \ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; for (i = 0; i < 80; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } #endif /* PS_SHA512_IMPROVE_PERF_INCREASE_CODESIZE */ /* feedback */ for (i = 0; i < 8; i++) { md->sha512.state[i] = md->sha512.state[i] + S[i]; } }
void sha256_block_data_order (SHA256_CTX *ctx, const void *in) { unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2,t; SHA_LONG X[16],l,Ki; int i; const unsigned char *data=in; a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; for (i=0;i<16;i++) { HOST_c2l(data,l); X[i] = l; Ki=K256[i]; T1 = l + h + Sigma1(e) + Ch(e,f,g) + Ki; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } for (;i<64;i++) { s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); T1 = X[i&0xf]; t = X[(i+9)&0xf]; T1 += s0 + s1 + t; X[i&0xf] = T1; Ki=K256[i]; T1 += h + Sigma1(e) + Ch(e,f,g) + Ki; T2 = Sigma0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } t=ctx->h[0]; ctx->h[0]=t+a; t=ctx->h[1]; ctx->h[1]=t+b; t=ctx->h[2]; ctx->h[2]=t+c; t=ctx->h[3]; ctx->h[3]=t+d; t=ctx->h[4]; ctx->h[4]=t+e; t=ctx->h[5]; ctx->h[5]=t+f; t=ctx->h[6]; ctx->h[6]=t+g; t=ctx->h[7]; ctx->h[7]=t+h; return; }
static void sha256_compress(hash_state * md) #endif { unsigned long S[8], W[64], t0, t1; int i; _ARGCHK(md != NULL); /* copy state into S */ for (i = 0; i < 8; i++) S[i] = md->sha256.state[i]; /* copy the state into 512-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD32H(W[i], md->sha256.buf + (4*i)); } /* fill W[16..63] */ for (i = 16; i < 64; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ for (i = 0; i < 64; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } /* feedback */ for (i = 0; i < 8; i++) { md->sha256.state[i] = md->sha256.state[i] + S[i]; } }
static int sha512_compress(hash_state * md, unsigned char *buf) #endif { ulong64 S[8], W[80], t0, t1; int i; /* copy state into S */ for (i = 0; i < 8; i++) { S[i] = md->sha512.state[i]; } /* copy the state into 1024-bits into W[0..15] */ for (i = 0; i < 16; i++) { LOAD64H(W[i], buf + (8*i)); } /* fill W[16..79] */ for (i = 16; i < 80; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ #ifdef LTC_SMALL_CODE for (i = 0; i < 80; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } #else #define RND(a,b,c,d,e,f,g,h,i) \ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; for (i = 0; i < 80; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } #endif /* feedback */ for (i = 0; i < 8; i++) { md->sha512.state[i] = md->sha512.state[i] + S[i]; } return CRYPT_OK; }
static inline void sha256_transform(__m128i *state, __m128i *block, __m128i *dst) { __m128i W[64], t1, t2; W[0] = block[ 0]; W[1] = block[ 1]; W[2] = block[ 2]; W[3] = block[ 3]; W[4] = block[ 4]; W[5] = block[ 5]; W[6] = block[ 6]; W[7] = block[ 7]; W[8] = block[ 8]; W[9] = block[ 9]; W[10] = block[10]; W[11] = block[11]; W[12] = block[12]; W[13] = block[13]; W[14] = block[14]; W[15] = block[15]; W[16] = add4(sigma1(W[16 - 2]), W[16 - 7], sigma0(W[16 - 15]), W[16 - 16]); W[17] = add4(sigma1(W[17 - 2]), W[17 - 7], sigma0(W[17 - 15]), W[17 - 16]); W[18] = add4(sigma1(W[18 - 2]), W[18 - 7], sigma0(W[18 - 15]), W[18 - 16]); W[19] = add4(sigma1(W[19 - 2]), W[19 - 7], sigma0(W[19 - 15]), W[19 - 16]); W[20] = add4(sigma1(W[20 - 2]), W[20 - 7], sigma0(W[20 - 15]), W[20 - 16]); W[21] = add4(sigma1(W[21 - 2]), W[21 - 7], sigma0(W[21 - 15]), W[21 - 16]); W[22] = add4(sigma1(W[22 - 2]), W[22 - 7], sigma0(W[22 - 15]), W[22 - 16]); W[23] = add4(sigma1(W[23 - 2]), W[23 - 7], sigma0(W[23 - 15]), W[23 - 16]); W[24] = add4(sigma1(W[24 - 2]), W[24 - 7], sigma0(W[24 - 15]), W[24 - 16]); W[25] = add4(sigma1(W[25 - 2]), W[25 - 7], sigma0(W[25 - 15]), W[25 - 16]); W[26] = add4(sigma1(W[26 - 2]), W[26 - 7], sigma0(W[26 - 15]), W[26 - 16]); W[27] = add4(sigma1(W[27 - 2]), W[27 - 7], sigma0(W[27 - 15]), W[27 - 16]); W[28] = add4(sigma1(W[28 - 2]), W[28 - 7], sigma0(W[28 - 15]), W[28 - 16]); W[29] = add4(sigma1(W[29 - 2]), W[29 - 7], sigma0(W[29 - 15]), W[29 - 16]); W[30] = add4(sigma1(W[30 - 2]), W[30 - 7], sigma0(W[30 - 15]), W[30 - 16]); W[31] = add4(sigma1(W[31 - 2]), W[31 - 7], sigma0(W[31 - 15]), W[31 - 16]); W[32] = add4(sigma1(W[32 - 2]), W[32 - 7], sigma0(W[32 - 15]), W[32 - 16]); W[33] = add4(sigma1(W[33 - 2]), W[33 - 7], sigma0(W[33 - 15]), W[33 - 16]); W[34] = add4(sigma1(W[34 - 2]), W[34 - 7], sigma0(W[34 - 15]), W[34 - 16]); W[35] = add4(sigma1(W[35 - 2]), W[35 - 7], sigma0(W[35 - 15]), W[35 - 16]); W[36] = add4(sigma1(W[36 - 2]), W[36 - 7], sigma0(W[36 - 15]), W[36 - 16]); W[37] = add4(sigma1(W[37 - 2]), W[37 - 7], sigma0(W[37 - 15]), W[37 - 16]); W[38] = add4(sigma1(W[38 - 2]), W[38 - 7], sigma0(W[38 - 15]), W[38 - 16]); W[39] = add4(sigma1(W[39 - 2]), W[39 - 7], sigma0(W[39 - 15]), W[39 - 16]); W[40] = add4(sigma1(W[40 - 2]), W[40 - 7], sigma0(W[40 - 15]), W[40 - 16]); W[41] = add4(sigma1(W[41 - 2]), W[41 - 7], sigma0(W[41 - 15]), W[41 - 16]); W[42] = add4(sigma1(W[42 - 2]), W[42 - 7], sigma0(W[42 - 15]), W[42 - 16]); W[43] = add4(sigma1(W[43 - 2]), W[43 - 7], sigma0(W[43 - 15]), W[43 - 16]); W[44] = add4(sigma1(W[44 - 2]), W[44 - 7], sigma0(W[44 - 15]), W[44 - 16]); W[45] = add4(sigma1(W[45 - 2]), W[45 - 7], sigma0(W[45 - 15]), W[45 - 16]); W[46] = add4(sigma1(W[46 - 2]), W[46 - 7], sigma0(W[46 - 15]), W[46 - 16]); W[47] = add4(sigma1(W[47 - 2]), W[47 - 7], sigma0(W[47 - 15]), W[47 - 16]); W[48] = add4(sigma1(W[48 - 2]), W[48 - 7], sigma0(W[48 - 15]), W[48 - 16]); W[49] = add4(sigma1(W[49 - 2]), W[49 - 7], sigma0(W[49 - 15]), W[49 - 16]); W[50] = add4(sigma1(W[50 - 2]), W[50 - 7], sigma0(W[50 - 15]), W[50 - 16]); W[51] = add4(sigma1(W[51 - 2]), W[51 - 7], sigma0(W[51 - 15]), W[51 - 16]); W[52] = add4(sigma1(W[52 - 2]), W[52 - 7], sigma0(W[52 - 15]), W[52 - 16]); W[53] = add4(sigma1(W[53 - 2]), W[53 - 7], sigma0(W[53 - 15]), W[53 - 16]); W[54] = add4(sigma1(W[54 - 2]), W[54 - 7], sigma0(W[54 - 15]), W[54 - 16]); W[55] = add4(sigma1(W[55 - 2]), W[55 - 7], sigma0(W[55 - 15]), W[55 - 16]); W[56] = add4(sigma1(W[56 - 2]), W[56 - 7], sigma0(W[56 - 15]), W[56 - 16]); W[57] = add4(sigma1(W[57 - 2]), W[57 - 7], sigma0(W[57 - 15]), W[57 - 16]); W[58] = add4(sigma1(W[58 - 2]), W[58 - 7], sigma0(W[58 - 15]), W[58 - 16]); W[59] = add4(sigma1(W[59 - 2]), W[59 - 7], sigma0(W[59 - 15]), W[59 - 16]); W[60] = add4(sigma1(W[60 - 2]), W[60 - 7], sigma0(W[60 - 15]), W[60 - 16]); W[61] = add4(sigma1(W[61 - 2]), W[61 - 7], sigma0(W[61 - 15]), W[61 - 16]); W[62] = add4(sigma1(W[62 - 2]), W[62 - 7], sigma0(W[62 - 15]), W[62 - 16]); W[63] = add4(sigma1(W[63 - 2]), W[63 - 7], sigma0(W[63 - 15]), W[63 - 16]); // read existing state __m128i a = state[0]; __m128i b = state[1]; __m128i c = state[2]; __m128i d = state[3]; __m128i e = state[4]; __m128i f = state[5]; __m128i g = state[6]; __m128i h = state[7]; t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x428a2f98), W[0]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x71374491), W[1]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb5c0fbcf), W[2]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xe9b5dba5), W[3]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x3956c25b), W[4]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x59f111f1), W[5]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x923f82a4), W[6]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xab1c5ed5), W[7]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xd807aa98), W[8]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x12835b01), W[9]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x243185be), W[10]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x550c7dc3), W[11]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x72be5d74), W[12]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x80deb1fe), W[13]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x9bdc06a7), W[14]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc19bf174), W[15]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xe49b69c1), W[16]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xefbe4786), W[17]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x0fc19dc6), W[18]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x240ca1cc), W[19]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x2de92c6f), W[20]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4a7484aa), W[21]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5cb0a9dc), W[22]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x76f988da), W[23]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x983e5152), W[24]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa831c66d), W[25]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb00327c8), W[26]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xbf597fc7), W[27]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xc6e00bf3), W[28]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd5a79147), W[29]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x06ca6351), W[30]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x14292967), W[31]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x27b70a85), W[32]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x2e1b2138), W[33]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x4d2c6dfc), W[34]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x53380d13), W[35]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x650a7354), W[36]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x766a0abb), W[37]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x81c2c92e), W[38]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x92722c85), W[39]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xa2bfe8a1), W[40]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa81a664b), W[41]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xc24b8b70), W[42]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xc76c51a3), W[43]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xd192e819), W[44]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd6990624), W[45]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xf40e3585), W[46]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x106aa070), W[47]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x19a4c116), W[48]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x1e376c08), W[49]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x2748774c), W[50]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x34b0bcb5), W[51]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x391c0cb3), W[52]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4ed8aa4a), W[53]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5b9cca4f), W[54]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x682e6ff3), W[55]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x748f82ee), W[56]); t2 = add2(Sigma0(a), Maj(a, b, c)); d = add2(d, t1); h = add2(t1, t2); t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x78a5636f), W[57]); t2 = add2(Sigma0(h), Maj(h, a, b)); c = add2(c, t1); g = add2(t1, t2); t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x84c87814), W[58]); t2 = add2(Sigma0(g), Maj(g, h, a)); b = add2(b, t1); f = add2(t1, t2); t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x8cc70208), W[59]); t2 = add2(Sigma0(f), Maj(f, g, h)); a = add2(a, t1); e = add2(t1, t2); t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x90befffa), W[60]); t2 = add2(Sigma0(e), Maj(e, f, g)); h = add2(h, t1); d = add2(t1, t2); t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xa4506ceb), W[61]); t2 = add2(Sigma0(d), Maj(d, e, f)); g = add2(g, t1); c = add2(t1, t2); t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xbef9a3f7), W[62]); t2 = add2(Sigma0(c), Maj(c, d, e)); f = add2(f, t1); b = add2(t1, t2); t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc67178f2), W[63]); t2 = add2(Sigma0(b), Maj(b, c, d)); e = add2(e, t1); a = add2(t1, t2); dst[0] = add2(state[0], a); dst[1] = add2(state[1], b); dst[2] = add2(state[2], c); dst[3] = add2(state[3], d); dst[4] = add2(state[4], e); dst[5] = add2(state[5], f); dst[6] = add2(state[6], g); dst[7] = add2(state[7], h); }
/* compress 1024-bits */ void ccsha512_ltc_compress(ccdigest_state_t state, unsigned long nblocks, const void *in) { uint64_t S[8], W[80], t0, t1; int i; uint64_t *s = ccdigest_u64(state); const unsigned char *buf = in; while(nblocks--) { /* copy state into S */ for (i = 0; i < 8; i++) { S[i] = s[i]; } /* copy the state into 1024-bits into W[0..15] */ for (i = 0; i < 16; i++) { CC_LOAD64_BE(W[i], buf + (8*i)); } /* fill W[16..79] */ for (i = 16; i < 80; i++) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } /* Compress */ #ifdef CC_SMALL_CODE for (i = 0; i < 80; i++) { t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3] + t0; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t0 + t1; } #else #define RND(a,b,c,d,e,f,g,h,i) \ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; for (i = 0; i < 80; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } #endif /* feedback */ for (i = 0; i < 8; i++) { s[i] = s[i] + S[i]; } buf+=CCSHA512_BLOCK_SIZE; } }