示例#1
0
static void
SHA256Transform(uint32_t *H, const uint8_t *cp)
{
	uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64];

	for (t = 0; t < 16; t++, cp += 4)
		W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3];

	for (t = 16; t < 64; t++)
		W[t] = sigma1(W[t - 2]) + W[t - 7] +
		    sigma0(W[t - 15]) + W[t - 16];

	a = H[0]; b = H[1]; c = H[2]; d = H[3];
	e = H[4]; f = H[5]; g = H[6]; h = H[7];

	for (t = 0; t < 64; t++) {
		T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t];
		T2 = SIGMA0(a) + Maj(a, b, c);
		h = g; g = f; f = e; e = d + T1;
		d = c; c = b; b = a; a = T1 + T2;
	}

	H[0] += a; H[1] += b; H[2] += c; H[3] += d;
	H[4] += e; H[5] += f; H[6] += g; H[7] += h;
}
示例#2
0
文件: sha256.c 项目: 01org/linux-sgx
static void sha256_compress(unsigned int* iv, const uint8_t* data) {
  unsigned int a, b, c, d, e, f, g, h;
  unsigned int s0, s1;
  unsigned int t1, t2;
  unsigned int work_space[16];
  unsigned int n;
  unsigned int i;

  a = iv[0];
  b = iv[1];
  c = iv[2];
  d = iv[3];
  e = iv[4];
  f = iv[5];
  g = iv[6];
  h = iv[7];

  for (i = 0; i < 16; ++i) {
    n = BigEndian(&data);
    t1 = work_space[i] = n;
    t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i];
    t2 = Sigma0(a) + Maj(a, b, c);
    h = g;
    g = f;
    f = e;
    e = d + t1;
    d = c;
    c = b;
    b = a;
    a = t1 + t2;
  }

  for (; i < 64; ++i) {
    s0 = work_space[(i + 1) & 0x0f];
    s0 = sigma0(s0);
    s1 = work_space[(i + 14) & 0x0f];
    s1 = sigma1(s1);

    t1 = work_space[i & 0xf] += s0 + s1 + work_space[(i + 9) & 0xf];
    t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i];
    t2 = Sigma0(a) + Maj(a, b, c);
    h = g;
    g = f;
    f = e;
    e = d + t1;
    d = c;
    c = b;
    b = a;
    a = t1 + t2;
  }

  iv[0] += a;
  iv[1] += b;
  iv[2] += c;
  iv[3] += d;
  iv[4] += e;
  iv[5] += f;
  iv[6] += g;
  iv[7] += h;
}
示例#3
0
static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
	{
	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
	SHA_LONG	X[16];
	int i;
	const unsigned char *data=in;

			while (num--) {

	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];

	if (host)
		{
		const SHA_LONG *W=(const SHA_LONG *)data;

		for (i=0;i<16;i++)
			{
			T1 = X[i] = W[i];
			T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
			T2 = Sigma0(a) + Maj(a,b,c);
			h = g;	g = f;	f = e;	e = d + T1;
			d = c;	c = b;	b = a;	a = T1 + T2;
			}

		data += SHA256_CBLOCK;
		}
	else
		{
		SHA_LONG l;

		for (i=0;i<16;i++)
			{
			HOST_c2l(data,l); T1 = X[i] = l;
			T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
			T2 = Sigma0(a) + Maj(a,b,c);
			h = g;	g = f;	f = e;	e = d + T1;
			d = c;	c = b;	b = a;	a = T1 + T2;
			}
		}

	for (;i<64;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;

			}
}
/**
 * sha512 compression function - 32-bit machines
 * @param res The resulting hash value
 * @param hash The chaining input value
 * @param in The message input
 */
void sha512_comp (hashblock res, const hashblock hash, const messageblock in)
	{
	const uint64_t *W=in;
	uint64_t	A,E,T;
	uint64_t	X[9+80],*F;
	uint64_t H[8];
	int i;

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   H[i]=PULL64(hash[i*8]);
	}

	F    = X+80;
	A    = H[0];	F[1] = H[1];
	F[2] = H[2];	F[3] = H[3];
	E    = H[4];	F[5] = H[5];
	F[6] = H[6];	F[7] = H[7];

	for (i=0;i<16;i++,F--)
		{
#ifdef B_ENDIAN
		T = W[i];
#else
		T = PULL64(W[i]);
#endif
		F[0] = A;
		F[4] = E;
		F[8] = T;
		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
		E    = F[3] + T;
		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
		}

	for (;i<80;i++,F--)
		{
		T    = sigma0(F[8+16-1]);
		T   += sigma1(F[8+16-14]);
		T   += F[8+16] + F[8+16-9];

		F[0] = A;
		F[4] = E;
		F[8] = T;
		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
		E    = F[3] + T;
		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
		}

	H[0] += A;		H[1] += F[1];
	H[2] += F[2];	H[3] += F[3];
	H[4] += E;		H[5] += F[5];
	H[6] += F[6];	H[7] += F[7];

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   PUSH64(H[i],res[i*8]);
	}

	}
示例#5
0
文件: sha.c 项目: abrauchli/allnet
static inline void init_w (uint64_t * W, const uint64_t * block)
{
  int t;
  for (t = 0; t < 16; t++)
    W [t] = read_int ((char *) (block + t));
  for (t = 16; t < 80; t++)
    W [t] = sigma1 (W [t - 2]) + W [t - 7] +
            sigma0 (W [t - 15]) + W [t - 16];
}
示例#6
0
void
_sha2block128(uchar *p, ulong len, uint64 *s)
{
	uint64 a, b, c, d, e, f, g, h, t1, t2;
	uint64 *kp, *wp;
	uint64 w[80];
	uchar *end;

	/* at this point, we have a multiple of 64 bytes */
	for(end = p+len; p < end;){
		a = s[0];
		b = s[1];
		c = s[2];
		d = s[3];
		e = s[4];
		f = s[5];
		g = s[6];
		h = s[7];

		for(wp = w; wp < &w[16]; wp++, p += 8)
			wp[0] = ((vlong)p[0])<<56 | ((vlong)p[1])<<48 |
				((vlong)p[2])<<40 | ((vlong)p[3])<<32 |
				p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
		for(; wp < &w[80]; wp++) {
			uint64 s0, s1;

			s0 = sigma0(wp[-15]);
			s1 = sigma1(wp[-2]);
//			wp[0] = sigma1(wp[-2]) + wp[-7] + sigma0(wp[-15]) + wp[-16];
			wp[0] = s1 + wp[-7] + s0 + wp[-16];
		}

		for(kp = K512, wp = w; wp < &w[80]; ) {
			t1 = h + SIGMA1(e) + Ch(e,f,g) + *kp++ + *wp++;
			t2 = SIGMA0(a) + Maj(a,b,c);
			h = g;
			g = f;
			f = e;
			e = d + t1;
			d = c;
			c = b;
			b = a;
			a = t1 + t2;
		}

		/* save state */
		s[0] += a;
		s[1] += b;
		s[2] += c;
		s[3] += d;
		s[4] += e;
		s[5] += f;
		s[6] += g;
		s[7] += h;
	}
}
示例#7
0
文件: sha512.c 项目: placrosse/ring
/*
 * This code should give better results on 32-bit CPU with less than
 * ~24 registers, both size and performance wise...
 */
void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) {
  uint64_t A, E, T;
  uint64_t X[9 + 80], *F;
  int i;

  while (num--) {
    F = X + 80;
    A = state[0];
    F[1] = state[1];
    F[2] = state[2];
    F[3] = state[3];
    E = state[4];
    F[5] = state[5];
    F[6] = state[6];
    F[7] = state[7];

    for (i = 0; i < 16; i++, F--) {
      T = from_be_u64(W[i]);
      F[0] = A;
      F[4] = E;
      F[8] = T;
      T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
      E = F[3] + T;
      A = T + Sigma0(A) + Maj(A, F[1], F[2]);
    }

    for (; i < 80; i++, F--) {
      T = sigma0(F[8 + 16 - 1]);
      T += sigma1(F[8 + 16 - 14]);
      T += F[8 + 16] + F[8 + 16 - 9];

      F[0] = A;
      F[4] = E;
      F[8] = T;
      T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
      E = F[3] + T;
      A = T + Sigma0(A) + Maj(A, F[1], F[2]);
    }

    state[0] += A;
    state[1] += F[1];
    state[2] += F[2];
    state[3] += F[3];
    state[4] += E;
    state[5] += F[5];
    state[6] += F[6];
    state[7] += F[7];

    W += 16;
  }
}
/**
 * sha512 compression function - 64-bit machines
 * @param res The resulting hash value
 * @param hash The chaining input value
 * @param in The message input
 */
void sha512_comp (hashblock res, const hashblock hash, const messageblock in)
	{
    // CHANGE type casting added due to c++
	const uint64_t *W=reinterpret_cast<const uint64_t*>(in);
	uint64_t	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
	uint64_t	X[16];
	uint64_t  H[8];
	int i;

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   H[i]=PULL64(hash[i*8]);
	}


	a = H[0];	b = H[1];	c = H[2];	d = H[3];
	e = H[4];	f = H[5];	g = H[6];	h = H[7];

	for (i=0;i<16;i++)
		{
#ifdef B_ENDIAN
		T1 = X[i] = W[i];
#else
		T1 = X[i] = PULL64(W[i]);
#endif
		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	for (;i<80;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	H[0] += a;	H[1] += b;	H[2] += c;	H[3] += d;
	H[4] += e;	H[5] += f;	H[6] += g;	H[7] += h;

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   PUSH64(H[i],res[i*8]);
	}

	}
示例#9
0
文件: sha2block64.c 项目: 0intro/vx32
void
_sha2block64(uchar *p, ulong len, uint32 *s)
{
	uint32 a, b, c, d, e, f, g, h, t1, t2;
	uint32 *kp, *wp;
	uint32 w[64];
	uchar *end;

	/* at this point, we have a multiple of 64 bytes */
	for(end = p+len; p < end;){
		a = s[0];
		b = s[1];
		c = s[2];
		d = s[3];
		e = s[4];
		f = s[5];
		g = s[6];
		h = s[7];

		for(wp = w; wp < &w[16]; wp++, p += 4)
			wp[0] = p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
		for(; wp < &w[64]; wp++)
			wp[0] = sigma1(wp[-2]) + wp[-7] +
				sigma0(wp[-15]) + wp[-16];

		for(kp = K256, wp = w; wp < &w[64]; ) {
			t1 = h + SIGMA1(e) + Ch(e,f,g) + *kp++ + *wp++;
			t2 = SIGMA0(a) + Maj(a,b,c);
			h = g;
			g = f;
			f = e;
			e = d + t1;
			d = c;
			c = b;
			b = a;
			a = t1 + t2;
		}

		/* save state */
		s[0] += a;
		s[1] += b;
		s[2] += c;
		s[3] += d;
		s[4] += e;
		s[5] += f;
		s[6] += g;
		s[7] += h;
	}
}
/*****************************************
 *       sha256 compression function     *
 *                                       *
 *   H   points to chaining input        *
 *   in  points to the message input     *
 *                                       *
 *****************************************/
void sha256_comp (hashblock res, const hashblock hash, const void *in)
	{
	uint32_t a,b,c,d,e,f,g,h,s0,s1,T1,T2;
	uint32_t    H[8];
	uint32_t	X[16],l;
	int i;
    // CHANGE type casting added due to c++
	const unsigned char *data=static_cast<const unsigned char*>(in);

	for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) {
	   HOST_c2l(hash, H[i]);
	}

	a = H[0];	b = H[1];	c = H[2];	d = H[3];
	e = H[4];	f = H[5];	g = H[6];	h = H[7];

	for (i=0;i<16;i++)
		{
		HOST_c2l(data,l); T1 = X[i] = l;
		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;
		g = f;
		f = e;
		e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	for (;i<64;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	H[0] += a;	H[1] += b;	H[2] += c;	H[3] += d;
	H[4] += e;	H[5] += f;	H[6] += g;	H[7] += h;

	for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) {
	   HOST_l2c(H[i], res);
	}
}
示例#11
0
文件: sha.c 项目: GaloisInc/hacrypto
void sha256_block_data_order (SHA256_CTX *ctx, const void *in)
	{
	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2,t;
	SHA_LONG	X[16],l,Ki;
	int i;
	const unsigned char *data=in;

	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];

	for (i=0;i<16;i++)
		{
		HOST_c2l(data,l); X[i] = l;
		Ki=K256[i];
		T1 = l + h + Sigma1(e) + Ch(e,f,g) + Ki;
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	for (;i<64;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf];
		t = X[(i+9)&0xf];
		T1 += s0 + s1 + t;
                X[i&0xf] = T1;
		Ki=K256[i];
		T1 += h + Sigma1(e) + Ch(e,f,g) + Ki;
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	t=ctx->h[0]; ctx->h[0]=t+a;
	t=ctx->h[1]; ctx->h[1]=t+b;
	t=ctx->h[2]; ctx->h[2]=t+c;
	t=ctx->h[3]; ctx->h[3]=t+d;
	t=ctx->h[4]; ctx->h[4]=t+e;
	t=ctx->h[5]; ctx->h[5]=t+f;
	t=ctx->h[6]; ctx->h[6]=t+g;
	t=ctx->h[7]; ctx->h[7]=t+h;
       return;
}
示例#12
0
文件: sha.c 项目: abrauchli/allnet
static inline void
  init_w_native_byte_order (uint64_t * W, const uint64_t * block)
{
  W [ 0] = block [ 0];
  W [ 1] = block [ 1];
  W [ 2] = block [ 2];
  W [ 3] = block [ 3];
  W [ 4] = block [ 4];
  W [ 5] = block [ 5];
  W [ 6] = block [ 6];
  W [ 7] = block [ 7];
  W [ 8] = block [ 8];
  W [ 9] = block [ 9];
  W [10] = block [10];
  W [11] = block [11];
  W [12] = block [12];
  W [13] = block [13];
  W [14] = block [14];
  W [15] = block [15];
  int t;
  for (t = 16; t < 80; t++)
    W [t] = sigma1 (W [t - 2]) + W [t - 7] +
            sigma0 (W [t - 15]) + W [t - 16];
}
示例#13
0
 int num_divisors( const int n ) {
     return sigma0( n );
 }
示例#14
0
static inline void sha256_transform(__m128i *state, __m128i *block, __m128i *dst)
{
    __m128i W[64], t1, t2;

    W[0]  = block[ 0];
    W[1]  = block[ 1];
    W[2]  = block[ 2];
    W[3]  = block[ 3];
    W[4]  = block[ 4];
    W[5]  = block[ 5];
    W[6]  = block[ 6];
    W[7]  = block[ 7];
    W[8]  = block[ 8];
    W[9]  = block[ 9];
    W[10] = block[10];
    W[11] = block[11];
    W[12] = block[12];
    W[13] = block[13];
    W[14] = block[14];
    W[15] = block[15];

    W[16] = add4(sigma1(W[16 - 2]), W[16 - 7], sigma0(W[16 - 15]), W[16 - 16]);
    W[17] = add4(sigma1(W[17 - 2]), W[17 - 7], sigma0(W[17 - 15]), W[17 - 16]);
    W[18] = add4(sigma1(W[18 - 2]), W[18 - 7], sigma0(W[18 - 15]), W[18 - 16]);
    W[19] = add4(sigma1(W[19 - 2]), W[19 - 7], sigma0(W[19 - 15]), W[19 - 16]);
    W[20] = add4(sigma1(W[20 - 2]), W[20 - 7], sigma0(W[20 - 15]), W[20 - 16]);
    W[21] = add4(sigma1(W[21 - 2]), W[21 - 7], sigma0(W[21 - 15]), W[21 - 16]);
    W[22] = add4(sigma1(W[22 - 2]), W[22 - 7], sigma0(W[22 - 15]), W[22 - 16]);
    W[23] = add4(sigma1(W[23 - 2]), W[23 - 7], sigma0(W[23 - 15]), W[23 - 16]);
    W[24] = add4(sigma1(W[24 - 2]), W[24 - 7], sigma0(W[24 - 15]), W[24 - 16]);
    W[25] = add4(sigma1(W[25 - 2]), W[25 - 7], sigma0(W[25 - 15]), W[25 - 16]);
    W[26] = add4(sigma1(W[26 - 2]), W[26 - 7], sigma0(W[26 - 15]), W[26 - 16]);
    W[27] = add4(sigma1(W[27 - 2]), W[27 - 7], sigma0(W[27 - 15]), W[27 - 16]);
    W[28] = add4(sigma1(W[28 - 2]), W[28 - 7], sigma0(W[28 - 15]), W[28 - 16]);
    W[29] = add4(sigma1(W[29 - 2]), W[29 - 7], sigma0(W[29 - 15]), W[29 - 16]);
    W[30] = add4(sigma1(W[30 - 2]), W[30 - 7], sigma0(W[30 - 15]), W[30 - 16]);
    W[31] = add4(sigma1(W[31 - 2]), W[31 - 7], sigma0(W[31 - 15]), W[31 - 16]);
    W[32] = add4(sigma1(W[32 - 2]), W[32 - 7], sigma0(W[32 - 15]), W[32 - 16]);
    W[33] = add4(sigma1(W[33 - 2]), W[33 - 7], sigma0(W[33 - 15]), W[33 - 16]);
    W[34] = add4(sigma1(W[34 - 2]), W[34 - 7], sigma0(W[34 - 15]), W[34 - 16]);
    W[35] = add4(sigma1(W[35 - 2]), W[35 - 7], sigma0(W[35 - 15]), W[35 - 16]);
    W[36] = add4(sigma1(W[36 - 2]), W[36 - 7], sigma0(W[36 - 15]), W[36 - 16]);
    W[37] = add4(sigma1(W[37 - 2]), W[37 - 7], sigma0(W[37 - 15]), W[37 - 16]);
    W[38] = add4(sigma1(W[38 - 2]), W[38 - 7], sigma0(W[38 - 15]), W[38 - 16]);
    W[39] = add4(sigma1(W[39 - 2]), W[39 - 7], sigma0(W[39 - 15]), W[39 - 16]);
    W[40] = add4(sigma1(W[40 - 2]), W[40 - 7], sigma0(W[40 - 15]), W[40 - 16]);
    W[41] = add4(sigma1(W[41 - 2]), W[41 - 7], sigma0(W[41 - 15]), W[41 - 16]);
    W[42] = add4(sigma1(W[42 - 2]), W[42 - 7], sigma0(W[42 - 15]), W[42 - 16]);
    W[43] = add4(sigma1(W[43 - 2]), W[43 - 7], sigma0(W[43 - 15]), W[43 - 16]);
    W[44] = add4(sigma1(W[44 - 2]), W[44 - 7], sigma0(W[44 - 15]), W[44 - 16]);
    W[45] = add4(sigma1(W[45 - 2]), W[45 - 7], sigma0(W[45 - 15]), W[45 - 16]);
    W[46] = add4(sigma1(W[46 - 2]), W[46 - 7], sigma0(W[46 - 15]), W[46 - 16]);
    W[47] = add4(sigma1(W[47 - 2]), W[47 - 7], sigma0(W[47 - 15]), W[47 - 16]);
    W[48] = add4(sigma1(W[48 - 2]), W[48 - 7], sigma0(W[48 - 15]), W[48 - 16]);
    W[49] = add4(sigma1(W[49 - 2]), W[49 - 7], sigma0(W[49 - 15]), W[49 - 16]);
    W[50] = add4(sigma1(W[50 - 2]), W[50 - 7], sigma0(W[50 - 15]), W[50 - 16]);
    W[51] = add4(sigma1(W[51 - 2]), W[51 - 7], sigma0(W[51 - 15]), W[51 - 16]);
    W[52] = add4(sigma1(W[52 - 2]), W[52 - 7], sigma0(W[52 - 15]), W[52 - 16]);
    W[53] = add4(sigma1(W[53 - 2]), W[53 - 7], sigma0(W[53 - 15]), W[53 - 16]);
    W[54] = add4(sigma1(W[54 - 2]), W[54 - 7], sigma0(W[54 - 15]), W[54 - 16]);
    W[55] = add4(sigma1(W[55 - 2]), W[55 - 7], sigma0(W[55 - 15]), W[55 - 16]);
    W[56] = add4(sigma1(W[56 - 2]), W[56 - 7], sigma0(W[56 - 15]), W[56 - 16]);
    W[57] = add4(sigma1(W[57 - 2]), W[57 - 7], sigma0(W[57 - 15]), W[57 - 16]);
    W[58] = add4(sigma1(W[58 - 2]), W[58 - 7], sigma0(W[58 - 15]), W[58 - 16]);
    W[59] = add4(sigma1(W[59 - 2]), W[59 - 7], sigma0(W[59 - 15]), W[59 - 16]);
    W[60] = add4(sigma1(W[60 - 2]), W[60 - 7], sigma0(W[60 - 15]), W[60 - 16]);
    W[61] = add4(sigma1(W[61 - 2]), W[61 - 7], sigma0(W[61 - 15]), W[61 - 16]);
    W[62] = add4(sigma1(W[62 - 2]), W[62 - 7], sigma0(W[62 - 15]), W[62 - 16]);
    W[63] = add4(sigma1(W[63 - 2]), W[63 - 7], sigma0(W[63 - 15]), W[63 - 16]);

    // read existing state
    __m128i a = state[0];
    __m128i b = state[1];
    __m128i c = state[2];
    __m128i d = state[3];
    __m128i e = state[4];
    __m128i f = state[5];
    __m128i g = state[6];
    __m128i h = state[7];

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x428a2f98), W[0]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x71374491), W[1]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb5c0fbcf), W[2]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xe9b5dba5), W[3]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x3956c25b), W[4]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x59f111f1), W[5]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x923f82a4), W[6]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xab1c5ed5), W[7]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xd807aa98), W[8]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x12835b01), W[9]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x243185be), W[10]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x550c7dc3), W[11]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x72be5d74), W[12]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x80deb1fe), W[13]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x9bdc06a7), W[14]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc19bf174), W[15]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xe49b69c1), W[16]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xefbe4786), W[17]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x0fc19dc6), W[18]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x240ca1cc), W[19]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x2de92c6f), W[20]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4a7484aa), W[21]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5cb0a9dc), W[22]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x76f988da), W[23]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x983e5152), W[24]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa831c66d), W[25]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb00327c8), W[26]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xbf597fc7), W[27]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xc6e00bf3), W[28]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd5a79147), W[29]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x06ca6351), W[30]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x14292967), W[31]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x27b70a85), W[32]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x2e1b2138), W[33]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x4d2c6dfc), W[34]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x53380d13), W[35]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x650a7354), W[36]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x766a0abb), W[37]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x81c2c92e), W[38]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x92722c85), W[39]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xa2bfe8a1), W[40]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa81a664b), W[41]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xc24b8b70), W[42]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xc76c51a3), W[43]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xd192e819), W[44]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd6990624), W[45]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xf40e3585), W[46]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x106aa070), W[47]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x19a4c116), W[48]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x1e376c08), W[49]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x2748774c), W[50]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x34b0bcb5), W[51]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x391c0cb3), W[52]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4ed8aa4a), W[53]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5b9cca4f), W[54]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x682e6ff3), W[55]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x748f82ee), W[56]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x78a5636f), W[57]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x84c87814), W[58]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x8cc70208), W[59]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x90befffa), W[60]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xa4506ceb), W[61]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xbef9a3f7), W[62]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc67178f2), W[63]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    dst[0] = add2(state[0], a);
    dst[1] = add2(state[1], b);
    dst[2] = add2(state[2], c);
    dst[3] = add2(state[3], d);
    dst[4] = add2(state[4], e);
    dst[5] = add2(state[5], f);
    dst[6] = add2(state[6], g);
    dst[7] = add2(state[7], h);
}
示例#15
0
文件: sha2.cpp 项目: benosa/bitcoin
/** Perform one SHA-512 transformation, processing a 128-byte chunk. */
void Transform(uint64_t *s, const unsigned char *chunk) {
    uint64_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
    uint64_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;

    Round(a, b, c, d, e, f, g, h, 0x428a2f98d728ae22ull,  w0 = ReadBE64(chunk + 0));
    Round(h, a, b, c, d, e, f, g, 0x7137449123ef65cdull,  w1 = ReadBE64(chunk + 8));
    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcfec4d3b2full,  w2 = ReadBE64(chunk + 16));
    Round(f, g, h, a, b, c, d, e, 0xe9b5dba58189dbbcull,  w3 = ReadBE64(chunk + 24));
    Round(e, f, g, h, a, b, c, d, 0x3956c25bf348b538ull,  w4 = ReadBE64(chunk + 32));
    Round(d, e, f, g, h, a, b, c, 0x59f111f1b605d019ull,  w5 = ReadBE64(chunk + 40));
    Round(c, d, e, f, g, h, a, b, 0x923f82a4af194f9bull,  w6 = ReadBE64(chunk + 48));
    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5da6d8118ull,  w7 = ReadBE64(chunk + 56));
    Round(a, b, c, d, e, f, g, h, 0xd807aa98a3030242ull,  w8 = ReadBE64(chunk + 64));
    Round(h, a, b, c, d, e, f, g, 0x12835b0145706fbeull,  w9 = ReadBE64(chunk + 72));
    Round(g, h, a, b, c, d, e, f, 0x243185be4ee4b28cull, w10 = ReadBE64(chunk + 80));
    Round(f, g, h, a, b, c, d, e, 0x550c7dc3d5ffb4e2ull, w11 = ReadBE64(chunk + 88));
    Round(e, f, g, h, a, b, c, d, 0x72be5d74f27b896full, w12 = ReadBE64(chunk + 96));
    Round(d, e, f, g, h, a, b, c, 0x80deb1fe3b1696b1ull, w13 = ReadBE64(chunk + 104));
    Round(c, d, e, f, g, h, a, b, 0x9bdc06a725c71235ull, w14 = ReadBE64(chunk + 112));
    Round(b, c, d, e, f, g, h, a, 0xc19bf174cf692694ull, w15 = ReadBE64(chunk + 120));

    Round(a, b, c, d, e, f, g, h, 0xe49b69c19ef14ad2ull,  w0 += sigma1(w14) +  w9 + sigma0( w1));
    Round(h, a, b, c, d, e, f, g, 0xefbe4786384f25e3ull,  w1 += sigma1(w15) + w10 + sigma0( w2));
    Round(g, h, a, b, c, d, e, f, 0x0fc19dc68b8cd5b5ull,  w2 += sigma1( w0) + w11 + sigma0( w3));
    Round(f, g, h, a, b, c, d, e, 0x240ca1cc77ac9c65ull,  w3 += sigma1( w1) + w12 + sigma0( w4));
    Round(e, f, g, h, a, b, c, d, 0x2de92c6f592b0275ull,  w4 += sigma1( w2) + w13 + sigma0( w5));
    Round(d, e, f, g, h, a, b, c, 0x4a7484aa6ea6e483ull,  w5 += sigma1( w3) + w14 + sigma0( w6));
    Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcbd41fbd4ull,  w6 += sigma1( w4) + w15 + sigma0( w7));
    Round(b, c, d, e, f, g, h, a, 0x76f988da831153b5ull,  w7 += sigma1( w5) +  w0 + sigma0( w8));
    Round(a, b, c, d, e, f, g, h, 0x983e5152ee66dfabull,  w8 += sigma1( w6) +  w1 + sigma0( w9));
    Round(h, a, b, c, d, e, f, g, 0xa831c66d2db43210ull,  w9 += sigma1( w7) +  w2 + sigma0(w10));
    Round(g, h, a, b, c, d, e, f, 0xb00327c898fb213full, w10 += sigma1( w8) +  w3 + sigma0(w11));
    Round(f, g, h, a, b, c, d, e, 0xbf597fc7beef0ee4ull, w11 += sigma1( w9) +  w4 + sigma0(w12));
    Round(e, f, g, h, a, b, c, d, 0xc6e00bf33da88fc2ull, w12 += sigma1(w10) +  w5 + sigma0(w13));
    Round(d, e, f, g, h, a, b, c, 0xd5a79147930aa725ull, w13 += sigma1(w11) +  w6 + sigma0(w14));
    Round(c, d, e, f, g, h, a, b, 0x06ca6351e003826full, w14 += sigma1(w12) +  w7 + sigma0(w15));
    Round(b, c, d, e, f, g, h, a, 0x142929670a0e6e70ull, w15 += sigma1(w13) +  w8 + sigma0( w0));

    Round(a, b, c, d, e, f, g, h, 0x27b70a8546d22ffcull,  w0 += sigma1(w14) +  w9 + sigma0( w1));
    Round(h, a, b, c, d, e, f, g, 0x2e1b21385c26c926ull,  w1 += sigma1(w15) + w10 + sigma0( w2));
    Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc5ac42aedull,  w2 += sigma1( w0) + w11 + sigma0( w3));
    Round(f, g, h, a, b, c, d, e, 0x53380d139d95b3dfull,  w3 += sigma1( w1) + w12 + sigma0( w4));
    Round(e, f, g, h, a, b, c, d, 0x650a73548baf63deull,  w4 += sigma1( w2) + w13 + sigma0( w5));
    Round(d, e, f, g, h, a, b, c, 0x766a0abb3c77b2a8ull,  w5 += sigma1( w3) + w14 + sigma0( w6));
    Round(c, d, e, f, g, h, a, b, 0x81c2c92e47edaee6ull,  w6 += sigma1( w4) + w15 + sigma0( w7));
    Round(b, c, d, e, f, g, h, a, 0x92722c851482353bull,  w7 += sigma1( w5) +  w0 + sigma0( w8));
    Round(a, b, c, d, e, f, g, h, 0xa2bfe8a14cf10364ull,  w8 += sigma1( w6) +  w1 + sigma0( w9));
    Round(h, a, b, c, d, e, f, g, 0xa81a664bbc423001ull,  w9 += sigma1( w7) +  w2 + sigma0(w10));
    Round(g, h, a, b, c, d, e, f, 0xc24b8b70d0f89791ull, w10 += sigma1( w8) +  w3 + sigma0(w11));
    Round(f, g, h, a, b, c, d, e, 0xc76c51a30654be30ull, w11 += sigma1( w9) +  w4 + sigma0(w12));
    Round(e, f, g, h, a, b, c, d, 0xd192e819d6ef5218ull, w12 += sigma1(w10) +  w5 + sigma0(w13));
    Round(d, e, f, g, h, a, b, c, 0xd69906245565a910ull, w13 += sigma1(w11) +  w6 + sigma0(w14));
    Round(c, d, e, f, g, h, a, b, 0xf40e35855771202aull, w14 += sigma1(w12) +  w7 + sigma0(w15));
    Round(b, c, d, e, f, g, h, a, 0x106aa07032bbd1b8ull, w15 += sigma1(w13) +  w8 + sigma0( w0));

    Round(a, b, c, d, e, f, g, h, 0x19a4c116b8d2d0c8ull,  w0 += sigma1(w14) +  w9 + sigma0( w1));
    Round(h, a, b, c, d, e, f, g, 0x1e376c085141ab53ull,  w1 += sigma1(w15) + w10 + sigma0( w2));
    Round(g, h, a, b, c, d, e, f, 0x2748774cdf8eeb99ull,  w2 += sigma1( w0) + w11 + sigma0( w3));
    Round(f, g, h, a, b, c, d, e, 0x34b0bcb5e19b48a8ull,  w3 += sigma1( w1) + w12 + sigma0( w4));
    Round(e, f, g, h, a, b, c, d, 0x391c0cb3c5c95a63ull,  w4 += sigma1( w2) + w13 + sigma0( w5));
    Round(d, e, f, g, h, a, b, c, 0x4ed8aa4ae3418acbull,  w5 += sigma1( w3) + w14 + sigma0( w6));
    Round(c, d, e, f, g, h, a, b, 0x5b9cca4f7763e373ull,  w6 += sigma1( w4) + w15 + sigma0( w7));
    Round(b, c, d, e, f, g, h, a, 0x682e6ff3d6b2b8a3ull,  w7 += sigma1( w5) +  w0 + sigma0( w8));
    Round(a, b, c, d, e, f, g, h, 0x748f82ee5defb2fcull,  w8 += sigma1( w6) +  w1 + sigma0( w9));
    Round(h, a, b, c, d, e, f, g, 0x78a5636f43172f60ull,  w9 += sigma1( w7) +  w2 + sigma0(w10));
    Round(g, h, a, b, c, d, e, f, 0x84c87814a1f0ab72ull, w10 += sigma1( w8) +  w3 + sigma0(w11));
    Round(f, g, h, a, b, c, d, e, 0x8cc702081a6439ecull, w11 += sigma1( w9) +  w4 + sigma0(w12));
    Round(e, f, g, h, a, b, c, d, 0x90befffa23631e28ull, w12 += sigma1(w10) +  w5 + sigma0(w13));
    Round(d, e, f, g, h, a, b, c, 0xa4506cebde82bde9ull, w13 += sigma1(w11) +  w6 + sigma0(w14));
    Round(c, d, e, f, g, h, a, b, 0xbef9a3f7b2c67915ull, w14 += sigma1(w12) +  w7 + sigma0(w15));
    Round(b, c, d, e, f, g, h, a, 0xc67178f2e372532bull, w15 += sigma1(w13) +  w8 + sigma0( w0));

    Round(a, b, c, d, e, f, g, h, 0xca273eceea26619cull,  w0 += sigma1(w14) +  w9 + sigma0( w1));
    Round(h, a, b, c, d, e, f, g, 0xd186b8c721c0c207ull,  w1 += sigma1(w15) + w10 + sigma0( w2));
    Round(g, h, a, b, c, d, e, f, 0xeada7dd6cde0eb1eull,  w2 += sigma1( w0) + w11 + sigma0( w3));
    Round(f, g, h, a, b, c, d, e, 0xf57d4f7fee6ed178ull,  w3 += sigma1( w1) + w12 + sigma0( w4));
    Round(e, f, g, h, a, b, c, d, 0x06f067aa72176fbaull,  w4 += sigma1( w2) + w13 + sigma0( w5));
    Round(d, e, f, g, h, a, b, c, 0x0a637dc5a2c898a6ull,  w5 += sigma1( w3) + w14 + sigma0( w6));
    Round(c, d, e, f, g, h, a, b, 0x113f9804bef90daeull,  w6 += sigma1( w4) + w15 + sigma0( w7));
    Round(b, c, d, e, f, g, h, a, 0x1b710b35131c471bull,  w7 += sigma1( w5) +  w0 + sigma0( w8));
    Round(a, b, c, d, e, f, g, h, 0x28db77f523047d84ull,  w8 += sigma1( w6) +  w1 + sigma0( w9));
    Round(h, a, b, c, d, e, f, g, 0x32caab7b40c72493ull,  w9 += sigma1( w7) +  w2 + sigma0(w10));
    Round(g, h, a, b, c, d, e, f, 0x3c9ebe0a15c9bebcull, w10 += sigma1( w8) +  w3 + sigma0(w11));
    Round(f, g, h, a, b, c, d, e, 0x431d67c49c100d4cull, w11 += sigma1( w9) +  w4 + sigma0(w12));
    Round(e, f, g, h, a, b, c, d, 0x4cc5d4becb3e42b6ull, w12 += sigma1(w10) +  w5 + sigma0(w13));
    Round(d, e, f, g, h, a, b, c, 0x597f299cfc657e2aull, w13 += sigma1(w11) +  w6 + sigma0(w14));
    Round(c, d, e, f, g, h, a, b, 0x5fcb6fab3ad6faecull, w14 += sigma1(w12) +  w7 + sigma0(w15));
    Round(b, c, d, e, f, g, h, a, 0x6c44198c4a475817ull, w15 += sigma1(w13) +  w8 + sigma0( w0));

    s[0] += a;
    s[1] += b;
    s[2] += c;
    s[3] += d;
    s[4] += e;
    s[5] += f;
    s[6] += g;
    s[7] += h;
}