示例#1
0
文件: sha256.c 项目: 01org/linux-sgx
static void sha256_compress(unsigned int* iv, const uint8_t* data) {
  unsigned int a, b, c, d, e, f, g, h;
  unsigned int s0, s1;
  unsigned int t1, t2;
  unsigned int work_space[16];
  unsigned int n;
  unsigned int i;

  a = iv[0];
  b = iv[1];
  c = iv[2];
  d = iv[3];
  e = iv[4];
  f = iv[5];
  g = iv[6];
  h = iv[7];

  for (i = 0; i < 16; ++i) {
    n = BigEndian(&data);
    t1 = work_space[i] = n;
    t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i];
    t2 = Sigma0(a) + Maj(a, b, c);
    h = g;
    g = f;
    f = e;
    e = d + t1;
    d = c;
    c = b;
    b = a;
    a = t1 + t2;
  }

  for (; i < 64; ++i) {
    s0 = work_space[(i + 1) & 0x0f];
    s0 = sigma0(s0);
    s1 = work_space[(i + 14) & 0x0f];
    s1 = sigma1(s1);

    t1 = work_space[i & 0xf] += s0 + s1 + work_space[(i + 9) & 0xf];
    t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i];
    t2 = Sigma0(a) + Maj(a, b, c);
    h = g;
    g = f;
    f = e;
    e = d + t1;
    d = c;
    c = b;
    b = a;
    a = t1 + t2;
  }

  iv[0] += a;
  iv[1] += b;
  iv[2] += c;
  iv[3] += d;
  iv[4] += e;
  iv[5] += f;
  iv[6] += g;
  iv[7] += h;
}
示例#2
0
static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
	{
	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
	SHA_LONG	X[16];
	int i;
	const unsigned char *data=in;

			while (num--) {

	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];

	if (host)
		{
		const SHA_LONG *W=(const SHA_LONG *)data;

		for (i=0;i<16;i++)
			{
			T1 = X[i] = W[i];
			T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
			T2 = Sigma0(a) + Maj(a,b,c);
			h = g;	g = f;	f = e;	e = d + T1;
			d = c;	c = b;	b = a;	a = T1 + T2;
			}

		data += SHA256_CBLOCK;
		}
	else
		{
		SHA_LONG l;

		for (i=0;i<16;i++)
			{
			HOST_c2l(data,l); T1 = X[i] = l;
			T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
			T2 = Sigma0(a) + Maj(a,b,c);
			h = g;	g = f;	f = e;	e = d + T1;
			d = c;	c = b;	b = a;	a = T1 + T2;
			}
		}

	for (;i<64;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;

			}
}
/**
 * sha512 compression function - 32-bit machines
 * @param res The resulting hash value
 * @param hash The chaining input value
 * @param in The message input
 */
void sha512_comp (hashblock res, const hashblock hash, const messageblock in)
	{
	const uint64_t *W=in;
	uint64_t	A,E,T;
	uint64_t	X[9+80],*F;
	uint64_t H[8];
	int i;

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   H[i]=PULL64(hash[i*8]);
	}

	F    = X+80;
	A    = H[0];	F[1] = H[1];
	F[2] = H[2];	F[3] = H[3];
	E    = H[4];	F[5] = H[5];
	F[6] = H[6];	F[7] = H[7];

	for (i=0;i<16;i++,F--)
		{
#ifdef B_ENDIAN
		T = W[i];
#else
		T = PULL64(W[i]);
#endif
		F[0] = A;
		F[4] = E;
		F[8] = T;
		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
		E    = F[3] + T;
		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
		}

	for (;i<80;i++,F--)
		{
		T    = sigma0(F[8+16-1]);
		T   += sigma1(F[8+16-14]);
		T   += F[8+16] + F[8+16-9];

		F[0] = A;
		F[4] = E;
		F[8] = T;
		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
		E    = F[3] + T;
		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
		}

	H[0] += A;		H[1] += F[1];
	H[2] += F[2];	H[3] += F[3];
	H[4] += E;		H[5] += F[5];
	H[6] += F[6];	H[7] += F[7];

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   PUSH64(H[i],res[i*8]);
	}

	}
示例#4
0
文件: sha512.c 项目: placrosse/ring
/*
 * This code should give better results on 32-bit CPU with less than
 * ~24 registers, both size and performance wise...
 */
void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) {
  uint64_t A, E, T;
  uint64_t X[9 + 80], *F;
  int i;

  while (num--) {
    F = X + 80;
    A = state[0];
    F[1] = state[1];
    F[2] = state[2];
    F[3] = state[3];
    E = state[4];
    F[5] = state[5];
    F[6] = state[6];
    F[7] = state[7];

    for (i = 0; i < 16; i++, F--) {
      T = from_be_u64(W[i]);
      F[0] = A;
      F[4] = E;
      F[8] = T;
      T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
      E = F[3] + T;
      A = T + Sigma0(A) + Maj(A, F[1], F[2]);
    }

    for (; i < 80; i++, F--) {
      T = sigma0(F[8 + 16 - 1]);
      T += sigma1(F[8 + 16 - 14]);
      T += F[8 + 16] + F[8 + 16 - 9];

      F[0] = A;
      F[4] = E;
      F[8] = T;
      T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
      E = F[3] + T;
      A = T + Sigma0(A) + Maj(A, F[1], F[2]);
    }

    state[0] += A;
    state[1] += F[1];
    state[2] += F[2];
    state[3] += F[3];
    state[4] += E;
    state[5] += F[5];
    state[6] += F[6];
    state[7] += F[7];

    W += 16;
  }
}
/**
 * sha512 compression function - 64-bit machines
 * @param res The resulting hash value
 * @param hash The chaining input value
 * @param in The message input
 */
void sha512_comp (hashblock res, const hashblock hash, const messageblock in)
	{
    // CHANGE type casting added due to c++
	const uint64_t *W=reinterpret_cast<const uint64_t*>(in);
	uint64_t	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
	uint64_t	X[16];
	uint64_t  H[8];
	int i;

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   H[i]=PULL64(hash[i*8]);
	}


	a = H[0];	b = H[1];	c = H[2];	d = H[3];
	e = H[4];	f = H[5];	g = H[6];	h = H[7];

	for (i=0;i<16;i++)
		{
#ifdef B_ENDIAN
		T1 = X[i] = W[i];
#else
		T1 = X[i] = PULL64(W[i]);
#endif
		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	for (;i<80;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	H[0] += a;	H[1] += b;	H[2] += c;	H[3] += d;
	H[4] += e;	H[5] += f;	H[6] += g;	H[7] += h;

   for (i = 0; i < SHA512_DIGEST_LENGTH/8; i++) {
	   PUSH64(H[i],res[i*8]);
	}

	}
示例#6
0
void setupCasadiVars(const std::vector<Matrix<C_DIM> >& X, const std::vector<Matrix<U_DIM> >& U, double* XU_arr, double* Sigma0_arr, double* l_arr, double* params_arr)
{
	int index = 0;
	for(int t = 0; t < T-1; ++t) {
		for(int i=0; i < C_DIM; ++i) {
			XU_arr[index++] = X[t][i];
		}

		for(int i=0; i < U_DIM; ++i) {
			XU_arr[index++] = U[t][i];
		}
	}
	for(int i=0; i < C_DIM; ++i) {
		XU_arr[index++] = X[T-1][i];
	}

	Matrix<X_DIM,X_DIM> Sigma0 = SqrtSigma0*SqrtSigma0;
	index = 0;
	for(int i=0; i < X_DIM; ++i) {
		for(int j=0; j < X_DIM; ++j) {
			Sigma0_arr[index++] = Sigma0(i,j);
		}
	}

	index = 0;
	for(int i=C_DIM; i < X_DIM; ++i) {
		l_arr[index++] = x0[i];
	}

	params_arr[0] = alpha_belief;
	params_arr[1] = alpha_control;
	params_arr[2] = alpha_final_belief;

}
示例#7
0
/** One round of SHA-256. */
void inline __attribute__((always_inline)) Round(__m256i a, __m256i b, __m256i c, __m256i& d, __m256i e, __m256i f, __m256i g, __m256i& h, __m256i k)
{
    __m256i t1 = Add(h, Sigma1(e), Ch(e, f, g), k);
    __m256i t2 = Add(Sigma0(a), Maj(a, b, c));
    d = Add(d, t1);
    h = Add(t1, t2);
}
/*****************************************
 *       sha256 compression function     *
 *                                       *
 *   H   points to chaining input        *
 *   in  points to the message input     *
 *                                       *
 *****************************************/
void sha256_comp (hashblock res, const hashblock hash, const void *in)
	{
	uint32_t a,b,c,d,e,f,g,h,s0,s1,T1,T2;
	uint32_t    H[8];
	uint32_t	X[16],l;
	int i;
    // CHANGE type casting added due to c++
	const unsigned char *data=static_cast<const unsigned char*>(in);

	for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) {
	   HOST_c2l(hash, H[i]);
	}

	a = H[0];	b = H[1];	c = H[2];	d = H[3];
	e = H[4];	f = H[5];	g = H[6];	h = H[7];

	for (i=0;i<16;i++)
		{
		HOST_c2l(data,l); T1 = X[i] = l;
		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;
		g = f;
		f = e;
		e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	for (;i<64;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	H[0] += a;	H[1] += b;	H[2] += c;	H[3] += d;
	H[4] += e;	H[5] += f;	H[6] += g;	H[7] += h;

	for (i = 0; i < SHA256_DIGEST_LENGTH/4; i++) {
	   HOST_l2c(H[i], res);
	}
}
示例#9
0
void  sha512_compress(psDigestContext_t * md, unsigned char *buf)
#endif
{
	uint64 S[8], W[80], t0, t1;
    int i;

    /* copy state into S */
    for (i = 0; i < 8; i++) {
        S[i] = md->sha512.state[i];
    }

    /* copy the state into 1024-bits into W[0..15] */
    for (i = 0; i < 16; i++) {
		LOAD64H(W[i], buf + (8*i));
    }

    /* fill W[16..79] */
    for (i = 16; i < 80; i++) {
        W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
    }        

    /* Compress */
#ifndef PS_SHA512_IMPROVE_PERF_INCREASE_CODESIZE
    for (i = 0; i < 80; i++) {
        t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i];
		t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]);
        S[7] = S[6];
        S[6] = S[5];
        S[5] = S[4];
        S[4] = S[3] + t0;
        S[3] = S[2];
        S[2] = S[1];
        S[1] = S[0];
        S[0] = t0 + t1;
    }
#else
#define RND(a,b,c,d,e,f,g,h,i)                    \
     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];   \
     t1 = Sigma0(a) + Maj(a, b, c);                  \
     d += t0;                                        \
     h  = t0 + t1;

     for (i = 0; i < 80; i += 8) {
         RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
         RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
         RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
         RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
         RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
         RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
         RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
         RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
     }
#endif /* PS_SHA512_IMPROVE_PERF_INCREASE_CODESIZE */

	/* feedback */
	for (i = 0; i < 8; i++) {
        md->sha512.state[i] = md->sha512.state[i] + S[i];
    }
}
示例#10
0
文件: sha.c 项目: GaloisInc/hacrypto
void sha256_block_data_order (SHA256_CTX *ctx, const void *in)
	{
	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2,t;
	SHA_LONG	X[16],l,Ki;
	int i;
	const unsigned char *data=in;

	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];

	for (i=0;i<16;i++)
		{
		HOST_c2l(data,l); X[i] = l;
		Ki=K256[i];
		T1 = l + h + Sigma1(e) + Ch(e,f,g) + Ki;
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	for (;i<64;i++)
		{
		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);

		T1 = X[i&0xf];
		t = X[(i+9)&0xf];
		T1 += s0 + s1 + t;
                X[i&0xf] = T1;
		Ki=K256[i];
		T1 += h + Sigma1(e) + Ch(e,f,g) + Ki;
		T2 = Sigma0(a) + Maj(a,b,c);
		h = g;	g = f;	f = e;	e = d + T1;
		d = c;	c = b;	b = a;	a = T1 + T2;
		}

	t=ctx->h[0]; ctx->h[0]=t+a;
	t=ctx->h[1]; ctx->h[1]=t+b;
	t=ctx->h[2]; ctx->h[2]=t+c;
	t=ctx->h[3]; ctx->h[3]=t+d;
	t=ctx->h[4]; ctx->h[4]=t+e;
	t=ctx->h[5]; ctx->h[5]=t+f;
	t=ctx->h[6]; ctx->h[6]=t+g;
	t=ctx->h[7]; ctx->h[7]=t+h;
       return;
}
示例#11
0
static void sha256_compress(hash_state * md)
#endif
{
    unsigned long S[8], W[64], t0, t1;
    int i;

    _ARGCHK(md != NULL);

    /* copy state into S */
    for (i = 0; i < 8; i++)
        S[i] = md->sha256.state[i];

    /* copy the state into 512-bits into W[0..15] */
    for (i = 0; i < 16; i++) {
        LOAD32H(W[i], md->sha256.buf + (4*i));
    }

    /* fill W[16..63] */
    for (i = 16; i < 64; i++) {
        W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
    }        

    /* Compress */
    for (i = 0; i < 64; i++) {
        t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i];
        t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]);
        S[7] = S[6];
        S[6] = S[5];
        S[5] = S[4];
        S[4] = S[3] + t0;
        S[3] = S[2];
        S[2] = S[1];
        S[1] = S[0];
        S[0] = t0 + t1;
    }

    /* feedback */
    for (i = 0; i < 8; i++) {
        md->sha256.state[i] = md->sha256.state[i] + S[i];
    }

}
示例#12
0
static int  sha512_compress(hash_state * md, unsigned char *buf)
#endif
{
    ulong64 S[8], W[80], t0, t1;
    int i;

    /* copy state into S */
    for (i = 0; i < 8; i++) {
        S[i] = md->sha512.state[i];
    }

    /* copy the state into 1024-bits into W[0..15] */
    for (i = 0; i < 16; i++) {
        LOAD64H(W[i], buf + (8*i));
    }

    /* fill W[16..79] */
    for (i = 16; i < 80; i++) {
        W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
    }        

    /* Compress */
#ifdef LTC_SMALL_CODE
    for (i = 0; i < 80; i++) {
        t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i];
        t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]);
        S[7] = S[6];
        S[6] = S[5];
        S[5] = S[4];
        S[4] = S[3] + t0;
        S[3] = S[2];
        S[2] = S[1];
        S[1] = S[0];
        S[0] = t0 + t1;
    }
#else
#define RND(a,b,c,d,e,f,g,h,i)                    \
     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];   \
     t1 = Sigma0(a) + Maj(a, b, c);                  \
     d += t0;                                        \
     h  = t0 + t1;

     for (i = 0; i < 80; i += 8) {
         RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
         RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
         RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
         RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
         RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
         RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
         RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
         RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
     }
#endif     


    /* feedback */
    for (i = 0; i < 8; i++) {
        md->sha512.state[i] = md->sha512.state[i] + S[i];
    }

    return CRYPT_OK;
}
示例#13
0
static inline void sha256_transform(__m128i *state, __m128i *block, __m128i *dst)
{
    __m128i W[64], t1, t2;

    W[0]  = block[ 0];
    W[1]  = block[ 1];
    W[2]  = block[ 2];
    W[3]  = block[ 3];
    W[4]  = block[ 4];
    W[5]  = block[ 5];
    W[6]  = block[ 6];
    W[7]  = block[ 7];
    W[8]  = block[ 8];
    W[9]  = block[ 9];
    W[10] = block[10];
    W[11] = block[11];
    W[12] = block[12];
    W[13] = block[13];
    W[14] = block[14];
    W[15] = block[15];

    W[16] = add4(sigma1(W[16 - 2]), W[16 - 7], sigma0(W[16 - 15]), W[16 - 16]);
    W[17] = add4(sigma1(W[17 - 2]), W[17 - 7], sigma0(W[17 - 15]), W[17 - 16]);
    W[18] = add4(sigma1(W[18 - 2]), W[18 - 7], sigma0(W[18 - 15]), W[18 - 16]);
    W[19] = add4(sigma1(W[19 - 2]), W[19 - 7], sigma0(W[19 - 15]), W[19 - 16]);
    W[20] = add4(sigma1(W[20 - 2]), W[20 - 7], sigma0(W[20 - 15]), W[20 - 16]);
    W[21] = add4(sigma1(W[21 - 2]), W[21 - 7], sigma0(W[21 - 15]), W[21 - 16]);
    W[22] = add4(sigma1(W[22 - 2]), W[22 - 7], sigma0(W[22 - 15]), W[22 - 16]);
    W[23] = add4(sigma1(W[23 - 2]), W[23 - 7], sigma0(W[23 - 15]), W[23 - 16]);
    W[24] = add4(sigma1(W[24 - 2]), W[24 - 7], sigma0(W[24 - 15]), W[24 - 16]);
    W[25] = add4(sigma1(W[25 - 2]), W[25 - 7], sigma0(W[25 - 15]), W[25 - 16]);
    W[26] = add4(sigma1(W[26 - 2]), W[26 - 7], sigma0(W[26 - 15]), W[26 - 16]);
    W[27] = add4(sigma1(W[27 - 2]), W[27 - 7], sigma0(W[27 - 15]), W[27 - 16]);
    W[28] = add4(sigma1(W[28 - 2]), W[28 - 7], sigma0(W[28 - 15]), W[28 - 16]);
    W[29] = add4(sigma1(W[29 - 2]), W[29 - 7], sigma0(W[29 - 15]), W[29 - 16]);
    W[30] = add4(sigma1(W[30 - 2]), W[30 - 7], sigma0(W[30 - 15]), W[30 - 16]);
    W[31] = add4(sigma1(W[31 - 2]), W[31 - 7], sigma0(W[31 - 15]), W[31 - 16]);
    W[32] = add4(sigma1(W[32 - 2]), W[32 - 7], sigma0(W[32 - 15]), W[32 - 16]);
    W[33] = add4(sigma1(W[33 - 2]), W[33 - 7], sigma0(W[33 - 15]), W[33 - 16]);
    W[34] = add4(sigma1(W[34 - 2]), W[34 - 7], sigma0(W[34 - 15]), W[34 - 16]);
    W[35] = add4(sigma1(W[35 - 2]), W[35 - 7], sigma0(W[35 - 15]), W[35 - 16]);
    W[36] = add4(sigma1(W[36 - 2]), W[36 - 7], sigma0(W[36 - 15]), W[36 - 16]);
    W[37] = add4(sigma1(W[37 - 2]), W[37 - 7], sigma0(W[37 - 15]), W[37 - 16]);
    W[38] = add4(sigma1(W[38 - 2]), W[38 - 7], sigma0(W[38 - 15]), W[38 - 16]);
    W[39] = add4(sigma1(W[39 - 2]), W[39 - 7], sigma0(W[39 - 15]), W[39 - 16]);
    W[40] = add4(sigma1(W[40 - 2]), W[40 - 7], sigma0(W[40 - 15]), W[40 - 16]);
    W[41] = add4(sigma1(W[41 - 2]), W[41 - 7], sigma0(W[41 - 15]), W[41 - 16]);
    W[42] = add4(sigma1(W[42 - 2]), W[42 - 7], sigma0(W[42 - 15]), W[42 - 16]);
    W[43] = add4(sigma1(W[43 - 2]), W[43 - 7], sigma0(W[43 - 15]), W[43 - 16]);
    W[44] = add4(sigma1(W[44 - 2]), W[44 - 7], sigma0(W[44 - 15]), W[44 - 16]);
    W[45] = add4(sigma1(W[45 - 2]), W[45 - 7], sigma0(W[45 - 15]), W[45 - 16]);
    W[46] = add4(sigma1(W[46 - 2]), W[46 - 7], sigma0(W[46 - 15]), W[46 - 16]);
    W[47] = add4(sigma1(W[47 - 2]), W[47 - 7], sigma0(W[47 - 15]), W[47 - 16]);
    W[48] = add4(sigma1(W[48 - 2]), W[48 - 7], sigma0(W[48 - 15]), W[48 - 16]);
    W[49] = add4(sigma1(W[49 - 2]), W[49 - 7], sigma0(W[49 - 15]), W[49 - 16]);
    W[50] = add4(sigma1(W[50 - 2]), W[50 - 7], sigma0(W[50 - 15]), W[50 - 16]);
    W[51] = add4(sigma1(W[51 - 2]), W[51 - 7], sigma0(W[51 - 15]), W[51 - 16]);
    W[52] = add4(sigma1(W[52 - 2]), W[52 - 7], sigma0(W[52 - 15]), W[52 - 16]);
    W[53] = add4(sigma1(W[53 - 2]), W[53 - 7], sigma0(W[53 - 15]), W[53 - 16]);
    W[54] = add4(sigma1(W[54 - 2]), W[54 - 7], sigma0(W[54 - 15]), W[54 - 16]);
    W[55] = add4(sigma1(W[55 - 2]), W[55 - 7], sigma0(W[55 - 15]), W[55 - 16]);
    W[56] = add4(sigma1(W[56 - 2]), W[56 - 7], sigma0(W[56 - 15]), W[56 - 16]);
    W[57] = add4(sigma1(W[57 - 2]), W[57 - 7], sigma0(W[57 - 15]), W[57 - 16]);
    W[58] = add4(sigma1(W[58 - 2]), W[58 - 7], sigma0(W[58 - 15]), W[58 - 16]);
    W[59] = add4(sigma1(W[59 - 2]), W[59 - 7], sigma0(W[59 - 15]), W[59 - 16]);
    W[60] = add4(sigma1(W[60 - 2]), W[60 - 7], sigma0(W[60 - 15]), W[60 - 16]);
    W[61] = add4(sigma1(W[61 - 2]), W[61 - 7], sigma0(W[61 - 15]), W[61 - 16]);
    W[62] = add4(sigma1(W[62 - 2]), W[62 - 7], sigma0(W[62 - 15]), W[62 - 16]);
    W[63] = add4(sigma1(W[63 - 2]), W[63 - 7], sigma0(W[63 - 15]), W[63 - 16]);

    // read existing state
    __m128i a = state[0];
    __m128i b = state[1];
    __m128i c = state[2];
    __m128i d = state[3];
    __m128i e = state[4];
    __m128i f = state[5];
    __m128i g = state[6];
    __m128i h = state[7];

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x428a2f98), W[0]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x71374491), W[1]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb5c0fbcf), W[2]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xe9b5dba5), W[3]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x3956c25b), W[4]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x59f111f1), W[5]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x923f82a4), W[6]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xab1c5ed5), W[7]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xd807aa98), W[8]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x12835b01), W[9]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x243185be), W[10]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x550c7dc3), W[11]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x72be5d74), W[12]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x80deb1fe), W[13]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x9bdc06a7), W[14]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc19bf174), W[15]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xe49b69c1), W[16]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xefbe4786), W[17]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x0fc19dc6), W[18]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x240ca1cc), W[19]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x2de92c6f), W[20]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4a7484aa), W[21]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5cb0a9dc), W[22]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x76f988da), W[23]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x983e5152), W[24]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa831c66d), W[25]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xb00327c8), W[26]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xbf597fc7), W[27]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xc6e00bf3), W[28]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd5a79147), W[29]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x06ca6351), W[30]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x14292967), W[31]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x27b70a85), W[32]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x2e1b2138), W[33]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x4d2c6dfc), W[34]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x53380d13), W[35]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x650a7354), W[36]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x766a0abb), W[37]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x81c2c92e), W[38]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x92722c85), W[39]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0xa2bfe8a1), W[40]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0xa81a664b), W[41]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0xc24b8b70), W[42]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0xc76c51a3), W[43]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0xd192e819), W[44]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xd6990624), W[45]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xf40e3585), W[46]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x106aa070), W[47]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x19a4c116), W[48]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x1e376c08), W[49]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x2748774c), W[50]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x34b0bcb5), W[51]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x391c0cb3), W[52]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0x4ed8aa4a), W[53]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0x5b9cca4f), W[54]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0x682e6ff3), W[55]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    t1 = add5(h, Sigma1(e), Ch(e, f, g), _mm_set1_epi32(0x748f82ee), W[56]);
    t2 = add2(Sigma0(a), Maj(a, b, c));
    d = add2(d, t1);
    h = add2(t1, t2);
    t1 = add5(g, Sigma1(d), Ch(d, e, f), _mm_set1_epi32(0x78a5636f), W[57]);
    t2 = add2(Sigma0(h), Maj(h, a, b));
    c = add2(c, t1);
    g = add2(t1, t2);
    t1 = add5(f, Sigma1(c), Ch(c, d, e), _mm_set1_epi32(0x84c87814), W[58]);
    t2 = add2(Sigma0(g), Maj(g, h, a));
    b = add2(b, t1);
    f = add2(t1, t2);
    t1 = add5(e, Sigma1(b), Ch(b, c, d), _mm_set1_epi32(0x8cc70208), W[59]);
    t2 = add2(Sigma0(f), Maj(f, g, h));
    a = add2(a, t1);
    e = add2(t1, t2);
    t1 = add5(d, Sigma1(a), Ch(a, b, c), _mm_set1_epi32(0x90befffa), W[60]);
    t2 = add2(Sigma0(e), Maj(e, f, g));
    h = add2(h, t1);
    d = add2(t1, t2);
    t1 = add5(c, Sigma1(h), Ch(h, a, b), _mm_set1_epi32(0xa4506ceb), W[61]);
    t2 = add2(Sigma0(d), Maj(d, e, f));
    g = add2(g, t1);
    c = add2(t1, t2);
    t1 = add5(b, Sigma1(g), Ch(g, h, a), _mm_set1_epi32(0xbef9a3f7), W[62]);
    t2 = add2(Sigma0(c), Maj(c, d, e));
    f = add2(f, t1);
    b = add2(t1, t2);
    t1 = add5(a, Sigma1(f), Ch(f, g, h), _mm_set1_epi32(0xc67178f2), W[63]);
    t2 = add2(Sigma0(b), Maj(b, c, d));
    e = add2(e, t1);
    a = add2(t1, t2);

    dst[0] = add2(state[0], a);
    dst[1] = add2(state[1], b);
    dst[2] = add2(state[2], c);
    dst[3] = add2(state[3], d);
    dst[4] = add2(state[4], e);
    dst[5] = add2(state[5], f);
    dst[6] = add2(state[6], g);
    dst[7] = add2(state[7], h);
}
示例#14
0
/* compress 1024-bits */
void ccsha512_ltc_compress(ccdigest_state_t state, unsigned long nblocks, const void *in)
{
    uint64_t S[8], W[80], t0, t1;
    int i;
    uint64_t *s = ccdigest_u64(state);
    const unsigned char *buf = in;

    while(nblocks--) {
        /* copy state into S */
        for (i = 0; i < 8; i++) {
            S[i] = s[i];
        }

        /* copy the state into 1024-bits into W[0..15] */
        for (i = 0; i < 16; i++) {
            CC_LOAD64_BE(W[i], buf + (8*i));
        }

        /* fill W[16..79] */
        for (i = 16; i < 80; i++) {
            W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
        }

        /* Compress */
    #ifdef CC_SMALL_CODE
        for (i = 0; i < 80; i++) {
            t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i];
            t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]);
            S[7] = S[6];
            S[6] = S[5];
            S[5] = S[4];
            S[4] = S[3] + t0;
            S[3] = S[2];
            S[2] = S[1];
            S[1] = S[0];
            S[0] = t0 + t1;
        }
    #else
    #define RND(a,b,c,d,e,f,g,h,i)                    \
         t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];   \
         t1 = Sigma0(a) + Maj(a, b, c);                  \
         d += t0;                                        \
         h  = t0 + t1;

         for (i = 0; i < 80; i += 8) {
             RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
             RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
             RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
             RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
             RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
             RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
             RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
             RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
         }
    #endif


        /* feedback */
        for (i = 0; i < 8; i++) {
            s[i] = s[i] + S[i];
        }

        buf+=CCSHA512_BLOCK_SIZE;
    }
}