示例#1
0
文件: perf43_8.c 项目: P1K/SAC2014
__m128i mul_check(void)
{
  __m128i x;

  //x = _mm_set_epi64x(0x0f0e0d0c0b0a0908, 0x0706050403020100);
  x = _mm_set_epi64x(0x0000000000000000, 0x00000000000000f0);
  //x = _mm_set_epi64x(0x0f0e0d030b0a0908, 0x0706050403020100);
  x = M128(x);

  return x;
}
示例#2
0
文件: hash.c 项目: oleid/cpuminer
void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
{
	unsigned int r, b, i, j;
	__m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
	__m128i _state[4][4], _state2[4][4], _statebackup[4][4]; 


	for(i = 0; i < 4; i++)
		for(j = 0; j < ctx->uHashSize / 256; j++)
			_state[i][j] = ctx->state[i][j];


#if HAVE_AES_NI
	// transform cv
	for(i = 0; i < 4; i++)
		for(j = 0; j < ctx->uHashSize / 256; j++)
		{
			TRANSFORM(_state[i][j], _k_ipt, t1, t2);
		}
#endif

	for(b = 0; b < uBlockCount; b++)
	{
		ctx->k = _mm_add_epi64(ctx->k, ctx->const1536);

		// load message
		for(j = ctx->uHashSize / 256; j < 4; j++)
		{
			for(i = 0; i < 4; i++)
			{
				_state[i][j] = _mm_loadu_si128((__m128i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i);

#if HAVE_AES_NI
				// transform message
				TRANSFORM(_state[i][j], _k_ipt, t1, t2);
#endif
			}
		}

		// save state
		SAVESTATE(_statebackup, _state);


		k1 = ctx->k;

#if HAVE_AES_NI
		for(r = 0; r < ctx->uRounds / 2; r++)
		{
			ECHO_ROUND_UNROLL2;
		}

#else
		for(r = 0; r < ctx->uRounds / 2; r++)
		{
			_state2[0][0] = M128(zero); _state2[1][0] = M128(zero); _state2[2][0] = M128(zero); _state2[3][0] = M128(zero);
			_state2[0][1] = M128(zero); _state2[1][1] = M128(zero); _state2[2][1] = M128(zero); _state2[3][1] = M128(zero);
			_state2[0][2] = M128(zero); _state2[1][2] = M128(zero); _state2[2][2] = M128(zero); _state2[3][2] = M128(zero);
			_state2[0][3] = M128(zero); _state2[1][3] = M128(zero); _state2[2][3] = M128(zero); _state2[3][3] = M128(zero);																			

			ECHO_SUB_AND_MIX(_state, 0, 0, _state2, 0, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state, 1, 0, _state2, 3, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state, 2, 0, _state2, 2, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state, 3, 0, _state2, 1, 3, 0, 1, 2);
			ECHO_SUB_AND_MIX(_state, 0, 1, _state2, 1, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state, 1, 1, _state2, 0, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state, 2, 1, _state2, 3, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state, 3, 1, _state2, 2, 3, 0, 1, 2);
			ECHO_SUB_AND_MIX(_state, 0, 2, _state2, 2, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state, 1, 2, _state2, 1, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state, 2, 2, _state2, 0, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state, 3, 2, _state2, 3, 3, 0, 1, 2);
			ECHO_SUB_AND_MIX(_state, 0, 3, _state2, 3, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state, 1, 3, _state2, 2, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state, 2, 3, _state2, 1, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state, 3, 3, _state2, 0, 3, 0, 1, 2);

			_state[0][0] = M128(zero); _state[1][0] = M128(zero); _state[2][0] = M128(zero); _state[3][0] = M128(zero);
			_state[0][1] = M128(zero); _state[1][1] = M128(zero); _state[2][1] = M128(zero); _state[3][1] = M128(zero);
			_state[0][2] = M128(zero); _state[1][2] = M128(zero); _state[2][2] = M128(zero); _state[3][2] = M128(zero);
			_state[0][3] = M128(zero); _state[1][3] = M128(zero); _state[2][3] = M128(zero); _state[3][3] = M128(zero);																			

			ECHO_SUB_AND_MIX(_state2, 0, 0, _state, 0, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state2, 1, 0, _state, 3, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state2, 2, 0, _state, 2, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state2, 3, 0, _state, 1, 3, 0, 1, 2);
			ECHO_SUB_AND_MIX(_state2, 0, 1, _state, 1, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state2, 1, 1, _state, 0, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state2, 2, 1, _state, 3, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state2, 3, 1, _state, 2, 3, 0, 1, 2);
			ECHO_SUB_AND_MIX(_state2, 0, 2, _state, 2, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state2, 1, 2, _state, 1, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state2, 2, 2, _state, 0, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state2, 3, 2, _state, 3, 3, 0, 1, 2);
			ECHO_SUB_AND_MIX(_state2, 0, 3, _state, 3, 0, 1, 2, 3);
			ECHO_SUB_AND_MIX(_state2, 1, 3, _state, 2, 1, 2, 3, 0);
			ECHO_SUB_AND_MIX(_state2, 2, 3, _state, 1, 2, 3, 0, 1);
			ECHO_SUB_AND_MIX(_state2, 3, 3, _state, 0, 3, 0, 1, 2);

		}
#endif

		
		if(ctx->uHashSize == 256)
		{
			for(i = 0; i < 4; i++)
			{
				_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][1]);
				_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
				_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][3]);

				_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
				_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][1]);
				_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
				_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][3]);
			}
		}
		else
		{
			for(i = 0; i < 4; i++)
			{
				_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
				_state[i][1] = _mm_xor_si128(_state[i][1], _state[i][3]);

				_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
				_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);

				_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][1]);
				_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][3]);
			}
		}

		pmsg += ctx->uBlockLength;
	}

#if HAVE_AES_NI
	// transform state
	for(i = 0; i < 4; i++)
		for(j = 0; j < 4; j++)
		{
			TRANSFORM(_state[i][j], _k_opt, t1, t2);
		}
#endif

		SAVESTATE(ctx->state, _state);

}
示例#3
0
文件: perf43_8.c 项目: P1K/SAC2014
/*
 * Simulates an implementation of a
 * Littlun-like cipher many times, for speed
 * evaluation
 */
__m128i dummy_cipher_eval()
{
  __m128i x, k;
  unsigned long long tick1, tick2, dum;

  x = _mm_set_epi64x(0x0001020304050607, 0x08090a0b0c0d0e0f);
  k = _mm_set_epi64x(0x0a030d0c0f050607, 0x08090a000c0d0e0f);

  dum = 0;

  for (tick1 = 0; tick1 < 1ull << 31; tick1++)
    dum += 2*tick1 & (~tick1 | (tick1 >> 2));
  
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);
  x = SB(x);
  x = M128(x);

  tick1 = rdtsc();

//  // r1
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // r2
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // r3
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // r4
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // r5
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // r6
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // r7
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // r8
//  x = _mm_xor_si128(x,k);
//  x = SB(x);
//  x = M128(x);
//  // final
//  x = _mm_xor_si128(x,k);

  tick2 = rdtsc();

  printf("%llu ~ %llu cycles\n\n", dum, tick2 - tick1); 

  return x;
}