예제 #1
0
/*The input to initialization is the 128-bit key; 128-bit IV;*/
void aegis128_initialization(const unsigned char *key, const unsigned char *iv, __m128i *state)
{
       int i;

        __m128i  tmp;
        __m128i  keytmp = _mm_load_si128((__m128i*)key);
        __m128i  ivtmp  = _mm_load_si128((__m128i*)iv);

        state[0] =  ivtmp;
        state[1] = _mm_set_epi8(0xdd,0x28,0xb5,0x73,0x42,0x31,0x11,0x20,0xf1,0x2f,0xc2,0x6d,0x55,0x18,0x3d,0xdb);
        state[2] = _mm_set_epi8(0x62,0x79,0xe9,0x90,0x59,0x37,0x22,0x15,0x0d,0x08,0x05,0x03,0x02,0x01,0x1, 0x0);
        state[3] = _mm_xor_si128(keytmp, _mm_set_epi8(0x62,0x79,0xe9,0x90,0x59,0x37,0x22,0x15,0x0d,0x08,0x05,0x03,0x02,0x01,0x1,0x0));
        state[4] = _mm_xor_si128(keytmp, _mm_set_epi8(0xdd,0x28,0xb5,0x73,0x42,0x31,0x11,0x20,0xf1,0x2f,0xc2,0x6d,0x55,0x18,0x3d,0xdb));
        state[0] = _mm_xor_si128(state[0], keytmp);

        keytmp   = _mm_xor_si128(keytmp, ivtmp); 
        for (i = 0; i < 10; i++)  {
             //state update function
             tmp = state[4];
             state[4] = _mm_aesenc_si128(state[3], state[4]);
             state[3] = _mm_aesenc_si128(state[2], state[3]);
             state[2] = _mm_aesenc_si128(state[1], state[2]);
             state[1] = _mm_aesenc_si128(state[0], state[1]);
             state[0] = _mm_aesenc_si128(tmp, state[0]);

             //xor msg with state[0]
             keytmp   = _mm_xor_si128(keytmp, ivtmp);
             state[0] = _mm_xor_si128(state[0], keytmp);
        }
}
예제 #2
0
static inline void aes_enc_128(__m128i *state, __m128i *key)
{
	// 0
	*state = _mm_xor_si128(*state, key[0]);
	// 1

	*state = _mm_aesenc_si128(*state, key[1]);
	// 2

	*state = _mm_aesenc_si128(*state, key[2]);
	// 3 
	*state = _mm_aesenc_si128(*state, key[3]);
	// 4
	*state = _mm_aesenc_si128(*state, key[4]);
	// 5
	*state = _mm_aesenc_si128(*state, key[5]);
	// 6
	*state = _mm_aesenc_si128(*state, key[6]);
	// 7 
	*state = _mm_aesenc_si128(*state, key[7]);
	// 8
	*state = _mm_aesenc_si128(*state, key[8]);
	// 9 
	*state = _mm_aesenc_si128(*state, key[9]);
	// 10
	*state = _mm_aesenclast_si128(*state, key[10]);

}
예제 #3
0
int haraka256256(unsigned char *hash, const unsigned char *msg) {
	// stuff we need
	int i, j;
	__m128i s[2], tmp, rcon;
	__m128i MSB64 = _mm_set_epi32(0xFFFFFFFF,0xFFFFFFFF,0,0);

	// set initial round constant
	rcon = _mm_set_epi32(1,1,1,1);

	// initialize state to msg
	s[0] = _mm_load_si128(&((__m128i*)msg)[0]);
	s[1] = _mm_load_si128(&((__m128i*)msg)[1]);

	//printf("= input state =\n");
	//printstate256(s[0], s[1]);

	for (i = 0; i < ROUNDS; ++i) {
		// aes round(s)
		for (j = 0; j < AES_PER_ROUND; ++j) {
			s[0] = _mm_aesenc_si128(s[0], rcon);
			s[1] = _mm_aesenc_si128(s[1], rcon);
			rcon = _mm_slli_epi32(rcon, 1);
		}

		//printf("= round %d : after aes layer =\n", i);
		//printstate256(s[0], s[1]);
		
		// mixing
		tmp = _mm_unpacklo_epi32(s[0], s[1]);
		s[1] = _mm_unpackhi_epi32(s[0], s[1]);
		s[0] = tmp;

		//printf("= round %d : after mix layer =\n", i);
		//printstate256(s[0], s[1]);
	}

	//printf("= output from permutation =\n");
	//printstate256(s[0], s[1]);

	// xor message to get DM effect
	s[0] = _mm_xor_si128(s[0], _mm_load_si128(&((__m128i*)msg)[0]));
	s[1] = _mm_xor_si128(s[1], _mm_load_si128(&((__m128i*)msg)[1]));

	//printf("= after feed-forward =\n");
	//printstate256(s[0], s[1]);

	// store result
	_mm_storeu_si128((__m128i*)hash, s[0]);
	_mm_storeu_si128((__m128i*)(hash + 16), s[1]);
}
예제 #4
0
void AESNI_encrypt4(const uint8_t *in, uint8_t *out, const AUX_KEY key)
{
    __m128i tmp;

    tmp = _mm_loadu_si128 ((__m128i*)in);

    tmp = _mm_xor_si128 (tmp,key[0]);
    tmp = _mm_aesenc_si128 (tmp, key[1]);
    tmp = _mm_aesenc_si128 (tmp, key[2]);
    tmp = _mm_aesenc_si128 (tmp, key[3]);
    tmp = _mm_aesenc_si128 (tmp, key[4]);

   _mm_storeu_si128 ((__m128i*)out,tmp);
}
예제 #5
0
파일: aes.cpp 프로젝트: mbroz/PHCtest
inline void AES_reduced_opt(int128 &u)
{
	//Round Key initialization
	__m128i roundkey[AES_ROUNDS + 1];

	for (unsigned i = 0; i<AES_ROUNDS + 1; ++i)
	{
		roundkey[i] = _mm_set_epi64x(subkeys64[i][1], subkeys64[i][0]);
	}

	__m128i acc0 = _mm_set_epi64x(u.i1, u.i0);

	acc0 = _mm_xor_si128(acc0, roundkey[0]);

	for (unsigned j = 0; j<AES_ROUNDS; ++j)
	{
		for (unsigned i = 0; i<1; ++i)
		{
			acc0 = _mm_aesenc_si128(acc0, roundkey[j + 1]);
		}
	}
	{
		u.i0 = _mm_extract_epi64(acc0, 0);
		u.i1 = _mm_extract_epi64(acc0, 1);
	}
}
예제 #6
0
파일: xts_fast.c 프로젝트: hkerem/dcrypt
int _declspec(noinline) _stdcall xts_aes_ni_available()
{
	int           CPUInfo[4], res = 0;
	__m128i       enc;
#ifdef _M_IX86
	unsigned char fpustate[32];
#endif

	// check for AES-NI support via CPUID.01H:ECX.AES[bit 25]
	__cpuid(CPUInfo, 1);
	if ( CPUInfo[2] & 0x02000000 ) return 1;

	// Special workaround for AES-NI on Hyper-V server and virtual machines
	if ( (CPUInfo[2] & 0x80000000) == 0 ) return 0;
	__cpuid(CPUInfo, 0x40000000);
	if ( CPUInfo[1] != 'rciM' || CPUInfo[2] != 'foso' || CPUInfo[3] != 'vH t' ) return 0;

#ifdef _M_IX86
	if (save_fpu_state(fpustate) >= 0)
	{
#endif
		__try {
			enc = _mm_aesenc_si128(_mm_set_epi32(0,1,2,3), _mm_set_epi32(4,5,6,7));
			res = enc.m128i_u64[0] == 0x5f77774d4b7b7b54 && enc.m128i_u64[1] == 0x63636367427c7c58;
		}
		__except(EXCEPTION_EXECUTE_HANDLER) {
			res = 0;
		}
#ifdef _M_IX86
		load_fpu_state(fpustate);
	}
예제 #7
0
/*
** AES-128/256 batch encrypt for PIPE blocks
*/
__inline__ static void AES_ecb_encrypt_PIPE(
	__m128i *blks,
	const __m128i *key)
{
	unsigned j;
	blks[0] = _mm_xor_si128(blks[0], key[0]);
	blks[1] = _mm_xor_si128(blks[1], key[0]);
	blks[2] = _mm_xor_si128(blks[2], key[0]);
	blks[3] = _mm_xor_si128(blks[3], key[0]);
#if (PIPE>=5)
	blks[4] = _mm_xor_si128(blks[4], key[0]);
#endif
#if (PIPE>=6)
	blks[5] = _mm_xor_si128(blks[5], key[0]);
#endif
#if (PIPE>=7)
	blks[6] = _mm_xor_si128(blks[6], key[0]);
#endif
#if (PIPE==8)
	blks[7] = _mm_xor_si128(blks[7], key[0]);
#endif
	for (j = 1; j<ROUND; ++j) {
		blks[0] = _mm_aesenc_si128(blks[0], key[j]);
		blks[1] = _mm_aesenc_si128(blks[1], key[j]);
		blks[2] = _mm_aesenc_si128(blks[2], key[j]);
		blks[3] = _mm_aesenc_si128(blks[3], key[j]);
#if (PIPE>=5)
		blks[4] = _mm_aesenc_si128(blks[4], key[j]);
#endif
#if (PIPE>=6)
		blks[5] = _mm_aesenc_si128(blks[5], key[j]);
#endif
#if (PIPE>=7)
		blks[6] = _mm_aesenc_si128(blks[6], key[j]);
#endif
#if (PIPE==8)
		blks[7] = _mm_aesenc_si128(blks[7], key[j]);
#endif
	}
	blks[0] = _mm_aesenclast_si128(blks[0], key[j]);
	blks[1] = _mm_aesenclast_si128(blks[1], key[j]);
	blks[2] = _mm_aesenclast_si128(blks[2], key[j]);
	blks[3] = _mm_aesenclast_si128(blks[3], key[j]);
#if (PIPE>=5)
	blks[4] = _mm_aesenclast_si128(blks[4], key[j]);
#endif
#if (PIPE>=6)
	blks[5] = _mm_aesenclast_si128(blks[5], key[j]);
#endif
#if (PIPE>=7)
	blks[6] = _mm_aesenclast_si128(blks[6], key[j]);
#endif
#if (PIPE==8)
	blks[7] = _mm_aesenclast_si128(blks[7], key[j]);
#endif
}
예제 #8
0
파일: AES.cpp 프로젝트: netromdk/faes
    void Cryptor::cbcEncrypt(const string &plaintext, const Key &key,
                             string *ciphertext,
                             unsigned char *schedule) {
      ciphertext->resize(plaintext.size());

      int blocks = plaintext.size() / 16;
      if (plaintext.size() % 16) {
        blocks++;
      }

      __m128i tmp, tmp2, tmp3;
      __m128i *input = (__m128i*) plaintext.data();
      __m128i *output = (__m128i*) ciphertext->data();      
      __m128i *keySchedule = (__m128i*) schedule;
      int rounds = getRounds(key.size);

      // Load the IV.
      tmp2 = _mm_loadu_si128((__m128i*) key.iv);

      // Swap byte-order => big-endian.
      if (!bigEndian) {        
        reverse_m128i(tmp2); 
      }      
      
      for (int block = 0; block < blocks; block++) {
        // Get next 128-bit block.
        tmp = _mm_loadu_si128(&input[block]);

        // Swap byte-order => big-endian.
        if (!bigEndian) {        
          reverse_m128i(tmp); 
        }

        // XOR IV or last ciphertext with the plaintext.
        tmp2 = _mm_xor_si128(tmp, tmp2);

        // Whitening step.
        tmp2 = _mm_xor_si128(tmp2, keySchedule[0]);

        // Apply the AES rounds.
        int round = 1;
        for (; round < rounds; round++) {
          tmp2 = _mm_aesenc_si128(tmp2, keySchedule[round]);
        }

        // And the last.
        tmp2 = _mm_aesenclast_si128(tmp2, keySchedule[round]);

        // Swap byte-order => little-endian.
        tmp3 = tmp2;
        if (!bigEndian) {        
          reverse_m128i(tmp3); 
        }
        
        // Save the encrypted block.
        _mm_storeu_si128(&output[block], tmp3);
      }
    }
예제 #9
0
파일: siv.c 프로젝트: medsec/riv
static __m128i aes_encrypt(__m128i in, __m128i* k)
{
    __m128i x = _mm_xor_si128(in, k[0]);
    x = _mm_aesenc_si128(x, k[1]);
    x = _mm_aesenc_si128(x, k[2]);
    x = _mm_aesenc_si128(x, k[3]);
    x = _mm_aesenc_si128(x, k[4]);
    x = _mm_aesenc_si128(x, k[5]);
    x = _mm_aesenc_si128(x, k[6]);
    x = _mm_aesenc_si128(x, k[7]);
    x = _mm_aesenc_si128(x, k[8]);
    x = _mm_aesenc_si128(x, k[9]);
    return _mm_aesenclast_si128(x, k[10]);
}
예제 #10
0
void AES_ecb_encrypt(block *blk,  AES_KEY *aesKey) {
	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));

	*blk = _mm_xor_si128(*blk, sched[0]);
	for (j = 1; j<rnds; ++j)
		*blk = _mm_aesenc_si128(*blk, sched[j]);
	*blk = _mm_aesenclast_si128(*blk, sched[j]);
}
예제 #11
0
void AES_encryptC(block *in, block *out,  AES_KEY *aesKey)
{
	int j, rnds = ROUNDS(aesKey);
	const __m128i *sched = ((__m128i *)(aesKey->rd_key));
	__m128i tmp = _mm_load_si128((__m128i*)in);
	tmp = _mm_xor_si128(tmp, sched[0]);
	for (j = 1; j<rnds; j++)  tmp = _mm_aesenc_si128(tmp, sched[j]);
	tmp = _mm_aesenclast_si128(tmp, sched[j]);
	_mm_store_si128((__m128i*)out, tmp);
}
예제 #12
0
void AES_ecb_encrypt_chunk_in_out(block *in, block *out, unsigned nblks, AES_KEY *aesKey) {

	int numberOfLoops = nblks / 8;
	int blocksPipeLined = numberOfLoops * 8;
	int remainingEncrypts = nblks - blocksPipeLined;

	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));

	for (int i = 0; i < numberOfLoops; i++){

		out[0 + i * 8] = _mm_xor_si128(in[0 + i * 8], sched[0]);
		out[1 + i * 8] = _mm_xor_si128(in[1 + i * 8], sched[0]);
		out[2 + i * 8] = _mm_xor_si128(in[2 + i * 8], sched[0]);
		out[3 + i * 8] = _mm_xor_si128(in[3 + i * 8], sched[0]);
		out[4 + i * 8] = _mm_xor_si128(in[4 + i * 8], sched[0]);
		out[5 + i * 8] = _mm_xor_si128(in[5 + i * 8], sched[0]);
		out[6 + i * 8] = _mm_xor_si128(in[6 + i * 8], sched[0]);
		out[7 + i * 8] = _mm_xor_si128(in[7 + i * 8], sched[0]);

		for (j = 1; j < rnds; ++j){
			out[0 + i * 8] = _mm_aesenc_si128(out[0 + i * 8], sched[j]);
			out[1 + i * 8] = _mm_aesenc_si128(out[1 + i * 8], sched[j]);
			out[2 + i * 8] = _mm_aesenc_si128(out[2 + i * 8], sched[j]);
			out[3 + i * 8] = _mm_aesenc_si128(out[3 + i * 8], sched[j]);
			out[4 + i * 8] = _mm_aesenc_si128(out[4 + i * 8], sched[j]);
			out[5 + i * 8] = _mm_aesenc_si128(out[5 + i * 8], sched[j]);
			out[6 + i * 8] = _mm_aesenc_si128(out[6 + i * 8], sched[j]);
			out[7 + i * 8] = _mm_aesenc_si128(out[7 + i * 8], sched[j]);
		}
		out[0 + i * 8] = _mm_aesenclast_si128(out[0 + i * 8], sched[j]);
		out[1 + i * 8] = _mm_aesenclast_si128(out[1 + i * 8], sched[j]);
		out[2 + i * 8] = _mm_aesenclast_si128(out[2 + i * 8], sched[j]);
		out[3 + i * 8] = _mm_aesenclast_si128(out[3 + i * 8], sched[j]);
		out[4 + i * 8] = _mm_aesenclast_si128(out[4 + i * 8], sched[j]);
		out[5 + i * 8] = _mm_aesenclast_si128(out[5 + i * 8], sched[j]);
		out[6 + i * 8] = _mm_aesenclast_si128(out[6 + i * 8], sched[j]);
		out[7 + i * 8] = _mm_aesenclast_si128(out[7 + i * 8], sched[j]);
	}

	for (int i = blocksPipeLined; i<blocksPipeLined + remainingEncrypts; ++i)
		out[i] = _mm_xor_si128(in[i], sched[0]);
	for (j = 1; j<rnds; ++j)
		for (int i = blocksPipeLined; i<blocksPipeLined + remainingEncrypts; ++i)
			out[i] = _mm_aesenc_si128(out[i], sched[j]);
	for (int i = blocksPipeLined; i<blocksPipeLined + remainingEncrypts; ++i)
		out[i] = _mm_aesenclast_si128(out[i], sched[j]);
}
예제 #13
0
void AES_ecb_encrypt_blks_4(block *blks,  AES_KEY *aesKey) {
	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));
	blks[0] = _mm_xor_si128(blks[0], sched[0]);
	blks[1] = _mm_xor_si128(blks[1], sched[0]);
	blks[2] = _mm_xor_si128(blks[2], sched[0]);
	blks[3] = _mm_xor_si128(blks[3], sched[0]);

	for (j = 1; j < rnds; ++j){
		blks[0] = _mm_aesenc_si128(blks[0], sched[j]);
		blks[1] = _mm_aesenc_si128(blks[1], sched[j]);
		blks[2] = _mm_aesenc_si128(blks[2], sched[j]);
		blks[3] = _mm_aesenc_si128(blks[3], sched[j]);
	}
	blks[0] = _mm_aesenclast_si128(blks[0], sched[j]);
	blks[1] = _mm_aesenclast_si128(blks[1], sched[j]);
	blks[2] = _mm_aesenclast_si128(blks[2], sched[j]);
	blks[3] = _mm_aesenclast_si128(blks[3], sched[j]);
}
예제 #14
0
AES_AES_Block __fastcall aes_AES128_encrypt_block_(
    AES_AES_Block plaintext,
    const AES_AES128_RoundKeys* encryption_keys)
{
    plaintext = _mm_xor_si128(plaintext, encryption_keys->keys[0]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[1]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[2]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[3]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[4]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[5]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[6]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[7]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[8]);
    plaintext = _mm_aesenc_si128(plaintext, encryption_keys->keys[9]);
    return _mm_aesenclast_si128(plaintext, encryption_keys->keys[10]);
}
예제 #15
0
파일: aesni.c 프로젝트: behemot/pm
void aesni_encrypt(aesni_ctx *ctx, const byte *in, byte *out)
{
	register __m128i tmp;
	tmp = _mm_loadu_si128((__m128i*)in);
	tmp = _mm_xor_si128(tmp, ctx->enc_keys[0]);
	for (int i = 1; i < 10; i++) {
		tmp = _mm_aesenc_si128(tmp, ctx->enc_keys[i]);
	}
	tmp = _mm_aesenclast_si128(tmp, ctx->enc_keys[10]);
	_mm_storeu_si128((__m128i*)out, tmp);
}
예제 #16
0
void AES_ecb_encrypt_blks(block *blks, unsigned nblks,  AES_KEY *aesKey) {
    unsigned i,j,rnds=ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));
	for (i=0; i<nblks; ++i)
	    blks[i] =_mm_xor_si128(blks[i], sched[0]);
	for(j=1; j<rnds; ++j)
	    for (i=0; i<nblks; ++i)
		    blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
	for (i=0; i<nblks; ++i)
	    blks[i] =_mm_aesenclast_si128(blks[i], sched[j]);
}
예제 #17
0
static inline void aes256ni_encrypt(const __m128i rkeys[15], const unsigned char *n, unsigned char *out) {
  __m128i nv = _mm_load_si128((const __m128i *)n);
  int i;
  __m128i temp = _mm_xor_si128(nv, rkeys[0]);
#pragma unroll(13)
  for (i = 1 ; i < 14 ; i++) {
    temp = _mm_aesenc_si128(temp, rkeys[i]);
  }
  temp = _mm_aesenclast_si128(temp, rkeys[14]);
  _mm_store_si128((__m128i*)(out), temp);
}
예제 #18
0
파일: aes.c 프로젝트: irdan/justGarble
inline void AES_encrypt(const unsigned char *in, unsigned char *out,
		const AES_KEY *key) {
	int j, rnds = ROUNDS(key);
	const __m128i *sched = ((__m128i *) (key->rd_key));
	__m128i tmp = _mm_load_si128((__m128i *) in);
	tmp = _mm_xor_si128(tmp, sched[0]);
	for (j = 1; j < rnds; j++)
		tmp = _mm_aesenc_si128(tmp, sched[j]);
	tmp = _mm_aesenclast_si128(tmp, sched[j]);
	_mm_store_si128((__m128i *) out, tmp);
}
예제 #19
0
static __m128i AES_encrypt(__m128i in,  const __m128i* expkey)
{
	int j;

	__m128i tmp = byte_swap(in) ^ expkey[0];
	for (j=1; j <10; j++){
		tmp = _mm_aesenc_si128 (tmp,expkey[j]);
	}
	tmp = _mm_aesenclast_si128 (tmp,expkey[10]);

	return byte_swap(tmp);
}
예제 #20
0
void AES_ecb_encrypt_blks_4_in_out(block *in, block *out,  AES_KEY *aesKey) {
	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));
	//block temp[4];

	out[0] = _mm_xor_si128(in[0], sched[0]);
	out[1] = _mm_xor_si128(in[1], sched[0]);
	out[2] = _mm_xor_si128(in[2], sched[0]);
	out[3] = _mm_xor_si128(in[3], sched[0]);

	for (j = 1; j < rnds; ++j){
		out[0] = _mm_aesenc_si128(out[0], sched[j]);
		out[1] = _mm_aesenc_si128(out[1], sched[j]);
		out[2] = _mm_aesenc_si128(out[2], sched[j]);
		out[3] = _mm_aesenc_si128(out[3], sched[j]);
	}
	out[0] = _mm_aesenclast_si128(out[0], sched[j]);
	out[1] = _mm_aesenclast_si128(out[1], sched[j]);
	out[2] = _mm_aesenclast_si128(out[2], sched[j]);
	out[3] = _mm_aesenclast_si128(out[3], sched[j]);
}
예제 #21
0
//the finalization state of AEGIS
void aegis128_tag_generation(unsigned long long msglen, unsigned long long adlen, unsigned char maclen, unsigned char *mac, __m128i *state)
{
       int i;

        __m128i  tmp;
        __m128i  msgtmp;
        unsigned char t[16],tt[16];

        for (i = 0; i < 16; i++) tt[i] = 0;

        ((unsigned long long*)tt)[0] = adlen  << 3;
        ((unsigned long long*)tt)[1] = msglen << 3;
        msgtmp = _mm_load_si128((__m128i*)tt);

        msgtmp = _mm_xor_si128(msgtmp, state[3]);

        for (i = 0; i < 7; i++) {
             //state update function
             tmp = state[4];
             state[4] = _mm_aesenc_si128(state[3],state[4]);
             state[3] = _mm_aesenc_si128(state[2],state[3]);
             state[2] = _mm_aesenc_si128(state[1],state[2]);
             state[1] = _mm_aesenc_si128(state[0],state[1]);
             state[0] = _mm_aesenc_si128(tmp,state[0]);

             //xor "msg" with state[0]
             state[0] = _mm_xor_si128(state[0], msgtmp);
        }

        state[4] = _mm_xor_si128(state[4], state[3]);
        state[4] = _mm_xor_si128(state[4], state[2]);
        state[4] = _mm_xor_si128(state[4], state[1]);
        state[4] = _mm_xor_si128(state[4], state[0]);

        _mm_store_si128((__m128i*)t, state[4]);
        //in this program, the mac length is assumed to be multiple of bytes
        memcpy(mac,t,maclen);
}
예제 #22
0
//one step of decryption
inline void aegis128_dec_aut_step(unsigned char *plaintextblk,
       const unsigned char *ciphertextblk, __m128i *state)
{
        __m128i msg = _mm_load_si128((__m128i*)ciphertextblk);
        __m128i tmp = state[4];
                      
        //decryption  
        msg = _mm_xor_si128(msg, _mm_and_si128(state[2], state[3]));
        msg = _mm_xor_si128(msg, state[4]);
        msg = _mm_xor_si128(msg, state[1]);

        _mm_store_si128((__m128i*)plaintextblk, msg);

        //state update function
        state[4] = _mm_aesenc_si128(state[3],state[4]);
        state[3] = _mm_aesenc_si128(state[2],state[3]);
        state[2] = _mm_aesenc_si128(state[1],state[2]);
        state[1] = _mm_aesenc_si128(state[0],state[1]);
        state[0] = _mm_aesenc_si128(tmp,state[0]);

        //message is used to update the state
        state[0] = _mm_xor_si128(state[0],msg);
}
예제 #23
0
파일: aesni.c 프로젝트: Yawning/polarssl
int aesni_xcryptecb( aes_context *ctx,
                     int mode,
                     const unsigned char input[16],
                     unsigned char output[16] )
{
    __m128i block;
    const __m128i *subkeys = (__m128i *) ctx->rk;
    const int rounds = ctx->nr;
    int i;

    /* This could be faster if more data was provided at once. */

    block = _mm_loadu_si128( (__m128i *) input );
    block = _mm_xor_si128( block, subkeys[0] );

    if( mode == AES_ENCRYPT ) {
        for( i = 1; i < rounds - 1; i += 2 ) {
            block = _mm_aesenc_si128( block, subkeys[i] );
            block = _mm_aesenc_si128( block, subkeys[i + 1] );
        }

        block = _mm_aesenc_si128( block, subkeys[rounds - 1] );
        block = _mm_aesenclast_si128( block, subkeys[rounds] );
    } else {
        for( i = 1; i < rounds - 1; i += 2 ) {
            block = _mm_aesdec_si128( block, subkeys[i] );
            block = _mm_aesdec_si128( block, subkeys[i + 1] );
        }

        block = _mm_aesdec_si128( block, subkeys[rounds - 1] );
        block = _mm_aesdeclast_si128( block, subkeys[rounds] );
    }

    _mm_storeu_si128( (__m128i *) output, block );

    return( 0 );
}
예제 #24
0
void AESNI_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY enc_key)
{
    __m128i tmp;

    tmp = _mm_loadu_si128 ((__m128i*)in);

    tmp = _mm_xor_si128 (tmp,enc_key[0]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[1]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[2]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[3]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[4]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[5]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[6]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[7]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[8]);
    tmp = _mm_aesenc_si128 (tmp, enc_key[9]);
    tmp = _mm_aesenclast_si128 (tmp, enc_key[10]);

   _mm_storeu_si128 ((__m128i*)out,tmp);
}
예제 #25
0
파일: block.c 프로젝트: amaloz/libgarble
inline block
garble_random_block(void)
{
    block out;
    uint64_t *val;
    int i;

    out = garble_zero_block();
    val = (uint64_t *) &out;
    val[0] = current_rand_index++;
    out = _mm_xor_si128(out, rand_aes_key.rd_key[0]);
    for (i = 1; i < 10; ++i)
        out = _mm_aesenc_si128(out, rand_aes_key.rd_key[i]);
    return _mm_aesenclast_si128(out, rand_aes_key.rd_key[i]);
}
예제 #26
0
파일: siv.c 프로젝트: medsec/riv
static inline void aes_encrypt_n(__m128i *text, int num_blocks,
                                 __m128i *keys)
{
    int i, j;

    for(j = 1; j < 10 ; j++) {
        for(i = 0; i< num_blocks; i++) {
            text[i] = _mm_aesenc_si128(text[i], keys[j]);
        }
    }

    for(i = 0; i < num_blocks; i++) {
        text[i] = _mm_aesenclast_si128(text[i], keys[j]);
    }
}
예제 #27
0
파일: AES.cpp 프로젝트: netromdk/faes
    void Cryptor::ecbEncrypt(const string &plaintext, const Key &key,
                             string *ciphertext,
                             unsigned char *schedule) {
      // Right now we just use the same length, but it should just be
      // a multiple of 16.
      ciphertext->resize(plaintext.size());

      int blocks = plaintext.size() / 16;
      if (plaintext.size() % 16) {
        blocks++;
      }

      __m128i tmp;
      __m128i *input = (__m128i*) plaintext.data();
      __m128i *output = (__m128i*) ciphertext->data();      
      __m128i *keySchedule = (__m128i*) schedule;
      int rounds = getRounds(key.size);
      
      for (int block = 0; block < blocks; block++) {
        // Get next 128-bit block.
        tmp = _mm_loadu_si128(&input[block]);

        // Swap byte-order => big-endian.
        if (!bigEndian) {        
          reverse_m128i(tmp); 
        }

        // Whitening step.
        tmp = _mm_xor_si128(tmp, keySchedule[0]);

        // Apply the AES rounds.
        int round = 1;
        for (; round < rounds; round++) {
          tmp = _mm_aesenc_si128(tmp, keySchedule[round]);
        }

        // And the last.
        tmp = _mm_aesenclast_si128(tmp, keySchedule[round]);

        // Swap byte-order => little-endian.        
        if (!bigEndian) {        
          reverse_m128i(tmp); 
        }
        
        // Save the encrypted block.
        _mm_storeu_si128(&output[block], tmp);
      }
    }
예제 #28
0
파일: aesni.c 프로젝트: 6e6f36/hashkill
static void AESNI_CBC_encrypt(const unsigned char *in, unsigned char *out,unsigned char ivec[16],unsigned long length,unsigned char *key,int number_of_rounds)
{
    __m128i feedback,data;
    int i,j;
    if (length%16)
        length = length/16+1;
    else length /=16;
    feedback=_mm_loadu_si128 ((__m128i*)ivec);
    for(i=0; i < length; i++)
    {
        data = _mm_loadu_si128 (&((__m128i*)in)[i]);
        feedback = _mm_xor_si128 (data,feedback);
        feedback = _mm_xor_si128 (feedback,((__m128i*)key)[0]);
        for(j=1; j <number_of_rounds; j++) feedback = _mm_aesenc_si128 (feedback,((__m128i*)key)[j]);
        feedback = _mm_aesenclast_si128 (feedback,((__m128i*)key)[j]);
        _mm_storeu_si128 (&((__m128i*)out)[i],feedback);
    }
}
예제 #29
0
파일: aes.cpp 프로젝트: mbroz/PHCtest
/*inline*/ void AES_reduced_batch_intr(__m128i* batch, uint32_t batch_size) //Encrypts batch_size in parallel
{
	//Round Key initialization
	__m128i roundkey[AES_ROUNDS + 1];

	for (unsigned i = 0; i<AES_ROUNDS + 1; ++i)
	{
		roundkey[i] = _mm_set_epi64x(subkeys64[i][1], subkeys64[i][0]);
	}
	for (unsigned i = 0; i<batch_size; ++i)
	{
		batch[i] = _mm_xor_si128(batch[i], roundkey[0]);
	}

	for (unsigned j = 0; j<AES_ROUNDS; ++j)
	{
		for (unsigned i = 0; i<batch_size; ++i)
		{
			batch[i] = _mm_aesenc_si128(batch[i], roundkey[j + 1]);
		}
	}

}
예제 #30
0
int AES_GCM_decrypt (const unsigned char *in,
 unsigned char *out,
const unsigned char* addt,
 const unsigned char* ivec,
 unsigned char *tag,
int nbytes,
int abytes,
int ibytes,
const unsigned char* key,
int nr)
 {
 int i, j ,k;
 __m128i hlp1, hlp2, hlp3, hlp4;
 __m128i tmp1, tmp2, tmp3, tmp4;
 __m128i H, Y, T;
 __m128i *KEY = (__m128i*)key;
 __m128i ctr1, ctr2, ctr3, ctr4;
 __m128i last_block = _mm_setzero_si128();
 __m128i ONE = _mm_set_epi32(0, 1, 0, 0);
 __m128i FOUR = _mm_set_epi32(0, 4, 0, 0);
 __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
 __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
 __m128i X = _mm_setzero_si128();
 if(ibytes == 96/8){
 Y = _mm_loadu_si128((__m128i*)ivec);
 Y = _mm_insert_epi32(Y, 0x1000000, 3);
 /*(Compute E[ZERO, KS] and E[Y0, KS] together*/
 tmp1 = _mm_xor_si128(X, KEY[0]);
 tmp2 = _mm_xor_si128(Y, KEY[0]);
 for(j=1; j < nr-1; j+=2) {
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
 tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
 tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]);
 };
 tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
 tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]);
 H = _mm_aesenclast_si128(tmp1, KEY[nr]);
 T = _mm_aesenclast_si128(tmp2, KEY[nr]);
 H = _mm_shuffle_epi8(H, BSWAP_MASK);
 }
 else{
 tmp1 = _mm_xor_si128(X, KEY[0]);
 for(j=1; j <nr; j++)
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
 H = _mm_aesenclast_si128(tmp1, KEY[nr]);
 H = _mm_shuffle_epi8(H, BSWAP_MASK);
 Y = _mm_xor_si128(Y, Y);
 for(i=0; i < ibytes/16; i++){
 tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]);
tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
 Y = _mm_xor_si128(Y, tmp1);
 gfmul(Y, H, &Y);
 }
 if(ibytes%16){
 for(j=0; j < ibytes%16; j++)
 ((unsigned char*)&last_block)[j] = ivec[i*16+j];
 tmp1 = last_block;
 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
 Y = _mm_xor_si128(Y, tmp1);
 gfmul(Y, H, &Y);
 }
 tmp1 = _mm_insert_epi64(tmp1, ibytes*8, 0);
 tmp1 = _mm_insert_epi64(tmp1, 0, 1);
 Y = _mm_xor_si128(Y, tmp1);
 gfmul(Y, H, &Y);
 Y = _mm_shuffle_epi8(Y, BSWAP_MASK);
 /*Compute E(K, Y0)*/
 tmp1 = _mm_xor_si128(Y, KEY[0]);
 for(j=1; j < nr; j++)
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
 T = _mm_aesenclast_si128(tmp1, KEY[nr]);
 }
 for(i=0; i<abytes/16; i++){
 tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]);
 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
 X = _mm_xor_si128(X, tmp1);
 gfmul(X, H, &X);
 }
 if(abytes%16){
 last_block = _mm_setzero_si128();
 for(j=0;j<abytes%16;j++)
 ((unsigned char*)&last_block)[j] = addt[i*16+j];
 tmp1 = last_block;
 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
 X =_mm_xor_si128(X, tmp1);
 gfmul(X, H, &X);
 }
 for(i=0; i<nbytes/16; i++){
 tmp1 = _mm_loadu_si128(&((__m128i*)in)[i]);
 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
 X = _mm_xor_si128(X, tmp1);
 gfmul(X, H, &X);
 }
 if(nbytes%16){
 last_block = _mm_setzero_si128();
 for(j=0; j<nbytes%16; j++)
 ((unsigned char*)&last_block)[j] = in[i*16+j];
 tmp1 = last_block;
 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
 X = _mm_xor_si128(X, tmp1);
 gfmul(X, H, &X);
 }
 tmp1 =_mm_insert_epi64(tmp1, nbytes*8, 0);
 tmp1 =_mm_insert_epi64(tmp1, abytes*8, 1);
X = _mm_xor_si128(X, tmp1);
 gfmul(X, H, &X);
 X = _mm_shuffle_epi8(X, BSWAP_MASK);
 T = _mm_xor_si128(X, T);
 if(0xffff!=_mm_movemask_epi8(_mm_cmpeq_epi8(T, _mm_loadu_si128((__m128i*)tag))))
 return 0; //in case the authentication failed
 ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
 ctr1 = _mm_add_epi32(ctr1, ONE);
 ctr2 = _mm_add_epi32(ctr1, ONE);
 ctr3 = _mm_add_epi32(ctr2, ONE);
 ctr4 = _mm_add_epi32(ctr3, ONE);
 for(i=0; i < nbytes/16/4; i++){
 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
 tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
 tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
 tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
 ctr1 = _mm_add_epi32(ctr1, FOUR);
 ctr2 = _mm_add_epi32(ctr2, FOUR);
 ctr3 = _mm_add_epi32(ctr3, FOUR);
 ctr4 = _mm_add_epi32(ctr4, FOUR);
 tmp1 =_mm_xor_si128(tmp1, KEY[0]);
 tmp2 =_mm_xor_si128(tmp2, KEY[0]);
 tmp3 =_mm_xor_si128(tmp3, KEY[0]);
 tmp4 =_mm_xor_si128(tmp4, KEY[0]);
 for(j=1; j < nr-1; j+=2){
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
 tmp2 = _mm_aesenc_si128(tmp2, KEY[j]);
 tmp3 = _mm_aesenc_si128(tmp3, KEY[j]);
 tmp4 = _mm_aesenc_si128(tmp4, KEY[j]);
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
 tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]);
 tmp3 = _mm_aesenc_si128(tmp3, KEY[j+1]);
 tmp4 = _mm_aesenc_si128(tmp4, KEY[j+1]);
 }
 tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
 tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]);
 tmp3 = _mm_aesenc_si128(tmp3, KEY[nr-1]);
 tmp4 = _mm_aesenc_si128(tmp4, KEY[nr-1]);
 tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]);
 tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]);
 tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]);
 tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]);
 tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[i*4+0]));
 tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i*)in)[i*4+1]));
 tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i*)in)[i*4+2]));
 tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i*)in)[i*4+3]));
 _mm_storeu_si128(&((__m128i*)out)[i*4+0], tmp1);
 _mm_storeu_si128(&((__m128i*)out)[i*4+1], tmp2);
 _mm_storeu_si128(&((__m128i*)out)[i*4+2], tmp3);
 _mm_storeu_si128(&((__m128i*)out)[i*4+3], tmp4);
 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
 tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
 tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
 }
 for(k = i*4; k < nbytes/16; k++){
 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
 ctr1 = _mm_add_epi32(ctr1, ONE);
 tmp1 = _mm_xor_si128(tmp1, KEY[0]);
 for(j=1; j<nr-1; j+=2){
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j]);
 tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]);
 }
 tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
 tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
 tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
 _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
 }
//If one partial block remains
 if(nbytes%16){
 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
 tmp1 = _mm_xor_si128(tmp1, KEY[0]);
 for(j=1; j<nr-1; j+=2){
 tmp1 =_mm_aesenc_si128(tmp1, KEY[j]);
 tmp1 =_mm_aesenc_si128(tmp1, KEY[j+1]);
 }
 tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]);
 tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]);
 tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
 last_block = tmp1;
 for(j=0; j<nbytes%16; j++)
 out[k*16+j]=((unsigned char*)&last_block)[j];
 }
 return 1; //when sucessfull returns 1
}