Пример #1
0
Файл: aesni.c Проект: behemot/pm
aesni_ctx* aesni_create_ctx(byte *key)
{
	aesni_ctx *ctx = (aesni_ctx*)malloc(sizeof(aesni_ctx));
	ctx->key = (byte*)malloc(16);
	memcpy(ctx->key, key, 16);

	__m128i t1, t2;
	t1 = _mm_loadu_si128((__m128i*)ctx->key);
	ctx->enc_keys[0] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x1);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[1] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x2);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[2] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x4);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[3] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x8);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[4] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x10);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[5] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x20);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[6] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x40);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[7] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x80);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[8] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x1b);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[9] = t1;

	t2 = _mm_aeskeygenassist_si128(t1, 0x36);
	t1 = aesni_128_assist(t1, t2);
	ctx->enc_keys[10] = t1;

	ctx->dec_keys[0] = ctx->enc_keys[10];
	for (int i = 1; i < 10; i++) {
		ctx->dec_keys[i] =  _mm_aesimc_si128(ctx->enc_keys[10 - i]);
	}
	ctx->dec_keys[10] = ctx->enc_keys[0];


	return ctx;
}
Пример #2
0
    void Cryptor::expandKey128(const unsigned char *key,
                               unsigned char *schedule) {
      __m128i *keySchedule = (__m128i*) schedule;

      // The first entry is just the key itself.
      __m128i tmp = _mm_loadu_si128((__m128i*) key);
      if (!bigEndian) {
        reverse_m128i(tmp); // swap byte-order => big-endian.
      }
      keySchedule[0] = tmp;

      // Sadly, these cannot be done in a loop because the second
      // argument of _mm_aeskeygenassist_si128() needs to be a 8-bit
      // immediate!

      // The assist pretty much does the following:
      //   SubWord(RotWord(tmp)) xor RCON[round]
      __m128i tmp2 = _mm_aeskeygenassist_si128(tmp, 0x1);      
      tmp = assistKey128(tmp, tmp2);
      keySchedule[1] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x2);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[2] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x4);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[3] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x8);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[4] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x10);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[5] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x20);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[6] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x40);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[7] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x80);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[8] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x1B);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[9] = tmp;

      tmp2 = _mm_aeskeygenassist_si128(tmp, 0x36);
      tmp = assistKey128(tmp, tmp2);
      keySchedule[10] = tmp;            
    }
Пример #3
0
void __fastcall aes_AES128_expand_key_(
    AES_AES_Block key,
    AES_AES128_RoundKeys* encryption_keys)
{
    AES_Block128 prev = encryption_keys->keys[0] = key;
    prev = encryption_keys->keys[1] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x01));
    prev = encryption_keys->keys[2] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x02));
    prev = encryption_keys->keys[3] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x04));
    prev = encryption_keys->keys[4] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x08));
    prev = encryption_keys->keys[5] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x10));
    prev = encryption_keys->keys[6] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x20));
    prev = encryption_keys->keys[7] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x40));
    prev = encryption_keys->keys[8] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x80));
    prev = encryption_keys->keys[9] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x1b));
    prev = encryption_keys->keys[10] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x36));
}
Пример #4
0
static void AES_set_encrypt_key(const unsigned char *key,
                                __m128i *expkey)
{
    __m128i *sched = (__m128i*) expkey;
    sched[ 0] = _mm_loadu_si128((__m128i*)key);
    sched[ 1] = assist128(sched[0], _mm_aeskeygenassist_si128(sched[0],0x1));
    sched[ 2] = assist128(sched[1], _mm_aeskeygenassist_si128(sched[1],0x2));
    sched[ 3] = assist128(sched[2], _mm_aeskeygenassist_si128(sched[2],0x4));
    sched[ 4] = assist128(sched[3], _mm_aeskeygenassist_si128(sched[3],0x8));
    sched[ 5] = assist128(sched[4], _mm_aeskeygenassist_si128(sched[4],0x10));
    sched[ 6] = assist128(sched[5], _mm_aeskeygenassist_si128(sched[5],0x20));
    sched[ 7] = assist128(sched[6], _mm_aeskeygenassist_si128(sched[6],0x40));
    sched[ 8] = assist128(sched[7], _mm_aeskeygenassist_si128(sched[7],0x80));
    sched[ 9] = assist128(sched[8], _mm_aeskeygenassist_si128(sched[8],0x1b));
    sched[10] = assist128(sched[9], _mm_aeskeygenassist_si128(sched[9],0x36));
}
Пример #5
0
    void Cryptor::expandKey256(const unsigned char *key,
                               unsigned char *schedule) {
      __m128i *keySchedule = (__m128i*) schedule;

      // Save the first 128 bits of the key as the first one.
      __m128i tmp = _mm_loadu_si128((__m128i*) key);
      if (!bigEndian) {
        reverse_m128i(tmp); // swap byte-order => big-endian.
      }
      keySchedule[0] = tmp;

      // The next 128 bits as the second.
      __m128i tmp3 = _mm_loadu_si128((__m128i*) (key + 16));
      if (!bigEndian) {
        reverse_m128i(tmp3); // swap byte-order => big-endian.
      }
      keySchedule[1] = tmp3;      

      __m128i tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);

      assistKey256_1(&tmp, &tmp2);
      keySchedule[2] = tmp;

      assistKey256_2(&tmp, &tmp3);
      keySchedule[3] = tmp3;

      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
      assistKey256_1(&tmp, &tmp2);
      keySchedule[4] = tmp;
      assistKey256_2(&tmp, &tmp3);
      keySchedule[5] = tmp3;
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
      assistKey256_1(&tmp, &tmp2);
      keySchedule[6] = tmp;
      assistKey256_2(&tmp, &tmp3);
      keySchedule[7] = tmp3;
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
      assistKey256_1(&tmp, &tmp2);
      keySchedule[8] = tmp;
      assistKey256_2(&tmp, &tmp3);
      keySchedule[9] = tmp3;
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
      assistKey256_1(&tmp, &tmp2);
      keySchedule[10] = tmp;
      assistKey256_2(&tmp, &tmp3);
      keySchedule[11] = tmp3;
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
      assistKey256_1(&tmp, &tmp2);
      keySchedule[12] = tmp;
      assistKey256_2(&tmp, &tmp3);
      keySchedule[13] = tmp3;
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
      assistKey256_1(&tmp, &tmp2);
      keySchedule[14] = tmp;
    }
Пример #6
0
__m128i aes_256_key_expansion(__m128i key, __m128i key2)
   {
   __m128i key_with_rcon = _mm_aeskeygenassist_si128(key2, 0x00);
   key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(2,2,2,2));

   key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
   key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
   key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
   return _mm_xor_si128(key, key_with_rcon);
   }
Пример #7
0
void AESNI_Key_Expansion4(const unsigned char *userkey, AUX_KEY key)
{
   __m128i temp1, temp2;
    __m128i *Key_Schedule = (__m128i*)key;

    temp1 = _mm_loadu_si128((__m128i*)userkey);
    _mm_storeu_si128(&Key_Schedule[0], temp1);
    temp2= _mm_aeskeygenassist_si128 (temp1 ,0x1);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[1], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x2);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[2], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x4);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[3], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x8);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[4], temp1);
}
Пример #8
0
static inline void KEY_256_ASSIST_2(__m128i* temp1, __m128i * temp3)
{
    __m128i temp2,temp4;
    temp4 = _mm_aeskeygenassist_si128 (*temp1, 0x0);
    temp2 = _mm_shuffle_epi32(temp4, 0xaa);
    temp4 = _mm_slli_si128 (*temp3, 0x4);
    *temp3 = _mm_xor_si128 (*temp3, temp4);
    temp4 = _mm_slli_si128 (temp4, 0x4);
    *temp3 = _mm_xor_si128 (*temp3, temp4);
    temp4 = _mm_slli_si128 (temp4, 0x4);
    *temp3 = _mm_xor_si128 (*temp3, temp4);
    *temp3 = _mm_xor_si128 (*temp3, temp2);
}
Пример #9
0
static void AESNI_128_Key_Expansion (const unsigned char *userkey,unsigned char *key)
{
    __m128i temp1, temp2;
    __m128i *Key_Schedule = (__m128i*)key;
    temp1 = _mm_loadu_si128((__m128i*)userkey);
    Key_Schedule[0] = temp1;


    temp2 = _mm_aeskeygenassist_si128 (temp1 ,0x1);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[1] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x2);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[2] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x4);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[3] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x8);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[4] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x10);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[5] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x20);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[6] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x40);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[7] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x80);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[8] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x1b);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[9] = temp1;
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x36);
    temp1 = AESNI_128_ASSIST(temp1, temp2);
    Key_Schedule[10] = temp1;
}
Пример #10
0
void AESNI_Key_Expansion(const unsigned char *userkey, AES_KEY enc_key)
{
    __m128i temp1, temp2;
    __m128i *Key_Schedule = (__m128i*)enc_key;

    temp1 = _mm_loadu_si128((__m128i*)userkey);
    _mm_storeu_si128(&Key_Schedule[0], temp1);
    temp2= _mm_aeskeygenassist_si128 (temp1 ,0x1);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[1], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x2);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[2], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x4);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[3], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x8);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[4], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x10);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[5], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x20);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[6], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x40);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[7], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x80);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[8], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x1b);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[9], temp1);
    temp2 = _mm_aeskeygenassist_si128 (temp1,0x36);
    temp1 = AES_128_ASSIST(temp1, temp2);
    _mm_storeu_si128(&Key_Schedule[10], temp1);
}
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
{
	__m128i tmp2, tmp4;
	
	tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
	tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
	tmp4 = _mm_slli_si128(*tmp3, 0x04);
	*tmp3 = _mm_xor_si128(*tmp3, tmp4);
	tmp4 = _mm_slli_si128(tmp4, 0x04);
	*tmp3 = _mm_xor_si128(*tmp3, tmp4);
	tmp4 = _mm_slli_si128(tmp4, 0x04);
	*tmp3 = _mm_xor_si128(*tmp3, tmp4);
	*tmp3 = _mm_xor_si128(*tmp3, tmp2);
}
Пример #12
0
void AES_192_Key_Expansion (const unsigned char *userkey, 
                                    unsigned char *key) 
        { 
            __m128i temp1, temp2, temp3, temp4; 
            __m128i *Key_Schedule = (__m128i*)key; 
         
            temp1 = _mm_loadu_si128((__m128i*)userkey); 
            temp3 = _mm_loadu_si128((__m128i*)(userkey+16)); 
         
            Key_Schedule[0]=temp1; 
            Key_Schedule[1]=temp3; 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x1); 
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[1] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[1], 
                                                                  (__m128d)temp1,0); 
            Key_Schedule[2] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x2);
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[3]=temp1; 
            Key_Schedule[4]=temp3; 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x4); 
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[4] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[4], 
            (__m128d)temp1,0); 
            Key_Schedule[5] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x8); 
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[6]=temp1; 
            Key_Schedule[7]=temp3; 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x10); 
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[7] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[7], 
            (__m128d)temp1,0); 
            Key_Schedule[8] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x20); 
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[9]=temp1; 
            Key_Schedule[10]=temp3; 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x40); 
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[10] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[10], 
            (__m128d)temp1,0); 
            Key_Schedule[11] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); 
            temp2=_mm_aeskeygenassist_si128 (temp3,0x80); 
            KEY_192_ASSIST(&temp1, &temp2, &temp3); 
            Key_Schedule[12]=temp1;    
        } 
// Special thanks to Intel for helping me
// with ExpandAESKey256() and its subroutines
static inline void ExpandAESKey256(char *keybuf)
{
	__m128i tmp1, tmp2, tmp3, *keys;
	
	keys = (__m128i *)keybuf;
	
	tmp1 = _mm_load_si128((__m128i *)keybuf);
	tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
	
	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
	ExpandAESKey256_sub1(&tmp1, &tmp2);
	keys[2] = tmp1;
	ExpandAESKey256_sub2(&tmp1, &tmp3);
	keys[3] = tmp3;
	
	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
	ExpandAESKey256_sub1(&tmp1, &tmp2);
	keys[4] = tmp1;
	ExpandAESKey256_sub2(&tmp1, &tmp3);
	keys[5] = tmp3;
	
	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
	ExpandAESKey256_sub1(&tmp1, &tmp2);
	keys[6] = tmp1;
	ExpandAESKey256_sub2(&tmp1, &tmp3);
	keys[7] = tmp3;
	
	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
	ExpandAESKey256_sub1(&tmp1, &tmp2);
	keys[8] = tmp1;
	ExpandAESKey256_sub2(&tmp1, &tmp3);
	keys[9] = tmp3;
	
	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
	ExpandAESKey256_sub1(&tmp1, &tmp2);
	keys[10] = tmp1;
	ExpandAESKey256_sub2(&tmp1, &tmp3);
	keys[11] = tmp3;
	
	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
	ExpandAESKey256_sub1(&tmp1, &tmp2);
	keys[12] = tmp1;
	ExpandAESKey256_sub2(&tmp1, &tmp3);
	keys[13] = tmp3;
	
	tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
	ExpandAESKey256_sub1(&tmp1, &tmp2);
	keys[14] = tmp1;
}
Пример #14
0
    void Cryptor::assistKey256_2(__m128i *tmp, __m128i *tmp2) {
      __m128i tmp4 = _mm_aeskeygenassist_si128(*tmp, 0x0);

      // Duplicate 3rd part 4 times.
      __m128i tmp3 = _mm_shuffle_epi32(tmp4, SHUFFLE4_32(2, 2, 2, 2));
      
      tmp4 = _mm_slli_si128(*tmp2, 0x4);
      
      *tmp2 = _mm_xor_si128(*tmp2, tmp4);
      tmp4 = _mm_slli_si128(tmp4, 0x4);
      
      *tmp2 = _mm_xor_si128(*tmp2, tmp4);
      tmp4 = _mm_slli_si128(tmp4, 0x4);
      
      *tmp2 = _mm_xor_si128(*tmp2, tmp4);
      *tmp2 = _mm_xor_si128(*tmp2, tmp3);
    }    
Пример #15
0
static void AESNI_256_Key_Expansion (const unsigned char *userkey, unsigned char *key)
{
    __m128i temp1, temp2, temp3;
    __m128i *Key_Schedule = (__m128i*)key;

    temp1 = _mm_loadu_si128((__m128i*)userkey);
    temp3 = _mm_loadu_si128((__m128i*)(userkey+16));
    Key_Schedule[0] = temp1;
    Key_Schedule[1] = temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x01);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[2]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[3]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x02);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[4]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[5]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x04);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[6]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[7]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x08);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[8]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[9]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x10);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[10]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[11]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x20);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[12]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[13]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x40);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[14]=temp1;
}
Пример #16
0
void
test8bit (void)
{
  i1 = _mm_cmpistrm (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistri (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistra (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrc (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistro (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrs (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrz (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  b1 = _mm256_blend_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  k1 = _cvtss_sh (f1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm256_cvtps_ph (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_dp_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute_ps (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_blend_epi16 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_cvtps_ph (a1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  d1 = _mm_dp_pd (d2, d3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_dp_ps (a2, a3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_insert_ps (a2, a3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_mpsadbw_epu8 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_permute_ps (a2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_slli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_srli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
}
Пример #17
0
void __fastcall aes_AES256_expand_key_(
    AES_AES_Block key_lo,
    AES_AES_Block key_hi,
    AES_AES256_RoundKeys* encryption_keys)
{
    AES_AES_Block prev_lo, prev_hi;
    AES_AES_Block hwgen;

    prev_lo = encryption_keys->keys[0] = key_lo;
    prev_hi = encryption_keys->keys[1] = key_hi;

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x01);
    hwgen = _mm_shuffle_epi32(hwgen, 0xff);
    encryption_keys->keys[2] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0);
    hwgen = _mm_shuffle_epi32(hwgen, 0xaa);
    encryption_keys->keys[3] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x02);
    hwgen = _mm_shuffle_epi32(hwgen, 0xff);
    encryption_keys->keys[4] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0);
    hwgen = _mm_shuffle_epi32(hwgen, 0xaa);
    encryption_keys->keys[5] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x04);
    hwgen = _mm_shuffle_epi32(hwgen, 0xff);
    encryption_keys->keys[6] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0);
    hwgen = _mm_shuffle_epi32(hwgen, 0xaa);
    encryption_keys->keys[7] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x08);
    hwgen = _mm_shuffle_epi32(hwgen, 0xff);
    encryption_keys->keys[8] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0);
    hwgen = _mm_shuffle_epi32(hwgen, 0xaa);
    encryption_keys->keys[9] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x10);
    hwgen = _mm_shuffle_epi32(hwgen, 0xff);
    encryption_keys->keys[10] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0);
    hwgen = _mm_shuffle_epi32(hwgen, 0xaa);
    encryption_keys->keys[11] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x20);
    hwgen = _mm_shuffle_epi32(hwgen, 0xff);
    encryption_keys->keys[12] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0);
    hwgen = _mm_shuffle_epi32(hwgen, 0xaa);
    encryption_keys->keys[13] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);

    hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x40);
    hwgen = _mm_shuffle_epi32(hwgen, 0xff);
    encryption_keys->keys[14] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen);
}
Пример #18
0
    inline void Cryptor::expandKey192(const unsigned char *key,
                                      unsigned char *schedule) {
      __m128i *keySchedule = (__m128i*) schedule;

      // Save the first 128 bits of the key as the first one.
      __m128i tmp = _mm_loadu_si128((__m128i*) key);
      if (!bigEndian) {
        reverse_m128i(tmp); // swap byte-order => big-endian.
      }
      keySchedule[0] = tmp;

      // The next 64 bits as the second.
      unsigned char buf[128];
      memset(buf, 0, 128);
      memcpy(buf, key + 16, 64);
      
      __m128i tmp3 = _mm_loadu_si128((__m128i*) buf);
      if (!bigEndian) {
        reverse_m128i(tmp3); // swap byte-order => big-endian.
      }
      keySchedule[1] = tmp3;

      __m128i tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x1);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[1] =
        (__m128i) _mm_shuffle_pd((__m128d) keySchedule[1],
                                 (__m128d) tmp, 0);
      keySchedule[2] =
        (__m128i) _mm_shuffle_pd((__m128d) tmp, (__m128d) tmp3, 1);
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x2);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[3] = tmp;
      keySchedule[4] = tmp3;

      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x4);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[4] =
        (__m128i) _mm_shuffle_pd((__m128d) keySchedule[4],
                                 (__m128d) tmp, 0);
      keySchedule[5] = (__m128i) _mm_shuffle_pd((__m128d) tmp,
                                                (__m128d) tmp3, 1);
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x8);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[6] = tmp;
      keySchedule[7] = tmp3;
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[7] =
        (__m128i) _mm_shuffle_pd((__m128d) keySchedule[7],
                                 (__m128d) tmp, 0);
      keySchedule[8] = (__m128i) _mm_shuffle_pd((__m128d) tmp,
                                                (__m128d) tmp3, 1);
 
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[9] = tmp;
      keySchedule[10] = tmp3;
      
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[10] =
        (__m128i) _mm_shuffle_pd((__m128d) keySchedule[10],
                                 (__m128d) tmp, 0);
      keySchedule[11] = (__m128i) _mm_shuffle_pd((__m128d) tmp,
                                                 (__m128d) tmp3, 1);
 
      tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x80);
      assistKey192(&tmp, &tmp2, &tmp3);
      keySchedule[12] = tmp;
      keySchedule[13] = tmp3;      
    }
Пример #19
0
/*
* AES-256 Key Schedule
*/
void AES_256_NI::key_schedule(const byte key[], size_t)
   {
   __m128i K0 = _mm_loadu_si128((const __m128i*)(key));
   __m128i K1 = _mm_loadu_si128((const __m128i*)(key + 16));

   __m128i K2 = aes_128_key_expansion(K0, _mm_aeskeygenassist_si128(K1, 0x01));
   __m128i K3 = aes_256_key_expansion(K1, K2);

   __m128i K4 = aes_128_key_expansion(K2, _mm_aeskeygenassist_si128(K3, 0x02));
   __m128i K5 = aes_256_key_expansion(K3, K4);

   __m128i K6 = aes_128_key_expansion(K4, _mm_aeskeygenassist_si128(K5, 0x04));
   __m128i K7 = aes_256_key_expansion(K5, K6);

   __m128i K8 = aes_128_key_expansion(K6, _mm_aeskeygenassist_si128(K7, 0x08));
   __m128i K9 = aes_256_key_expansion(K7, K8);

   __m128i K10 = aes_128_key_expansion(K8, _mm_aeskeygenassist_si128(K9, 0x10));
   __m128i K11 = aes_256_key_expansion(K9, K10);

   __m128i K12 = aes_128_key_expansion(K10, _mm_aeskeygenassist_si128(K11, 0x20));
   __m128i K13 = aes_256_key_expansion(K11, K12);

   __m128i K14 = aes_128_key_expansion(K12, _mm_aeskeygenassist_si128(K13, 0x40));

   __m128i* EK_mm = (__m128i*)&EK[0];
   _mm_storeu_si128(EK_mm     , K0);
   _mm_storeu_si128(EK_mm +  1, K1);
   _mm_storeu_si128(EK_mm +  2, K2);
   _mm_storeu_si128(EK_mm +  3, K3);
   _mm_storeu_si128(EK_mm +  4, K4);
   _mm_storeu_si128(EK_mm +  5, K5);
   _mm_storeu_si128(EK_mm +  6, K6);
   _mm_storeu_si128(EK_mm +  7, K7);
   _mm_storeu_si128(EK_mm +  8, K8);
   _mm_storeu_si128(EK_mm +  9, K9);
   _mm_storeu_si128(EK_mm + 10, K10);
   _mm_storeu_si128(EK_mm + 11, K11);
   _mm_storeu_si128(EK_mm + 12, K12);
   _mm_storeu_si128(EK_mm + 13, K13);
   _mm_storeu_si128(EK_mm + 14, K14);

   // Now generate decryption keys

   __m128i* DK_mm = (__m128i*)&DK[0];
   _mm_storeu_si128(DK_mm     , K14);
   _mm_storeu_si128(DK_mm +  1, _mm_aesimc_si128(K13));
   _mm_storeu_si128(DK_mm +  2, _mm_aesimc_si128(K12));
   _mm_storeu_si128(DK_mm +  3, _mm_aesimc_si128(K11));
   _mm_storeu_si128(DK_mm +  4, _mm_aesimc_si128(K10));
   _mm_storeu_si128(DK_mm +  5, _mm_aesimc_si128(K9));
   _mm_storeu_si128(DK_mm +  6, _mm_aesimc_si128(K8));
   _mm_storeu_si128(DK_mm +  7, _mm_aesimc_si128(K7));
   _mm_storeu_si128(DK_mm +  8, _mm_aesimc_si128(K6));
   _mm_storeu_si128(DK_mm +  9, _mm_aesimc_si128(K5));
   _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(K4));
   _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(K3));
   _mm_storeu_si128(DK_mm + 12, _mm_aesimc_si128(K2));
   _mm_storeu_si128(DK_mm + 13, _mm_aesimc_si128(K1));
   _mm_storeu_si128(DK_mm + 14, K0);
   }
Пример #20
0
static void
TEST (void)
{
  int i;

  init_data (src1, edst);

  for (i = 0; i < NUM; i += 16)
    {
      resdst[i]  = _mm_aeskeygenassist_si128 (src1[i], IMM8);
      resdst[i + 1] = _mm_aeskeygenassist_si128 (src1[i + 1], IMM8);
      resdst[i + 2] = _mm_aeskeygenassist_si128 (src1[i + 2], IMM8);
      resdst[i + 3] = _mm_aeskeygenassist_si128 (src1[i + 3], IMM8);
      resdst[i + 4] = _mm_aeskeygenassist_si128 (src1[i + 4], IMM8);
      resdst[i + 5] = _mm_aeskeygenassist_si128 (src1[i + 5], IMM8);
      resdst[i + 6] = _mm_aeskeygenassist_si128 (src1[i + 6], IMM8);
      resdst[i + 7] = _mm_aeskeygenassist_si128 (src1[i + 7], IMM8);
      resdst[i + 8] = _mm_aeskeygenassist_si128 (src1[i + 8], IMM8);
      resdst[i + 9] = _mm_aeskeygenassist_si128 (src1[i + 9], IMM8);
      resdst[i + 10] = _mm_aeskeygenassist_si128 (src1[i + 10], IMM8);
      resdst[i + 11] = _mm_aeskeygenassist_si128 (src1[i + 11], IMM8);
      resdst[i + 12] = _mm_aeskeygenassist_si128 (src1[i + 12], IMM8);
      resdst[i + 13] = _mm_aeskeygenassist_si128 (src1[i + 13], IMM8);
      resdst[i + 14] = _mm_aeskeygenassist_si128 (src1[i + 14], IMM8);
      resdst[i + 15] = _mm_aeskeygenassist_si128 (src1[i + 15], IMM8);
    }

  for (i = 0; i < NUM; i++)
    if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
      abort ();
}
Пример #21
0
void AES_Key_Expansion_PARA_3(
        const unsigned char *userkey1,
        const unsigned char *userkey2,
        const unsigned char *userkey3,
        unsigned char *key1,
        unsigned char *key2,
        unsigned char *key3)
{
    __m128i temp1_1, temp2_1;
    __m128i temp1_2, temp2_2, temp3_2, temp4_2; 
    __m128i temp1_3, temp2_3;

    __m128i *Key_Schedule1 = (__m128i*)key1; 
    __m128i *Key_Schedule2 = (__m128i*)key2; 
    __m128i *Key_Schedule3 = (__m128i*)key3; 
     
    temp1_1 = _mm_loadu_si128((__m128i*)userkey1); 
    temp1_2 = _mm_loadu_si128((__m128i*)userkey2); 
    temp3_2 = _mm_loadu_si128((__m128i*)(userkey2+16)); 
    temp1_3 = _mm_loadu_si128((__m128i*)userkey3); 
    Key_Schedule1[0] = temp1_1; 
    Key_Schedule2[0] = temp1_2; 
    Key_Schedule2[1] = temp3_2; 
    Key_Schedule3[0] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1 ,0x1); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2 ,0x1); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3 ,0x1); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[1] = temp1_1; 
    Key_Schedule2[1] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[1], (__m128d)temp1_2,0); 
    Key_Schedule2[2] = (__m128i)_mm_shuffle_pd((__m128d)temp1_2, (__m128d)temp3_2,1); 
    Key_Schedule3[1] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x2); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x2);
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x2); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[2] = temp1_1;   
    Key_Schedule2[3] = temp1_2; 
    Key_Schedule2[4] = temp3_2; 
    Key_Schedule3[2] = temp1_3;   
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x4); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x4); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x4); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[3] = temp1_1; 
    Key_Schedule2[4] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[4], (__m128d)temp1_2,0); 
    Key_Schedule2[5] = (__m128i)_mm_shuffle_pd((__m128d)temp1_2,(__m128d)temp3_2,1); 
    Key_Schedule3[3] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x8); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x8); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x8); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[4] = temp1_1; 
    Key_Schedule2[6] = temp1_2; 
    Key_Schedule2[7] = temp3_2; 
    Key_Schedule3[4] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x10); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x10); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x10); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[5] = temp1_1; 
    Key_Schedule2[7] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[7], (__m128d)temp1_2,0); 
    Key_Schedule2[8] = (__m128i)_mm_shuffle_pd((__m128d)temp1_2,(__m128d)temp3_2,1); 
    Key_Schedule3[5] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x20); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x20); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x20); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[6] = temp1_1; 
    Key_Schedule2[9] = temp1_2; 
    Key_Schedule2[10]= temp3_2; 
    Key_Schedule3[6] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x40); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x40); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x40); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[7] = temp1_1; 
    Key_Schedule2[10]= (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[10], (__m128d)temp1_2,0); 
    Key_Schedule2[11]= (__m128i)_mm_shuffle_pd((__m128d)temp1_2,(__m128d)temp3_2,1); 
    Key_Schedule3[7] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x80); 
    temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x80); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x80); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    /*KEY_ASSIST_PARA_3(
            temp1_1, temp2_1, &temp1_1,
            &temp1_2, &temp2_2, &temp3_2,
            temp1_3, temp2_3, &temp1_3);*/
    Key_Schedule1[8] = temp1_1;     
    Key_Schedule2[12]= temp1_2;    
    Key_Schedule3[8] = temp1_3;     
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x1b); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x1b); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    Key_Schedule1[9] = temp1_1; 
    Key_Schedule3[9] = temp1_3; 
    temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x36); 
    temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x36); 
    temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); 
    temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); 
    Key_Schedule1[10] = temp1_1; 
    Key_Schedule3[10] = temp1_3; 
}