aesni_ctx* aesni_create_ctx(byte *key) { aesni_ctx *ctx = (aesni_ctx*)malloc(sizeof(aesni_ctx)); ctx->key = (byte*)malloc(16); memcpy(ctx->key, key, 16); __m128i t1, t2; t1 = _mm_loadu_si128((__m128i*)ctx->key); ctx->enc_keys[0] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x1); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[1] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x2); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[2] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x4); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[3] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x8); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[4] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x10); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[5] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x20); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[6] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x40); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[7] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x80); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[8] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x1b); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[9] = t1; t2 = _mm_aeskeygenassist_si128(t1, 0x36); t1 = aesni_128_assist(t1, t2); ctx->enc_keys[10] = t1; ctx->dec_keys[0] = ctx->enc_keys[10]; for (int i = 1; i < 10; i++) { ctx->dec_keys[i] = _mm_aesimc_si128(ctx->enc_keys[10 - i]); } ctx->dec_keys[10] = ctx->enc_keys[0]; return ctx; }
void Cryptor::expandKey128(const unsigned char *key, unsigned char *schedule) { __m128i *keySchedule = (__m128i*) schedule; // The first entry is just the key itself. __m128i tmp = _mm_loadu_si128((__m128i*) key); if (!bigEndian) { reverse_m128i(tmp); // swap byte-order => big-endian. } keySchedule[0] = tmp; // Sadly, these cannot be done in a loop because the second // argument of _mm_aeskeygenassist_si128() needs to be a 8-bit // immediate! // The assist pretty much does the following: // SubWord(RotWord(tmp)) xor RCON[round] __m128i tmp2 = _mm_aeskeygenassist_si128(tmp, 0x1); tmp = assistKey128(tmp, tmp2); keySchedule[1] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x2); tmp = assistKey128(tmp, tmp2); keySchedule[2] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x4); tmp = assistKey128(tmp, tmp2); keySchedule[3] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x8); tmp = assistKey128(tmp, tmp2); keySchedule[4] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x10); tmp = assistKey128(tmp, tmp2); keySchedule[5] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x20); tmp = assistKey128(tmp, tmp2); keySchedule[6] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x40); tmp = assistKey128(tmp, tmp2); keySchedule[7] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x80); tmp = assistKey128(tmp, tmp2); keySchedule[8] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x1B); tmp = assistKey128(tmp, tmp2); keySchedule[9] = tmp; tmp2 = _mm_aeskeygenassist_si128(tmp, 0x36); tmp = assistKey128(tmp, tmp2); keySchedule[10] = tmp; }
void __fastcall aes_AES128_expand_key_( AES_AES_Block key, AES_AES128_RoundKeys* encryption_keys) { AES_Block128 prev = encryption_keys->keys[0] = key; prev = encryption_keys->keys[1] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x01)); prev = encryption_keys->keys[2] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x02)); prev = encryption_keys->keys[3] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x04)); prev = encryption_keys->keys[4] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x08)); prev = encryption_keys->keys[5] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x10)); prev = encryption_keys->keys[6] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x20)); prev = encryption_keys->keys[7] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x40)); prev = encryption_keys->keys[8] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x80)); prev = encryption_keys->keys[9] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x1b)); prev = encryption_keys->keys[10] = aes_aes128_expand_key_assist(prev, _mm_aeskeygenassist_si128(prev, 0x36)); }
static void AES_set_encrypt_key(const unsigned char *key, __m128i *expkey) { __m128i *sched = (__m128i*) expkey; sched[ 0] = _mm_loadu_si128((__m128i*)key); sched[ 1] = assist128(sched[0], _mm_aeskeygenassist_si128(sched[0],0x1)); sched[ 2] = assist128(sched[1], _mm_aeskeygenassist_si128(sched[1],0x2)); sched[ 3] = assist128(sched[2], _mm_aeskeygenassist_si128(sched[2],0x4)); sched[ 4] = assist128(sched[3], _mm_aeskeygenassist_si128(sched[3],0x8)); sched[ 5] = assist128(sched[4], _mm_aeskeygenassist_si128(sched[4],0x10)); sched[ 6] = assist128(sched[5], _mm_aeskeygenassist_si128(sched[5],0x20)); sched[ 7] = assist128(sched[6], _mm_aeskeygenassist_si128(sched[6],0x40)); sched[ 8] = assist128(sched[7], _mm_aeskeygenassist_si128(sched[7],0x80)); sched[ 9] = assist128(sched[8], _mm_aeskeygenassist_si128(sched[8],0x1b)); sched[10] = assist128(sched[9], _mm_aeskeygenassist_si128(sched[9],0x36)); }
void Cryptor::expandKey256(const unsigned char *key, unsigned char *schedule) { __m128i *keySchedule = (__m128i*) schedule; // Save the first 128 bits of the key as the first one. __m128i tmp = _mm_loadu_si128((__m128i*) key); if (!bigEndian) { reverse_m128i(tmp); // swap byte-order => big-endian. } keySchedule[0] = tmp; // The next 128 bits as the second. __m128i tmp3 = _mm_loadu_si128((__m128i*) (key + 16)); if (!bigEndian) { reverse_m128i(tmp3); // swap byte-order => big-endian. } keySchedule[1] = tmp3; __m128i tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01); assistKey256_1(&tmp, &tmp2); keySchedule[2] = tmp; assistKey256_2(&tmp, &tmp3); keySchedule[3] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02); assistKey256_1(&tmp, &tmp2); keySchedule[4] = tmp; assistKey256_2(&tmp, &tmp3); keySchedule[5] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04); assistKey256_1(&tmp, &tmp2); keySchedule[6] = tmp; assistKey256_2(&tmp, &tmp3); keySchedule[7] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08); assistKey256_1(&tmp, &tmp2); keySchedule[8] = tmp; assistKey256_2(&tmp, &tmp3); keySchedule[9] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10); assistKey256_1(&tmp, &tmp2); keySchedule[10] = tmp; assistKey256_2(&tmp, &tmp3); keySchedule[11] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20); assistKey256_1(&tmp, &tmp2); keySchedule[12] = tmp; assistKey256_2(&tmp, &tmp3); keySchedule[13] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40); assistKey256_1(&tmp, &tmp2); keySchedule[14] = tmp; }
__m128i aes_256_key_expansion(__m128i key, __m128i key2) { __m128i key_with_rcon = _mm_aeskeygenassist_si128(key2, 0x00); key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(2,2,2,2)); key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); return _mm_xor_si128(key, key_with_rcon); }
void AESNI_Key_Expansion4(const unsigned char *userkey, AUX_KEY key) { __m128i temp1, temp2; __m128i *Key_Schedule = (__m128i*)key; temp1 = _mm_loadu_si128((__m128i*)userkey); _mm_storeu_si128(&Key_Schedule[0], temp1); temp2= _mm_aeskeygenassist_si128 (temp1 ,0x1); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[1], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x2); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[2], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x4); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[3], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x8); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[4], temp1); }
static inline void KEY_256_ASSIST_2(__m128i* temp1, __m128i * temp3) { __m128i temp2,temp4; temp4 = _mm_aeskeygenassist_si128 (*temp1, 0x0); temp2 = _mm_shuffle_epi32(temp4, 0xaa); temp4 = _mm_slli_si128 (*temp3, 0x4); *temp3 = _mm_xor_si128 (*temp3, temp4); temp4 = _mm_slli_si128 (temp4, 0x4); *temp3 = _mm_xor_si128 (*temp3, temp4); temp4 = _mm_slli_si128 (temp4, 0x4); *temp3 = _mm_xor_si128 (*temp3, temp4); *temp3 = _mm_xor_si128 (*temp3, temp2); }
static void AESNI_128_Key_Expansion (const unsigned char *userkey,unsigned char *key) { __m128i temp1, temp2; __m128i *Key_Schedule = (__m128i*)key; temp1 = _mm_loadu_si128((__m128i*)userkey); Key_Schedule[0] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1 ,0x1); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[1] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x2); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[2] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x4); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[3] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x8); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[4] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x10); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[5] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x20); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[6] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x40); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[7] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x80); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[8] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x1b); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[9] = temp1; temp2 = _mm_aeskeygenassist_si128 (temp1,0x36); temp1 = AESNI_128_ASSIST(temp1, temp2); Key_Schedule[10] = temp1; }
void AESNI_Key_Expansion(const unsigned char *userkey, AES_KEY enc_key) { __m128i temp1, temp2; __m128i *Key_Schedule = (__m128i*)enc_key; temp1 = _mm_loadu_si128((__m128i*)userkey); _mm_storeu_si128(&Key_Schedule[0], temp1); temp2= _mm_aeskeygenassist_si128 (temp1 ,0x1); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[1], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x2); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[2], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x4); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[3], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x8); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[4], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x10); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[5], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x20); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[6], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x40); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[7], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x80); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[8], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x1b); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[9], temp1); temp2 = _mm_aeskeygenassist_si128 (temp1,0x36); temp1 = AES_128_ASSIST(temp1, temp2); _mm_storeu_si128(&Key_Schedule[10], temp1); }
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3) { __m128i tmp2, tmp4; tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00); tmp2 = _mm_shuffle_epi32(tmp4, 0xAA); tmp4 = _mm_slli_si128(*tmp3, 0x04); *tmp3 = _mm_xor_si128(*tmp3, tmp4); tmp4 = _mm_slli_si128(tmp4, 0x04); *tmp3 = _mm_xor_si128(*tmp3, tmp4); tmp4 = _mm_slli_si128(tmp4, 0x04); *tmp3 = _mm_xor_si128(*tmp3, tmp4); *tmp3 = _mm_xor_si128(*tmp3, tmp2); }
void AES_192_Key_Expansion (const unsigned char *userkey, unsigned char *key) { __m128i temp1, temp2, temp3, temp4; __m128i *Key_Schedule = (__m128i*)key; temp1 = _mm_loadu_si128((__m128i*)userkey); temp3 = _mm_loadu_si128((__m128i*)(userkey+16)); Key_Schedule[0]=temp1; Key_Schedule[1]=temp3; temp2=_mm_aeskeygenassist_si128 (temp3,0x1); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[1] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[1], (__m128d)temp1,0); Key_Schedule[2] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); temp2=_mm_aeskeygenassist_si128 (temp3,0x2); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[3]=temp1; Key_Schedule[4]=temp3; temp2=_mm_aeskeygenassist_si128 (temp3,0x4); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[4] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[4], (__m128d)temp1,0); Key_Schedule[5] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); temp2=_mm_aeskeygenassist_si128 (temp3,0x8); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[6]=temp1; Key_Schedule[7]=temp3; temp2=_mm_aeskeygenassist_si128 (temp3,0x10); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[7] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[7], (__m128d)temp1,0); Key_Schedule[8] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); temp2=_mm_aeskeygenassist_si128 (temp3,0x20); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[9]=temp1; Key_Schedule[10]=temp3; temp2=_mm_aeskeygenassist_si128 (temp3,0x40); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[10] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule[10], (__m128d)temp1,0); Key_Schedule[11] = (__m128i)_mm_shuffle_pd((__m128d)temp1,(__m128d)temp3,1); temp2=_mm_aeskeygenassist_si128 (temp3,0x80); KEY_192_ASSIST(&temp1, &temp2, &temp3); Key_Schedule[12]=temp1; }
// Special thanks to Intel for helping me // with ExpandAESKey256() and its subroutines static inline void ExpandAESKey256(char *keybuf) { __m128i tmp1, tmp2, tmp3, *keys; keys = (__m128i *)keybuf; tmp1 = _mm_load_si128((__m128i *)keybuf); tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10)); tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01); ExpandAESKey256_sub1(&tmp1, &tmp2); keys[2] = tmp1; ExpandAESKey256_sub2(&tmp1, &tmp3); keys[3] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02); ExpandAESKey256_sub1(&tmp1, &tmp2); keys[4] = tmp1; ExpandAESKey256_sub2(&tmp1, &tmp3); keys[5] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04); ExpandAESKey256_sub1(&tmp1, &tmp2); keys[6] = tmp1; ExpandAESKey256_sub2(&tmp1, &tmp3); keys[7] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08); ExpandAESKey256_sub1(&tmp1, &tmp2); keys[8] = tmp1; ExpandAESKey256_sub2(&tmp1, &tmp3); keys[9] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10); ExpandAESKey256_sub1(&tmp1, &tmp2); keys[10] = tmp1; ExpandAESKey256_sub2(&tmp1, &tmp3); keys[11] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20); ExpandAESKey256_sub1(&tmp1, &tmp2); keys[12] = tmp1; ExpandAESKey256_sub2(&tmp1, &tmp3); keys[13] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40); ExpandAESKey256_sub1(&tmp1, &tmp2); keys[14] = tmp1; }
void Cryptor::assistKey256_2(__m128i *tmp, __m128i *tmp2) { __m128i tmp4 = _mm_aeskeygenassist_si128(*tmp, 0x0); // Duplicate 3rd part 4 times. __m128i tmp3 = _mm_shuffle_epi32(tmp4, SHUFFLE4_32(2, 2, 2, 2)); tmp4 = _mm_slli_si128(*tmp2, 0x4); *tmp2 = _mm_xor_si128(*tmp2, tmp4); tmp4 = _mm_slli_si128(tmp4, 0x4); *tmp2 = _mm_xor_si128(*tmp2, tmp4); tmp4 = _mm_slli_si128(tmp4, 0x4); *tmp2 = _mm_xor_si128(*tmp2, tmp4); *tmp2 = _mm_xor_si128(*tmp2, tmp3); }
static void AESNI_256_Key_Expansion (const unsigned char *userkey, unsigned char *key) { __m128i temp1, temp2, temp3; __m128i *Key_Schedule = (__m128i*)key; temp1 = _mm_loadu_si128((__m128i*)userkey); temp3 = _mm_loadu_si128((__m128i*)(userkey+16)); Key_Schedule[0] = temp1; Key_Schedule[1] = temp3; temp2 = _mm_aeskeygenassist_si128 (temp3,0x01); KEY_256_ASSIST_1(&temp1, &temp2); Key_Schedule[2]=temp1; KEY_256_ASSIST_2(&temp1, &temp3); Key_Schedule[3]=temp3; temp2 = _mm_aeskeygenassist_si128 (temp3,0x02); KEY_256_ASSIST_1(&temp1, &temp2); Key_Schedule[4]=temp1; KEY_256_ASSIST_2(&temp1, &temp3); Key_Schedule[5]=temp3; temp2 = _mm_aeskeygenassist_si128 (temp3,0x04); KEY_256_ASSIST_1(&temp1, &temp2); Key_Schedule[6]=temp1; KEY_256_ASSIST_2(&temp1, &temp3); Key_Schedule[7]=temp3; temp2 = _mm_aeskeygenassist_si128 (temp3,0x08); KEY_256_ASSIST_1(&temp1, &temp2); Key_Schedule[8]=temp1; KEY_256_ASSIST_2(&temp1, &temp3); Key_Schedule[9]=temp3; temp2 = _mm_aeskeygenassist_si128 (temp3,0x10); KEY_256_ASSIST_1(&temp1, &temp2); Key_Schedule[10]=temp1; KEY_256_ASSIST_2(&temp1, &temp3); Key_Schedule[11]=temp3; temp2 = _mm_aeskeygenassist_si128 (temp3,0x20); KEY_256_ASSIST_1(&temp1, &temp2); Key_Schedule[12]=temp1; KEY_256_ASSIST_2(&temp1, &temp3); Key_Schedule[13]=temp3; temp2 = _mm_aeskeygenassist_si128 (temp3,0x40); KEY_256_ASSIST_1(&temp1, &temp2); Key_Schedule[14]=temp1; }
void test8bit (void) { i1 = _mm_cmpistrm (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistri (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistra (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrc (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistro (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrs (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrz (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ b1 = _mm256_blend_ps (b2, b3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ k1 = _cvtss_sh (f1, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm256_cvtps_ph (b2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_dp_ps (b2, b3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_permute_ps (b2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_blend_epi16 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_cvtps_ph (a1, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ d1 = _mm_dp_pd (d2, d3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_dp_ps (a2, a3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_insert_ps (a2, a3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_mpsadbw_epu8 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_permute_ps (a2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_slli_si128 (i2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_srli_si128 (i2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ }
void __fastcall aes_AES256_expand_key_( AES_AES_Block key_lo, AES_AES_Block key_hi, AES_AES256_RoundKeys* encryption_keys) { AES_AES_Block prev_lo, prev_hi; AES_AES_Block hwgen; prev_lo = encryption_keys->keys[0] = key_lo; prev_hi = encryption_keys->keys[1] = key_hi; hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x01); hwgen = _mm_shuffle_epi32(hwgen, 0xff); encryption_keys->keys[2] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); hwgen = _mm_shuffle_epi32(hwgen, 0xaa); encryption_keys->keys[3] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x02); hwgen = _mm_shuffle_epi32(hwgen, 0xff); encryption_keys->keys[4] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); hwgen = _mm_shuffle_epi32(hwgen, 0xaa); encryption_keys->keys[5] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x04); hwgen = _mm_shuffle_epi32(hwgen, 0xff); encryption_keys->keys[6] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); hwgen = _mm_shuffle_epi32(hwgen, 0xaa); encryption_keys->keys[7] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x08); hwgen = _mm_shuffle_epi32(hwgen, 0xff); encryption_keys->keys[8] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); hwgen = _mm_shuffle_epi32(hwgen, 0xaa); encryption_keys->keys[9] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x10); hwgen = _mm_shuffle_epi32(hwgen, 0xff); encryption_keys->keys[10] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); hwgen = _mm_shuffle_epi32(hwgen, 0xaa); encryption_keys->keys[11] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x20); hwgen = _mm_shuffle_epi32(hwgen, 0xff); encryption_keys->keys[12] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); hwgen = _mm_shuffle_epi32(hwgen, 0xaa); encryption_keys->keys[13] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x40); hwgen = _mm_shuffle_epi32(hwgen, 0xff); encryption_keys->keys[14] = aes_aes256_expand_key_assist(&prev_lo, &prev_hi, hwgen); }
inline void Cryptor::expandKey192(const unsigned char *key, unsigned char *schedule) { __m128i *keySchedule = (__m128i*) schedule; // Save the first 128 bits of the key as the first one. __m128i tmp = _mm_loadu_si128((__m128i*) key); if (!bigEndian) { reverse_m128i(tmp); // swap byte-order => big-endian. } keySchedule[0] = tmp; // The next 64 bits as the second. unsigned char buf[128]; memset(buf, 0, 128); memcpy(buf, key + 16, 64); __m128i tmp3 = _mm_loadu_si128((__m128i*) buf); if (!bigEndian) { reverse_m128i(tmp3); // swap byte-order => big-endian. } keySchedule[1] = tmp3; __m128i tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x1); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[1] = (__m128i) _mm_shuffle_pd((__m128d) keySchedule[1], (__m128d) tmp, 0); keySchedule[2] = (__m128i) _mm_shuffle_pd((__m128d) tmp, (__m128d) tmp3, 1); tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x2); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[3] = tmp; keySchedule[4] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x4); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[4] = (__m128i) _mm_shuffle_pd((__m128d) keySchedule[4], (__m128d) tmp, 0); keySchedule[5] = (__m128i) _mm_shuffle_pd((__m128d) tmp, (__m128d) tmp3, 1); tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x8); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[6] = tmp; keySchedule[7] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[7] = (__m128i) _mm_shuffle_pd((__m128d) keySchedule[7], (__m128d) tmp, 0); keySchedule[8] = (__m128i) _mm_shuffle_pd((__m128d) tmp, (__m128d) tmp3, 1); tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[9] = tmp; keySchedule[10] = tmp3; tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[10] = (__m128i) _mm_shuffle_pd((__m128d) keySchedule[10], (__m128d) tmp, 0); keySchedule[11] = (__m128i) _mm_shuffle_pd((__m128d) tmp, (__m128d) tmp3, 1); tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x80); assistKey192(&tmp, &tmp2, &tmp3); keySchedule[12] = tmp; keySchedule[13] = tmp3; }
/* * AES-256 Key Schedule */ void AES_256_NI::key_schedule(const byte key[], size_t) { __m128i K0 = _mm_loadu_si128((const __m128i*)(key)); __m128i K1 = _mm_loadu_si128((const __m128i*)(key + 16)); __m128i K2 = aes_128_key_expansion(K0, _mm_aeskeygenassist_si128(K1, 0x01)); __m128i K3 = aes_256_key_expansion(K1, K2); __m128i K4 = aes_128_key_expansion(K2, _mm_aeskeygenassist_si128(K3, 0x02)); __m128i K5 = aes_256_key_expansion(K3, K4); __m128i K6 = aes_128_key_expansion(K4, _mm_aeskeygenassist_si128(K5, 0x04)); __m128i K7 = aes_256_key_expansion(K5, K6); __m128i K8 = aes_128_key_expansion(K6, _mm_aeskeygenassist_si128(K7, 0x08)); __m128i K9 = aes_256_key_expansion(K7, K8); __m128i K10 = aes_128_key_expansion(K8, _mm_aeskeygenassist_si128(K9, 0x10)); __m128i K11 = aes_256_key_expansion(K9, K10); __m128i K12 = aes_128_key_expansion(K10, _mm_aeskeygenassist_si128(K11, 0x20)); __m128i K13 = aes_256_key_expansion(K11, K12); __m128i K14 = aes_128_key_expansion(K12, _mm_aeskeygenassist_si128(K13, 0x40)); __m128i* EK_mm = (__m128i*)&EK[0]; _mm_storeu_si128(EK_mm , K0); _mm_storeu_si128(EK_mm + 1, K1); _mm_storeu_si128(EK_mm + 2, K2); _mm_storeu_si128(EK_mm + 3, K3); _mm_storeu_si128(EK_mm + 4, K4); _mm_storeu_si128(EK_mm + 5, K5); _mm_storeu_si128(EK_mm + 6, K6); _mm_storeu_si128(EK_mm + 7, K7); _mm_storeu_si128(EK_mm + 8, K8); _mm_storeu_si128(EK_mm + 9, K9); _mm_storeu_si128(EK_mm + 10, K10); _mm_storeu_si128(EK_mm + 11, K11); _mm_storeu_si128(EK_mm + 12, K12); _mm_storeu_si128(EK_mm + 13, K13); _mm_storeu_si128(EK_mm + 14, K14); // Now generate decryption keys __m128i* DK_mm = (__m128i*)&DK[0]; _mm_storeu_si128(DK_mm , K14); _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K13)); _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K12)); _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K11)); _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K10)); _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K9)); _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K8)); _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K7)); _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K6)); _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K5)); _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(K4)); _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(K3)); _mm_storeu_si128(DK_mm + 12, _mm_aesimc_si128(K2)); _mm_storeu_si128(DK_mm + 13, _mm_aesimc_si128(K1)); _mm_storeu_si128(DK_mm + 14, K0); }
static void TEST (void) { int i; init_data (src1, edst); for (i = 0; i < NUM; i += 16) { resdst[i] = _mm_aeskeygenassist_si128 (src1[i], IMM8); resdst[i + 1] = _mm_aeskeygenassist_si128 (src1[i + 1], IMM8); resdst[i + 2] = _mm_aeskeygenassist_si128 (src1[i + 2], IMM8); resdst[i + 3] = _mm_aeskeygenassist_si128 (src1[i + 3], IMM8); resdst[i + 4] = _mm_aeskeygenassist_si128 (src1[i + 4], IMM8); resdst[i + 5] = _mm_aeskeygenassist_si128 (src1[i + 5], IMM8); resdst[i + 6] = _mm_aeskeygenassist_si128 (src1[i + 6], IMM8); resdst[i + 7] = _mm_aeskeygenassist_si128 (src1[i + 7], IMM8); resdst[i + 8] = _mm_aeskeygenassist_si128 (src1[i + 8], IMM8); resdst[i + 9] = _mm_aeskeygenassist_si128 (src1[i + 9], IMM8); resdst[i + 10] = _mm_aeskeygenassist_si128 (src1[i + 10], IMM8); resdst[i + 11] = _mm_aeskeygenassist_si128 (src1[i + 11], IMM8); resdst[i + 12] = _mm_aeskeygenassist_si128 (src1[i + 12], IMM8); resdst[i + 13] = _mm_aeskeygenassist_si128 (src1[i + 13], IMM8); resdst[i + 14] = _mm_aeskeygenassist_si128 (src1[i + 14], IMM8); resdst[i + 15] = _mm_aeskeygenassist_si128 (src1[i + 15], IMM8); } for (i = 0; i < NUM; i++) if (memcmp(edst + i, resdst + i, sizeof (__m128i))) abort (); }
void AES_Key_Expansion_PARA_3( const unsigned char *userkey1, const unsigned char *userkey2, const unsigned char *userkey3, unsigned char *key1, unsigned char *key2, unsigned char *key3) { __m128i temp1_1, temp2_1; __m128i temp1_2, temp2_2, temp3_2, temp4_2; __m128i temp1_3, temp2_3; __m128i *Key_Schedule1 = (__m128i*)key1; __m128i *Key_Schedule2 = (__m128i*)key2; __m128i *Key_Schedule3 = (__m128i*)key3; temp1_1 = _mm_loadu_si128((__m128i*)userkey1); temp1_2 = _mm_loadu_si128((__m128i*)userkey2); temp3_2 = _mm_loadu_si128((__m128i*)(userkey2+16)); temp1_3 = _mm_loadu_si128((__m128i*)userkey3); Key_Schedule1[0] = temp1_1; Key_Schedule2[0] = temp1_2; Key_Schedule2[1] = temp3_2; Key_Schedule3[0] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1 ,0x1); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2 ,0x1); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3 ,0x1); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[1] = temp1_1; Key_Schedule2[1] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[1], (__m128d)temp1_2,0); Key_Schedule2[2] = (__m128i)_mm_shuffle_pd((__m128d)temp1_2, (__m128d)temp3_2,1); Key_Schedule3[1] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x2); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x2); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x2); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[2] = temp1_1; Key_Schedule2[3] = temp1_2; Key_Schedule2[4] = temp3_2; Key_Schedule3[2] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x4); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x4); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x4); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[3] = temp1_1; Key_Schedule2[4] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[4], (__m128d)temp1_2,0); Key_Schedule2[5] = (__m128i)_mm_shuffle_pd((__m128d)temp1_2,(__m128d)temp3_2,1); Key_Schedule3[3] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x8); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x8); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x8); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[4] = temp1_1; Key_Schedule2[6] = temp1_2; Key_Schedule2[7] = temp3_2; Key_Schedule3[4] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x10); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x10); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x10); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[5] = temp1_1; Key_Schedule2[7] = (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[7], (__m128d)temp1_2,0); Key_Schedule2[8] = (__m128i)_mm_shuffle_pd((__m128d)temp1_2,(__m128d)temp3_2,1); Key_Schedule3[5] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x20); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x20); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x20); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[6] = temp1_1; Key_Schedule2[9] = temp1_2; Key_Schedule2[10]= temp3_2; Key_Schedule3[6] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x40); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x40); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x40); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[7] = temp1_1; Key_Schedule2[10]= (__m128i)_mm_shuffle_pd((__m128d)Key_Schedule2[10], (__m128d)temp1_2,0); Key_Schedule2[11]= (__m128i)_mm_shuffle_pd((__m128d)temp1_2,(__m128d)temp3_2,1); Key_Schedule3[7] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x80); temp2_2 = _mm_aeskeygenassist_si128 (temp3_2,0x80); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x80); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); KEY_192_ASSIST(&temp1_2, &temp2_2, &temp3_2); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); /*KEY_ASSIST_PARA_3( temp1_1, temp2_1, &temp1_1, &temp1_2, &temp2_2, &temp3_2, temp1_3, temp2_3, &temp1_3);*/ Key_Schedule1[8] = temp1_1; Key_Schedule2[12]= temp1_2; Key_Schedule3[8] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x1b); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x1b); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); Key_Schedule1[9] = temp1_1; Key_Schedule3[9] = temp1_3; temp2_1 = _mm_aeskeygenassist_si128 (temp1_1,0x36); temp2_3 = _mm_aeskeygenassist_si128 (temp1_3,0x36); temp1_1 = AES_128_ASSIST(temp1_1, temp2_1); temp1_3 = AES_128_ASSIST(temp1_3, temp2_3); Key_Schedule1[10] = temp1_1; Key_Schedule3[10] = temp1_3; }