inline void AES_set_decrypt_key_fast(AES_KEY *dkey, const AES_KEY *ekey) { int j = 0; int i = ROUNDS(ekey); #if (OCB_KEY_LEN == 0) dkey->rounds = i; #endif dkey->rd_key[i--] = ekey->rd_key[j++]; while (i) dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]); dkey->rd_key[i] = ekey->rd_key[j]; }
/* * AES-128 Key Schedule */ void AES_128_NI::key_schedule(const byte key[], size_t) { #define AES_128_key_exp(K, RCON) \ aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON)) __m128i K0 = _mm_loadu_si128((const __m128i*)(key)); __m128i K1 = AES_128_key_exp(K0, 0x01); __m128i K2 = AES_128_key_exp(K1, 0x02); __m128i K3 = AES_128_key_exp(K2, 0x04); __m128i K4 = AES_128_key_exp(K3, 0x08); __m128i K5 = AES_128_key_exp(K4, 0x10); __m128i K6 = AES_128_key_exp(K5, 0x20); __m128i K7 = AES_128_key_exp(K6, 0x40); __m128i K8 = AES_128_key_exp(K7, 0x80); __m128i K9 = AES_128_key_exp(K8, 0x1B); __m128i K10 = AES_128_key_exp(K9, 0x36); __m128i* EK_mm = (__m128i*)&EK[0]; _mm_storeu_si128(EK_mm , K0); _mm_storeu_si128(EK_mm + 1, K1); _mm_storeu_si128(EK_mm + 2, K2); _mm_storeu_si128(EK_mm + 3, K3); _mm_storeu_si128(EK_mm + 4, K4); _mm_storeu_si128(EK_mm + 5, K5); _mm_storeu_si128(EK_mm + 6, K6); _mm_storeu_si128(EK_mm + 7, K7); _mm_storeu_si128(EK_mm + 8, K8); _mm_storeu_si128(EK_mm + 9, K9); _mm_storeu_si128(EK_mm + 10, K10); // Now generate decryption keys __m128i* DK_mm = (__m128i*)&DK[0]; _mm_storeu_si128(DK_mm , K10); _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9)); _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8)); _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7)); _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6)); _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5)); _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4)); _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3)); _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2)); _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1)); _mm_storeu_si128(DK_mm + 10, K0); }
unsigned char *Cryptor::genKeySchedule(const Key &key, bool encryption) { ALIGN16 unsigned char *schedule = new unsigned char[15 * 16]; int upper; switch (key.size) { case _128_BITS: upper = 9; expandKey128(key.key, schedule); break; case _192_BITS: upper = 11; expandKey192(key.key, schedule); break; case _256_BITS: upper = 13; expandKey256(key.key, schedule); break; } // Generate decryption round keys by using aesimc // instruction. This only concerns keys 1-9/11/13. And reverse // the order for all of them! if (!encryption) { __m128i *keySchedule = (__m128i*) schedule; ALIGN16 unsigned char *tempSchedule = new unsigned char[15 * 16]; __m128i *tempKeySchedule = (__m128i*) tempSchedule; tempKeySchedule[upper + 1] = keySchedule[0]; for (int i = 1; i <= upper; i++) { tempKeySchedule[(upper + 1) - i] = _mm_aesimc_si128(keySchedule[i]); } tempKeySchedule[0] = keySchedule[upper + 1]; // Now use the temp. instead! delete[] schedule; return (unsigned char*) tempKeySchedule; } return schedule; }
int AESNI_set_decrypt_key (const unsigned char *userKey, const int bits, AES_KEY *key) { int i,nr;; AES_KEY temp_key; __m128i *Key_Schedule = (__m128i*)key->KEY; __m128i *Temp_Key_Schedule = (__m128i*)temp_key.KEY; mybits=bits; if (!userKey || !key) return -1; if (AESNI_set_encrypt_key(userKey,bits,&temp_key) == -2) return -2; nr = temp_key.nr; key->nr = nr; Key_Schedule[nr] = Temp_Key_Schedule[0]; Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]); Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]); Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]); Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]); Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]); Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]); Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]); Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]); Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]); if(nr>10) { Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]); Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]); } if(nr>12) { Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]); Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]); } Key_Schedule[0] = Temp_Key_Schedule[nr]; return 0; }
/* * AES-256 Key Schedule */ void AES_256_NI::key_schedule(const byte key[], size_t) { __m128i K0 = _mm_loadu_si128((const __m128i*)(key)); __m128i K1 = _mm_loadu_si128((const __m128i*)(key + 16)); __m128i K2 = aes_128_key_expansion(K0, _mm_aeskeygenassist_si128(K1, 0x01)); __m128i K3 = aes_256_key_expansion(K1, K2); __m128i K4 = aes_128_key_expansion(K2, _mm_aeskeygenassist_si128(K3, 0x02)); __m128i K5 = aes_256_key_expansion(K3, K4); __m128i K6 = aes_128_key_expansion(K4, _mm_aeskeygenassist_si128(K5, 0x04)); __m128i K7 = aes_256_key_expansion(K5, K6); __m128i K8 = aes_128_key_expansion(K6, _mm_aeskeygenassist_si128(K7, 0x08)); __m128i K9 = aes_256_key_expansion(K7, K8); __m128i K10 = aes_128_key_expansion(K8, _mm_aeskeygenassist_si128(K9, 0x10)); __m128i K11 = aes_256_key_expansion(K9, K10); __m128i K12 = aes_128_key_expansion(K10, _mm_aeskeygenassist_si128(K11, 0x20)); __m128i K13 = aes_256_key_expansion(K11, K12); __m128i K14 = aes_128_key_expansion(K12, _mm_aeskeygenassist_si128(K13, 0x40)); __m128i* EK_mm = (__m128i*)&EK[0]; _mm_storeu_si128(EK_mm , K0); _mm_storeu_si128(EK_mm + 1, K1); _mm_storeu_si128(EK_mm + 2, K2); _mm_storeu_si128(EK_mm + 3, K3); _mm_storeu_si128(EK_mm + 4, K4); _mm_storeu_si128(EK_mm + 5, K5); _mm_storeu_si128(EK_mm + 6, K6); _mm_storeu_si128(EK_mm + 7, K7); _mm_storeu_si128(EK_mm + 8, K8); _mm_storeu_si128(EK_mm + 9, K9); _mm_storeu_si128(EK_mm + 10, K10); _mm_storeu_si128(EK_mm + 11, K11); _mm_storeu_si128(EK_mm + 12, K12); _mm_storeu_si128(EK_mm + 13, K13); _mm_storeu_si128(EK_mm + 14, K14); // Now generate decryption keys __m128i* DK_mm = (__m128i*)&DK[0]; _mm_storeu_si128(DK_mm , K14); _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K13)); _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K12)); _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K11)); _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K10)); _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K9)); _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K8)); _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K7)); _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K6)); _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K5)); _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(K4)); _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(K3)); _mm_storeu_si128(DK_mm + 12, _mm_aesimc_si128(K2)); _mm_storeu_si128(DK_mm + 13, _mm_aesimc_si128(K1)); _mm_storeu_si128(DK_mm + 14, K0); }
/* * AES-192 Key Schedule */ void AES_192_NI::key_schedule(const byte key[], size_t) { __m128i K0 = _mm_loadu_si128((const __m128i*)(key)); __m128i K1 = _mm_loadu_si128((const __m128i*)(key + 8)); K1 = _mm_srli_si128(K1, 8); load_le(&EK[0], key, 6); #define AES_192_key_exp(RCON, EK_OFF) \ aes_192_key_expansion(&K0, &K1, \ _mm_aeskeygenassist_si128(K1, RCON), \ EK + EK_OFF, EK_OFF == 48) AES_192_key_exp(0x01, 6); AES_192_key_exp(0x02, 12); AES_192_key_exp(0x04, 18); AES_192_key_exp(0x08, 24); AES_192_key_exp(0x10, 30); AES_192_key_exp(0x20, 36); AES_192_key_exp(0x40, 42); AES_192_key_exp(0x80, 48); // Now generate decryption keys const __m128i* EK_mm = (const __m128i*)&EK[0]; __m128i* DK_mm = (__m128i*)&DK[0]; _mm_storeu_si128(DK_mm , _mm_loadu_si128(EK_mm + 12)); _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 11))); _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 10))); _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 9))); _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 8))); _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 7))); _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 6))); _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 5))); _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 4))); _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 3))); _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 2))); _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 1))); _mm_storeu_si128(DK_mm + 12, _mm_loadu_si128(EK_mm + 0)); }
void __fastcall aes_AES256_derive_decryption_keys_( const AES_AES256_RoundKeys* encryption_keys, AES_AES256_RoundKeys* decryption_keys) { decryption_keys->keys[0] = encryption_keys->keys[14]; decryption_keys->keys[1] = _mm_aesimc_si128(encryption_keys->keys[13]); decryption_keys->keys[2] = _mm_aesimc_si128(encryption_keys->keys[12]); decryption_keys->keys[3] = _mm_aesimc_si128(encryption_keys->keys[11]); decryption_keys->keys[4] = _mm_aesimc_si128(encryption_keys->keys[10]); decryption_keys->keys[5] = _mm_aesimc_si128(encryption_keys->keys[9]); decryption_keys->keys[6] = _mm_aesimc_si128(encryption_keys->keys[8]); decryption_keys->keys[7] = _mm_aesimc_si128(encryption_keys->keys[7]); decryption_keys->keys[8] = _mm_aesimc_si128(encryption_keys->keys[6]); decryption_keys->keys[9] = _mm_aesimc_si128(encryption_keys->keys[5]); decryption_keys->keys[10] = _mm_aesimc_si128(encryption_keys->keys[4]); decryption_keys->keys[11] = _mm_aesimc_si128(encryption_keys->keys[3]); decryption_keys->keys[12] = _mm_aesimc_si128(encryption_keys->keys[2]); decryption_keys->keys[13] = _mm_aesimc_si128(encryption_keys->keys[1]); decryption_keys->keys[14] = encryption_keys->keys[0]; }