Пример #1
0
/*
* AES-256 Encryption
*/
void AES_256_NI::encrypt_n(const byte in[], byte out[], size_t blocks) const
   {
   const __m128i* in_mm = (const __m128i*)in;
   __m128i* out_mm = (__m128i*)out;

   const __m128i* key_mm = (const __m128i*)&EK[0];

   __m128i K0  = _mm_loadu_si128(key_mm);
   __m128i K1  = _mm_loadu_si128(key_mm + 1);
   __m128i K2  = _mm_loadu_si128(key_mm + 2);
   __m128i K3  = _mm_loadu_si128(key_mm + 3);
   __m128i K4  = _mm_loadu_si128(key_mm + 4);
   __m128i K5  = _mm_loadu_si128(key_mm + 5);
   __m128i K6  = _mm_loadu_si128(key_mm + 6);
   __m128i K7  = _mm_loadu_si128(key_mm + 7);
   __m128i K8  = _mm_loadu_si128(key_mm + 8);
   __m128i K9  = _mm_loadu_si128(key_mm + 9);
   __m128i K10 = _mm_loadu_si128(key_mm + 10);
   __m128i K11 = _mm_loadu_si128(key_mm + 11);
   __m128i K12 = _mm_loadu_si128(key_mm + 12);
   __m128i K13 = _mm_loadu_si128(key_mm + 13);
   __m128i K14 = _mm_loadu_si128(key_mm + 14);

   while(blocks >= 4)
      {
      __m128i B0 = _mm_loadu_si128(in_mm + 0);
      __m128i B1 = _mm_loadu_si128(in_mm + 1);
      __m128i B2 = _mm_loadu_si128(in_mm + 2);
      __m128i B3 = _mm_loadu_si128(in_mm + 3);

      B0 = _mm_xor_si128(B0, K0);
      B1 = _mm_xor_si128(B1, K0);
      B2 = _mm_xor_si128(B2, K0);
      B3 = _mm_xor_si128(B3, K0);

      AES_ENC_4_ROUNDS(K1);
      AES_ENC_4_ROUNDS(K2);
      AES_ENC_4_ROUNDS(K3);
      AES_ENC_4_ROUNDS(K4);
      AES_ENC_4_ROUNDS(K5);
      AES_ENC_4_ROUNDS(K6);
      AES_ENC_4_ROUNDS(K7);
      AES_ENC_4_ROUNDS(K8);
      AES_ENC_4_ROUNDS(K9);
      AES_ENC_4_ROUNDS(K10);
      AES_ENC_4_ROUNDS(K11);
      AES_ENC_4_ROUNDS(K12);
      AES_ENC_4_ROUNDS(K13);
      AES_ENC_4_LAST_ROUNDS(K14);

      _mm_storeu_si128(out_mm + 0, B0);
      _mm_storeu_si128(out_mm + 1, B1);
      _mm_storeu_si128(out_mm + 2, B2);
      _mm_storeu_si128(out_mm + 3, B3);

      blocks -= 4;
      in_mm += 4;
      out_mm += 4;
      }

   for(size_t i = 0; i != blocks; ++i)
      {
      __m128i B = _mm_loadu_si128(in_mm + i);

      B = _mm_xor_si128(B, K0);

      B = _mm_aesenc_si128(B, K1);
      B = _mm_aesenc_si128(B, K2);
      B = _mm_aesenc_si128(B, K3);
      B = _mm_aesenc_si128(B, K4);
      B = _mm_aesenc_si128(B, K5);
      B = _mm_aesenc_si128(B, K6);
      B = _mm_aesenc_si128(B, K7);
      B = _mm_aesenc_si128(B, K8);
      B = _mm_aesenc_si128(B, K9);
      B = _mm_aesenc_si128(B, K10);
      B = _mm_aesenc_si128(B, K11);
      B = _mm_aesenc_si128(B, K12);
      B = _mm_aesenc_si128(B, K13);
      B = _mm_aesenclast_si128(B, K14);

      _mm_storeu_si128(out_mm + i, B);
      }
   }
Пример #2
0
void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
   {
   BOTAN_ASSERT(m_EK.empty() == false, "Key was set");

   const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
   const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());

   const uint8x16_t K0 = vld1q_u8(skey + 0);
   const uint8x16_t K1 = vld1q_u8(skey + 16);
   const uint8x16_t K2 = vld1q_u8(skey + 32);
   const uint8x16_t K3 = vld1q_u8(skey + 48);
   const uint8x16_t K4 = vld1q_u8(skey + 64);
   const uint8x16_t K5 = vld1q_u8(skey + 80);
   const uint8x16_t K6 = vld1q_u8(skey + 96);
   const uint8x16_t K7 = vld1q_u8(skey + 112);
   const uint8x16_t K8 = vld1q_u8(skey + 128);
   const uint8x16_t K9 = vld1q_u8(skey + 144);
   const uint8x16_t K10 = vld1q_u8(mkey);

   while(blocks >= 4)
      {
      uint8x16_t B0 = vld1q_u8(in);
      uint8x16_t B1 = vld1q_u8(in+16);
      uint8x16_t B2 = vld1q_u8(in+32);
      uint8x16_t B3 = vld1q_u8(in+48);

      AES_ENC_4_ROUNDS(K0);
      AES_ENC_4_ROUNDS(K1);
      AES_ENC_4_ROUNDS(K2);
      AES_ENC_4_ROUNDS(K3);
      AES_ENC_4_ROUNDS(K4);
      AES_ENC_4_ROUNDS(K5);
      AES_ENC_4_ROUNDS(K6);
      AES_ENC_4_ROUNDS(K7);
      AES_ENC_4_ROUNDS(K8);
      AES_ENC_4_LAST_ROUNDS(K9, K10);

      vst1q_u8(out, B0);
      vst1q_u8(out+16, B1);
      vst1q_u8(out+32, B2);
      vst1q_u8(out+48, B3);

      in += 16*4;
      out += 16*4;
      blocks -= 4;
      }

   for(size_t i = 0; i != blocks; ++i)
      {
      uint8x16_t B = vld1q_u8(in+16*i);
      B = vaesmcq_u8(vaeseq_u8(B, K0));
      B = vaesmcq_u8(vaeseq_u8(B, K1));
      B = vaesmcq_u8(vaeseq_u8(B, K2));
      B = vaesmcq_u8(vaeseq_u8(B, K3));
      B = vaesmcq_u8(vaeseq_u8(B, K4));
      B = vaesmcq_u8(vaeseq_u8(B, K5));
      B = vaesmcq_u8(vaeseq_u8(B, K6));
      B = vaesmcq_u8(vaeseq_u8(B, K7));
      B = vaesmcq_u8(vaeseq_u8(B, K8));
      B = veorq_u8(vaeseq_u8(B, K9), K10);
      vst1q_u8(out+16*i, B);
      }
   }