static uint8x16_t armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from) { uint8x16_t tmp; int i; tmp = from; for (i = 0; i < rounds - 1; i += 2) { tmp = vaeseq_u8(tmp, keysched[i]); tmp = vaesmcq_u8(tmp); tmp = vaeseq_u8(tmp, keysched[i + 1]); tmp = vaesmcq_u8(tmp); } tmp = vaeseq_u8(tmp, keysched[rounds - 1]); tmp = vaesmcq_u8(tmp); tmp = vaeseq_u8(tmp, keysched[rounds]); tmp = veorq_u8(tmp, keysched[rounds + 1]); return (tmp); }
uint8x16_t test_aesmc(uint8x16_t data, uint8x16_t key) { // CHECK-LABEL: test_aesmc: // CHECK: aesmc.16b v0, v0 return vaesmcq_u8(data); }
uint8x16_t aes_enc_rnd(uint8x16_t block, uint8x16_t rndKey){ return vaesmcq_u8(vaeseq_u8(block, rndKey)); }
uint8x16_t test_vaesmcq_u8(uint8x16_t data) { // CHECK: test_vaesmcq_u8 return vaesmcq_u8(data); // CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }
void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { BOTAN_ASSERT(m_EK.empty() == false, "Key was set"); const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data()); const uint8x16_t K0 = vld1q_u8(skey + 0); const uint8x16_t K1 = vld1q_u8(skey + 16); const uint8x16_t K2 = vld1q_u8(skey + 32); const uint8x16_t K3 = vld1q_u8(skey + 48); const uint8x16_t K4 = vld1q_u8(skey + 64); const uint8x16_t K5 = vld1q_u8(skey + 80); const uint8x16_t K6 = vld1q_u8(skey + 96); const uint8x16_t K7 = vld1q_u8(skey + 112); const uint8x16_t K8 = vld1q_u8(skey + 128); const uint8x16_t K9 = vld1q_u8(skey + 144); const uint8x16_t K10 = vld1q_u8(mkey); while(blocks >= 4) { uint8x16_t B0 = vld1q_u8(in); uint8x16_t B1 = vld1q_u8(in+16); uint8x16_t B2 = vld1q_u8(in+32); uint8x16_t B3 = vld1q_u8(in+48); AES_ENC_4_ROUNDS(K0); AES_ENC_4_ROUNDS(K1); AES_ENC_4_ROUNDS(K2); AES_ENC_4_ROUNDS(K3); AES_ENC_4_ROUNDS(K4); AES_ENC_4_ROUNDS(K5); AES_ENC_4_ROUNDS(K6); AES_ENC_4_ROUNDS(K7); AES_ENC_4_ROUNDS(K8); AES_ENC_4_LAST_ROUNDS(K9, K10); vst1q_u8(out, B0); vst1q_u8(out+16, B1); vst1q_u8(out+32, B2); vst1q_u8(out+48, B3); in += 16*4; out += 16*4; blocks -= 4; } for(size_t i = 0; i != blocks; ++i) { uint8x16_t B = vld1q_u8(in+16*i); B = vaesmcq_u8(vaeseq_u8(B, K0)); B = vaesmcq_u8(vaeseq_u8(B, K1)); B = vaesmcq_u8(vaeseq_u8(B, K2)); B = vaesmcq_u8(vaeseq_u8(B, K3)); B = vaesmcq_u8(vaeseq_u8(B, K4)); B = vaesmcq_u8(vaeseq_u8(B, K5)); B = vaesmcq_u8(vaeseq_u8(B, K6)); B = vaesmcq_u8(vaeseq_u8(B, K7)); B = vaesmcq_u8(vaeseq_u8(B, K8)); B = veorq_u8(vaeseq_u8(B, K9), K10); vst1q_u8(out+16*i, B); } }