static int ccm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); struct skcipher_walk walk; u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen; int err; err = ccm_init_mac(req, mac, len); if (err) return err; if (req->assoclen) ccm_calculate_auth_mac(req, mac); /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_encrypt(&walk, req, false); if (crypto_simd_usable()) { while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; if (walk.nbytes == walk.total) tail = 0; kernel_neon_begin(); ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); kernel_neon_end(); err = skcipher_walk_done(&walk, tail); } if (!err) { kernel_neon_begin(); ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); kernel_neon_end(); } } else { err = ccm_crypt_fallback(&walk, mac, buf, ctx, true); } if (err) return err; /* copy authtag to end of dst */ scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(aead), 1); return 0; }
static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct blkcipher_walk walk; u32 state[16]; int err; if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) return crypto_chacha20_crypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); kernel_neon_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); err = blkcipher_walk_done(desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); err = blkcipher_walk_done(desc, &walk, 0); } kernel_neon_end(); return err; }
static int cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; int err; err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; if (walk.nbytes < walk.total) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } kernel_neon_end(); return err; }
static int sha1_ce_final(struct shash_desc *desc, u8 *out) { kernel_neon_begin_partial(16); sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); kernel_neon_end(); return sha1_base_finish(desc, out); }
static int __ecb_crypt(struct skcipher_request *req, void (*fn)(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks)) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; int err; err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; if (walk.nbytes < walk.total) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, ctx->rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } kernel_neon_end(); return err; }
static int sha256_neon_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; int res; /* Handle the fast case right here */ if (partial + len < SHA256_BLOCK_SIZE) { sctx->count += len; memcpy(sctx->buf + partial, data, len); return 0; } if (!may_use_simd()) { res = __sha256_update(desc, data, len, partial); } else { kernel_neon_begin(); res = __sha256_neon_update(desc, data, len, partial); kernel_neon_end(); } return res; }
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct sha1_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); ASM_EXPORT(sha1_ce_offsetof_count, offsetof(struct sha1_ce_state, sst.count)); ASM_EXPORT(sha1_ce_offsetof_finalize, offsetof(struct sha1_ce_state, finalize)); /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. */ sctx->finalize = finalize; kernel_neon_begin_partial(16); sha1_base_do_update(desc, data, len, (sha1_block_fn *)sha1_ce_transform); if (!finalize) sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); kernel_neon_end(); return sha1_base_finish(desc, out); }
static int chacha20_neon(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; u32 state[16]; int err; if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE) return crypto_chacha20_crypt(req); err = skcipher_walk_virt(&walk, req, true); crypto_chacha20_init(state, ctx, walk.iv); kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } kernel_neon_end(); return err; }
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, unsigned int length) { u32 *crc = shash_desc_ctx(desc); unsigned int l; if ((u64)data % SCALE_F) { l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F)); *crc = fallback_crc32(*crc, data, l); data += l; length -= l; } if (length >= PMULL_MIN_LEN && may_use_simd()) { l = round_down(length, SCALE_F); kernel_neon_begin(); *crc = crc32_pmull_le(data, l, *crc); kernel_neon_end(); data += l; length -= l; } if (length > 0) *crc = fallback_crc32(*crc, data, length); return 0; }
static int sha256_ce_final(struct shash_desc *desc, u8 *out) { struct sha256_ce_state *sctx = shash_desc_ctx(desc); sctx->finalize = 0; kernel_neon_begin_partial(28); sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); return sha256_base_finish(desc, out); }
static int sm3_ce_final(struct shash_desc *desc, u8 *out) { if (!crypto_simd_usable()) return crypto_sm3_finup(desc, NULL, 0, out); kernel_neon_begin(); sm3_base_do_finalize(desc, sm3_ce_transform); kernel_neon_end(); return sm3_base_finish(desc, out); }
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { if (!crypto_simd_usable()) return crypto_sm3_finup(desc, data, len, out); kernel_neon_begin(); sm3_base_do_update(desc, data, len, sm3_ce_transform); kernel_neon_end(); return sm3_ce_final(desc, out); }
static int sha1_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha1_ce_state *sctx = shash_desc_ctx(desc); sctx->finalize = 0; kernel_neon_begin_partial(16); sha1_base_do_update(desc, data, len, (sha1_block_fn *)sha1_ce_transform); kernel_neon_end(); return 0; }
static int sha512_ce_final(struct shash_desc *desc, u8 *out) { if (!may_use_simd()) { sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_block_data_order); return sha512_base_finish(desc, out); } kernel_neon_begin(); sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform); kernel_neon_end(); return sha512_base_finish(desc, out); }
static int sha512_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { if (!may_use_simd()) return sha512_base_do_update(desc, data, len, (sha512_block_fn *)sha512_block_data_order); kernel_neon_begin(); sha512_base_do_update(desc, data, len, (sha512_block_fn *)sha512_ce_transform); kernel_neon_end(); return 0; }
static int sha1_neon_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { if (!crypto_simd_usable()) return sha1_finup_arm(desc, data, len, out); kernel_neon_begin(); if (len) sha1_base_do_update(desc, data, len, (sha1_block_fn *)sha1_transform_neon); sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon); kernel_neon_end(); return sha1_base_finish(desc, out); }
static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { if (!may_use_simd()) return crypto_sha256_arm_finup(desc, data, len, out); kernel_neon_begin(); if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha2_ce_transform); sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); return sha256_base_finish(desc, out); }
static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; u32 blocks; int err; blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; __be32 *ctr = (__be32 *)walk.iv; u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); /* avoid 32 bit counter overflow in the NEON code */ if (unlikely(headroom < blocks)) { blocks = headroom + 1; tail = walk.nbytes - blocks * AES_BLOCK_SIZE; } kernel_neon_begin(); bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr, blocks, &ctx->enc, walk.iv); kernel_neon_end(); inc_be128_ctr(ctr, blocks); nbytes -= blocks * AES_BLOCK_SIZE; if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE) break; err = blkcipher_walk_done(desc, &walk, tail); } if (walk.nbytes) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 ks[AES_BLOCK_SIZE]; AES_encrypt(walk.iv, ks, &ctx->enc.rk); if (tdst != tsrc) memcpy(tdst, tsrc, nbytes); crypto_xor(tdst, ks, nbytes); err = blkcipher_walk_done(desc, &walk, 0); } return err; }
static int sha2_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); if (!may_use_simd() || (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_arm_update(desc, data, len); kernel_neon_begin(); sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); return 0; }
static int sha1_neon_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); if (!crypto_simd_usable() || (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return sha1_update_arm(desc, data, len); kernel_neon_begin(); sha1_base_do_update(desc, data, len, (sha1_block_fn *)sha1_transform_neon); kernel_neon_end(); return 0; }
static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); int err, first, rounds = 6 + ctx->key_length / 4; struct blkcipher_walk walk; int blocks; desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); first = 1; kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); first = 0; nbytes -= blocks * AES_BLOCK_SIZE; if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) break; err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } if (nbytes) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; /* * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need * to tell aes_ctr_encrypt() to only read half a block. */ blocks = (nbytes <= 8) ? -1 : 1; aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); memcpy(tdst, tail, nbytes); err = blkcipher_walk_done(desc, &walk, 0); } kernel_neon_end(); return err; }
static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); int err; err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len); if (err) return err; ctx->key.rounds = 6 + key_len / 4; kernel_neon_begin(); aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds); kernel_neon_end(); return 0; }
static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; int err; blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) { kernel_neon_begin(); bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); err = blkcipher_walk_done(desc, &walk, 0); } while (walk.nbytes) { u32 blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; u8 bk[2][AES_BLOCK_SIZE]; u8 *iv = walk.iv; do { if (walk.dst.virt.addr == walk.src.virt.addr) memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE); AES_decrypt(src, dst, &ctx->dec.rk); crypto_xor(dst, iv, AES_BLOCK_SIZE); if (walk.dst.virt.addr == walk.src.virt.addr) iv = bk[blocks & 1]; else iv = src; dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; } while (--blocks); err = blkcipher_walk_done(desc, &walk, 0); } return err; }
static int ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int blocks; int err; err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, num_rounds(ctx), blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; }
static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { if (!may_use_simd()) { if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_data_order); } else { kernel_neon_begin(); if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_neon); sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_neon); kernel_neon_end(); } return sha256_base_finish(desc, out); }
static int sha256_update_neon(struct shash_desc *desc, const u8 *data, unsigned int len) { /* * Stacking and unstacking a substantial slice of the NEON register * file may significantly affect performance for small updates when * executing in interrupt context, so fall back to the scalar code * in that case. */ if (!may_use_simd()) return sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); kernel_neon_begin(); sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_neon); kernel_neon_end(); return 0; }
/* Add padding and return the message digest. */ static int sha256_neon_final(struct shash_desc *desc, u8 *out) { struct sha256_state *sctx = shash_desc_ctx(desc); unsigned int i, index, padlen; __be32 *dst = (__be32 *)out; __be64 bits; static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; /* save number of bits */ bits = cpu_to_be64(sctx->count << 3); /* Pad out to 56 mod 64 and append length */ index = sctx->count % SHA256_BLOCK_SIZE; padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); if (!may_use_simd()) { sha256_update(desc, padding, padlen); sha256_update(desc, (const u8 *)&bits, sizeof(bits)); } else { kernel_neon_begin(); /* We need to fill a whole block for __sha256_neon_update() */ if (padlen <= 56) { sctx->count += padlen; memcpy(sctx->buf + index, padding, padlen); } else { __sha256_neon_update(desc, padding, padlen, index); } __sha256_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56); kernel_neon_end(); } /* Store state in digest */ for (i = 0; i < 8; i++) dst[i] = cpu_to_be32(sctx->state[i]); /* Wipe context */ memset(sctx, 0, sizeof(*sctx)); return 0; }
static int xts_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = num_rounds(&ctx->key1); struct skcipher_walk walk; unsigned int blocks; err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, walk.iv, (u8 *)ctx->key2.key_enc, first); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; }
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_aes_ctx rk; int err; err = crypto_aes_expand_key(&rk, in_key, key_len); if (err) return err; ctx->key.rounds = 6 + key_len / 4; memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); kernel_neon_begin(); aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); kernel_neon_end(); return 0; }
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); int err, first, rounds = 6 + ctx->key_length / 4; struct blkcipher_walk walk; unsigned int blocks; desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; }