static int chacha20_neon(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; u32 state[16]; int err; if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE) return crypto_chacha20_crypt(req); err = skcipher_walk_virt(&walk, req, true); crypto_chacha20_init(state, ctx, walk.iv); kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } kernel_neon_end(); return err; }
static int sha256_neon_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; int res; /* Handle the fast case right here */ if (partial + len < SHA256_BLOCK_SIZE) { sctx->count += len; memcpy(sctx->buf + partial, data, len); return 0; } if (!may_use_simd()) { res = __sha256_update(desc, data, len, partial); } else { kernel_neon_begin(); res = __sha256_neon_update(desc, data, len, partial); kernel_neon_end(); } return res; }
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, unsigned int length) { u32 *crc = shash_desc_ctx(desc); unsigned int l; if ((u64)data % SCALE_F) { l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F)); *crc = fallback_crc32(*crc, data, l); data += l; length -= l; } if (length >= PMULL_MIN_LEN && may_use_simd()) { l = round_down(length, SCALE_F); kernel_neon_begin(); *crc = crc32_pmull_le(data, l, *crc); kernel_neon_end(); data += l; length -= l; } if (length > 0) *crc = fallback_crc32(*crc, data, length); return 0; }
static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct blkcipher_walk walk; u32 state[16]; int err; if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) return crypto_chacha20_crypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); kernel_neon_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); err = blkcipher_walk_done(desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); err = blkcipher_walk_done(desc, &walk, 0); } kernel_neon_end(); return err; }
static int sha512_ce_final(struct shash_desc *desc, u8 *out) { if (!may_use_simd()) { sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_block_data_order); return sha512_base_finish(desc, out); } kernel_neon_begin(); sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform); kernel_neon_end(); return sha512_base_finish(desc, out); }
static int sha512_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { if (!may_use_simd()) return sha512_base_do_update(desc, data, len, (sha512_block_fn *)sha512_block_data_order); kernel_neon_begin(); sha512_base_do_update(desc, data, len, (sha512_block_fn *)sha512_ce_transform); kernel_neon_end(); return 0; }
static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { if (!may_use_simd()) return crypto_sha256_arm_finup(desc, data, len, out); kernel_neon_begin(); if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha2_ce_transform); sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); return sha256_base_finish(desc, out); }
static int sha2_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); if (!may_use_simd() || (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_arm_update(desc, data, len); kernel_neon_begin(); sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); return 0; }
int ablk_encrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); if (!may_use_simd()) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); memcpy(cryptd_req, req, sizeof(*req)); ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); return crypto_ablkcipher_encrypt(cryptd_req); } else { return __ablk_encrypt(req); } }
static int sha256_update_neon(struct shash_desc *desc, const u8 *data, unsigned int len) { /* * Stacking and unstacking a substantial slice of the NEON register * file may significantly affect performance for small updates when * executing in interrupt context, so fall back to the scalar code * in that case. */ if (!may_use_simd()) return sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); kernel_neon_begin(); sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_neon); kernel_neon_end(); return 0; }
static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { if (!may_use_simd()) { if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_data_order); } else { kernel_neon_begin(); if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_neon); sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_neon); kernel_neon_end(); } return sha256_base_finish(desc, out); }
/* Add padding and return the message digest. */ static int sha256_neon_final(struct shash_desc *desc, u8 *out) { struct sha256_state *sctx = shash_desc_ctx(desc); unsigned int i, index, padlen; __be32 *dst = (__be32 *)out; __be64 bits; static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; /* save number of bits */ bits = cpu_to_be64(sctx->count << 3); /* Pad out to 56 mod 64 and append length */ index = sctx->count % SHA256_BLOCK_SIZE; padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); if (!may_use_simd()) { sha256_update(desc, padding, padlen); sha256_update(desc, (const u8 *)&bits, sizeof(bits)); } else { kernel_neon_begin(); /* We need to fill a whole block for __sha256_neon_update() */ if (padlen <= 56) { sctx->count += padlen; memcpy(sctx->buf + index, padding, padlen); } else { __sha256_neon_update(desc, padding, padlen, index); } __sha256_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56); kernel_neon_end(); } /* Store state in digest */ for (i = 0; i < 8; i++) dst[i] = cpu_to_be32(sctx->state[i]); /* Wipe context */ memset(sctx, 0, sizeof(*sctx)); return 0; }
static int chacha20_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); u32 *state, state_buf[16 + 2] __aligned(8); struct skcipher_walk walk; int err; BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) return crypto_chacha20_crypt(req); err = skcipher_walk_virt(&walk, req, true); crypto_chacha20_init(state, ctx, walk.iv); kernel_fpu_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); err = skcipher_walk_done(&walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); return err; }
int ablk_decrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); if (!may_use_simd()) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); memcpy(cryptd_req, req, sizeof(*req)); ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); return crypto_ablkcipher_decrypt(cryptd_req); } else { struct blkcipher_desc desc; desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); desc.info = req->info; desc.flags = 0; return crypto_blkcipher_crt(desc.tfm)->decrypt( &desc, req->dst, req->src, req->nbytes); } }
static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; struct blkcipher_walk walk; int err; if (!may_use_simd()) return crypto_chacha20_crypt(desc, dst, src, nbytes); state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); kernel_fpu_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); err = blkcipher_walk_done(desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); err = blkcipher_walk_done(desc, &walk, 0); } kernel_fpu_end(); return err; }