static void fletcher_4_avx2_init(zio_cksum_t *zcp) { kfpu_begin(); /* clear avx2 registers */ asm volatile("vpxor %ymm0, %ymm0, %ymm0"); asm volatile("vpxor %ymm1, %ymm1, %ymm1"); asm volatile("vpxor %ymm2, %ymm2, %ymm2"); asm volatile("vpxor %ymm3, %ymm3, %ymm3"); }
static void fletcher_4_aarch64_neon_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size) { const uint64_t *ip = buf; const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size); #if defined(_KERNEL) register unsigned char ZERO asm("v0") __attribute__((vector_size(16))); register unsigned char ACC0 asm("v1") __attribute__((vector_size(16))); register unsigned char ACC1 asm("v2") __attribute__((vector_size(16))); register unsigned char ACC2 asm("v3") __attribute__((vector_size(16))); register unsigned char ACC3 asm("v4") __attribute__((vector_size(16))); register unsigned char TMP1 asm("v5") __attribute__((vector_size(16))); register unsigned char TMP2 asm("v6") __attribute__((vector_size(16))); register unsigned char SRC asm("v7") __attribute__((vector_size(16))); #else unsigned char ZERO __attribute__((vector_size(16))); unsigned char ACC0 __attribute__((vector_size(16))); unsigned char ACC1 __attribute__((vector_size(16))); unsigned char ACC2 __attribute__((vector_size(16))); unsigned char ACC3 __attribute__((vector_size(16))); unsigned char TMP1 __attribute__((vector_size(16))); unsigned char TMP2 __attribute__((vector_size(16))); unsigned char SRC __attribute__((vector_size(16))); #endif kfpu_begin(); NEON_INIT_LOOP(); for (; ip < ipend; ip += 2) { NEON_MAIN_LOOP(NEON_DO_REVERSE); } NEON_FINI_LOOP(); kfpu_end(); }