void test_vsubQs8 (void) { int8x16_t out_int8x16_t; int8x16_t arg0_int8x16_t; int8x16_t arg1_int8x16_t; out_int8x16_t = vsubq_s8 (arg0_int8x16_t, arg1_int8x16_t); }
/* s8x16 sub */ void mw_neon_mm_sub_s8x16(signed char * A, int Row, int Col, signed char * B, signed char * C) { int8x16_t neon_a, neon_b, neon_c; int size = Row * Col; int i = 0; int k = 0; for (i = 16; i <= size ; i+=16) { k = i - 16; neon_a = vld1q_s8(A + k); neon_b = vld1q_s8(B + k); neon_c = vsubq_s8(neon_a, neon_b); vst1q_s8(C + k, neon_c); } k = i - 16; for (i = 0; i <= size % 16; i++) { C[k + i] = A[k + i] - B[k + i]; } }
inline int8x16_t vsubq(const int8x16_t & v0, const int8x16_t & v1) { return vsubq_s8 (v0, v1); }