void test_vqaddQu16 (void) { uint16x8_t out_uint16x8_t; uint16x8_t arg0_uint16x8_t; uint16x8_t arg1_uint16x8_t; out_uint16x8_t = vqaddq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); }
/* u16x8 saturated add */ void mw_neon_mm_qadd_u16x8(unsigned short * A, int Row, int Col, unsigned short * B, unsigned short * C) { uint16x8_t neon_a, neon_b, neon_c; int size = Row * Col; int i = 0; int k = 0; for (i = 8; i <= size ; i+=8) { k = i - 8; neon_a = vld1q_u16(A + k); neon_b = vld1q_u16(B + k); neon_c = vqaddq_u16(neon_a, neon_b); vst1q_u16(C + k, neon_c); } k = i - 8; for (i = 0; i < size % 8; i++) { C[k + i] = A[k + i] + B[k + i]; } }
inline uint16x8_t vqaddq(const uint16x8_t & v0, const uint16x8_t & v1) { return vqaddq_u16(v0, v1); }