void test_vqaddQu32 (void) { uint32x4_t out_uint32x4_t; uint32x4_t arg0_uint32x4_t; uint32x4_t arg1_uint32x4_t; out_uint32x4_t = vqaddq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); }
/* u32x4 saturated add */ void mw_neon_mm_qadd_u32x4(unsigned int * A, int Row, int Col, unsigned int * B, unsigned int * C) { uint32x4_t neon_a, neon_b, neon_c; int size = Row * Col; int i = 0; int k = 0; for (i = 4; i <= size ; i+=4) { k = i - 4; neon_a = vld1q_u32(A + k); neon_b = vld1q_u32(B + k); neon_c = vqaddq_u32(neon_a, neon_b); vst1q_u32(C + k, neon_c); } k = i - 4; for (i = 0; i < size % 4; i++) { C[k + i] = A[k + i] + B[k + i]; } }
inline uint32x4_t vqaddq(const uint32x4_t & v0, const uint32x4_t & v1) { return vqaddq_u32(v0, v1); }