void test_vandu16 (void) { uint16x4_t out_uint16x4_t; uint16x4_t arg0_uint16x4_t; uint16x4_t arg1_uint16x4_t; out_uint16x4_t = vand_u16 (arg0_uint16x4_t, arg1_uint16x4_t); }
static void interpolate5LineNeon(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int width, int k1, int k2) { uint16x4_t kRedBlueMask_4 = vdup_n_u16(ColorMask::kRedBlueMask); uint16x4_t kGreenMask_4 = vdup_n_u16(ColorMask::kGreenMask); uint16x4_t k1_4 = vdup_n_u16(k1); uint16x4_t k2_4 = vdup_n_u16(k2); while (width >= 4) { uint16x4_t srcA_4 = vld1_u16(srcA); uint16x4_t srcB_4 = vld1_u16(srcB); uint16x4_t p1_4 = srcB_4; uint16x4_t p2_4 = srcA_4; uint16x4_t p1_rb_4 = vand_u16(p1_4, kRedBlueMask_4); uint16x4_t p1_g_4 = vand_u16(p1_4, kGreenMask_4); uint16x4_t p2_rb_4 = vand_u16(p2_4, kRedBlueMask_4); uint16x4_t p2_g_4 = vand_u16(p2_4, kGreenMask_4); uint32x4_t tmp_rb_4 = vshrq_n_u32(vmlal_u16(vmull_u16(p2_rb_4, k2_4), p1_rb_4, k1_4), 3); uint32x4_t tmp_g_4 = vshrq_n_u32(vmlal_u16(vmull_u16(p2_g_4, k2_4), p1_g_4, k1_4), 3); uint16x4_t p_rb_4 = vmovn_u32(tmp_rb_4); p_rb_4 = vand_u16(p_rb_4, kRedBlueMask_4); uint16x4_t p_g_4 = vmovn_u32(tmp_g_4); p_g_4 = vand_u16(p_g_4, kGreenMask_4); uint16x4_t result_4 = p_rb_4 | p_g_4; vst1_u16(dst, result_4); dst += 4; srcA += 4; srcB += 4; width -= 4; } }
inline uint16x4_t vand(const uint16x4_t & v0, const uint16x4_t & v1) { return vand_u16(v0, v1); }
uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: test_vand_u16 return vand_u16(a, b); // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }