static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis, int32x4_t *const a0, int32x4_t *const a1, int32x4_t *const a2, int32x4_t *const a3) { int32x4_t b0, b1, b2, b3; transpose_s32_4x4(a0, a1, a2, a3); b0 = vaddq_s32(*a0, *a2); b1 = vsubq_s32(*a0, *a2); b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0); b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0); b2 = vmulq_lane_s32(*a1, vget_high_s32(cospis), 1); b3 = vmulq_lane_s32(*a1, vget_low_s32(cospis), 1); b2 = vmlsq_lane_s32(b2, *a3, vget_low_s32(cospis), 1); b3 = vmlaq_lane_s32(b3, *a3, vget_high_s32(cospis), 1); b0 = vrshrq_n_s32(b0, DCT_CONST_BITS); b1 = vrshrq_n_s32(b1, DCT_CONST_BITS); b2 = vrshrq_n_s32(b2, DCT_CONST_BITS); b3 = vrshrq_n_s32(b3, DCT_CONST_BITS); *a0 = vaddq_s32(b0, b3); *a1 = vaddq_s32(b1, b2); *a2 = vsubq_s32(b1, b2); *a3 = vsubq_s32(b0, b3); }
void test_vmlsQ_lanes32 (void) { int32x4_t out_int32x4_t; int32x4_t arg0_int32x4_t; int32x4_t arg1_int32x4_t; int32x2_t arg2_int32x2_t; out_int32x4_t = vmlsq_lane_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32x2_t, 1); }
static INLINE void iadst_butterfly_lane_1_0_bd10_neon(const int32x4_t in0, const int32x4_t in1, const int32x2_t c, int32x4_t *const s0, int32x4_t *const s1) { const int32x4_t t0 = vmulq_lane_s32(in0, c, 1); const int32x4_t t1 = vmulq_lane_s32(in0, c, 0); *s0 = vmlaq_lane_s32(t0, in1, c, 0); *s1 = vmlsq_lane_s32(t1, in1, c, 1); }
int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { // CHECK: test_vmlsq_lane_s32 return vmlsq_lane_s32(a, b, v, 1); // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] }