static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis, int32x4_t *const a0, int32x4_t *const a1, int32x4_t *const a2, int32x4_t *const a3) { int32x4_t b0, b1, b2, b3; transpose_s32_4x4(a0, a1, a2, a3); b0 = vaddq_s32(*a0, *a2); b1 = vsubq_s32(*a0, *a2); b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0); b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0); b2 = vmulq_lane_s32(*a1, vget_high_s32(cospis), 1); b3 = vmulq_lane_s32(*a1, vget_low_s32(cospis), 1); b2 = vmlsq_lane_s32(b2, *a3, vget_low_s32(cospis), 1); b3 = vmlaq_lane_s32(b3, *a3, vget_high_s32(cospis), 1); b0 = vrshrq_n_s32(b0, DCT_CONST_BITS); b1 = vrshrq_n_s32(b1, DCT_CONST_BITS); b2 = vrshrq_n_s32(b2, DCT_CONST_BITS); b3 = vrshrq_n_s32(b3, DCT_CONST_BITS); *a0 = vaddq_s32(b0, b3); *a1 = vaddq_s32(b1, b2); *a2 = vsubq_s32(b1, b2); *a3 = vsubq_s32(b0, b3); }
static INLINE void iadst_half_butterfly_bd10_neon(int32x4_t *const x, const int32x2_t c) { const int32x4_t sum = vaddq_s32(x[0], x[1]); const int32x4_t sub = vsubq_s32(x[0], x[1]); x[0] = vmulq_lane_s32(sum, c, 0); x[1] = vmulq_lane_s32(sub, c, 0); x[0] = vrshrq_n_s32(x[0], DCT_CONST_BITS); x[1] = vrshrq_n_s32(x[1], DCT_CONST_BITS); }
void test_vRshrQ_ns32 (void) { int32x4_t out_int32x4_t; int32x4_t arg0_int32x4_t; out_int32x4_t = vrshrq_n_s32 (arg0_int32x4_t, 1); }
static INLINE int32x4_t sub_dct_const_round_shift_low_8_bd10(const int32x4_t in0, const int32x4_t in1) { const int32x4_t sub = vsubq_s32(in0, in1); return vrshrq_n_s32(sub, DCT_CONST_BITS); }
int32x4_t test_vrshrq_n_s32(int32x4_t in) { // CHECK-LABEL: @test_vrshrq_n_s32 // CHECK: call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %in, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) return vrshrq_n_s32(in, 1); }