예제 #1
0
static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis,
                                          int32x4_t *const a0,
                                          int32x4_t *const a1,
                                          int32x4_t *const a2,
                                          int32x4_t *const a3) {
  int32x4_t b0, b1, b2, b3;

  transpose_s32_4x4(a0, a1, a2, a3);
  b0 = vaddq_s32(*a0, *a2);
  b1 = vsubq_s32(*a0, *a2);
  b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0);
  b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0);
  b2 = vmulq_lane_s32(*a1, vget_high_s32(cospis), 1);
  b3 = vmulq_lane_s32(*a1, vget_low_s32(cospis), 1);
  b2 = vmlsq_lane_s32(b2, *a3, vget_low_s32(cospis), 1);
  b3 = vmlaq_lane_s32(b3, *a3, vget_high_s32(cospis), 1);
  b0 = vrshrq_n_s32(b0, DCT_CONST_BITS);
  b1 = vrshrq_n_s32(b1, DCT_CONST_BITS);
  b2 = vrshrq_n_s32(b2, DCT_CONST_BITS);
  b3 = vrshrq_n_s32(b3, DCT_CONST_BITS);
  *a0 = vaddq_s32(b0, b3);
  *a1 = vaddq_s32(b1, b2);
  *a2 = vsubq_s32(b1, b2);
  *a3 = vsubq_s32(b0, b3);
}
예제 #2
0
파일: vmlsQ_lanes32.c 프로젝트: 0day-ci/gcc
void test_vmlsQ_lanes32 (void)
{
  int32x4_t out_int32x4_t;
  int32x4_t arg0_int32x4_t;
  int32x4_t arg1_int32x4_t;
  int32x2_t arg2_int32x2_t;

  out_int32x4_t = vmlsq_lane_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32x2_t, 1);
}
static INLINE void iadst_butterfly_lane_1_0_bd10_neon(const int32x4_t in0,
                                                      const int32x4_t in1,
                                                      const int32x2_t c,
                                                      int32x4_t *const s0,
                                                      int32x4_t *const s1) {
  const int32x4_t t0 = vmulq_lane_s32(in0, c, 1);
  const int32x4_t t1 = vmulq_lane_s32(in0, c, 0);

  *s0 = vmlaq_lane_s32(t0, in1, c, 0);
  *s1 = vmlsq_lane_s32(t1, in1, c, 1);
}
예제 #4
0
int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) {
  // CHECK: test_vmlsq_lane_s32
  return vmlsq_lane_s32(a, b, v, 1);
  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
}