static INLINE int32x4_t sub_dct_const_round_shift_low_8_bd12(
    const int64x2_t *const in0, const int64x2_t *const in1) {
  const int64x2_t sub_lo = vsubq_s64(in0[0], in1[0]);
  const int64x2_t sub_hi = vsubq_s64(in0[1], in1[1]);
  const int32x2_t out_lo = vrshrn_n_s64(sub_lo, DCT_CONST_BITS);
  const int32x2_t out_hi = vrshrn_n_s64(sub_hi, DCT_CONST_BITS);
  return vcombine_s32(out_lo, out_hi);
}
Пример #2
0
void test_vsubQs64 (void)
{
  int64x2_t out_int64x2_t;
  int64x2_t arg0_int64x2_t;
  int64x2_t arg1_int64x2_t;

  out_int64x2_t = vsubq_s64 (arg0_int64x2_t, arg1_int64x2_t);
}
Пример #3
0
static INLINE void idct4x4_16_kernel_bd12(const int32x4_t cospis,
                                          int32x4_t *const a0,
                                          int32x4_t *const a1,
                                          int32x4_t *const a2,
                                          int32x4_t *const a3) {
  int32x4_t b0, b1, b2, b3;
  int64x2_t c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11;

  transpose_s32_4x4(a0, a1, a2, a3);
  b0 = vaddq_s32(*a0, *a2);
  b1 = vsubq_s32(*a0, *a2);
  c0 = vmull_lane_s32(vget_low_s32(b0), vget_high_s32(cospis), 0);
  c1 = vmull_lane_s32(vget_high_s32(b0), vget_high_s32(cospis), 0);
  c2 = vmull_lane_s32(vget_low_s32(b1), vget_high_s32(cospis), 0);
  c3 = vmull_lane_s32(vget_high_s32(b1), vget_high_s32(cospis), 0);
  c4 = vmull_lane_s32(vget_low_s32(*a1), vget_high_s32(cospis), 1);
  c5 = vmull_lane_s32(vget_high_s32(*a1), vget_high_s32(cospis), 1);
  c6 = vmull_lane_s32(vget_low_s32(*a1), vget_low_s32(cospis), 1);
  c7 = vmull_lane_s32(vget_high_s32(*a1), vget_low_s32(cospis), 1);
  c8 = vmull_lane_s32(vget_low_s32(*a3), vget_low_s32(cospis), 1);
  c9 = vmull_lane_s32(vget_high_s32(*a3), vget_low_s32(cospis), 1);
  c10 = vmull_lane_s32(vget_low_s32(*a3), vget_high_s32(cospis), 1);
  c11 = vmull_lane_s32(vget_high_s32(*a3), vget_high_s32(cospis), 1);
  c4 = vsubq_s64(c4, c8);
  c5 = vsubq_s64(c5, c9);
  c6 = vaddq_s64(c6, c10);
  c7 = vaddq_s64(c7, c11);
  b0 = vcombine_s32(vrshrn_n_s64(c0, DCT_CONST_BITS),
                    vrshrn_n_s64(c1, DCT_CONST_BITS));
  b1 = vcombine_s32(vrshrn_n_s64(c2, DCT_CONST_BITS),
                    vrshrn_n_s64(c3, DCT_CONST_BITS));
  b2 = vcombine_s32(vrshrn_n_s64(c4, DCT_CONST_BITS),
                    vrshrn_n_s64(c5, DCT_CONST_BITS));
  b3 = vcombine_s32(vrshrn_n_s64(c6, DCT_CONST_BITS),
                    vrshrn_n_s64(c7, DCT_CONST_BITS));
  *a0 = vaddq_s32(b0, b3);
  *a1 = vaddq_s32(b1, b2);
  *a2 = vsubq_s32(b1, b2);
  *a3 = vsubq_s32(b0, b3);
}