void test_vqshluQ_ns16 (void)
{
  uint16x8_t out_uint16x8_t;
  int16x8_t arg0_int16x8_t;

  out_uint16x8_t = vqshluq_n_s16 (arg0_int16x8_t, 1);
}
static INLINE void highbd_idct32x32_1_add_neg_kernel(uint16_t **dest,
                                                     const int stride,
                                                     const int16x8_t res) {
  const uint16x8_t a0 = vld1q_u16(*dest);
  const uint16x8_t a1 = vld1q_u16(*dest + 8);
  const uint16x8_t a2 = vld1q_u16(*dest + 16);
  const uint16x8_t a3 = vld1q_u16(*dest + 24);
  const int16x8_t b0 = vaddq_s16(res, vreinterpretq_s16_u16(a0));
  const int16x8_t b1 = vaddq_s16(res, vreinterpretq_s16_u16(a1));
  const int16x8_t b2 = vaddq_s16(res, vreinterpretq_s16_u16(a2));
  const int16x8_t b3 = vaddq_s16(res, vreinterpretq_s16_u16(a3));
  const uint16x8_t c0 = vqshluq_n_s16(b0, 0);
  const uint16x8_t c1 = vqshluq_n_s16(b1, 0);
  const uint16x8_t c2 = vqshluq_n_s16(b2, 0);
  const uint16x8_t c3 = vqshluq_n_s16(b3, 0);
  vst1q_u16(*dest, c0);
  vst1q_u16(*dest + 8, c1);
  vst1q_u16(*dest + 16, c2);
  vst1q_u16(*dest + 24, c3);
  *dest += stride;
}
示例#3
0
// res is in reverse row order
static INLINE void highbd_idct4x4_1_add_kernel2(uint16_t **dest,
                                                const int stride,
                                                const int16x8_t res,
                                                const int16x8_t max) {
  const uint16x4_t a0 = vld1_u16(*dest);
  const uint16x4_t a1 = vld1_u16(*dest + stride);
  const int16x8_t a = vreinterpretq_s16_u16(vcombine_u16(a1, a0));
  // Note: In some profile tests, res is quite close to +/-32767.
  // We use saturating addition.
  const int16x8_t b = vqaddq_s16(res, a);
  const int16x8_t c = vminq_s16(b, max);
  const uint16x8_t d = vqshluq_n_s16(c, 0);
  vst1_u16(*dest, vget_high_u16(d));
  *dest += stride;
  vst1_u16(*dest, vget_low_u16(d));
  *dest += stride;
}
示例#4
0
int16x8_t test_vqshluq_n_s16(int16x8_t in) {
    // CHECK-LABEL: @test_vqshluq_n_s16
    // CHECK: call <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16> %in, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
    return vqshluq_n_s16(in, 1);
}