void vpx_highbd_idct32x32_1_add_neon(const tran_low_t *input, uint16_t *dest,
                                     int stride, int bd) {
  const tran_low_t out0 = HIGHBD_WRAPLOW(
      dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
  const tran_low_t out1 = HIGHBD_WRAPLOW(
      dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd);
  const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
  const int16x8_t dc = vdupq_n_s16(a1);
  int i;

  if (a1 >= 0) {
    const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
    for (i = 0; i < 8; ++i) {
      highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max);
      highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max);
      highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max);
      highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max);
    }
  } else {
    for (i = 0; i < 8; ++i) {
      highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc);
      highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc);
      highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc);
      highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc);
    }
  }
}
Exemple #2
0
void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint16_t *dest,
                                   int stride, int bd) {
  const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
  const tran_low_t out0 = HIGHBD_WRAPLOW(
      dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd);
  const tran_low_t out1 = HIGHBD_WRAPLOW(
      dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd);
  const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4);
  const int16x8_t dc = vdupq_n_s16(a1);

  highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
  highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
}