void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                         uint8_t *dst, ptrdiff_t dst_stride,
                         const int16_t *filter_x, int x_step_q4,
                         const int16_t *filter_y, int y_step_q4,
                         int w, int h) {
    /* Fixed size intermediate buffer places limits on parameters. */
    DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);

    vp9_convolve8_c(src, src_stride, temp, 64,
                    filter_x, x_step_q4, filter_y, y_step_q4, w, h);
    vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
}
示例#2
0
void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
                        uint8_t *dst, ptrdiff_t dst_stride,
                        const int16_t *filter_x, int x_step_q4,
                        const int16_t *filter_y, int y_step_q4,
                        int w, int h) {
  /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
   * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
   */
  DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);

  // Account for the vertical phase needing 3 lines prior and 4 lines post
  int intermediate_height = h + 7;

  if (x_step_q4 != 16 || y_step_q4 != 16) {
    vp9_convolve8_c(src, src_stride,
                    dst, dst_stride,
                    filter_x, x_step_q4,
                    filter_y, y_step_q4,
                    w, h);
    return;
  }

  /* Filter starting 3 lines back. The neon implementation will ignore the
   * given height and filter a multiple of 4 lines. Since this goes in to
   * the temp buffer which has lots of extra room and is subsequently discarded
   * this is safe if somewhat less than ideal.
   */
  vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
                           temp, 64,
                           filter_x, x_step_q4, filter_y, y_step_q4,
                           w, intermediate_height);

  /* Step into the temp buffer 3 lines to get the actual frame data */
  vp9_convolve8_vert_neon(temp + 64 * 3, 64,
                          dst, dst_stride,
                          filter_x, x_step_q4, filter_y, y_step_q4,
                          w, h);
}