Beispiel #1
0
INLINE static void diff_blocks_avx2(__m128i (*row_diff)[8],
                                                           const kvz_pixel * buf1, unsigned stride1,
                                                           const kvz_pixel * orig, unsigned stride_orig)
{
  (*row_diff)[0] = diff_row_avx2(buf1 + 0 * stride1, orig + 0 * stride_orig);
  (*row_diff)[1] = diff_row_avx2(buf1 + 1 * stride1, orig + 1 * stride_orig);
  (*row_diff)[2] = diff_row_avx2(buf1 + 2 * stride1, orig + 2 * stride_orig);
  (*row_diff)[3] = diff_row_avx2(buf1 + 3 * stride1, orig + 3 * stride_orig);
  (*row_diff)[4] = diff_row_avx2(buf1 + 4 * stride1, orig + 4 * stride_orig);
  (*row_diff)[5] = diff_row_avx2(buf1 + 5 * stride1, orig + 5 * stride_orig);
  (*row_diff)[6] = diff_row_avx2(buf1 + 6 * stride1, orig + 6 * stride_orig);
  (*row_diff)[7] = diff_row_avx2(buf1 + 7 * stride1, orig + 7 * stride_orig);

}
Beispiel #2
0
INLINE static void diff_blocks_and_hor_transform_avx2(__m128i (*row_diff)[8], const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
{
  (*row_diff)[0] = diff_row_avx2(buf1 + 0 * stride1, buf2 + 0 * stride2);
  (*row_diff)[1] = diff_row_avx2(buf1 + 1 * stride1, buf2 + 1 * stride2);
  hor_add_sub_avx2((*row_diff) + 0, (*row_diff) + 1);

  (*row_diff)[2] = diff_row_avx2(buf1 + 2 * stride1, buf2 + 2 * stride2);
  (*row_diff)[3] = diff_row_avx2(buf1 + 3 * stride1, buf2 + 3 * stride2);
  hor_add_sub_avx2((*row_diff) + 2, (*row_diff) + 3);

  (*row_diff)[4] = diff_row_avx2(buf1 + 4 * stride1, buf2 + 4 * stride2);
  (*row_diff)[5] = diff_row_avx2(buf1 + 5 * stride1, buf2 + 5 * stride2);
  hor_add_sub_avx2((*row_diff) + 4, (*row_diff) + 5);

  (*row_diff)[6] = diff_row_avx2(buf1 + 6 * stride1, buf2 + 6 * stride2);
  (*row_diff)[7] = diff_row_avx2(buf1 + 7 * stride1, buf2 + 7 * stride2);
  hor_add_sub_avx2((*row_diff) + 6, (*row_diff) + 7);
}