static unsigned satd_8x8_subblock_8bit_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2) { __m128i temp[8]; diff_blocks_avx2(&temp, buf1, stride1, buf2, stride2); hor_transform_block_avx2(&temp); ver_transform_block_avx2(&temp); unsigned sad = sum_block_avx2(temp); unsigned result = (sad + 2) >> 2; return result; }
static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2) { __m128i temp_hor[8]; __m128i temp_ver[8]; diff_blocks_and_hor_transform_avx2(&temp_hor, buf1, stride1, buf2, stride2); ver_add_sub_avx2(&temp_hor, &temp_ver); unsigned sad = sum_block_avx2(temp_ver); unsigned result = (sad + 2) >> 2; return result; }