static inline int pixel_sa8d_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ) { int16_t diff[8][8]; int i_satd = 0; int x, y; for( y = 0; y < i_height; y += 8 ) { for( x = 0; x < i_width; x += 8 ) { int i; pixel_sub_wxh( (int16_t*)diff, 8, pix1+x, i_pix1, pix2+x, i_pix2 ); #define SRC(x) diff[i][x] #define DST(x,rhs) diff[i][x] = (rhs) for( i = 0; i < 8; i++ ) SA8D_1D #undef SRC #undef DST #define SRC(x) diff[x][i] #define DST(x,rhs) i_satd += abs(rhs) for( i = 0; i < 8; i++ ) SA8D_1D #undef SRC #undef DST } pix1 += 8 * i_pix1; pix2 += 8 * i_pix2; } return i_satd; }
static void sub8x8_dct8( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 ) { int i; #ifdef HAVE_SSE2 if(1) { int16_t tmp[8][8]; xavs_sub8x8_dct8_sse2( (int16_t*) dct[8][8], pix1, pix2 ,tmp); } #else { pixel_sub_wxh( (int16_t*)dct, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE ); #define SRC(x) dct[i][x] #define DST(x) dct[i][x] for( i = 0; i < 8; i++ ) DCT8_Horizontal_1D #undef SRC #undef DST #define SRC(x) dct[x][i] #define DST(x) dct[x][i] for( i = 0; i < 8; i++ ) DCT8_Vertical_1D #undef SRC #undef DST } #endif }
static void sub8x8_dct8 (int16_t dct[8][8], uint8_t * pix1, uint8_t * pix2) { int i; int16_t tmp[8][8]; #ifdef HAVE_SSE2 xavs_sub8x8_dct8_sse2( dct, pix1, pix2 ,tmp); #else pixel_sub_wxh ((int16_t *) tmp, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE); //aloha' i--x #define SRC(x) tmp[i][x] #define DST(x) tmp[i][x] for (i = 0; i < 8; i++) DCT8_Horizontal_1D #undef SRC #undef DST #define SRC(x) tmp[x][i] #define DST(x) dct[x][i] for (i = 0; i < 8; i++) DCT8_Vertical_1D #undef SRC #undef DST #endif }
/**************************************************************************** * pixel_satd_WxH: sum of 4x4 Hadamard transformed differences ****************************************************************************/ static int pixel_satd_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ) { int16_t tmp[4][4]; int16_t diff[4][4]; int x, y; int i_satd = 0; for( y = 0; y < i_height; y += 4 ) { for( x = 0; x < i_width; x += 4 ) { int d; pixel_sub_wxh( (int16_t*)diff, 4, &pix1[x], i_pix1, &pix2[x], i_pix2 ); for( d = 0; d < 4; d++ ) { int s01, s23; int d01, d23; s01 = diff[d][0] + diff[d][1]; s23 = diff[d][2] + diff[d][3]; d01 = diff[d][0] - diff[d][1]; d23 = diff[d][2] - diff[d][3]; tmp[d][0] = s01 + s23; tmp[d][1] = s01 - s23; tmp[d][2] = d01 - d23; tmp[d][3] = d01 + d23; } for( d = 0; d < 4; d++ ) { int s01, s23; int d01, d23; s01 = tmp[0][d] + tmp[1][d]; s23 = tmp[2][d] + tmp[3][d]; d01 = tmp[0][d] - tmp[1][d]; d23 = tmp[2][d] - tmp[3][d]; i_satd += abs( s01 + s23 ) + abs( s01 - s23 ) + abs( d01 - d23 ) + abs( d01 + d23 ); } } pix1 += 4 * i_pix1; pix2 += 4 * i_pix2; } return i_satd / 2; }