void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh ) { const int bw = x264_pixel_size[m->i_pixel].w; const int bh = x264_pixel_size[m->i_pixel].h; const int i_pixel = m->i_pixel; const int stride = m->i_stride[0]; int i_me_range = h->param.analyse.i_me_range; int bmx, bmy, bcost; int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX; int omx, omy, pmx, pmy; pixel *p_fenc = m->p_fenc[0]; pixel *p_fref_w = m->p_fref_w; ALIGNED_ARRAY_16( pixel, pix,[16*16] ); int costs[16]; int mv_x_min = h->mb.mv_min_fpel[0]; int mv_y_min = h->mb.mv_min_fpel[1]; int mv_x_max = h->mb.mv_max_fpel[0]; int mv_y_max = h->mb.mv_max_fpel[1]; int mv_x_min_qpel = mv_x_min << 2; int mv_y_min_qpel = mv_y_min << 2; int mv_x_max_qpel = mv_x_max << 2; int mv_y_max_qpel = mv_y_max << 2; /* Special version of pack to allow shortcuts in CHECK_MVRANGE */ #define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF)) uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min ); uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000; #define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000)) const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; uint32_t pmv; bmx = x264_clip3( m->mvp[0], mv_x_min_qpel, mv_x_max_qpel ); bmy = x264_clip3( m->mvp[1], mv_y_min_qpel, mv_y_max_qpel ); pmx = ( bmx + 2 ) >> 2; pmy = ( bmy + 2 ) >> 2; bcost = COST_MAX; /* try extra predictors if provided */ if( h->mb.i_subpel_refine >= 3 ) { pmv = pack16to32_mask(bmx,bmy); if( i_mvc ) COST_MV_HPEL( bmx, bmy ); for( int i = 0; i < i_mvc; i++ ) { if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) ) { int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel ); int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel ); COST_MV_HPEL( mx, my ); } } bmx = ( bpred_mx + 2 ) >> 2; bmy = ( bpred_my + 2 ) >> 2; COST_MV( bmx, bmy ); }
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh ) { const int bw = x264_pixel_size[m->i_pixel].w; const int bh = x264_pixel_size[m->i_pixel].h; const int i_pixel = m->i_pixel; int i_me_range = h->param.analyse.i_me_range; int bmx, bmy, bcost; int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX; int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); int i, j; int dir; int costs[6]; int mv_x_min = h->mb.mv_min_fpel[0]; int mv_y_min = h->mb.mv_min_fpel[1]; int mv_x_max = h->mb.mv_max_fpel[0]; int mv_y_max = h->mb.mv_max_fpel[1]; const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; if( h->mb.i_me_method == X264_ME_UMH ) { /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */ p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); } bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 ); bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 ); pmx = ( bmx + 2 ) >> 2; pmy = ( bmy + 2 ) >> 2; bcost = COST_MAX; /* try extra predictors if provided */ if( h->mb.i_subpel_refine >= 3 ) { COST_MV_PRED( bmx, bmy ); for( i = 0; i < i_mvc; i++ ) { const int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 ); const int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 ); if( mx != bpred_mx || my != bpred_my ) COST_MV_PRED( mx, my ); } bmx = ( bpred_mx + 2 ) >> 2; bmy = ( bpred_my + 2 ) >> 2; COST_MV( bmx, bmy ); }
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh ) { const int bw = x264_pixel_size[m->i_pixel].w; const int bh = x264_pixel_size[m->i_pixel].h; const int i_pixel = m->i_pixel; int i_me_range = h->param.analyse.i_me_range; int bmx, bmy, bcost; int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX; int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); int i, j; int dir; int costs[6]; int mv_x_min = h->mb.mv_min_fpel[0]; int mv_y_min = h->mb.mv_min_fpel[1]; int mv_x_max = h->mb.mv_max_fpel[0]; int mv_y_max = h->mb.mv_max_fpel[1]; #define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max ) const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 ); bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 ); pmx = ( bmx + 2 ) >> 2; pmy = ( bmy + 2 ) >> 2; bcost = COST_MAX; /* try extra predictors if provided */ if( h->mb.i_subpel_refine >= 3 ) { COST_MV_HPEL( bmx, bmy ); for( i = 0; i < i_mvc; i++ ) { int mx = mvc[i][0]; int my = mvc[i][1]; if( (mx | my) && ((mx-bmx) | (my-bmy)) ) { mx = x264_clip3( mx, mv_x_min*4, mv_x_max*4 ); my = x264_clip3( my, mv_y_min*4, mv_y_max*4 ); COST_MV_HPEL( mx, my ); } } bmx = ( bpred_mx + 2 ) >> 2; bmy = ( bpred_my + 2 ) >> 2; COST_MV( bmx, bmy ); }
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh ) { const int bw = x264_pixel_size[m->i_pixel].w; const int bh = x264_pixel_size[m->i_pixel].h; const int i_pixel = m->i_pixel; int i_me_range = h->param.analyse.i_me_range; int bmx, bmy, bcost; int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX; int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; DECLARE_ALIGNED_16( uint8_t pix[16*16] ); int i = 0, j; int dir; int costs[6]; int mv_x_min = h->mb.mv_min_fpel[0]; int mv_y_min = h->mb.mv_min_fpel[1]; int mv_x_max = h->mb.mv_max_fpel[0]; int mv_y_max = h->mb.mv_max_fpel[1]; #define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max ) const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 ); bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 ); pmx = ( bmx + 2 ) >> 2; pmy = ( bmy + 2 ) >> 2; bcost = COST_MAX; /* try extra predictors if provided */ if( h->mb.i_subpel_refine >= 3 ) { uint32_t bmv = pack16to32_mask(bmx,bmy); COST_MV_HPEL( bmx, bmy ); do { if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) ) { int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 ); int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 ); COST_MV_HPEL( mx, my ); } } while( ++i < i_mvc ); bmx = ( bpred_mx + 2 ) >> 2; bmy = ( bpred_my + 2 ) >> 2; COST_MV( bmx, bmy ); }
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh ) { const int i_pixel = m->i_pixel; const int i_me_range = h->param.analyse.i_me_range; const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8; int bmx, bmy, bcost; int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; int i, j; int mv_x_min = h->mb.mv_min_fpel[0]; int mv_y_min = h->mb.mv_min_fpel[1]; int mv_x_max = h->mb.mv_max_fpel[0]; int mv_y_max = h->mb.mv_max_fpel[1]; const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; if( h->mb.i_me_method == X264_ME_UMH ) { /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */ p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] ); p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] ); } bmx = pmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max ); bmy = pmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max ); bcost = COST_MAX; COST_MV( bmx, bmy ); /* I don't know why this helps */ bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ]; /* try extra predictors if provided */ for( i = 0; i < i_mvc; i++ ) { const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max ); const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max ); if( mx != bmx || my != bmy ) COST_MV( mx, my ); } COST_MV( 0, 0 ); mv_x_max += 8; mv_y_max += 8; mv_x_min -= 8; mv_y_min -= 8; switch( h->mb.i_me_method ) { case X264_ME_DIA: /* diamond search, radius 1 */ #define DIA1_ITER(mx, my)\ {\ omx = mx;\ omy = my;\ COST_MV( omx , omy-1 );\ COST_MV( omx , omy+1 );\ COST_MV( omx-1, omy );\ COST_MV( omx+1, omy );\ } for( i = 0; i < i_me_range; i++ ) { DIA1_ITER( bmx, bmy ); if( bmx == omx && bmy == omy ) break; } break; case X264_ME_HEX: /* hexagon search, radius 2 */ #define HEX2_ITER(mx, my)\ {\ omx = mx;\ omy = my;\ COST_MV( omx-2, omy );\ COST_MV( omx-1, omy+2 );\ COST_MV( omx+1, omy+2 );\ COST_MV( omx+2, omy );\ COST_MV( omx+1, omy-2 );\ COST_MV( omx-1, omy-2 );\ } for( i = 0; i < i_me_range/2; i++ ) { HEX2_ITER( bmx, bmy ); if( bmx == omx && bmy == omy ) break; } /* square refine */ DIA1_ITER( bmx, bmy ); COST_MV( omx-1, omy-1 ); COST_MV( omx-1, omy+1 ); COST_MV( omx+1, omy-1 ); COST_MV( omx+1, omy+1 ); break; case X264_ME_UMH: /* Uneven-cross Multi-Hexagon-grid Search * as in JM, except without early termination */ DIA1_ITER( pmx, pmy ); if( pmx || pmy ) DIA1_ITER( 0, 0 ); DIA1_ITER( bmx, bmy ); if(i_pixel == PIXEL_4x4) goto umh_small_hex; /* cross */ omx = bmx; omy = bmy; for( i = 1; i < i_me_range; i+=2 ) { if( omx + i <= mv_x_max ) COST_MV( omx + i, omy ); if( omx - i >= mv_x_min ) COST_MV( omx - i, omy ); } for( i = 1; i < i_me_range/2; i+=2 ) { if( omy + i <= mv_y_max ) COST_MV( omx, omy + i ); if( omy - i >= mv_y_min ) COST_MV( omx, omy - i ); } /* 5x5 ESA */ omx = bmx; omy = bmy; for( i = 0; i < 24; i++ ) { static const int square2_x[24] = {1,1,0,-1,-1,-1, 0, 1, 2,2,2,2,1,0,-1,-2,-2,-2,-2,-2,-1, 0, 1, 2}; static const int square2_y[24] = {0,1,1, 1, 0,-1,-1,-1,-1,0,1,2,2,2, 2, 2, 1, 0,-1,-2,-2,-2,-2,-2}; COST_MV( omx + square2_x[i], omy + square2_y[i] ); } /* hexagon grid */ omx = bmx; omy = bmy; for( i = 1; i <= i_me_range/4; i++ ) { int bounds_check = 4*i > X264_MIN4( mv_x_max-omx, mv_y_max-omy, omx-mv_x_min, omy-mv_y_min ); for( j = 0; j < 16; j++ ) { static const int hex4_x[16] = {0,-2,-4,-4,-4,-4,-4,-2, 0, 2, 4, 4,4,4,4,2}; static const int hex4_y[16] = {4, 3, 2, 1, 0,-1,-2,-3,-4,-3,-2,-1,0,1,2,3}; int mx = omx + hex4_x[j]*i; int my = omy + hex4_y[j]*i; if( !bounds_check || ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max ) ) COST_MV( mx, my ); } } umh_small_hex: /* iterative search */ for( i = 0; i < i_me_range; i++ ) { HEX2_ITER( bmx, bmy ); if( bmx == omx && bmy == omy ) break; } for( i = 0; i < i_me_range; i++ ) { DIA1_ITER( bmx, bmy ); if( bmx == omx && bmy == omy ) break; } break; case X264_ME_ESA: { const int min_x = X264_MAX( bmx - i_me_range, mv_x_min); const int min_y = X264_MAX( bmy - i_me_range, mv_y_min); const int max_x = X264_MIN( bmx + i_me_range, mv_x_max); const int max_y = X264_MIN( bmy + i_me_range, mv_y_max); for( omy = min_y; omy <= max_y; omy++ ) for( omx = min_x; omx <= max_x; omx++ ) { COST_MV( omx, omy ); } } break; } /* -> qpel mv */ m->mv[0] = bmx << 2; m->mv[1] = bmy << 2; /* compute the real cost */ m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ]; m->cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], m->i_stride[0], &p_fref[bmy * m->i_stride[0] + bmx], m->i_stride[0] ) + m->cost_mv; if( b_chroma_me ) { const int bw = x264_pixel_size[m->i_pixel].w; const int bh = x264_pixel_size[m->i_pixel].h; uint8_t pix[8*8*2]; h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, m->mv[0], m->mv[1], bw/2, bh/2 ); h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix+8*8, 8, m->mv[0], m->mv[1], bw/2, bh/2 ); m->cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 ) + h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix+8*8, 8 ); } /* subpel refine */ if( h->mb.i_subpel_refine >= 3 ) { int hpel, qpel; /* early termination (when examining multiple reference frames) * FIXME: this can update fullpel_thresh even if the match * ref is rejected after subpel refinement */ if( p_fullpel_thresh ) { if( (m->cost*7)>>3 > *p_fullpel_thresh ) return; else if( m->cost < *p_fullpel_thresh ) *p_fullpel_thresh = m->cost; }