예제 #1
0
파일: me.c 프로젝트: DANHUK/x264
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
{
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
    const int i_pixel = m->i_pixel;
    const int stride = m->i_stride[0];
    int i_me_range = h->param.analyse.i_me_range;
    int bmx, bmy, bcost;
    int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
    int omx, omy, pmx, pmy;
    pixel *p_fenc = m->p_fenc[0];
    pixel *p_fref_w = m->p_fref_w;
    ALIGNED_ARRAY_16( pixel, pix,[16*16] );

    int costs[16];

    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];
    int mv_x_max = h->mb.mv_max_fpel[0];
    int mv_y_max = h->mb.mv_max_fpel[1];
    int mv_x_min_qpel = mv_x_min << 2;
    int mv_y_min_qpel = mv_y_min << 2;
    int mv_x_max_qpel = mv_x_max << 2;
    int mv_y_max_qpel = mv_y_max << 2;
/* Special version of pack to allow shortcuts in CHECK_MVRANGE */
#define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
    uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
    uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;

#define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000))

    const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

    uint32_t pmv;
    bmx = x264_clip3( m->mvp[0], mv_x_min_qpel, mv_x_max_qpel );
    bmy = x264_clip3( m->mvp[1], mv_y_min_qpel, mv_y_max_qpel );
    pmx = ( bmx + 2 ) >> 2;
    pmy = ( bmy + 2 ) >> 2;
    bcost = COST_MAX;

    /* try extra predictors if provided */
    if( h->mb.i_subpel_refine >= 3 )
    {
        pmv = pack16to32_mask(bmx,bmy);
        if( i_mvc )
            COST_MV_HPEL( bmx, bmy );
        for( int i = 0; i < i_mvc; i++ )
        {
            if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )
            {
                int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );
                int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );
                COST_MV_HPEL( mx, my );
            }
        }
        bmx = ( bpred_mx + 2 ) >> 2;
        bmy = ( bpred_my + 2 ) >> 2;
        COST_MV( bmx, bmy );
    }
예제 #2
0
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
{
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
    const int i_pixel = m->i_pixel;
    int i_me_range = h->param.analyse.i_me_range;
    int bmx, bmy, bcost;
    int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
    int omx, omy, pmx, pmy;
    uint8_t *p_fref = m->p_fref[0];
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
    
    int i, j;
    int dir;
    int costs[6];

    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];
    int mv_x_max = h->mb.mv_max_fpel[0];
    int mv_y_max = h->mb.mv_max_fpel[1];

#define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max )

    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

    bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 );
    bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 );
    pmx = ( bmx + 2 ) >> 2;
    pmy = ( bmy + 2 ) >> 2;
    bcost = COST_MAX;

    /* try extra predictors if provided */
    if( h->mb.i_subpel_refine >= 3 )
    {
        COST_MV_HPEL( bmx, bmy );
        for( i = 0; i < i_mvc; i++ )
        {
            int mx = mvc[i][0];
            int my = mvc[i][1];
            if( (mx | my) && ((mx-bmx) | (my-bmy)) )
            {
                mx = x264_clip3( mx, mv_x_min*4, mv_x_max*4 );
                my = x264_clip3( my, mv_y_min*4, mv_y_max*4 );
                COST_MV_HPEL( mx, my );
            }
        }
        bmx = ( bpred_mx + 2 ) >> 2;
        bmy = ( bpred_my + 2 ) >> 2;
        COST_MV( bmx, bmy );
    }
예제 #3
0
파일: me.c 프로젝트: w-spencer/sagetv
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
{
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
    const int i_pixel = m->i_pixel;
    int i_me_range = h->param.analyse.i_me_range;
    int bmx, bmy, bcost;
    int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
    int omx, omy, pmx, pmy;
    uint8_t *p_fref = m->p_fref[0];
    DECLARE_ALIGNED_16( uint8_t pix[16*16] );

    int i = 0, j;
    int dir;
    int costs[6];

    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];
    int mv_x_max = h->mb.mv_max_fpel[0];
    int mv_y_max = h->mb.mv_max_fpel[1];

#define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max )

    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

    bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 );
    bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 );
    pmx = ( bmx + 2 ) >> 2;
    pmy = ( bmy + 2 ) >> 2;
    bcost = COST_MAX;

    /* try extra predictors if provided */
    if( h->mb.i_subpel_refine >= 3 )
    {
        uint32_t bmv = pack16to32_mask(bmx,bmy);
        COST_MV_HPEL( bmx, bmy );
        do
        {
            if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) )
            {
                int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 );
                int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 );
                COST_MV_HPEL( mx, my );
            }
        } while( ++i < i_mvc );
        bmx = ( bpred_mx + 2 ) >> 2;
        bmy = ( bpred_my + 2 ) >> 2;
        COST_MV( bmx, bmy );
    }
예제 #4
0
파일: speed.c 프로젝트: submux/obe-vod
void x264_speedcontrol_new( x264_t *h )
{
    x264_speedcontrol_t *sc = h->sc = x264_malloc( sizeof(x264_speedcontrol_t) );
    x264_emms();
    memset( sc, 0, sizeof(x264_speedcontrol_t) );

    if( h->param.sc.f_speed <= 0 )
        h->param.sc.f_speed = 1;
    sc->fps = h->param.i_fps_num / h->param.i_fps_den;
    sc->spf = 1e6 / sc->fps;
    h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size );
    sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps;
    sc->buffer_fill = sc->buffer_size * h->param.sc.f_buffer_init;
    sc->buffer_fill = x264_clip3( sc->buffer_fill, sc->spf, sc->buffer_size );
    sc->compensation_period = sc->buffer_size/4;
    sc->timestamp = x264_mdate();
    sc->preset = -1;
    sc->prev_frame = 0;
    sc->cplx_num = 3e3; //FIXME estimate initial complexity
    sc->cplx_den = .1;
    sc->cplx_decay = 1 - 1./h->param.sc.i_buffer_size;
    sc->stat.min_buffer = sc->buffer_size;
    sc->stat.max_buffer = 0;
    sc->user_param = h->param;
}
예제 #5
0
//{ mb_analyse_int //{ mb_analyse_int //{ mb_analyse_int //{ mb_analyse_int 
static void dull_mb_analyse_init_P( x264_t *h, x264_mb_analysis_t *a )
{
    x264_mb_analyse_init_qp( h, a, h->mb.i_qp );

    h->mb.b_transform_8x8 = 0;
    h->mb.b_noise_reduction = 0;

    /* I: Intra part */
    a->i_satd_i16x16 =
    a->i_satd_i8x8   =
    a->i_satd_i4x4   =
    a->i_satd_i8x8chroma = COST_MAX;

    a->b_fast_intra = 0;
    a->b_avoid_topright = 0;
    h->mb.i_skip_intra =
        h->mb.b_lossless ? 0 :
        a->i_mbrd ? 2 :
        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;

    /* II: Inter part P/B frame */
    if( h->sh.i_type != SLICE_TYPE_I )
    {
        int i;
        int i_fmv_range = 4 * h->param.analyse.i_mv_range;
        // limit motion search to a slightly smaller range than the theoretical limit,
        // since the search may go a few iterations past its given range
        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel

        /* Calculate max allowed MV range */
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
        h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
        h->mb.mv_max[0] = 4*( 16*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
        h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
        h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P )
        {
            int max_x = (h->fref0[0]->i_pir_end_col * 16 - 3)*4; /* 3 pixels of hpel border */
            int max_mv = max_x - 4*16*h->mb.i_mb_x;
            /* If we're left of the refresh bar, don't reference right of it. */
            if( max_mv > 0 && h->mb.i_mb_x < h->fdec->i_pir_start_col )
                h->mb.mv_max_spel[0] = X264_MIN( h->mb.mv_max_spel[0], max_mv );
        }
        h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
        h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
        if( h->mb.i_mb_x == 0 )
        {
            int mb_y = h->mb.i_mb_y;
            int mb_height = h->sps->i_mb_height;
            int thread_mvy_range = i_fmv_range;

            h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
            h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
            h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
            h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
            h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
            h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
            h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
        }
예제 #6
0
파일: me.c 프로젝트: TravisKraatz/cinelerra
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
{
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
    const int i_pixel = m->i_pixel;
    int i_me_range = h->param.analyse.i_me_range;
    int bmx, bmy, bcost;
    int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
    int omx, omy, pmx, pmy;
    uint8_t *p_fref = m->p_fref[0];
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
    
    int i, j;
    int dir;
    int costs[6];

    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];
    int mv_x_max = h->mb.mv_max_fpel[0];
    int mv_y_max = h->mb.mv_max_fpel[1];

    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

    if( h->mb.i_me_method == X264_ME_UMH )
    {
        /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */
        p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
        p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
    }

    bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 );
    bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 );
    pmx = ( bmx + 2 ) >> 2;
    pmy = ( bmy + 2 ) >> 2;
    bcost = COST_MAX;

    /* try extra predictors if provided */
    if( h->mb.i_subpel_refine >= 3 )
    {
        COST_MV_PRED( bmx, bmy );
        for( i = 0; i < i_mvc; i++ )
        {
             const int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 );
             const int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 );
             if( mx != bpred_mx || my != bpred_my )
                 COST_MV_PRED( mx, my );
        }
        bmx = ( bpred_mx + 2 ) >> 2;
        bmy = ( bpred_my + 2 ) >> 2;
        COST_MV( bmx, bmy );
    }
예제 #7
0
파일: speed.c 프로젝트: submux/obe-vod
static void apply_preset( x264_t *h, int preset )
{
    x264_speedcontrol_t *sc = h->sc;
    preset = x264_clip3( preset, 0, PRESETS-1 );
    //if( preset != sc->preset )
    {
        const sc_preset_t *s = &presets[preset];
        x264_param_t p = sc->user_param;

        p.i_frame_reference = s->refs;
        p.analyse.inter = s->partitions;
        p.analyse.i_subpel_refine = s->subme;
        p.analyse.i_me_method = s->me;
        p.analyse.i_trellis = s->trellis;
        p.analyse.b_mixed_references = s->mix;
        p.analyse.b_chroma_me = s->chromame;
        p.analyse.f_psy_rd = s->psy_rd;
        p.analyse.f_psy_trellis = s->psy_trellis;
        x264_encoder_reconfig( h, &p );
        sc->preset = preset;
        x264_log( h, X264_LOG_DEBUG, "Applying speedcontrol preset %d.\n", preset );
    }
}
예제 #8
0
파일: me.c 프로젝트: JamesLinus/x264-1
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh )
{
    const int i_pixel = m->i_pixel;
    const int i_me_range = h->param.analyse.i_me_range;
    const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
    int bmx, bmy, bcost;
    int omx, omy, pmx, pmy;
    uint8_t *p_fref = m->p_fref[0];
    int i, j;

    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];
    int mv_x_max = h->mb.mv_max_fpel[0];
    int mv_y_max = h->mb.mv_max_fpel[1];

    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

    if( h->mb.i_me_method == X264_ME_UMH )
    {
        /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */
        p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
        p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
    }

    bmx = pmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max );
    bmy = pmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max );
    bcost = COST_MAX;
    COST_MV( bmx, bmy );
    /* I don't know why this helps */
    bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ];

    /* try extra predictors if provided */
    for( i = 0; i < i_mvc; i++ )
    {
        const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max );
        const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max );
        if( mx != bmx || my != bmy )
            COST_MV( mx, my );
    }
    
    COST_MV( 0, 0 );

    mv_x_max += 8;
    mv_y_max += 8;
    mv_x_min -= 8;
    mv_y_min -= 8;

    switch( h->mb.i_me_method )
    {
    case X264_ME_DIA:
        /* diamond search, radius 1 */
#define DIA1_ITER(mx, my)\
        {\
            omx = mx;\
            omy = my;\
            COST_MV( omx  , omy-1 );\
            COST_MV( omx  , omy+1 );\
            COST_MV( omx-1, omy   );\
            COST_MV( omx+1, omy   );\
        }

        for( i = 0; i < i_me_range; i++ )
        {
            DIA1_ITER( bmx, bmy );
            if( bmx == omx && bmy == omy )
                break;
        }
        break;

    case X264_ME_HEX:
        /* hexagon search, radius 2 */
#define HEX2_ITER(mx, my)\
        {\
            omx = mx;\
            omy = my;\
            COST_MV( omx-2, omy   );\
            COST_MV( omx-1, omy+2 );\
            COST_MV( omx+1, omy+2 );\
            COST_MV( omx+2, omy   );\
            COST_MV( omx+1, omy-2 );\
            COST_MV( omx-1, omy-2 );\
        }

        for( i = 0; i < i_me_range/2; i++ )
        {
            HEX2_ITER( bmx, bmy );
            if( bmx == omx && bmy == omy )
                break;
        }
        /* square refine */
        DIA1_ITER( bmx, bmy );
        COST_MV( omx-1, omy-1 );
        COST_MV( omx-1, omy+1 );
        COST_MV( omx+1, omy-1 );
        COST_MV( omx+1, omy+1 );
        break;

    case X264_ME_UMH:
        /* Uneven-cross Multi-Hexagon-grid Search
         * as in JM, except without early termination */

        DIA1_ITER( pmx, pmy );
        if( pmx || pmy )
            DIA1_ITER( 0, 0 );
        DIA1_ITER( bmx, bmy );

        if(i_pixel == PIXEL_4x4)
            goto umh_small_hex;

        /* cross */
        omx = bmx; omy = bmy;
        for( i = 1; i < i_me_range; i+=2 )
        {
            if( omx + i <= mv_x_max )
                COST_MV( omx + i, omy );
            if( omx - i >= mv_x_min )
                COST_MV( omx - i, omy );
        }
        for( i = 1; i < i_me_range/2; i+=2 )
        {
            if( omy + i <= mv_y_max )
                COST_MV( omx, omy + i );
            if( omy - i >= mv_y_min )
                COST_MV( omx, omy - i );
        }

        /* 5x5 ESA */
        omx = bmx; omy = bmy;
        for( i = 0; i < 24; i++ )
        {
            static const int square2_x[24] = {1,1,0,-1,-1,-1, 0, 1, 2,2,2,2,1,0,-1,-2,-2,-2,-2,-2,-1, 0, 1, 2};
            static const int square2_y[24] = {0,1,1, 1, 0,-1,-1,-1,-1,0,1,2,2,2, 2, 2, 1, 0,-1,-2,-2,-2,-2,-2};
            COST_MV( omx + square2_x[i], omy + square2_y[i] );
        }
        /* hexagon grid */
        omx = bmx; omy = bmy;
        for( i = 1; i <= i_me_range/4; i++ )
        {
            int bounds_check = 4*i > X264_MIN4( mv_x_max-omx, mv_y_max-omy, omx-mv_x_min, omy-mv_y_min );
            for( j = 0; j < 16; j++ )
            {
                static const int hex4_x[16] = {0,-2,-4,-4,-4,-4,-4,-2, 0, 2, 4, 4,4,4,4,2};
                static const int hex4_y[16] = {4, 3, 2, 1, 0,-1,-2,-3,-4,-3,-2,-1,0,1,2,3};
                int mx = omx + hex4_x[j]*i;
                int my = omy + hex4_y[j]*i;
                if( !bounds_check || ( mx >= mv_x_min && mx <= mv_x_max
                                    && my >= mv_y_min && my <= mv_y_max ) )
                    COST_MV( mx, my );
            }
        }
umh_small_hex:
        /* iterative search */
        for( i = 0; i < i_me_range; i++ )
        {
            HEX2_ITER( bmx, bmy );
            if( bmx == omx && bmy == omy )
                break;
        }
        for( i = 0; i < i_me_range; i++ )
        {
            DIA1_ITER( bmx, bmy );
            if( bmx == omx && bmy == omy )
                break;
        }
        break;

    case X264_ME_ESA:
        {
            const int min_x = X264_MAX( bmx - i_me_range, mv_x_min);
            const int min_y = X264_MAX( bmy - i_me_range, mv_y_min);
            const int max_x = X264_MIN( bmx + i_me_range, mv_x_max);
            const int max_y = X264_MIN( bmy + i_me_range, mv_y_max);
            for( omy = min_y; omy <= max_y; omy++ )
                for( omx = min_x; omx <= max_x; omx++ )
                {
                    COST_MV( omx, omy );
                }
        }
        break;
    }

    /* -> qpel mv */
    m->mv[0] = bmx << 2;
    m->mv[1] = bmy << 2;

    /* compute the real cost */
    m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
    m->cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], m->i_stride[0],
                    &p_fref[bmy * m->i_stride[0] + bmx], m->i_stride[0] )
            + m->cost_mv;
    if( b_chroma_me )
    {
        const int bw = x264_pixel_size[m->i_pixel].w;
        const int bh = x264_pixel_size[m->i_pixel].h;
        uint8_t pix[8*8*2];
        h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
        h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix+8*8, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
        m->cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 )
                 + h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix+8*8, 8 );
    }

    /* subpel refine */
    if( h->mb.i_subpel_refine >= 3 )
    {
        int hpel, qpel;

        /* early termination (when examining multiple reference frames)
         * FIXME: this can update fullpel_thresh even if the match
         *        ref is rejected after subpel refinement */
        if( p_fullpel_thresh )
        {
            if( (m->cost*7)>>3 > *p_fullpel_thresh )
                return;
            else if( m->cost < *p_fullpel_thresh )
                *p_fullpel_thresh = m->cost;
        }