Пример #1
0
void     x264_cpu_restore( uint32_t cpu )
{
    if( cpu&(X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_3DNOW|X264_CPU_3DNOWEXT) )
    {
        x264_emms();
    }
}
Пример #2
0
void x264_speedcontrol_new( x264_t *h )
{
    x264_speedcontrol_t *sc = h->sc = x264_malloc( sizeof(x264_speedcontrol_t) );
    x264_emms();
    memset( sc, 0, sizeof(x264_speedcontrol_t) );

    if( h->param.sc.f_speed <= 0 )
        h->param.sc.f_speed = 1;
    sc->fps = h->param.i_fps_num / h->param.i_fps_den;
    sc->spf = 1e6 / sc->fps;
    h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size );
    sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps;
    sc->buffer_fill = sc->buffer_size * h->param.sc.f_buffer_init;
    sc->buffer_fill = x264_clip3( sc->buffer_fill, sc->spf, sc->buffer_size );
    sc->compensation_period = sc->buffer_size/4;
    sc->timestamp = x264_mdate();
    sc->preset = -1;
    sc->prev_frame = 0;
    sc->cplx_num = 3e3; //FIXME estimate initial complexity
    sc->cplx_den = .1;
    sc->cplx_decay = 1 - 1./h->param.sc.i_buffer_size;
    sc->stat.min_buffer = sc->buffer_size;
    sc->stat.max_buffer = 0;
    sc->user_param = h->param;
}
Пример #3
0
/****************************************************************************
 * x264_nal_encode:
 ****************************************************************************/
void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
{
    uint8_t *src = nal->p_payload;
    uint8_t *end = nal->p_payload + nal->i_payload;
    uint8_t *orig_dst = dst;

    if( MPEG2 )
    {
        *dst++ = 0x00;
        *dst++ = 0x00;
        *dst++ = 0x01;
        /* Write correct startcode if the structure is a slice*/
        if( nal->i_type > 0 && nal->i_type < 0xb0 )
            *dst++ = nal->i_type;
        else
            *dst++ = structure_to_start_code[nal->i_type];
        memcpy( dst, src, nal->i_payload );
        nal->i_payload += 4;
    }
    else
    {
        if( h->param.b_annexb )
        {
            if( nal->b_long_startcode )
                *dst++ = 0x00;
            *dst++ = 0x00;
            *dst++ = 0x00;
            *dst++ = 0x01;
        }
        else /* save room for size later */
            dst += 4;

        /* nal header */
        *dst++ = ( 0x00 << 7 ) | ( nal->i_ref_idc << 5 ) | nal->i_type;

        dst = h->bsf.nal_escape( dst, src, end );
        int size = (dst - orig_dst) - 4;

        /* Write the size header for mp4/etc */
        if( !h->param.b_annexb )
        {
            /* Size doesn't include the size of the header we're writing now. */
            orig_dst[0] = size>>24;
            orig_dst[1] = size>>16;
            orig_dst[2] = size>> 8;
            orig_dst[3] = size>> 0;
        }

        nal->i_payload = size+4;
        x264_emms();
    }
Пример #4
0
static int check_pixel( int cpu_ref, int cpu_new )
{
    x264_pixel_function_t pixel_c;
    x264_pixel_function_t pixel_ref;
    x264_pixel_function_t pixel_asm;
    x264_predict_t predict_16x16[4+3];
    x264_predict_t predict_8x8c[4+3];
    x264_predict_t predict_4x4[9+3];
    x264_predict8x8_t predict_8x8[9+3];
    DECLARE_ALIGNED_16( uint8_t edge[33] );
    uint16_t cost_mv[32];
    int ret = 0, ok, used_asm;
    int i, j;

    x264_pixel_init( 0, &pixel_c );
    x264_pixel_init( cpu_ref, &pixel_ref );
    x264_pixel_init( cpu_new, &pixel_asm );
    x264_predict_16x16_init( 0, predict_16x16 );
    x264_predict_8x8c_init( 0, predict_8x8c );
    x264_predict_8x8_init( 0, predict_8x8 );
    x264_predict_4x4_init( 0, predict_4x4 );
    x264_predict_8x8_filter( buf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );

#define TEST_PIXEL( name, align ) \
    for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
    { \
        int res_c, res_asm; \
        if( pixel_asm.name[i] != pixel_ref.name[i] ) \
        { \
            set_func_name( "%s_%s", #name, pixel_names[i] ); \
            for( j=0; j<64; j++ ) \
            { \
                used_asm = 1; \
                res_c   = call_c( pixel_c.name[i], buf1, 16, buf2+j*!align, 64 ); \
                res_asm = call_a( pixel_asm.name[i], buf1, 16, buf2+j*!align, 64 ); \
                if( res_c != res_asm ) \
                { \
                    ok = 0; \
                    fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \
                    break; \
                } \
            } \
        } \
    } \
    report( "pixel " #name " :" );

    TEST_PIXEL( sad, 0 );
    TEST_PIXEL( ssd, 1 );
    TEST_PIXEL( satd, 0 );
    TEST_PIXEL( sa8d, 0 );

#define TEST_PIXEL_X( N ) \
    for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
    { \
        int res_c[4]={0}, res_asm[4]={0}; \
        if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
        { \
            set_func_name( "sad_x%d_%s", N, pixel_names[i] ); \
            for( j=0; j<64; j++) \
            { \
                uint8_t *pix2 = buf2+j; \
                used_asm = 1; \
                res_c[0] = pixel_c.sad[i]( buf1, 16, pix2, 64 ); \
                res_c[1] = pixel_c.sad[i]( buf1, 16, pix2+6, 64 ); \
                res_c[2] = pixel_c.sad[i]( buf1, 16, pix2+1, 64 ); \
                if(N==4) \
                { \
                    res_c[3] = pixel_c.sad[i]( buf1, 16, pix2+10, 64 ); \
                    call_a( pixel_asm.sad_x4[i], buf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
                } \
                else \
                    call_a( pixel_asm.sad_x3[i], buf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
                if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
                { \
                    ok = 0; \
                    fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
                             i, res_c[0], res_c[1], res_c[2], res_c[3], \
                             res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \
                } \
                if(N==4) \
                    call_c2( pixel_c.sad_x4[i], buf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
                else \
                    call_c2( pixel_c.sad_x3[i], buf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
            } \
        } \
    } \
    report( "pixel sad_x"#N" :" );

    TEST_PIXEL_X(3);
    TEST_PIXEL_X(4);

#define TEST_PIXEL_VAR( i ) \
    if( pixel_asm.var[i] != pixel_ref.var[i] ) \
    { \
        uint32_t res_c, res_asm; \
        uint32_t sad_c, sad_asm; \
        set_func_name( "%s_%s", "var", pixel_names[i] ); \
        used_asm = 1; \
        res_c   = call_c( pixel_c.var[i], buf1, 16, &sad_c ); \
        res_asm = call_a( pixel_asm.var[i], buf1, 16, &sad_asm ); \
        if( (res_c != res_asm) || (sad_c != sad_asm) ) \
        { \
            ok = 0; \
            fprintf( stderr, "var[%d]: %d,%d != %d,%d [FAILED]\n", i, res_c, sad_c, res_asm, sad_asm ); \
        } \
    }

    ok = 1; used_asm = 0;
    TEST_PIXEL_VAR( PIXEL_16x16 );
    TEST_PIXEL_VAR( PIXEL_8x8 );
    report( "pixel var :" );

#define TEST_INTRA_SATD( name, pred, satd, i8x8, ... ) \
    if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
    { \
        int res_c[3], res_asm[3]; \
        set_func_name( #name );\
        used_asm = 1; \
        memcpy( buf3, buf2, 1024 ); \
        for( i=0; i<3; i++ ) \
        { \
            pred[i]( buf3+40, ##__VA_ARGS__ ); \
            res_c[i] = pixel_c.satd( buf1+40, 16, buf3+40, 32 ); \
        } \
        call_a( pixel_asm.name, buf1+40, i8x8 ? edge : buf3+40, res_asm ); \
        if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
        { \
            ok = 0; \
            fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \
                     res_c[0], res_c[1], res_c[2], \
                     res_asm[0], res_asm[1], res_asm[2] ); \
        } \
    }

    ok = 1; used_asm = 0;
    TEST_INTRA_SATD( intra_satd_x3_16x16, predict_16x16, satd[PIXEL_16x16], 0 );
    TEST_INTRA_SATD( intra_satd_x3_8x8c, predict_8x8c, satd[PIXEL_8x8], 0 );
    TEST_INTRA_SATD( intra_satd_x3_4x4, predict_4x4, satd[PIXEL_4x4], 0 );
    TEST_INTRA_SATD( intra_sa8d_x3_8x8, predict_8x8, sa8d[PIXEL_8x8], 1, edge );
    report( "intra satd_x3 :" );

    if( pixel_asm.ssim_4x4x2_core != pixel_ref.ssim_4x4x2_core ||
        pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )
    {
        float res_c, res_a;
        int sums[5][4] = {{0}};
        used_asm = ok = 1;
        x264_emms();
        res_c = x264_pixel_ssim_wxh( &pixel_c,   buf1+2, 32, buf2+2, 32, 32, 28 );
        res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );
        if( fabs(res_c - res_a) > 1e-6 )
        {
            ok = 0;
            fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
        }
        set_func_name( "ssim_core" );
        call_c2( pixel_c.ssim_4x4x2_core,   buf1+2, 32, buf2+2, 32, sums );
        call_a2( pixel_asm.ssim_4x4x2_core, buf1+2, 32, buf2+2, 32, sums );
        set_func_name( "ssim_end" );
        call_c2( pixel_c.ssim_end4,   sums, sums, 4 );
        call_a2( pixel_asm.ssim_end4, sums, sums, 4 );
        report( "ssim :" );
    }

    ok = 1; used_asm = 0;
    for( i=0; i<32; i++ )
        cost_mv[i] = i*10;
    for( i=0; i<100 && ok; i++ )
        if( pixel_asm.ads[i&3] != pixel_ref.ads[i&3] )
        {
            DECLARE_ALIGNED_16( uint16_t sums[72] );
            DECLARE_ALIGNED_16( int dc[4] );
            int16_t mvs_a[32], mvs_c[32];
            int mvn_a, mvn_c;
            int thresh = rand() & 0x3fff;
            set_func_name( "esa_ads" );
            for( j=0; j<72; j++ )
                sums[j] = rand() & 0x3fff;
            for( j=0; j<4; j++ )
                dc[j] = rand() & 0x3fff;
            used_asm = 1;
            mvn_c = call_c( pixel_c.ads[i&3], dc, sums, 32, cost_mv, mvs_c, 28, thresh );
            mvn_a = call_a( pixel_asm.ads[i&3], dc, sums, 32, cost_mv, mvs_a, 28, thresh );
            if( mvn_c != mvn_a || memcmp( mvs_c, mvs_a, mvn_c*sizeof(*mvs_c) ) )
            {
                ok = 0;
                printf("c%d: ", i&3);
                for(j=0; j<mvn_c; j++)
                    printf("%d ", mvs_c[j]);
                printf("\na%d: ", i&3);
                for(j=0; j<mvn_a; j++)
                    printf("%d ", mvs_a[j]);
                printf("\n\n");
            }
        }
    report( "esa ads:" );

    return ret;
}
Пример #5
0
void x264_speedcontrol_frame( x264_t *h )
{
    x264_speedcontrol_t *sc = h->sc;
    int64_t t, delta_t, delta_buffer;
    int delta_f;

    x264_emms();

    // update buffer state after encoding and outputting the previous frame(s)
    t = x264_mdate();
    delta_f = h->i_frame - sc->prev_frame;
    delta_t = t - sc->timestamp;
    delta_buffer = delta_f * sc->spf / h->param.sc.f_speed - delta_t;
    sc->buffer_fill += delta_buffer;
    sc->prev_frame = h->i_frame;
    sc->timestamp = t;

    // update the time predictor
    if( delta_f )
    {
        int cpu_time = h->param.sc.b_alt_timer ? sc->cpu_time : delta_t;
        float decay = powf( sc->cplx_decay, delta_f );
        sc->cplx_num *= decay;
        sc->cplx_den *= decay;
        sc->cplx_num += cpu_time / presets[sc->preset].time;
        sc->cplx_den += delta_f;

        sc->stat.avg_preset += sc->preset * delta_f;
        sc->stat.den += delta_f;
    }
    sc->stat.min_buffer = X264_MIN( sc->buffer_fill, sc->stat.min_buffer );
    sc->stat.max_buffer = X264_MAX( sc->buffer_fill, sc->stat.max_buffer );

    if( sc->buffer_fill > sc->buffer_size ) // oops, cpu was idle
    {
        // not really an error, but we'll warn for debugging purposes
        static int64_t idle_t = 0, print_interval = 0;
        idle_t += sc->buffer_fill - sc->buffer_size;
        if( t - print_interval > 1e6 )
        {
            x264_log( h, X264_LOG_WARNING, "speedcontrol idle (%.6f sec)\n", idle_t/1e6 );
            print_interval = t;
            idle_t = 0;
        }
        sc->buffer_fill = sc->buffer_size;
    }
    else if( sc->buffer_fill < 0 && delta_buffer < 0 ) // oops, we're late
    {
        // don't clip fullness to 0; we'll hope the real buffer was bigger than
        // specified, and maybe we can catch up. if the application had to drop
        // frames, then it should override the buffer fullness (FIXME implement this).
        x264_log( h, X264_LOG_WARNING, "speedcontrol underflow (%.6f sec)\n", sc->buffer_fill/1e6 );
    }

    {
        // pick the preset that should return the buffer to 3/4-full within a time
        // specified by compensation_period
        float target = sc->spf / h->param.sc.f_speed
                     * (sc->buffer_fill + sc->compensation_period)
                     / (sc->buffer_size*3/4 + sc->compensation_period);
        float cplx = sc->cplx_num / sc->cplx_den;
        float set, t0, t1;
	float filled = (float) sc->buffer_fill / sc->buffer_size;
        int i;
        t0 = presets[0].time * cplx;
        for( i=1;; i++ )
        {
            t1 = presets[i].time * cplx;
            if( t1 >= target || i == PRESETS-1 )
                break;
            t0 = t1;
        }
        // linear interpolation between states
        set = i-1 + (target - t0) / (t1 - t0);
        // Even if our time estimations in the PRESETS array are off
        // this will push us towards our target fullness
        set += (20 * (filled-0.75));
        set = x264_clip3f(set,0,PRESETS-1);
        apply_preset( h, dither( sc, set ) );

        // FIXME
        if (h->param.i_log_level >= X264_LOG_DEBUG)
        {
            static float cpu, wall, tgt, den;
            float decay = 1-1/100.;
            cpu = cpu*decay + sc->cpu_time;
            wall = wall*decay + delta_t;
            tgt = tgt*decay + target;
            den = den*decay + 1;
            fprintf( stderr, "speed: %.2f %d[%.5f] (t/c/w: %6.0f/%6.0f/%6.0f = %.4f) fps=%.2f\r",
                     set, sc->preset, (float)sc->buffer_fill / sc->buffer_size,
                     tgt/den, cpu/den, wall/den, cpu/wall, 1e6*den/wall );
        }
    }

}