コード例 #1
0
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
                           uint8_t *pix1, int stride1,
                           uint8_t *pix2, int stride2,
                           int width, int height )
{
    int x, y, z;
    float ssim = 0.0;
    int (*sum0)[4] = x264_malloc(4 * (width/4+3) * sizeof(int));
    int (*sum1)[4] = x264_malloc(4 * (width/4+3) * sizeof(int));
    width >>= 2;
    height >>= 2;
    z = 0;
    for( y = 1; y < height; y++ )
    {
        for( ; z <= y; z++ )
        {
            XCHG( void*, sum0, sum1 );
            for( x = 0; x < width; x+=2 )
                pf->ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] );
        }
        for( x = 0; x < width-1; x += 4 )
            ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) );
    }
    x264_free(sum0);
    x264_free(sum1);
    return ssim / ((width-1) * (height-1));
}
コード例 #2
0
void x264_sei_version_write( x264_t *h, bs_t *s )
{
    int i;
    // random ID number generated according to ISO-11578
    const uint8_t uuid[16] = {
        0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7,
        0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef
    };
    char *opts = x264_param2string( &h->param, 0 );
    char *version = x264_malloc( 200 + strlen(opts) );
    int length;

    sprintf( version, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
             "Copyleft 2003-2008 - http://www.videolan.org/x264.html - options: %s",
             X264_BUILD, X264_VERSION, opts );
    length = strlen(version)+1+16;

    bs_write( s, 8, 0x5 ); // payload_type = user_data_unregistered
    // payload_size
    for( i = 0; i <= length-255; i += 255 )
        bs_write( s, 8, 255 );
    bs_write( s, 8, length-i );

    for( i = 0; i < 16; i++ )
        bs_write( s, 8, uuid[i] );
    for( i = 0; i < length-16; i++ )
        bs_write( s, 8, version[i] );

    bs_rbsp_trailing( s );

    x264_free( opts );
    x264_free( version );
}
コード例 #3
0
ファイル: input.c プロジェクト: xkfz007/src.x264
static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
{
    memset( pic, 0, sizeof(cli_pic_t) );
    int csp_mask = csp & X264_CSP_MASK;
    if( x264_cli_csp_is_invalid( csp ) )
        pic->img.planes = 0;
    else
        pic->img.planes = x264_cli_csps[csp_mask].planes;
    pic->img.csp    = csp;
    pic->img.width  = width;
    pic->img.height = height;
    for( int i = 0; i < pic->img.planes; i++ )
    {
        int stride = width * x264_cli_csps[csp_mask].width[i];
        stride *= x264_cli_csp_depth_factor( csp );
        stride = ALIGN( stride, align );
        uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
        pic->img.plane[i] = x264_malloc( size );
        if( !pic->img.plane[i] )
            return -1;
        pic->img.stride[i] = stride;
    }

    return 0;
}
コード例 #4
0
/****************************************************************************
 * x264_slurp_file:
 ****************************************************************************/
char *x264_slurp_file( const char *filename )
{
    int b_error = 0;
    int i_size;
    char *buf;
    FILE *fh = fopen( filename, "rb" );
    if( !fh )
        return NULL;
    b_error |= fseek( fh, 0, SEEK_END ) < 0;
    b_error |= ( i_size = ftell( fh ) ) <= 0;
    b_error |= fseek( fh, 0, SEEK_SET ) < 0;
    if( b_error )
        return NULL;
    buf = x264_malloc( i_size+2 );
    if( buf == NULL )
        return NULL;
    b_error |= fread( buf, 1, i_size, fh ) != i_size;
    if( buf[i_size-1] != '\n' )
        buf[i_size++] = '\n';
    buf[i_size] = 0;
    fclose( fh );
    if( b_error )
    {
        x264_free( buf );
        return NULL;
    }
    return buf;
}
コード例 #5
0
ファイル: speed.c プロジェクト: submux/obe-vod
void x264_speedcontrol_new( x264_t *h )
{
    x264_speedcontrol_t *sc = h->sc = x264_malloc( sizeof(x264_speedcontrol_t) );
    x264_emms();
    memset( sc, 0, sizeof(x264_speedcontrol_t) );

    if( h->param.sc.f_speed <= 0 )
        h->param.sc.f_speed = 1;
    sc->fps = h->param.i_fps_num / h->param.i_fps_den;
    sc->spf = 1e6 / sc->fps;
    h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size );
    sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps;
    sc->buffer_fill = sc->buffer_size * h->param.sc.f_buffer_init;
    sc->buffer_fill = x264_clip3( sc->buffer_fill, sc->spf, sc->buffer_size );
    sc->compensation_period = sc->buffer_size/4;
    sc->timestamp = x264_mdate();
    sc->preset = -1;
    sc->prev_frame = 0;
    sc->cplx_num = 3e3; //FIXME estimate initial complexity
    sc->cplx_den = .1;
    sc->cplx_decay = 1 - 1./h->param.sc.i_buffer_size;
    sc->stat.min_buffer = sc->buffer_size;
    sc->stat.max_buffer = 0;
    sc->user_param = h->param;
}
コード例 #6
0
ファイル: x264Encoder.cpp プロジェクト: yujun1703/264
HRESULT CX264Encoder::Init( X264ENCPARAM param )
{    
    memset(m_level, (unsigned char)tp_lvl_2, sizeof(m_level));
    
    m_stEncParam = param;

    x264_param_t st264Param;
    ConfigParam( &st264Param );

    x264_t *h;
    if ( ( h = x264_encoder_open( &st264Param ) ) == NULL )
    {
        LOG(stderr, "x264 [error]: x264_encoder_open failed\n");
        return false;
    }
    m_px264Handle = (void*)h;
    
    x264_picture_t *pic = (x264_picture_t*)x264_malloc(sizeof(x264_picture_t));

    memset( pic, 0, sizeof( x264_picture_t ) );
    pic->i_type = X264_TYPE_AUTO;
    pic->i_qpplus1 = X264_QP_AUTO;
    pic->i_pic_struct = X264_CSP_I420;

    pic->img.i_csp = X264_CSP_I420;
    pic->img.i_plane = 3;
    pic->img.i_stride[0] = m_stEncParam.iWidth;
    pic->img.i_stride[1] = pic->img.i_stride[2] = m_stEncParam.iWidth>>1;

    m_pPic = (void*)pic;
    return true;
}
コード例 #7
0
ファイル: checkasm.c プロジェクト: w-spencer/sagetv
int main(int argc, char *argv[])
{
    int ret = 0;
    int i;

    if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) )
    {
#if !defined(ARCH_X86) && !defined(ARCH_X86_64)
        fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" );
        return 1;
#endif
        do_bench = 1;
        if( argv[1][7] == '=' )
        {
            bench_pattern = argv[1]+8;
            bench_pattern_len = strlen(bench_pattern);
        }
        argc--;
        argv++;
    }

    i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate();
    fprintf( stderr, "x264: using random seed %u\n", i );
    srand( i );

    buf1 = x264_malloc( 0x3e00 + 16*BENCH_ALIGNS );
    buf2 = buf1 + 0xf00;
    buf3 = buf2 + 0xf00;
    buf4 = buf3 + 0x1000;
    for( i=0; i<0x1e00; i++ )
        buf1[i] = rand() & 0xFF;
    memset( buf1+0x1e00, 0, 0x2000 );

    /* 16-byte alignment is guaranteed whenever it's useful, but some functions also vary in speed depending on %64 */
    if( do_bench )
        for( i=0; i<BENCH_ALIGNS && !ret; i++ )
        {
            buf2 = buf1 + 0xf00;
            buf3 = buf2 + 0xf00;
            buf4 = buf3 + 0x1000;
            ret |= x264_stack_pagealign( check_all_flags, i*16 );
            buf1 += 16;
            quiet = 1;
            fprintf( stderr, "%d/%d\r", i+1, BENCH_ALIGNS );
        }
    else
        ret = check_all_flags();

    if( ret )
    {
        fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" );
        return -1;
    }
    fprintf( stderr, "x264: All tests passed Yeah :)\n" );
    if( do_bench )
        print_bench();
    return 0;
}
コード例 #8
0
ファイル: depth.c プロジェクト: xing2fan/x264
static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
                 x264_param_t *param, char *opt_string )
{
    int ret = 0;
    int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH;
    int csp = ~(~info->csp ^ change_fmt);
    int bit_depth = 8*x264_cli_csp_depth_factor( csp );

    if( opt_string )
    {
        static const char * const optlist[] = { "bit_depth", NULL };
        char **opts = x264_split_options( opt_string, optlist );

        if( opts )
        {
            char *str_bit_depth = x264_get_option( "bit_depth", opts );
            bit_depth = x264_otoi( str_bit_depth, -1 );

            ret = bit_depth < 8 || bit_depth > 16;
            csp = bit_depth > 8 ? csp | X264_CSP_HIGH_DEPTH : csp & ~X264_CSP_HIGH_DEPTH;
            change_fmt = (info->csp ^ csp) & X264_CSP_HIGH_DEPTH;
            free( opts );
        }
        else
            ret = 1;
    }

    FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this filter supports only bit depth %d\n", BIT_DEPTH );
    FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" );

    /* only add the filter to the chain if it's needed */
    if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) )
    {
        FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" );
        depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) );

        if( !h )
            return -1;

        h->error_buf = (int16_t*)(h + 1);
        h->dst_csp = csp;
        h->bit_depth = bit_depth;
        h->prev_hnd = *handle;
        h->prev_filter = *filter;

        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, info->width, info->height ) )
        {
            x264_free( h );
            return -1;
        }

        *handle = h;
        *filter = depth_filter;
        info->csp = h->dst_csp;
    }

    return 0;
}
コード例 #9
0
/****************************************************************************
 * x264_realloc:
 ****************************************************************************/
void *x264_realloc( void *p, int i_size )
{
#ifdef HAVE_MALLOC_H
    return realloc( p, i_size );
#else
    int       i_old_size = 0;
    uint8_t * p_new;
    if( p )
    {
        i_old_size = *( (int*) ( (uint8_t*) p ) - sizeof( void ** ) -
                         sizeof( int ) );
    }
    p_new = x264_malloc( i_size );
    if( i_old_size > 0 && i_size > 0 )
    {
        memcpy( p_new, p, ( i_old_size < i_size ) ? i_old_size : i_size );
    }
    x264_free( p );
    return p_new;
#endif
}
コード例 #10
0
ファイル: input.c プロジェクト: BOTCrusher/sagetv
int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
{
    memset( pic, 0, sizeof(cli_pic_t) );
    int csp_mask = csp & X264_CSP_MASK;
    if( x264_cli_csp_is_invalid( csp ) )
        pic->img.planes = 0;
    else
        pic->img.planes = x264_cli_csps[csp_mask].planes;
    pic->img.csp    = csp;
    pic->img.width  = width;
    pic->img.height = height;
    for( int i = 0; i < pic->img.planes; i++ )
    {
         pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
         if( !pic->img.plane[i] )
             return -1;
         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i];
    }

    return 0;
}
コード例 #11
0
ファイル: visualize.c プロジェクト: 275958081/netfox
/* {{{ [fold] void x264_visualize_init( x264_t *h ) */
void x264_visualize_init( x264_t *h )
{
    int mb = h->sps->i_mb_width * h->sps->i_mb_height;
    h->visualize = x264_malloc(mb * sizeof(visualize_t));
}
コード例 #12
0
/****************************************************************************
 * x264_picture_alloc:
 ****************************************************************************/
void x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
    pic->i_type = X264_TYPE_AUTO;
    pic->i_qpplus1 = 0;
    pic->img.i_csp = i_csp;
    switch( i_csp & X264_CSP_MASK )
    {
        case X264_CSP_I420:
        case X264_CSP_YV12:
            pic->img.i_plane = 3;
            pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 );
            pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
            pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4;
            pic->img.i_stride[0] = i_width;
            pic->img.i_stride[1] = i_width / 2;
            pic->img.i_stride[2] = i_width / 2;
            break;

        case X264_CSP_I422:
            pic->img.i_plane = 3;
            pic->img.plane[0] = x264_malloc( 2 * i_width * i_height );
            pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
            pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 2;
            pic->img.i_stride[0] = i_width;
            pic->img.i_stride[1] = i_width / 2;
            pic->img.i_stride[2] = i_width / 2;
            break;

        case X264_CSP_I444:
            pic->img.i_plane = 3;
            pic->img.plane[0] = x264_malloc( 3 * i_width * i_height );
            pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
            pic->img.plane[2] = pic->img.plane[1] + i_width * i_height;
            pic->img.i_stride[0] = i_width;
            pic->img.i_stride[1] = i_width;
            pic->img.i_stride[2] = i_width;
            break;

        case X264_CSP_YUYV:
            pic->img.i_plane = 1;
            pic->img.plane[0] = x264_malloc( 2 * i_width * i_height );
            pic->img.i_stride[0] = 2 * i_width;
            break;

        case X264_CSP_RGB:
        case X264_CSP_BGR:
            pic->img.i_plane = 1;
            pic->img.plane[0] = x264_malloc( 3 * i_width * i_height );
            pic->img.i_stride[0] = 3 * i_width;
            break;

        case X264_CSP_BGRA:
            pic->img.i_plane = 1;
            pic->img.plane[0] = x264_malloc( 4 * i_width * i_height );
            pic->img.i_stride[0] = 4 * i_width;
            break;

        default:
            fprintf( stderr, "invalid CSP\n" );
            pic->img.i_plane = 0;
            break;
    }
}
コード例 #13
0
ファイル: frame.c プロジェクト: TravisKraatz/cinelerra
x264_frame_t *x264_frame_new( x264_t *h )
{
    x264_frame_t   *frame = x264_malloc( sizeof( x264_frame_t ) );
    int i, j;

    int i_mb_count = h->mb.i_mb_count;
    int i_stride;
    int i_lines;

    memset( frame, 0, sizeof(x264_frame_t) );

    /* allocate frame data (+64 for extra data for me) */
    i_stride = ( ( h->param.i_width  + 15 )&0xfffff0 )+ 64;
    i_lines  = ( ( h->param.i_height + 15 )&0xfffff0 );

    frame->i_plane = 3;
    for( i = 0; i < 3; i++ )
    {
        int i_divh = 1;
        int i_divw = 1;
        if( i > 0 )
        {
            if( h->param.i_csp == X264_CSP_I420 )
                i_divh = i_divw = 2;
            else if( h->param.i_csp == X264_CSP_I422 )
                i_divw = 2;
        }
        frame->i_stride[i] = i_stride / i_divw;
        frame->i_lines[i] = i_lines / i_divh;
        frame->buffer[i] = x264_malloc( frame->i_stride[i] *
                                        ( frame->i_lines[i] + 64 / i_divh ) );

        frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
                          frame->i_stride[i] * 32 / i_divh + 32 / i_divw;
    }
    frame->i_stride[3] = 0;
    frame->i_lines[3] = 0;
    frame->buffer[3] = NULL;
    frame->plane[3] = NULL;

    frame->filtered[0] = frame->plane[0];
    for( i = 0; i < 3; i++ )
    {
        frame->buffer[4+i] = x264_malloc( frame->i_stride[0] *
                                        ( frame->i_lines[0] + 64 ) );

        frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
                                frame->i_stride[0] * 32 + 32;
    }

    if( h->frames.b_have_lowres )
    {
        frame->i_stride_lowres = frame->i_stride[0]/2 + 32;
        frame->i_lines_lowres = frame->i_lines[0]/2;
        for( i = 0; i < 4; i++ )
        {
            frame->buffer[7+i] = x264_malloc( frame->i_stride_lowres *
                                            ( frame->i_lines[0]/2 + 64 ) );
            frame->lowres[i] = ((uint8_t*)frame->buffer[7+i]) +
                                frame->i_stride_lowres * 32 + 32;
        }
    }

    if( h->param.analyse.i_me_method == X264_ME_ESA )
    {
        frame->buffer[11] = x264_malloc( frame->i_stride[0] * (frame->i_lines[0] + 64) * sizeof(uint16_t) );
        frame->integral = (uint16_t*)frame->buffer[11] + frame->i_stride[0] * 32 + 32;
    }

    frame->i_poc = -1;
    frame->i_type = X264_TYPE_AUTO;
    frame->i_qpplus1 = 0;
    frame->i_pts = -1;
    frame->i_frame = -1;
    frame->i_frame_num = -1;

    frame->mb_type= x264_malloc( i_mb_count * sizeof( int8_t) );
    frame->mv[0]  = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
    frame->ref[0] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) );
    if( h->param.i_bframe )
    {
        frame->mv[1]  = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
        frame->ref[1] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) );
    }
    else
    {
        frame->mv[1]  = NULL;
        frame->ref[1] = NULL;
    }

    frame->i_row_bits = x264_malloc( i_lines/16 * sizeof( int ) );
    frame->i_row_qp   = x264_malloc( i_lines/16 * sizeof( int ) );
    for( i = 0; i < h->param.i_bframe + 2; i++ )
        for( j = 0; j < h->param.i_bframe + 2; j++ )
            frame->i_row_satds[i][j] = x264_malloc( i_lines/16 * sizeof( int ) );

    return frame;
}
コード例 #14
0
/****************************************************************************
 * x264_param2string:
 ****************************************************************************/
char *x264_param2string( x264_param_t *p, int b_res )
{
    char *buf = x264_malloc( 1000 );
    char *s = buf;

    if( b_res )
    {
        s += sprintf( s, "%dx%d ", p->i_width, p->i_height );
        s += sprintf( s, "fps=%d/%d ", p->i_fps_num, p->i_fps_den );
    }

///    s += sprintf( s, "cabac=%d", p->b_cabac );
    s += sprintf( s, " ref=%d", p->i_frame_reference );
    s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter,
                  p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta );
    s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
    s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
    s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
    s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo );
    s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
    s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
    s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
    s += sprintf( s, " trellis=%d", p->analyse.i_trellis );
    s += sprintf( s, " 8x8dct=%d", p->analyse.b_transform_8x8 );
    s += sprintf( s, " cqm=%d", p->i_cqm_preset );
    s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset );
    s += sprintf( s, " slices=%d", p->i_threads );
    s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction );
    s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate );

    s += sprintf( s, " bframes=%d", p->i_bframe );
    if( p->i_bframe )
    {
        s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d wpredb=%d bime=%d",
                      p->b_bframe_pyramid, p->b_bframe_adaptive, p->i_bframe_bias,
                      p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred,
                      p->analyse.b_bidir_me );
    }

    s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
                  p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );

    s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
                               ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
    if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
    {
        if( p->rc.i_rc_method == X264_RC_CRF )
            s += sprintf( s, " crf=%d", p->rc.i_rf_constant );
        else
            s += sprintf( s, " bitrate=%d ratetol=%.1f",
                          p->rc.i_bitrate, p->rc.f_rate_tolerance );
        s += sprintf( s, " rceq='%s' qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d",
                      p->rc.psz_rc_eq, p->rc.f_qcompress,
                      p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step );
        if( p->rc.b_stat_read )
            s += sprintf( s, " cplxblur=%.1f qblur=%.1f",
                          p->rc.f_complexity_blur, p->rc.f_qblur );
        if( p->rc.i_vbv_buffer_size )
            s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d",
                          p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size );
    }
    else if( p->rc.i_rc_method == X264_RC_CQP )
        s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
    if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
    {
        s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
        if( p->i_bframe )
            s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
        if( p->rc.i_zones )
            s += sprintf( s, " zones" );
    }

    return buf;
}
コード例 #15
0
ファイル: opencl.c プロジェクト: markjreed/vice-emu
static void* ADL_CALLBACK adl_malloc_wrapper( int iSize )
{
    return x264_malloc( iSize );
}
コード例 #16
0
ファイル: opencl.c プロジェクト: markjreed/vice-emu
int x264_opencl_lookahead_init( x264_t *h )
{
    x264_opencl_function_t *ocl = h->opencl.ocl;
    cl_platform_id *platforms = NULL;
    cl_device_id *devices = NULL;
    cl_image_format *imageType = NULL;
    cl_context context = NULL;
    int ret = -1;
    cl_uint i;

    cl_uint numPlatforms = 0;
    cl_int status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms );
    if( status != CL_SUCCESS || !numPlatforms )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
        goto fail;
    }
    platforms = (cl_platform_id*)x264_malloc( sizeof(cl_platform_id) * numPlatforms );
    if( !platforms )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: malloc of installed platforms buffer failed\n" );
        goto fail;
    }
    status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
        goto fail;
    }

    /* Select the first OpenCL platform with a GPU device that supports our
     * required image (texture) formats */
    for( i = 0; i < numPlatforms; i++ )
    {
        cl_uint gpu_count = 0;
        cl_uint gpu;

    	status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
        if( status != CL_SUCCESS || !gpu_count )
            continue;

        x264_free( devices );
        devices = x264_malloc( sizeof(cl_device_id) * gpu_count );
        if( !devices )
            continue;

        status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
        if( status != CL_SUCCESS )
            continue;

        /* Find a GPU device that supports our image formats */
        for( gpu = 0; gpu < gpu_count; gpu++ )
        {
            cl_bool image_support;
            cl_uint imagecount;
            int b_has_r;
            int b_has_rgba;
            cl_uint j;

			h->opencl.device = devices[gpu];

            /* if the user has specified an exact device ID, skip all other
             * GPUs.  If this device matches, allow it to continue through the
             * checks for supported images, etc.  */
            if( h->param.opencl_device_id && devices[gpu] != (cl_device_id)h->param.opencl_device_id )
                continue;

            image_support = 0;
            status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
            if( status != CL_SUCCESS || !image_support )
                continue;

            if( context )
                ocl->clReleaseContext( context );
            context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
            if( status != CL_SUCCESS || !context )
                continue;

            imagecount = 0;
            status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
            if( status != CL_SUCCESS || !imagecount )
                continue;

            x264_free( imageType );
            imageType = x264_malloc( sizeof(cl_image_format) * imagecount );
            if( !imageType )
                continue;

            status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
            if( status != CL_SUCCESS )
                continue;

            b_has_r = 0;
            b_has_rgba = 0;
            for( j = 0; j < imagecount; j++ )
            {
                if( imageType[j].image_channel_order == CL_R &&
                    imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 )
                    b_has_r = 1;
                else if( imageType[j].image_channel_order == CL_RGBA &&
                         imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 )
                    b_has_rgba = 1;
            }
            if( !b_has_r || !b_has_rgba )
            {
                char dev_name[64];
                status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL );
                if( status == CL_SUCCESS )
                {
                    /* emit warning if we are discarding the user's explicit choice */
                    int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG;
                    x264_log( h, level, "OpenCL: %s does not support required image formats\n", dev_name );
                }
                continue;
            }

            /* user selection of GPU device, skip N first matches */
            if( h->param.i_opencl_device )
            {
                h->param.i_opencl_device--;
                continue;
            }

            h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status );
            if( status != CL_SUCCESS || !h->opencl.queue )
                continue;

            h->opencl.context = context;
            context = NULL;

            ret = 0;
            break;
        }

        if( !ret )
            break;
    }

    if( !h->param.psz_clbin_file )
        h->param.psz_clbin_file = "x264_lookahead.clbin";

    if( ret )
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" );
    else
        ret = x264_opencl_lookahead_alloc( h );

fail:
    if( context )
        ocl->clReleaseContext( context );
    x264_free( imageType );
    x264_free( devices );
    x264_free( platforms );
    return ret;
}
コード例 #17
0
ファイル: opencl.c プロジェクト: markjreed/vice-emu
/* The OpenCL source under common/opencl will be merged into common/oclobj.h by
 * the Makefile. It defines a x264_opencl_source byte array which we will pass
 * to clCreateProgramWithSource().  We also attempt to use a cache file for the
 * compiled binary, stored in the current working folder. */
static cl_program x264_opencl_compile( x264_t *h )
{
    x264_opencl_function_t *ocl = h->opencl.ocl;
    cl_program program = NULL;
    char *build_log = NULL;

    char dev_name[64];
    char dev_vendor[64];
    char driver_version[64];
    cl_int status;
    int vectorize;
    const char *buildopts;
    size_t build_log_len;
    FILE *log_file;

	status  = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME,    sizeof(dev_name), dev_name, NULL );
    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR,  sizeof(dev_vendor), dev_vendor, NULL );
    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driver_version), driver_version, NULL );
    if( status != CL_SUCCESS )
        return NULL;

    // Most AMD GPUs have vector registers
    vectorize = !strcmp( dev_vendor, "Advanced Micro Devices, Inc." );
    h->opencl.b_device_AMD_SI = 0;

    if( vectorize )
    {
        cl_uint simdwidth;

		/* Disable OpenCL on Intel/AMD switchable graphics devices */
        if( x264_detect_switchable_graphics() )
        {
            x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" );
            return NULL;
        }

        /* Detect AMD SouthernIsland or newer device (single-width registers) */
        simdwidth = 4;
        status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
        if( status == CL_SUCCESS && simdwidth == 1 )
        {
            vectorize = 0;
            h->opencl.b_device_AMD_SI = 1;
        }
    }

    x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" );

    program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version );
    if( !program )
    {
        const char *strptr;
        size_t size;

		/* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */
        x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" );
        strptr = (const char*)x264_opencl_source;
        size = sizeof(x264_opencl_source);
        program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
        if( status != CL_SUCCESS || !program )
        {
            x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" );
            return NULL;
        }
    }

    /* Build the program binary for the OpenCL device */
    buildopts = vectorize ? "-DVECTORIZE=1" : "";
    status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
    if( status == CL_SUCCESS )
    {
        x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version );
        return program;
    }

    /* Compile failure, should not happen with production code. */

    build_log_len = 0;
    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_len );
    if( status != CL_SUCCESS || !build_log_len )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
        goto fail;
    }

    build_log = x264_malloc( build_log_len );
    if( !build_log )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" );
        goto fail;
    }

    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
        goto fail;
    }

    log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
    if( !log_file )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
        goto fail;
    }
    fwrite( build_log, 1, build_log_len, log_file );
    fclose( log_file );
    x264_log( h, X264_LOG_WARNING, "OpenCL: kernel build errors written to x264_kernel_build_log.txt\n" );

fail:
    x264_free( build_log );
    if( program )
        ocl->clReleaseProgram( program );
    return NULL;
}
コード例 #18
0
ファイル: frame1.c プロジェクト: JamesLinus/x264-1
x264_frame_t *x264_frame_new( x264_t *h )
{
    x264_frame_t   *frame = x264_malloc( sizeof( x264_frame_t ) );
    int i;

    int i_mb_count = h->mb.i_mb_count;
    int i_stride;
    int i_lines;

    /* allocate frame data (+64 for extra data for me) */
    i_stride = ( ( h->param.i_width  + 15 )&0xfffff0 )+ 64;
    i_lines  = ( ( h->param.i_height + 15 )&0xfffff0 );

    frame->i_plane = 3;
    for( i = 0; i < 3; i++ )
    {
        int i_divh = 1;
        int i_divw = 1;
        if( i > 0 )
        {
            if( h->param.i_csp == X264_CSP_I420 )
                i_divh = i_divw = 2;
            else if( h->param.i_csp == X264_CSP_I422 )
                i_divw = 2;
        }
        frame->i_stride[i] = i_stride / i_divw;
        frame->i_lines[i] = i_lines / i_divh;
        frame->buffer[i] = x264_malloc( frame->i_stride[i] *
                                        ( frame->i_lines[i] + 64 / i_divh ) );

        frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
                          frame->i_stride[i] * 32 / i_divh + 32 / i_divw;
    }
    frame->i_stride[3] = 0;
    frame->i_lines[3] = 0;
    frame->buffer[3] = NULL;
    frame->plane[3] = NULL;

    frame->filtered[0] = frame->plane[0];
    for( i = 0; i < 3; i++ )
    {
        frame->buffer[4+i] = x264_malloc( frame->i_stride[0] *
                                        ( frame->i_lines[0] + 64 ) );

        frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
                                frame->i_stride[0] * 32 + 32;
    }

    frame->i_stride_lowres = frame->i_stride[0]/2 + 32;
    frame->i_lines_lowres = frame->i_lines[0]/2;
    for( i = 0; i < 4; i++ )
    {
        frame->buffer[7+i] = x264_malloc( frame->i_stride_lowres *
                                        ( frame->i_lines[0]/2 + 64 ) );
        frame->lowres[i] = ((uint8_t*)frame->buffer[7+i]) +
                            frame->i_stride_lowres * 32 + 32;
    }

    frame->i_poc = -1;
    frame->i_type = X264_TYPE_AUTO;
    frame->i_qpplus1 = 0;
    frame->i_pts = -1;
    frame->i_frame = -1;
    frame->i_frame_num = -1;

    frame->mb_type= x264_malloc( i_mb_count * sizeof( int8_t) );
    frame->mv[0]  = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
    frame->ref[0] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) );
    if( h->param.i_bframe )
    {
        frame->mv[1]  = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
        frame->ref[1] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) );
    }
    else
    {
        frame->mv[1]  = NULL;
        frame->ref[1] = NULL;
    }

    return frame;
}