float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int stride1, uint8_t *pix2, int stride2, int width, int height ) { int x, y, z; float ssim = 0.0; int (*sum0)[4] = x264_malloc(4 * (width/4+3) * sizeof(int)); int (*sum1)[4] = x264_malloc(4 * (width/4+3) * sizeof(int)); width >>= 2; height >>= 2; z = 0; for( y = 1; y < height; y++ ) { for( ; z <= y; z++ ) { XCHG( void*, sum0, sum1 ); for( x = 0; x < width; x+=2 ) pf->ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] ); } for( x = 0; x < width-1; x += 4 ) ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) ); } x264_free(sum0); x264_free(sum1); return ssim / ((width-1) * (height-1)); }
void x264_sei_version_write( x264_t *h, bs_t *s ) { int i; // random ID number generated according to ISO-11578 const uint8_t uuid[16] = { 0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7, 0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef }; char *opts = x264_param2string( &h->param, 0 ); char *version = x264_malloc( 200 + strlen(opts) ); int length; sprintf( version, "x264 - core %d%s - H.264/MPEG-4 AVC codec - " "Copyleft 2003-2008 - http://www.videolan.org/x264.html - options: %s", X264_BUILD, X264_VERSION, opts ); length = strlen(version)+1+16; bs_write( s, 8, 0x5 ); // payload_type = user_data_unregistered // payload_size for( i = 0; i <= length-255; i += 255 ) bs_write( s, 8, 255 ); bs_write( s, 8, length-i ); for( i = 0; i < 16; i++ ) bs_write( s, 8, uuid[i] ); for( i = 0; i < length-16; i++ ) bs_write( s, 8, version[i] ); bs_rbsp_trailing( s ); x264_free( opts ); x264_free( version ); }
static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align ) { memset( pic, 0, sizeof(cli_pic_t) ); int csp_mask = csp & X264_CSP_MASK; if( x264_cli_csp_is_invalid( csp ) ) pic->img.planes = 0; else pic->img.planes = x264_cli_csps[csp_mask].planes; pic->img.csp = csp; pic->img.width = width; pic->img.height = height; for( int i = 0; i < pic->img.planes; i++ ) { int stride = width * x264_cli_csps[csp_mask].width[i]; stride *= x264_cli_csp_depth_factor( csp ); stride = ALIGN( stride, align ); uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride; pic->img.plane[i] = x264_malloc( size ); if( !pic->img.plane[i] ) return -1; pic->img.stride[i] = stride; } return 0; }
/**************************************************************************** * x264_slurp_file: ****************************************************************************/ char *x264_slurp_file( const char *filename ) { int b_error = 0; int i_size; char *buf; FILE *fh = fopen( filename, "rb" ); if( !fh ) return NULL; b_error |= fseek( fh, 0, SEEK_END ) < 0; b_error |= ( i_size = ftell( fh ) ) <= 0; b_error |= fseek( fh, 0, SEEK_SET ) < 0; if( b_error ) return NULL; buf = x264_malloc( i_size+2 ); if( buf == NULL ) return NULL; b_error |= fread( buf, 1, i_size, fh ) != i_size; if( buf[i_size-1] != '\n' ) buf[i_size++] = '\n'; buf[i_size] = 0; fclose( fh ); if( b_error ) { x264_free( buf ); return NULL; } return buf; }
void x264_speedcontrol_new( x264_t *h ) { x264_speedcontrol_t *sc = h->sc = x264_malloc( sizeof(x264_speedcontrol_t) ); x264_emms(); memset( sc, 0, sizeof(x264_speedcontrol_t) ); if( h->param.sc.f_speed <= 0 ) h->param.sc.f_speed = 1; sc->fps = h->param.i_fps_num / h->param.i_fps_den; sc->spf = 1e6 / sc->fps; h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size ); sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps; sc->buffer_fill = sc->buffer_size * h->param.sc.f_buffer_init; sc->buffer_fill = x264_clip3( sc->buffer_fill, sc->spf, sc->buffer_size ); sc->compensation_period = sc->buffer_size/4; sc->timestamp = x264_mdate(); sc->preset = -1; sc->prev_frame = 0; sc->cplx_num = 3e3; //FIXME estimate initial complexity sc->cplx_den = .1; sc->cplx_decay = 1 - 1./h->param.sc.i_buffer_size; sc->stat.min_buffer = sc->buffer_size; sc->stat.max_buffer = 0; sc->user_param = h->param; }
HRESULT CX264Encoder::Init( X264ENCPARAM param ) { memset(m_level, (unsigned char)tp_lvl_2, sizeof(m_level)); m_stEncParam = param; x264_param_t st264Param; ConfigParam( &st264Param ); x264_t *h; if ( ( h = x264_encoder_open( &st264Param ) ) == NULL ) { LOG(stderr, "x264 [error]: x264_encoder_open failed\n"); return false; } m_px264Handle = (void*)h; x264_picture_t *pic = (x264_picture_t*)x264_malloc(sizeof(x264_picture_t)); memset( pic, 0, sizeof( x264_picture_t ) ); pic->i_type = X264_TYPE_AUTO; pic->i_qpplus1 = X264_QP_AUTO; pic->i_pic_struct = X264_CSP_I420; pic->img.i_csp = X264_CSP_I420; pic->img.i_plane = 3; pic->img.i_stride[0] = m_stEncParam.iWidth; pic->img.i_stride[1] = pic->img.i_stride[2] = m_stEncParam.iWidth>>1; m_pPic = (void*)pic; return true; }
int main(int argc, char *argv[]) { int ret = 0; int i; if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) ) { #if !defined(ARCH_X86) && !defined(ARCH_X86_64) fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" ); return 1; #endif do_bench = 1; if( argv[1][7] == '=' ) { bench_pattern = argv[1]+8; bench_pattern_len = strlen(bench_pattern); } argc--; argv++; } i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate(); fprintf( stderr, "x264: using random seed %u\n", i ); srand( i ); buf1 = x264_malloc( 0x3e00 + 16*BENCH_ALIGNS ); buf2 = buf1 + 0xf00; buf3 = buf2 + 0xf00; buf4 = buf3 + 0x1000; for( i=0; i<0x1e00; i++ ) buf1[i] = rand() & 0xFF; memset( buf1+0x1e00, 0, 0x2000 ); /* 16-byte alignment is guaranteed whenever it's useful, but some functions also vary in speed depending on %64 */ if( do_bench ) for( i=0; i<BENCH_ALIGNS && !ret; i++ ) { buf2 = buf1 + 0xf00; buf3 = buf2 + 0xf00; buf4 = buf3 + 0x1000; ret |= x264_stack_pagealign( check_all_flags, i*16 ); buf1 += 16; quiet = 1; fprintf( stderr, "%d/%d\r", i+1, BENCH_ALIGNS ); } else ret = check_all_flags(); if( ret ) { fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" ); return -1; } fprintf( stderr, "x264: All tests passed Yeah :)\n" ); if( do_bench ) print_bench(); return 0; }
static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x264_param_t *param, char *opt_string ) { int ret = 0; int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH; int csp = ~(~info->csp ^ change_fmt); int bit_depth = 8*x264_cli_csp_depth_factor( csp ); if( opt_string ) { static const char * const optlist[] = { "bit_depth", NULL }; char **opts = x264_split_options( opt_string, optlist ); if( opts ) { char *str_bit_depth = x264_get_option( "bit_depth", opts ); bit_depth = x264_otoi( str_bit_depth, -1 ); ret = bit_depth < 8 || bit_depth > 16; csp = bit_depth > 8 ? csp | X264_CSP_HIGH_DEPTH : csp & ~X264_CSP_HIGH_DEPTH; change_fmt = (info->csp ^ csp) & X264_CSP_HIGH_DEPTH; free( opts ); } else ret = 1; } FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this filter supports only bit depth %d\n", BIT_DEPTH ); FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" ); /* only add the filter to the chain if it's needed */ if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) ) { FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" ); depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) ); if( !h ) return -1; h->error_buf = (int16_t*)(h + 1); h->dst_csp = csp; h->bit_depth = bit_depth; h->prev_hnd = *handle; h->prev_filter = *filter; if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, info->width, info->height ) ) { x264_free( h ); return -1; } *handle = h; *filter = depth_filter; info->csp = h->dst_csp; } return 0; }
/**************************************************************************** * x264_realloc: ****************************************************************************/ void *x264_realloc( void *p, int i_size ) { #ifdef HAVE_MALLOC_H return realloc( p, i_size ); #else int i_old_size = 0; uint8_t * p_new; if( p ) { i_old_size = *( (int*) ( (uint8_t*) p ) - sizeof( void ** ) - sizeof( int ) ); } p_new = x264_malloc( i_size ); if( i_old_size > 0 && i_size > 0 ) { memcpy( p_new, p, ( i_old_size < i_size ) ? i_old_size : i_size ); } x264_free( p ); return p_new; #endif }
int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height ) { memset( pic, 0, sizeof(cli_pic_t) ); int csp_mask = csp & X264_CSP_MASK; if( x264_cli_csp_is_invalid( csp ) ) pic->img.planes = 0; else pic->img.planes = x264_cli_csps[csp_mask].planes; pic->img.csp = csp; pic->img.width = width; pic->img.height = height; for( int i = 0; i < pic->img.planes; i++ ) { pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) ); if( !pic->img.plane[i] ) return -1; pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i]; } return 0; }
/* {{{ [fold] void x264_visualize_init( x264_t *h ) */ void x264_visualize_init( x264_t *h ) { int mb = h->sps->i_mb_width * h->sps->i_mb_height; h->visualize = x264_malloc(mb * sizeof(visualize_t)); }
/**************************************************************************** * x264_picture_alloc: ****************************************************************************/ void x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ) { pic->i_type = X264_TYPE_AUTO; pic->i_qpplus1 = 0; pic->img.i_csp = i_csp; switch( i_csp & X264_CSP_MASK ) { case X264_CSP_I420: case X264_CSP_YV12: pic->img.i_plane = 3; pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 ); pic->img.plane[1] = pic->img.plane[0] + i_width * i_height; pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4; pic->img.i_stride[0] = i_width; pic->img.i_stride[1] = i_width / 2; pic->img.i_stride[2] = i_width / 2; break; case X264_CSP_I422: pic->img.i_plane = 3; pic->img.plane[0] = x264_malloc( 2 * i_width * i_height ); pic->img.plane[1] = pic->img.plane[0] + i_width * i_height; pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 2; pic->img.i_stride[0] = i_width; pic->img.i_stride[1] = i_width / 2; pic->img.i_stride[2] = i_width / 2; break; case X264_CSP_I444: pic->img.i_plane = 3; pic->img.plane[0] = x264_malloc( 3 * i_width * i_height ); pic->img.plane[1] = pic->img.plane[0] + i_width * i_height; pic->img.plane[2] = pic->img.plane[1] + i_width * i_height; pic->img.i_stride[0] = i_width; pic->img.i_stride[1] = i_width; pic->img.i_stride[2] = i_width; break; case X264_CSP_YUYV: pic->img.i_plane = 1; pic->img.plane[0] = x264_malloc( 2 * i_width * i_height ); pic->img.i_stride[0] = 2 * i_width; break; case X264_CSP_RGB: case X264_CSP_BGR: pic->img.i_plane = 1; pic->img.plane[0] = x264_malloc( 3 * i_width * i_height ); pic->img.i_stride[0] = 3 * i_width; break; case X264_CSP_BGRA: pic->img.i_plane = 1; pic->img.plane[0] = x264_malloc( 4 * i_width * i_height ); pic->img.i_stride[0] = 4 * i_width; break; default: fprintf( stderr, "invalid CSP\n" ); pic->img.i_plane = 0; break; } }
x264_frame_t *x264_frame_new( x264_t *h ) { x264_frame_t *frame = x264_malloc( sizeof( x264_frame_t ) ); int i, j; int i_mb_count = h->mb.i_mb_count; int i_stride; int i_lines; memset( frame, 0, sizeof(x264_frame_t) ); /* allocate frame data (+64 for extra data for me) */ i_stride = ( ( h->param.i_width + 15 )&0xfffff0 )+ 64; i_lines = ( ( h->param.i_height + 15 )&0xfffff0 ); frame->i_plane = 3; for( i = 0; i < 3; i++ ) { int i_divh = 1; int i_divw = 1; if( i > 0 ) { if( h->param.i_csp == X264_CSP_I420 ) i_divh = i_divw = 2; else if( h->param.i_csp == X264_CSP_I422 ) i_divw = 2; } frame->i_stride[i] = i_stride / i_divw; frame->i_lines[i] = i_lines / i_divh; frame->buffer[i] = x264_malloc( frame->i_stride[i] * ( frame->i_lines[i] + 64 / i_divh ) ); frame->plane[i] = ((uint8_t*)frame->buffer[i]) + frame->i_stride[i] * 32 / i_divh + 32 / i_divw; } frame->i_stride[3] = 0; frame->i_lines[3] = 0; frame->buffer[3] = NULL; frame->plane[3] = NULL; frame->filtered[0] = frame->plane[0]; for( i = 0; i < 3; i++ ) { frame->buffer[4+i] = x264_malloc( frame->i_stride[0] * ( frame->i_lines[0] + 64 ) ); frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) + frame->i_stride[0] * 32 + 32; } if( h->frames.b_have_lowres ) { frame->i_stride_lowres = frame->i_stride[0]/2 + 32; frame->i_lines_lowres = frame->i_lines[0]/2; for( i = 0; i < 4; i++ ) { frame->buffer[7+i] = x264_malloc( frame->i_stride_lowres * ( frame->i_lines[0]/2 + 64 ) ); frame->lowres[i] = ((uint8_t*)frame->buffer[7+i]) + frame->i_stride_lowres * 32 + 32; } } if( h->param.analyse.i_me_method == X264_ME_ESA ) { frame->buffer[11] = x264_malloc( frame->i_stride[0] * (frame->i_lines[0] + 64) * sizeof(uint16_t) ); frame->integral = (uint16_t*)frame->buffer[11] + frame->i_stride[0] * 32 + 32; } frame->i_poc = -1; frame->i_type = X264_TYPE_AUTO; frame->i_qpplus1 = 0; frame->i_pts = -1; frame->i_frame = -1; frame->i_frame_num = -1; frame->mb_type= x264_malloc( i_mb_count * sizeof( int8_t) ); frame->mv[0] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) ); frame->ref[0] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) ); if( h->param.i_bframe ) { frame->mv[1] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) ); frame->ref[1] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) ); } else { frame->mv[1] = NULL; frame->ref[1] = NULL; } frame->i_row_bits = x264_malloc( i_lines/16 * sizeof( int ) ); frame->i_row_qp = x264_malloc( i_lines/16 * sizeof( int ) ); for( i = 0; i < h->param.i_bframe + 2; i++ ) for( j = 0; j < h->param.i_bframe + 2; j++ ) frame->i_row_satds[i][j] = x264_malloc( i_lines/16 * sizeof( int ) ); return frame; }
/**************************************************************************** * x264_param2string: ****************************************************************************/ char *x264_param2string( x264_param_t *p, int b_res ) { char *buf = x264_malloc( 1000 ); char *s = buf; if( b_res ) { s += sprintf( s, "%dx%d ", p->i_width, p->i_height ); s += sprintf( s, "fps=%d/%d ", p->i_fps_num, p->i_fps_den ); } /// s += sprintf( s, "cabac=%d", p->b_cabac ); s += sprintf( s, " ref=%d", p->i_frame_reference ); s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter, p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta ); s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter ); s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] ); s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine ); s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo ); s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references ); s += sprintf( s, " me_range=%d", p->analyse.i_me_range ); s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me ); s += sprintf( s, " trellis=%d", p->analyse.i_trellis ); s += sprintf( s, " 8x8dct=%d", p->analyse.b_transform_8x8 ); s += sprintf( s, " cqm=%d", p->i_cqm_preset ); s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset ); s += sprintf( s, " slices=%d", p->i_threads ); s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction ); s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate ); s += sprintf( s, " bframes=%d", p->i_bframe ); if( p->i_bframe ) { s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d wpredb=%d bime=%d", p->b_bframe_pyramid, p->b_bframe_adaptive, p->i_bframe_bias, p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->analyse.b_bidir_me ); } s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d", p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold ); s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ? ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" ) : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" ); if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF ) { if( p->rc.i_rc_method == X264_RC_CRF ) s += sprintf( s, " crf=%d", p->rc.i_rf_constant ); else s += sprintf( s, " bitrate=%d ratetol=%.1f", p->rc.i_bitrate, p->rc.f_rate_tolerance ); s += sprintf( s, " rceq='%s' qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d", p->rc.psz_rc_eq, p->rc.f_qcompress, p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step ); if( p->rc.b_stat_read ) s += sprintf( s, " cplxblur=%.1f qblur=%.1f", p->rc.f_complexity_blur, p->rc.f_qblur ); if( p->rc.i_vbv_buffer_size ) s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d", p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size ); } else if( p->rc.i_rc_method == X264_RC_CQP ) s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) ) { s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor ); if( p->i_bframe ) s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor ); if( p->rc.i_zones ) s += sprintf( s, " zones" ); } return buf; }
static void* ADL_CALLBACK adl_malloc_wrapper( int iSize ) { return x264_malloc( iSize ); }
int x264_opencl_lookahead_init( x264_t *h ) { x264_opencl_function_t *ocl = h->opencl.ocl; cl_platform_id *platforms = NULL; cl_device_id *devices = NULL; cl_image_format *imageType = NULL; cl_context context = NULL; int ret = -1; cl_uint i; cl_uint numPlatforms = 0; cl_int status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms ); if( status != CL_SUCCESS || !numPlatforms ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" ); goto fail; } platforms = (cl_platform_id*)x264_malloc( sizeof(cl_platform_id) * numPlatforms ); if( !platforms ) { x264_log( h, X264_LOG_WARNING, "OpenCL: malloc of installed platforms buffer failed\n" ); goto fail; } status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" ); goto fail; } /* Select the first OpenCL platform with a GPU device that supports our * required image (texture) formats */ for( i = 0; i < numPlatforms; i++ ) { cl_uint gpu_count = 0; cl_uint gpu; status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count ); if( status != CL_SUCCESS || !gpu_count ) continue; x264_free( devices ); devices = x264_malloc( sizeof(cl_device_id) * gpu_count ); if( !devices ) continue; status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL ); if( status != CL_SUCCESS ) continue; /* Find a GPU device that supports our image formats */ for( gpu = 0; gpu < gpu_count; gpu++ ) { cl_bool image_support; cl_uint imagecount; int b_has_r; int b_has_rgba; cl_uint j; h->opencl.device = devices[gpu]; /* if the user has specified an exact device ID, skip all other * GPUs. If this device matches, allow it to continue through the * checks for supported images, etc. */ if( h->param.opencl_device_id && devices[gpu] != (cl_device_id)h->param.opencl_device_id ) continue; image_support = 0; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL ); if( status != CL_SUCCESS || !image_support ) continue; if( context ) ocl->clReleaseContext( context ); context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status ); if( status != CL_SUCCESS || !context ) continue; imagecount = 0; status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount ); if( status != CL_SUCCESS || !imagecount ) continue; x264_free( imageType ); imageType = x264_malloc( sizeof(cl_image_format) * imagecount ); if( !imageType ) continue; status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL ); if( status != CL_SUCCESS ) continue; b_has_r = 0; b_has_rgba = 0; for( j = 0; j < imagecount; j++ ) { if( imageType[j].image_channel_order == CL_R && imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 ) b_has_r = 1; else if( imageType[j].image_channel_order == CL_RGBA && imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 ) b_has_rgba = 1; } if( !b_has_r || !b_has_rgba ) { char dev_name[64]; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL ); if( status == CL_SUCCESS ) { /* emit warning if we are discarding the user's explicit choice */ int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG; x264_log( h, level, "OpenCL: %s does not support required image formats\n", dev_name ); } continue; } /* user selection of GPU device, skip N first matches */ if( h->param.i_opencl_device ) { h->param.i_opencl_device--; continue; } h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status ); if( status != CL_SUCCESS || !h->opencl.queue ) continue; h->opencl.context = context; context = NULL; ret = 0; break; } if( !ret ) break; } if( !h->param.psz_clbin_file ) h->param.psz_clbin_file = "x264_lookahead.clbin"; if( ret ) x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" ); else ret = x264_opencl_lookahead_alloc( h ); fail: if( context ) ocl->clReleaseContext( context ); x264_free( imageType ); x264_free( devices ); x264_free( platforms ); return ret; }
/* The OpenCL source under common/opencl will be merged into common/oclobj.h by * the Makefile. It defines a x264_opencl_source byte array which we will pass * to clCreateProgramWithSource(). We also attempt to use a cache file for the * compiled binary, stored in the current working folder. */ static cl_program x264_opencl_compile( x264_t *h ) { x264_opencl_function_t *ocl = h->opencl.ocl; cl_program program = NULL; char *build_log = NULL; char dev_name[64]; char dev_vendor[64]; char driver_version[64]; cl_int status; int vectorize; const char *buildopts; size_t build_log_len; FILE *log_file; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL ); status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR, sizeof(dev_vendor), dev_vendor, NULL ); status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driver_version), driver_version, NULL ); if( status != CL_SUCCESS ) return NULL; // Most AMD GPUs have vector registers vectorize = !strcmp( dev_vendor, "Advanced Micro Devices, Inc." ); h->opencl.b_device_AMD_SI = 0; if( vectorize ) { cl_uint simdwidth; /* Disable OpenCL on Intel/AMD switchable graphics devices */ if( x264_detect_switchable_graphics() ) { x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" ); return NULL; } /* Detect AMD SouthernIsland or newer device (single-width registers) */ simdwidth = 4; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL ); if( status == CL_SUCCESS && simdwidth == 1 ) { vectorize = 0; h->opencl.b_device_AMD_SI = 1; } } x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" ); program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version ); if( !program ) { const char *strptr; size_t size; /* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */ x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" ); strptr = (const char*)x264_opencl_source; size = sizeof(x264_opencl_source); program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status ); if( status != CL_SUCCESS || !program ) { x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" ); return NULL; } } /* Build the program binary for the OpenCL device */ buildopts = vectorize ? "-DVECTORIZE=1" : ""; status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL ); if( status == CL_SUCCESS ) { x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version ); return program; } /* Compile failure, should not happen with production code. */ build_log_len = 0; status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_len ); if( status != CL_SUCCESS || !build_log_len ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" ); goto fail; } build_log = x264_malloc( build_log_len ); if( !build_log ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" ); goto fail; } status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" ); goto fail; } log_file = x264_fopen( "x264_kernel_build_log.txt", "w" ); if( !log_file ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" ); goto fail; } fwrite( build_log, 1, build_log_len, log_file ); fclose( log_file ); x264_log( h, X264_LOG_WARNING, "OpenCL: kernel build errors written to x264_kernel_build_log.txt\n" ); fail: x264_free( build_log ); if( program ) ocl->clReleaseProgram( program ); return NULL; }
x264_frame_t *x264_frame_new( x264_t *h ) { x264_frame_t *frame = x264_malloc( sizeof( x264_frame_t ) ); int i; int i_mb_count = h->mb.i_mb_count; int i_stride; int i_lines; /* allocate frame data (+64 for extra data for me) */ i_stride = ( ( h->param.i_width + 15 )&0xfffff0 )+ 64; i_lines = ( ( h->param.i_height + 15 )&0xfffff0 ); frame->i_plane = 3; for( i = 0; i < 3; i++ ) { int i_divh = 1; int i_divw = 1; if( i > 0 ) { if( h->param.i_csp == X264_CSP_I420 ) i_divh = i_divw = 2; else if( h->param.i_csp == X264_CSP_I422 ) i_divw = 2; } frame->i_stride[i] = i_stride / i_divw; frame->i_lines[i] = i_lines / i_divh; frame->buffer[i] = x264_malloc( frame->i_stride[i] * ( frame->i_lines[i] + 64 / i_divh ) ); frame->plane[i] = ((uint8_t*)frame->buffer[i]) + frame->i_stride[i] * 32 / i_divh + 32 / i_divw; } frame->i_stride[3] = 0; frame->i_lines[3] = 0; frame->buffer[3] = NULL; frame->plane[3] = NULL; frame->filtered[0] = frame->plane[0]; for( i = 0; i < 3; i++ ) { frame->buffer[4+i] = x264_malloc( frame->i_stride[0] * ( frame->i_lines[0] + 64 ) ); frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) + frame->i_stride[0] * 32 + 32; } frame->i_stride_lowres = frame->i_stride[0]/2 + 32; frame->i_lines_lowres = frame->i_lines[0]/2; for( i = 0; i < 4; i++ ) { frame->buffer[7+i] = x264_malloc( frame->i_stride_lowres * ( frame->i_lines[0]/2 + 64 ) ); frame->lowres[i] = ((uint8_t*)frame->buffer[7+i]) + frame->i_stride_lowres * 32 + 32; } frame->i_poc = -1; frame->i_type = X264_TYPE_AUTO; frame->i_qpplus1 = 0; frame->i_pts = -1; frame->i_frame = -1; frame->i_frame_num = -1; frame->mb_type= x264_malloc( i_mb_count * sizeof( int8_t) ); frame->mv[0] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) ); frame->ref[0] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) ); if( h->param.i_bframe ) { frame->mv[1] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) ); frame->ref[1] = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) ); } else { frame->mv[1] = NULL; frame->ref[1] = NULL; } return frame; }