void x264_validate_levels( x264_t *h ) { int mbs; const x264_level_t *l = x264_levels; while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc ) l++; mbs = h->sps->i_mb_width * h->sps->i_mb_height; if( l->frame_size < mbs || l->frame_size*8 < h->sps->i_mb_width * h->sps->i_mb_width || l->frame_size*8 < h->sps->i_mb_height * h->sps->i_mb_height ) x264_log( h, X264_LOG_WARNING, "frame MB size (%dx%d) > level limit (%d)\n", h->sps->i_mb_width, h->sps->i_mb_height, l->frame_size ); #define CHECK( name, limit, val ) \ if( (val) > (limit) ) \ x264_log( h, X264_LOG_WARNING, name " (%d) > level limit (%d)\n", (int)(val), (limit) ); CHECK( "DPB size", l->dpb, mbs * 384 * h->sps->i_num_ref_frames ); CHECK( "VBV bitrate", l->bitrate, h->param.rc.i_vbv_max_bitrate ); CHECK( "VBV buffer", l->cpb, h->param.rc.i_vbv_buffer_size ); CHECK( "MV range", l->mv_range, h->param.analyse.i_mv_range ); if( h->param.i_fps_den > 0 ) CHECK( "MB rate", l->mbps, (int64_t)mbs * h->param.i_fps_num / h->param.i_fps_den ); if( h->sps->b_direct8x8_inference < l->direct8x8 ) x264_log( h, X264_LOG_WARNING, "direct 8x8 inference (0) < level requirement (1)\n" ); /* TODO check the rest of the limits */ }
static void CL_CALLBACK x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data ) { /* Any error notification can be assumed to be fatal to the OpenCL context. * We need to stop using it immediately to prevent further damage. */ x264_t *h = (x264_t*)user_data; h->param.b_opencl = 0; h->opencl.b_fatal_error = 1; x264_log( h, X264_LOG_ERROR, "OpenCL: %s\n", errinfo ); x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" ); }
/* Save the compiled program binary to a file for later reuse. Device context * is also saved in the cache file so we do not reuse stale binaries */ static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version ) { FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" ); x264_opencl_function_t *ocl; uint8_t *binary; size_t size; cl_int status; if( !fp ) { x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" ); return; } ocl = h->opencl.ocl; binary = NULL; size = 0; status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL ); if( status != CL_SUCCESS || !size ) { x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary size, no cache file generated\n" ); goto fail; } CHECKED_MALLOC( binary, size ); status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary, no cache file generated\n" ); goto fail; } fputs( dev_name, fp ); fputc( '\n', fp ); fputs( dev_vendor, fp ); fputc( '\n', fp ); fputs( driver_version, fp ); fputc( '\n', fp ); fputs( x264_opencl_source_hash, fp ); fputc( '\n', fp ); fwrite( binary, 1, size, fp ); fail: fclose( fp ); x264_free( binary ); return; }
void x264_speedcontrol_delete( x264_t *h ) { x264_speedcontrol_t *sc = h->sc; if( !sc ) return; x264_log( h, X264_LOG_INFO, "speedcontrol: avg preset=%.3f buffer min=%.3f max=%.3f\n", sc->stat.avg_preset / sc->stat.den, (float)sc->stat.min_buffer / sc->buffer_size, (float)sc->stat.max_buffer / sc->buffer_size ); // x264_log( h, X264_LOG_INFO, "speedcontrol: avg cplx=%.5f\n", sc->cplx_num / sc->cplx_den ); x264_free( sc ); }
void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src ) { dst->i_type = src->i_type; dst->i_qpplus1 = src->i_qpplus1; dst->i_pts = src->i_pts; switch( src->img.i_csp & X264_CSP_MASK ) { case X264_CSP_I420: h->csp.i420( dst, &src->img, h->param.i_width, h->param.i_height ); break; case X264_CSP_YV12: h->csp.yv12( dst, &src->img, h->param.i_width, h->param.i_height ); break; case X264_CSP_I422: h->csp.i422( dst, &src->img, h->param.i_width, h->param.i_height ); break; case X264_CSP_I444: h->csp.i444( dst, &src->img, h->param.i_width, h->param.i_height ); break; case X264_CSP_YUYV: h->csp.yuyv( dst, &src->img, h->param.i_width, h->param.i_height ); break; case X264_CSP_RGB: h->csp.rgb( dst, &src->img, h->param.i_width, h->param.i_height ); break; case X264_CSP_BGR: h->csp.bgr( dst, &src->img, h->param.i_width, h->param.i_height ); break; case X264_CSP_BGRA: h->csp.bgra( dst, &src->img, h->param.i_width, h->param.i_height ); break; default: x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" ); break; } }
static void apply_preset( x264_t *h, int preset ) { x264_speedcontrol_t *sc = h->sc; preset = x264_clip3( preset, 0, PRESETS-1 ); //if( preset != sc->preset ) { const sc_preset_t *s = &presets[preset]; x264_param_t p = sc->user_param; p.i_frame_reference = s->refs; p.analyse.inter = s->partitions; p.analyse.i_subpel_refine = s->subme; p.analyse.i_me_method = s->me; p.analyse.i_trellis = s->trellis; p.analyse.b_mixed_references = s->mix; p.analyse.b_chroma_me = s->chromame; p.analyse.f_psy_rd = s->psy_rd; p.analyse.f_psy_trellis = s->psy_trellis; x264_encoder_reconfig( h, &p ); sc->preset = preset; x264_log( h, X264_LOG_DEBUG, "Applying speedcontrol preset %d.\n", preset ); } }
/**************************************************************************** * x264_malloc: ****************************************************************************/ void *x264_malloc( int i_size ) { uint8_t *align_buf = NULL; #if HAVE_MALLOC_H #if HAVE_THP #define HUGE_PAGE_SIZE 2*1024*1024 #define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */ /* Attempt to allocate huge pages to reduce TLB misses. */ if( i_size >= HUGE_PAGE_THRESHOLD ) { align_buf = memalign( HUGE_PAGE_SIZE, i_size ); if( align_buf ) { /* Round up to the next huge page boundary if we are close enough. */ size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1); madvise( align_buf, madv_size, MADV_HUGEPAGE ); } } else #undef HUGE_PAGE_SIZE #undef HUGE_PAGE_THRESHOLD #endif align_buf = memalign( NATIVE_ALIGN, i_size ); #else uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) ); if( buf ) { align_buf = buf + (NATIVE_ALIGN-1) + sizeof(void **); align_buf -= (intptr_t) align_buf & (NATIVE_ALIGN-1); *( (void **) ( align_buf - sizeof(void **) ) ) = buf; } #endif if( !align_buf ) x264_log( NULL, X264_LOG_ERROR, "malloc of size %d failed\n", i_size ); return align_buf; }
int x264_opencl_lookahead_init( x264_t *h ) { x264_opencl_function_t *ocl = h->opencl.ocl; cl_platform_id *platforms = NULL; cl_device_id *devices = NULL; cl_image_format *imageType = NULL; cl_context context = NULL; int ret = -1; cl_uint i; cl_uint numPlatforms = 0; cl_int status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms ); if( status != CL_SUCCESS || !numPlatforms ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" ); goto fail; } platforms = (cl_platform_id*)x264_malloc( sizeof(cl_platform_id) * numPlatforms ); if( !platforms ) { x264_log( h, X264_LOG_WARNING, "OpenCL: malloc of installed platforms buffer failed\n" ); goto fail; } status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" ); goto fail; } /* Select the first OpenCL platform with a GPU device that supports our * required image (texture) formats */ for( i = 0; i < numPlatforms; i++ ) { cl_uint gpu_count = 0; cl_uint gpu; status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count ); if( status != CL_SUCCESS || !gpu_count ) continue; x264_free( devices ); devices = x264_malloc( sizeof(cl_device_id) * gpu_count ); if( !devices ) continue; status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL ); if( status != CL_SUCCESS ) continue; /* Find a GPU device that supports our image formats */ for( gpu = 0; gpu < gpu_count; gpu++ ) { cl_bool image_support; cl_uint imagecount; int b_has_r; int b_has_rgba; cl_uint j; h->opencl.device = devices[gpu]; /* if the user has specified an exact device ID, skip all other * GPUs. If this device matches, allow it to continue through the * checks for supported images, etc. */ if( h->param.opencl_device_id && devices[gpu] != (cl_device_id)h->param.opencl_device_id ) continue; image_support = 0; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL ); if( status != CL_SUCCESS || !image_support ) continue; if( context ) ocl->clReleaseContext( context ); context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status ); if( status != CL_SUCCESS || !context ) continue; imagecount = 0; status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount ); if( status != CL_SUCCESS || !imagecount ) continue; x264_free( imageType ); imageType = x264_malloc( sizeof(cl_image_format) * imagecount ); if( !imageType ) continue; status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL ); if( status != CL_SUCCESS ) continue; b_has_r = 0; b_has_rgba = 0; for( j = 0; j < imagecount; j++ ) { if( imageType[j].image_channel_order == CL_R && imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 ) b_has_r = 1; else if( imageType[j].image_channel_order == CL_RGBA && imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 ) b_has_rgba = 1; } if( !b_has_r || !b_has_rgba ) { char dev_name[64]; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL ); if( status == CL_SUCCESS ) { /* emit warning if we are discarding the user's explicit choice */ int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG; x264_log( h, level, "OpenCL: %s does not support required image formats\n", dev_name ); } continue; } /* user selection of GPU device, skip N first matches */ if( h->param.i_opencl_device ) { h->param.i_opencl_device--; continue; } h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status ); if( status != CL_SUCCESS || !h->opencl.queue ) continue; h->opencl.context = context; context = NULL; ret = 0; break; } if( !ret ) break; } if( !h->param.psz_clbin_file ) h->param.psz_clbin_file = "x264_lookahead.clbin"; if( ret ) x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" ); else ret = x264_opencl_lookahead_alloc( h ); fail: if( context ) ocl->clReleaseContext( context ); x264_free( imageType ); x264_free( devices ); x264_free( platforms ); return ret; }
static int x264_opencl_lookahead_alloc( x264_t *h ) { static const char *kernelnames[] = { "mb_intra_cost_satd_8x8", "sum_intra_cost", "downscale_hpel", "downscale1", "downscale2", "memset_int16", "weightp_scaled_images", "weightp_hpel", "hierarchical_motion", "subpel_refine", "mode_selection", "sum_inter_cost" }; cl_kernel *kernels[] = { &h->opencl.intra_kernel, &h->opencl.rowsum_intra_kernel, &h->opencl.downscale_hpel_kernel, &h->opencl.downscale_kernel1, &h->opencl.downscale_kernel2, &h->opencl.memset_kernel, &h->opencl.weightp_scaled_images_kernel, &h->opencl.weightp_hpel_kernel, &h->opencl.hme_kernel, &h->opencl.subpel_refine_kernel, &h->opencl.mode_select_kernel, &h->opencl.rowsum_inter_kernel }; int i; x264_opencl_function_t *ocl; cl_int status; if( !h->param.rc.i_lookahead ) return -1; ocl = h->opencl.ocl; h->opencl.lookahead_program = x264_opencl_compile( h ); if( !h->opencl.lookahead_program ) goto fail; for( i = 0; i < ARRAY_SIZE(kernelnames); i++ ) { *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status ); goto fail; } } h->opencl.page_locked_buffer = ocl->clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status ); goto fail; } h->opencl.page_locked_ptr = ocl->clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status ); goto fail; } return 0; fail: x264_opencl_lookahead_delete( h ); return -1; }
/* The OpenCL source under common/opencl will be merged into common/oclobj.h by * the Makefile. It defines a x264_opencl_source byte array which we will pass * to clCreateProgramWithSource(). We also attempt to use a cache file for the * compiled binary, stored in the current working folder. */ static cl_program x264_opencl_compile( x264_t *h ) { x264_opencl_function_t *ocl = h->opencl.ocl; cl_program program = NULL; char *build_log = NULL; char dev_name[64]; char dev_vendor[64]; char driver_version[64]; cl_int status; int vectorize; const char *buildopts; size_t build_log_len; FILE *log_file; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL ); status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR, sizeof(dev_vendor), dev_vendor, NULL ); status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driver_version), driver_version, NULL ); if( status != CL_SUCCESS ) return NULL; // Most AMD GPUs have vector registers vectorize = !strcmp( dev_vendor, "Advanced Micro Devices, Inc." ); h->opencl.b_device_AMD_SI = 0; if( vectorize ) { cl_uint simdwidth; /* Disable OpenCL on Intel/AMD switchable graphics devices */ if( x264_detect_switchable_graphics() ) { x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" ); return NULL; } /* Detect AMD SouthernIsland or newer device (single-width registers) */ simdwidth = 4; status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL ); if( status == CL_SUCCESS && simdwidth == 1 ) { vectorize = 0; h->opencl.b_device_AMD_SI = 1; } } x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" ); program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version ); if( !program ) { const char *strptr; size_t size; /* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */ x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" ); strptr = (const char*)x264_opencl_source; size = sizeof(x264_opencl_source); program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status ); if( status != CL_SUCCESS || !program ) { x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" ); return NULL; } } /* Build the program binary for the OpenCL device */ buildopts = vectorize ? "-DVECTORIZE=1" : ""; status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL ); if( status == CL_SUCCESS ) { x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version ); return program; } /* Compile failure, should not happen with production code. */ build_log_len = 0; status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_len ); if( status != CL_SUCCESS || !build_log_len ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" ); goto fail; } build_log = x264_malloc( build_log_len ); if( !build_log ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" ); goto fail; } status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL ); if( status != CL_SUCCESS ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" ); goto fail; } log_file = x264_fopen( "x264_kernel_build_log.txt", "w" ); if( !log_file ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" ); goto fail; } fwrite( build_log, 1, build_log_len, log_file ); fclose( log_file ); x264_log( h, X264_LOG_WARNING, "OpenCL: kernel build errors written to x264_kernel_build_log.txt\n" ); fail: x264_free( build_log ); if( program ) ocl->clReleaseProgram( program ); return NULL; }
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) { const int i_mb_type = h->mb.i_type; if( h->sh.b_mbaff && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) ) { x264_cabac_encode_decision( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced ); } if( h->sh.i_type == SLICE_TYPE_I ) { int ctx = 0; if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 ) { ctx++; } if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 ) { ctx++; } x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 ); } else if( h->sh.i_type == SLICE_TYPE_P ) { /* prefix: 14, suffix: 17 */ if( i_mb_type == P_L0 ) { if( h->mb.i_partition == D_16x16 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 0 ); x264_cabac_encode_decision( cb, 16, 0 ); } else if( h->mb.i_partition == D_16x8 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 1 ); x264_cabac_encode_decision( cb, 17, 1 ); } else if( h->mb.i_partition == D_8x16 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 1 ); x264_cabac_encode_decision( cb, 17, 0 ); } } else if( i_mb_type == P_8x8 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 0 ); x264_cabac_encode_decision( cb, 16, 1 ); } else /* intra */ { /* prefix */ x264_cabac_encode_decision( cb, 14, 1 ); /* suffix */ x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 ); } } else if( h->sh.i_type == SLICE_TYPE_B ) { int ctx = 0; if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT ) { ctx++; } if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT ) { ctx++; } if( i_mb_type == B_DIRECT ) { x264_cabac_encode_decision( cb, 27+ctx, 0 ); } else if( i_mb_type == B_8x8 ) { x264_cabac_encode_decision( cb, 27+ctx, 1 ); x264_cabac_encode_decision( cb, 27+3, 1 ); x264_cabac_encode_decision( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); } else if( IS_INTRA( i_mb_type ) ) { /* prefix */ x264_cabac_encode_decision( cb, 27+ctx, 1 ); x264_cabac_encode_decision( cb, 27+3, 1 ); x264_cabac_encode_decision( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 0 ); x264_cabac_encode_decision( cb, 27+5, 1 ); /* suffix */ x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 ); } else { static const int i_mb_len[9*3] = { 6, 6, 3, /* L0 L0 */ 6, 6, 0, /* L0 L1 */ 7, 7, 0, /* L0 BI */ 6, 6, 0, /* L1 L0 */ 6, 6, 3, /* L1 L1 */ 7, 7, 0, /* L1 BI */ 7, 7, 0, /* BI L0 */ 7, 7, 0, /* BI L1 */ 7, 7, 6, /* BI BI */ }; static const int i_mb_bits[9*3][7] = { { 1,1,0,0,0,1 }, { 1,1,0,0,1,0, }, { 1,0,0 }, /* L0 L0 */ { 1,1,0,1,0,1 }, { 1,1,0,1,1,0 }, {0}, /* L0 L1 */ { 1,1,1,0,0,0,0 }, { 1,1,1,0,0,0,1 }, {0}, /* L0 BI */ { 1,1,0,1,1,1 }, { 1,1,1,1,1,0 }, {0}, /* L1 L0 */ { 1,1,0,0,1,1 }, { 1,1,0,1,0,0 }, { 1,0,1 }, /* L1 L1 */ { 1,1,1,0,0,1,0 }, { 1,1,1,0,0,1,1 }, {0}, /* L1 BI */ { 1,1,1,0,1,0,0 }, { 1,1,1,0,1,0,1 }, {0}, /* BI L0 */ { 1,1,1,0,1,1,0 }, { 1,1,1,0,1,1,1 }, {0}, /* BI L1 */ { 1,1,1,1,0,0,0 }, { 1,1,1,1,0,0,1 }, { 1,1,0,0,0,0 }, /* BI BI */ }; const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8); int i; x264_cabac_encode_decision( cb, 27+ctx, i_mb_bits[idx][0] ); x264_cabac_encode_decision( cb, 27+3, i_mb_bits[idx][1] ); x264_cabac_encode_decision( cb, 27+5-i_mb_bits[idx][1], i_mb_bits[idx][2] ); for( i = 3; i < i_mb_len[idx]; i++ ) x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] ); } } else { x264_log(h, X264_LOG_ERROR, "unknown SLICE_TYPE unsupported in x264_macroblock_write_cabac\n" ); } }
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) { const int i_mb_type = h->mb.i_type; if( h->sh.i_type == SLICE_TYPE_I ) { int ctx = 0; if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 ) { ctx++; } if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 ) { ctx++; } x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 ); } else if( h->sh.i_type == SLICE_TYPE_P ) { /* prefix: 14, suffix: 17 */ if( i_mb_type == P_L0 ) { if( h->mb.i_partition == D_16x16 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 0 ); x264_cabac_encode_decision( cb, 16, 0 ); } else if( h->mb.i_partition == D_16x8 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 1 ); x264_cabac_encode_decision( cb, 17, 1 ); } else if( h->mb.i_partition == D_8x16 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 1 ); x264_cabac_encode_decision( cb, 17, 0 ); } } else if( i_mb_type == P_8x8 ) { x264_cabac_encode_decision( cb, 14, 0 ); x264_cabac_encode_decision( cb, 15, 0 ); x264_cabac_encode_decision( cb, 16, 1 ); } else /* intra */ { /* prefix */ x264_cabac_encode_decision( cb, 14, 1 ); /* suffix */ x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 ); } } else if( h->sh.i_type == SLICE_TYPE_B ) { int ctx = 0; if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT ) { ctx++; } if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT ) { ctx++; } if( i_mb_type == B_DIRECT ) { x264_cabac_encode_decision( cb, 27+ctx, 0 ); } else if( i_mb_type == B_8x8 ) { x264_cabac_encode_decision( cb, 27+ctx, 1 ); x264_cabac_encode_decision( cb, 27+3, 1 ); x264_cabac_encode_decision( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); } else if( IS_INTRA( i_mb_type ) ) { /* prefix */ x264_cabac_encode_decision( cb, 27+ctx, 1 ); x264_cabac_encode_decision( cb, 27+3, 1 ); x264_cabac_encode_decision( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); x264_cabac_encode_decision( cb, 27+5, 0 ); x264_cabac_encode_decision( cb, 27+5, 1 ); /* suffix */ x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 ); } else { static const int i_mb_len[21] = { 3, 6, 6, /* L0 L0 */ 3, 6, 6, /* L1 L1 */ 6, 7, 7, /* BI BI */ 6, 6, /* L0 L1 */ 6, 6, /* L1 L0 */ 7, 7, /* L0 BI */ 7, 7, /* L1 BI */ 7, 7, /* BI L0 */ 7, 7, /* BI L1 */ }; static const int i_mb_bits[21][7] = { { 1, 0, 0, }, { 1, 1, 0, 0, 0, 1, }, { 1, 1, 0, 0, 1, 0, }, /* L0 L0 */ { 1, 0, 1, }, { 1, 1, 0, 0, 1, 1, }, { 1, 1, 0, 1, 0, 0, }, /* L1 L1 */ { 1, 1, 0, 0, 0, 0 ,}, { 1, 1, 1, 1, 0, 0 , 0 }, { 1, 1, 1, 1, 0, 0 , 1 },/* BI BI */ { 1, 1, 0, 1, 0, 1, }, { 1, 1, 0, 1, 1, 0, }, /* L0 L1 */ { 1, 1, 0, 1, 1, 1, }, { 1, 1, 1, 1, 1, 0, }, /* L1 L0 */ { 1, 1, 1, 0, 0, 0, 0 }, { 1, 1, 1, 0, 0, 0, 1 }, /* L0 BI */ { 1, 1, 1, 0, 0, 1, 0 }, { 1, 1, 1, 0, 0, 1, 1 }, /* L1 BI */ { 1, 1, 1, 0, 1, 0, 0 }, { 1, 1, 1, 0, 1, 0, 1 }, /* BI L0 */ { 1, 1, 1, 0, 1, 1, 0 }, { 1, 1, 1, 0, 1, 1, 1 } /* BI L1 */ }; const int i_partition = h->mb.i_partition; int idx = 0; int i; switch( i_mb_type ) { /* D_16x16, D_16x8, D_8x16 */ case B_BI_BI: idx += 3; case B_L1_L1: idx += 3; case B_L0_L0: if( i_partition == D_16x8 ) idx += 1; else if( i_partition == D_8x16 ) idx += 2; break; /* D_16x8, D_8x16 */ case B_BI_L1: idx += 2; case B_BI_L0: idx += 2; case B_L1_BI: idx += 2; case B_L0_BI: idx += 2; case B_L1_L0: idx += 2; case B_L0_L1: idx += 3*3; if( i_partition == D_8x16 ) idx++; break; default: x264_log(h, X264_LOG_ERROR, "error in B mb type\n" ); return; } x264_cabac_encode_decision( cb, 27+ctx, i_mb_bits[idx][0] ); x264_cabac_encode_decision( cb, 27+3, i_mb_bits[idx][1] ); x264_cabac_encode_decision( cb, 27+(i_mb_bits[idx][1] != 0 ? 4 : 5), i_mb_bits[idx][2] ); for( i = 3; i < i_mb_len[idx]; i++ ) { x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] ); } } } else { x264_log(h, X264_LOG_ERROR, "unknown SLICE_TYPE unsupported in x264_macroblock_write_cabac\n" ); } }
void x264_speedcontrol_frame( x264_t *h ) { x264_speedcontrol_t *sc = h->sc; int64_t t, delta_t, delta_buffer; int delta_f; x264_emms(); // update buffer state after encoding and outputting the previous frame(s) t = x264_mdate(); delta_f = h->i_frame - sc->prev_frame; delta_t = t - sc->timestamp; delta_buffer = delta_f * sc->spf / h->param.sc.f_speed - delta_t; sc->buffer_fill += delta_buffer; sc->prev_frame = h->i_frame; sc->timestamp = t; // update the time predictor if( delta_f ) { int cpu_time = h->param.sc.b_alt_timer ? sc->cpu_time : delta_t; float decay = powf( sc->cplx_decay, delta_f ); sc->cplx_num *= decay; sc->cplx_den *= decay; sc->cplx_num += cpu_time / presets[sc->preset].time; sc->cplx_den += delta_f; sc->stat.avg_preset += sc->preset * delta_f; sc->stat.den += delta_f; } sc->stat.min_buffer = X264_MIN( sc->buffer_fill, sc->stat.min_buffer ); sc->stat.max_buffer = X264_MAX( sc->buffer_fill, sc->stat.max_buffer ); if( sc->buffer_fill > sc->buffer_size ) // oops, cpu was idle { // not really an error, but we'll warn for debugging purposes static int64_t idle_t = 0, print_interval = 0; idle_t += sc->buffer_fill - sc->buffer_size; if( t - print_interval > 1e6 ) { x264_log( h, X264_LOG_WARNING, "speedcontrol idle (%.6f sec)\n", idle_t/1e6 ); print_interval = t; idle_t = 0; } sc->buffer_fill = sc->buffer_size; } else if( sc->buffer_fill < 0 && delta_buffer < 0 ) // oops, we're late { // don't clip fullness to 0; we'll hope the real buffer was bigger than // specified, and maybe we can catch up. if the application had to drop // frames, then it should override the buffer fullness (FIXME implement this). x264_log( h, X264_LOG_WARNING, "speedcontrol underflow (%.6f sec)\n", sc->buffer_fill/1e6 ); } { // pick the preset that should return the buffer to 3/4-full within a time // specified by compensation_period float target = sc->spf / h->param.sc.f_speed * (sc->buffer_fill + sc->compensation_period) / (sc->buffer_size*3/4 + sc->compensation_period); float cplx = sc->cplx_num / sc->cplx_den; float set, t0, t1; float filled = (float) sc->buffer_fill / sc->buffer_size; int i; t0 = presets[0].time * cplx; for( i=1;; i++ ) { t1 = presets[i].time * cplx; if( t1 >= target || i == PRESETS-1 ) break; t0 = t1; } // linear interpolation between states set = i-1 + (target - t0) / (t1 - t0); // Even if our time estimations in the PRESETS array are off // this will push us towards our target fullness set += (20 * (filled-0.75)); set = x264_clip3f(set,0,PRESETS-1); apply_preset( h, dither( sc, set ) ); // FIXME if (h->param.i_log_level >= X264_LOG_DEBUG) { static float cpu, wall, tgt, den; float decay = 1-1/100.; cpu = cpu*decay + sc->cpu_time; wall = wall*decay + delta_t; tgt = tgt*decay + target; den = den*decay + 1; fprintf( stderr, "speed: %.2f %d[%.5f] (t/c/w: %6.0f/%6.0f/%6.0f = %.4f) fps=%.2f\r", set, sc->preset, (float)sc->buffer_fill / sc->buffer_size, tgt/den, cpu/den, wall/den, cpu/wall, 1e6*den/wall ); } } }
/***************************************************************************** * x264_macroblock_analyse: *****************************************************************************/ void dull_macroblock_analyse_P_BEST( x264_t *h ) { x264_mb_analysis_t analysis; int i_cost = COST_MAX; int i; dull_mb_analyse_init_P( h, &analysis ); /*--------------------------- Do the analysis ---------------------------*/ //{ macroblock_analyse_P //{ macroblock_analyse_P //{ macroblock_analyse_P //{ macroblock_analyse_P { int b_skip = 0; analysis.b_try_skip = 0; if( b_skip ) { h->mb.i_type = P_SKIP; h->mb.i_partition = D_16x16; assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 ); /* Set up MVs for future predictors */ for( i = 0; i < h->mb.pic.i_fref[0]; i++ ) M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0; } else { const unsigned int flags = h->param.analyse.inter; int i_type; int i_partition; x264_mb_analyse_load_costs( h, &analysis ); dull_mb_analyse_inter_p16x16_2( h, &analysis ); if( h->mb.i_type == P_SKIP ) { for( i = 1; i < h->mb.pic.i_fref[0]; i++ ) M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0; return; } if( flags & X264_ANALYSE_PSUB16x16 ) { if( h->param.analyse.b_mixed_references ) x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis ); else dull_mb_analyse_inter_p8x8_2( h, &analysis ); } /* Select best inter mode */ i_type = P_L0; i_partition = D_16x16; i_cost = analysis.l0.me16x16.cost; if( ( flags & X264_ANALYSE_PSUB16x16 ) && analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost ) { i_type = P_8x8; i_partition = D_8x8; i_cost = analysis.l0.i_cost8x8; /* Do sub 8x8 */ if( flags & X264_ANALYSE_PSUB8x8 ) { for( i = 0; i < 4; i++ ) { x264_mb_analyse_inter_p4x4( h, &analysis, i ); if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost ) { int i_cost8x8 = analysis.l0.i_cost4x4[i]; h->mb.i_sub_partition[i] = D_L0_4x4; i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost; } x264_mb_cache_mv_p8x8( h, &analysis, i ); } analysis.l0.i_cost8x8 = i_cost; } } h->mb.i_partition = i_partition; /* refine qpel */ //FIXME mb_type costs? if( analysis.i_mbrd || !h->mb.i_subpel_refine ) { /* refine later */ } else if( i_partition == D_16x16 ) { x264_me_refine_qpel( h, &analysis.l0.me16x16 ); i_cost = analysis.l0.me16x16.cost; } else if( i_partition == D_16x8 ) { x264_me_refine_qpel( h, &analysis.l0.me16x8[0] ); x264_me_refine_qpel( h, &analysis.l0.me16x8[1] ); i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost; } else if( i_partition == D_8x16 ) { x264_me_refine_qpel( h, &analysis.l0.me8x16[0] ); x264_me_refine_qpel( h, &analysis.l0.me8x16[1] ); i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost; } else if( i_partition == D_8x8 ) { int i8x8; i_cost = 0; for( i8x8 = 0; i8x8 < 4; i8x8++ ) { switch( h->mb.i_sub_partition[i8x8] ) { case D_L0_8x8: x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] ); i_cost += analysis.l0.me8x8[i8x8].cost; break; case D_L0_8x4: x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] ); x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] ); i_cost += analysis.l0.me8x4[i8x8][0].cost + analysis.l0.me8x4[i8x8][1].cost; break; case D_L0_4x8: x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] ); x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] ); i_cost += analysis.l0.me4x8[i8x8][0].cost + analysis.l0.me4x8[i8x8][1].cost; break; case D_L0_4x4: x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] ); x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] ); x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] ); x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] ); i_cost += analysis.l0.me4x4[i8x8][0].cost + analysis.l0.me4x4[i8x8][1].cost + analysis.l0.me4x4[i8x8][2].cost + analysis.l0.me4x4[i8x8][3].cost; break; default: x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" ); break; } } } if( h->mb.b_chroma_me ) { x264_mb_analyse_intra_chroma( h, &analysis ); x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_i8x8chroma ); analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma; analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma; analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma; } else x264_mb_analyse_intra( h, &analysis, i_cost ); COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 ); COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 ); COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 ); h->mb.i_type = i_type; if( analysis.i_mbrd >= 2 && h->mb.i_type != I_PCM ) { if( IS_INTRA( h->mb.i_type ) ) { x264_intra_rd_refine( h, &analysis ); } else if( i_partition == D_16x16 ) { x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref ); analysis.l0.me16x16.cost = i_cost; x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 ); } else if( i_partition == D_16x8 ) { h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] = h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8; x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref ); x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref ); x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 8, 0 ); } else if( i_partition == D_8x16 ) { h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] = h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8; x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref ); x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref ); x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 4, 0 ); } else if( i_partition == D_8x8 ) { int i8x8; x264_analyse_update_cache( h, &analysis ); for( i8x8 = 0; i8x8 < 4; i8x8++ ) { if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8*4, 0 ); } else if( h->mb.i_sub_partition[i8x8] == D_L0_8x4 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 ); } else if( h->mb.i_sub_partition[i8x8] == D_L0_4x8 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 ); } else if( h->mb.i_sub_partition[i8x8] == D_L0_4x4 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 ); } } } } } } //} macroblock_analyse_P //} macroblock_analyse_P //} macroblock_analyse_P //} macroblock_analyse_P dull_analyse_update_cache_P( h, &analysis ); }