Пример #1
0
void x264_validate_levels( x264_t *h )
{
    int mbs;

    const x264_level_t *l = x264_levels;
    while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc )
        l++;

    mbs = h->sps->i_mb_width * h->sps->i_mb_height;
    if( l->frame_size < mbs
        || l->frame_size*8 < h->sps->i_mb_width * h->sps->i_mb_width
        || l->frame_size*8 < h->sps->i_mb_height * h->sps->i_mb_height )
        x264_log( h, X264_LOG_WARNING, "frame MB size (%dx%d) > level limit (%d)\n",
                  h->sps->i_mb_width, h->sps->i_mb_height, l->frame_size );

#define CHECK( name, limit, val ) \
    if( (val) > (limit) ) \
        x264_log( h, X264_LOG_WARNING, name " (%d) > level limit (%d)\n", (int)(val), (limit) );

    CHECK( "DPB size", l->dpb, mbs * 384 * h->sps->i_num_ref_frames );
    CHECK( "VBV bitrate", l->bitrate, h->param.rc.i_vbv_max_bitrate );
    CHECK( "VBV buffer", l->cpb, h->param.rc.i_vbv_buffer_size );
    CHECK( "MV range", l->mv_range, h->param.analyse.i_mv_range );

    if( h->param.i_fps_den > 0 )
        CHECK( "MB rate", l->mbps, (int64_t)mbs * h->param.i_fps_num / h->param.i_fps_den );
    if( h->sps->b_direct8x8_inference < l->direct8x8 )
        x264_log( h, X264_LOG_WARNING, "direct 8x8 inference (0) < level requirement (1)\n" );

    /* TODO check the rest of the limits */
}
Пример #2
0
static void CL_CALLBACK x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
{
    /* Any error notification can be assumed to be fatal to the OpenCL context.
     * We need to stop using it immediately to prevent further damage. */
    x264_t *h = (x264_t*)user_data;
    h->param.b_opencl = 0;
    h->opencl.b_fatal_error = 1;
    x264_log( h, X264_LOG_ERROR, "OpenCL: %s\n", errinfo );
    x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
}
Пример #3
0
/* Save the compiled program binary to a file for later reuse.  Device context
 * is also saved in the cache file so we do not reuse stale binaries */
static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
{
    FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
    x264_opencl_function_t *ocl;
    uint8_t *binary;

    size_t size;
    cl_int status;

	if( !fp )
    {
        x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
        return;
    }

    ocl = h->opencl.ocl;
    binary = NULL;

    size = 0;
    status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
    if( status != CL_SUCCESS || !size )
    {
        x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary size, no cache file generated\n" );
        goto fail;
    }

    CHECKED_MALLOC( binary, size );
    status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary, no cache file generated\n" );
        goto fail;
    }

    fputs( dev_name, fp );
    fputc( '\n', fp );
    fputs( dev_vendor, fp );
    fputc( '\n', fp );
    fputs( driver_version, fp );
    fputc( '\n', fp );
    fputs( x264_opencl_source_hash, fp );
    fputc( '\n', fp );
    fwrite( binary, 1, size, fp );

fail:
    fclose( fp );
    x264_free( binary );
    return;
}
Пример #4
0
void x264_speedcontrol_delete( x264_t *h )
{
    x264_speedcontrol_t *sc = h->sc;
    if( !sc )
        return;
    x264_log( h, X264_LOG_INFO, "speedcontrol: avg preset=%.3f  buffer min=%.3f max=%.3f\n",
              sc->stat.avg_preset / sc->stat.den,
              (float)sc->stat.min_buffer / sc->buffer_size,
              (float)sc->stat.max_buffer / sc->buffer_size );
//  x264_log( h, X264_LOG_INFO, "speedcontrol: avg cplx=%.5f\n", sc->cplx_num / sc->cplx_den );
    x264_free( sc );
}
Пример #5
0
void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
{
    dst->i_type     = src->i_type;
    dst->i_qpplus1  = src->i_qpplus1;
    dst->i_pts      = src->i_pts;

    switch( src->img.i_csp & X264_CSP_MASK )
    {
        case X264_CSP_I420:
            h->csp.i420( dst, &src->img, h->param.i_width, h->param.i_height );
            break;
        case X264_CSP_YV12:
            h->csp.yv12( dst, &src->img, h->param.i_width, h->param.i_height );
            break;
        case X264_CSP_I422:
            h->csp.i422( dst, &src->img, h->param.i_width, h->param.i_height );
            break;
        case X264_CSP_I444:
            h->csp.i444( dst, &src->img, h->param.i_width, h->param.i_height );
            break;
        case X264_CSP_YUYV:
            h->csp.yuyv( dst, &src->img, h->param.i_width, h->param.i_height );
            break;
        case X264_CSP_RGB:
            h->csp.rgb( dst, &src->img, h->param.i_width, h->param.i_height );
            break;
        case X264_CSP_BGR:
            h->csp.bgr( dst, &src->img, h->param.i_width, h->param.i_height );
            break;
        case X264_CSP_BGRA:
            h->csp.bgra( dst, &src->img, h->param.i_width, h->param.i_height );
            break;

        default:
            x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
            break;
    }
}
Пример #6
0
static void apply_preset( x264_t *h, int preset )
{
    x264_speedcontrol_t *sc = h->sc;
    preset = x264_clip3( preset, 0, PRESETS-1 );
    //if( preset != sc->preset )
    {
        const sc_preset_t *s = &presets[preset];
        x264_param_t p = sc->user_param;

        p.i_frame_reference = s->refs;
        p.analyse.inter = s->partitions;
        p.analyse.i_subpel_refine = s->subme;
        p.analyse.i_me_method = s->me;
        p.analyse.i_trellis = s->trellis;
        p.analyse.b_mixed_references = s->mix;
        p.analyse.b_chroma_me = s->chromame;
        p.analyse.f_psy_rd = s->psy_rd;
        p.analyse.f_psy_trellis = s->psy_trellis;
        x264_encoder_reconfig( h, &p );
        sc->preset = preset;
        x264_log( h, X264_LOG_DEBUG, "Applying speedcontrol preset %d.\n", preset );
    }
}
Пример #7
0
/****************************************************************************
 * x264_malloc:
 ****************************************************************************/
void *x264_malloc( int i_size )
{
    uint8_t *align_buf = NULL;
#if HAVE_MALLOC_H
#if HAVE_THP
#define HUGE_PAGE_SIZE 2*1024*1024
#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
    /* Attempt to allocate huge pages to reduce TLB misses. */
    if( i_size >= HUGE_PAGE_THRESHOLD )
    {
        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
        if( align_buf )
        {
            /* Round up to the next huge page boundary if we are close enough. */
            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
            madvise( align_buf, madv_size, MADV_HUGEPAGE );
        }
    }
    else
#undef HUGE_PAGE_SIZE
#undef HUGE_PAGE_THRESHOLD
#endif
        align_buf = memalign( NATIVE_ALIGN, i_size );
#else
    uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
    if( buf )
    {
        align_buf = buf + (NATIVE_ALIGN-1) + sizeof(void **);
        align_buf -= (intptr_t) align_buf & (NATIVE_ALIGN-1);
        *( (void **) ( align_buf - sizeof(void **) ) ) = buf;
    }
#endif
    if( !align_buf )
        x264_log( NULL, X264_LOG_ERROR, "malloc of size %d failed\n", i_size );
    return align_buf;
}
Пример #8
0
int x264_opencl_lookahead_init( x264_t *h )
{
    x264_opencl_function_t *ocl = h->opencl.ocl;
    cl_platform_id *platforms = NULL;
    cl_device_id *devices = NULL;
    cl_image_format *imageType = NULL;
    cl_context context = NULL;
    int ret = -1;
    cl_uint i;

    cl_uint numPlatforms = 0;
    cl_int status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms );
    if( status != CL_SUCCESS || !numPlatforms )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
        goto fail;
    }
    platforms = (cl_platform_id*)x264_malloc( sizeof(cl_platform_id) * numPlatforms );
    if( !platforms )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: malloc of installed platforms buffer failed\n" );
        goto fail;
    }
    status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
        goto fail;
    }

    /* Select the first OpenCL platform with a GPU device that supports our
     * required image (texture) formats */
    for( i = 0; i < numPlatforms; i++ )
    {
        cl_uint gpu_count = 0;
        cl_uint gpu;

    	status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
        if( status != CL_SUCCESS || !gpu_count )
            continue;

        x264_free( devices );
        devices = x264_malloc( sizeof(cl_device_id) * gpu_count );
        if( !devices )
            continue;

        status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
        if( status != CL_SUCCESS )
            continue;

        /* Find a GPU device that supports our image formats */
        for( gpu = 0; gpu < gpu_count; gpu++ )
        {
            cl_bool image_support;
            cl_uint imagecount;
            int b_has_r;
            int b_has_rgba;
            cl_uint j;

			h->opencl.device = devices[gpu];

            /* if the user has specified an exact device ID, skip all other
             * GPUs.  If this device matches, allow it to continue through the
             * checks for supported images, etc.  */
            if( h->param.opencl_device_id && devices[gpu] != (cl_device_id)h->param.opencl_device_id )
                continue;

            image_support = 0;
            status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
            if( status != CL_SUCCESS || !image_support )
                continue;

            if( context )
                ocl->clReleaseContext( context );
            context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
            if( status != CL_SUCCESS || !context )
                continue;

            imagecount = 0;
            status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
            if( status != CL_SUCCESS || !imagecount )
                continue;

            x264_free( imageType );
            imageType = x264_malloc( sizeof(cl_image_format) * imagecount );
            if( !imageType )
                continue;

            status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
            if( status != CL_SUCCESS )
                continue;

            b_has_r = 0;
            b_has_rgba = 0;
            for( j = 0; j < imagecount; j++ )
            {
                if( imageType[j].image_channel_order == CL_R &&
                    imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 )
                    b_has_r = 1;
                else if( imageType[j].image_channel_order == CL_RGBA &&
                         imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 )
                    b_has_rgba = 1;
            }
            if( !b_has_r || !b_has_rgba )
            {
                char dev_name[64];
                status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL );
                if( status == CL_SUCCESS )
                {
                    /* emit warning if we are discarding the user's explicit choice */
                    int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG;
                    x264_log( h, level, "OpenCL: %s does not support required image formats\n", dev_name );
                }
                continue;
            }

            /* user selection of GPU device, skip N first matches */
            if( h->param.i_opencl_device )
            {
                h->param.i_opencl_device--;
                continue;
            }

            h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status );
            if( status != CL_SUCCESS || !h->opencl.queue )
                continue;

            h->opencl.context = context;
            context = NULL;

            ret = 0;
            break;
        }

        if( !ret )
            break;
    }

    if( !h->param.psz_clbin_file )
        h->param.psz_clbin_file = "x264_lookahead.clbin";

    if( ret )
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" );
    else
        ret = x264_opencl_lookahead_alloc( h );

fail:
    if( context )
        ocl->clReleaseContext( context );
    x264_free( imageType );
    x264_free( devices );
    x264_free( platforms );
    return ret;
}
Пример #9
0
static int x264_opencl_lookahead_alloc( x264_t *h )
{
    static const char *kernelnames[] = {
        "mb_intra_cost_satd_8x8",
        "sum_intra_cost",
        "downscale_hpel",
        "downscale1",
        "downscale2",
        "memset_int16",
        "weightp_scaled_images",
        "weightp_hpel",
        "hierarchical_motion",
        "subpel_refine",
        "mode_selection",
        "sum_inter_cost"
    };

    cl_kernel *kernels[] = {
        &h->opencl.intra_kernel,
        &h->opencl.rowsum_intra_kernel,
        &h->opencl.downscale_hpel_kernel,
        &h->opencl.downscale_kernel1,
        &h->opencl.downscale_kernel2,
        &h->opencl.memset_kernel,
        &h->opencl.weightp_scaled_images_kernel,
        &h->opencl.weightp_hpel_kernel,
        &h->opencl.hme_kernel,
        &h->opencl.subpel_refine_kernel,
        &h->opencl.mode_select_kernel,
        &h->opencl.rowsum_inter_kernel
    };

    int i;

	x264_opencl_function_t *ocl;
    cl_int status;

	if( !h->param.rc.i_lookahead )
        return -1;

	ocl = h->opencl.ocl;

	h->opencl.lookahead_program = x264_opencl_compile( h );
    if( !h->opencl.lookahead_program )
        goto fail;

    for( i = 0; i < ARRAY_SIZE(kernelnames); i++ )
    {
        *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
        if( status != CL_SUCCESS )
        {
            x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
            goto fail;
        }
    }

    h->opencl.page_locked_buffer = ocl->clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
        goto fail;
    }
    h->opencl.page_locked_ptr = ocl->clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
                                                         0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
        goto fail;
    }

    return 0;
fail:
    x264_opencl_lookahead_delete( h );
    return -1;
}
Пример #10
0
/* The OpenCL source under common/opencl will be merged into common/oclobj.h by
 * the Makefile. It defines a x264_opencl_source byte array which we will pass
 * to clCreateProgramWithSource().  We also attempt to use a cache file for the
 * compiled binary, stored in the current working folder. */
static cl_program x264_opencl_compile( x264_t *h )
{
    x264_opencl_function_t *ocl = h->opencl.ocl;
    cl_program program = NULL;
    char *build_log = NULL;

    char dev_name[64];
    char dev_vendor[64];
    char driver_version[64];
    cl_int status;
    int vectorize;
    const char *buildopts;
    size_t build_log_len;
    FILE *log_file;

	status  = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME,    sizeof(dev_name), dev_name, NULL );
    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR,  sizeof(dev_vendor), dev_vendor, NULL );
    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driver_version), driver_version, NULL );
    if( status != CL_SUCCESS )
        return NULL;

    // Most AMD GPUs have vector registers
    vectorize = !strcmp( dev_vendor, "Advanced Micro Devices, Inc." );
    h->opencl.b_device_AMD_SI = 0;

    if( vectorize )
    {
        cl_uint simdwidth;

		/* Disable OpenCL on Intel/AMD switchable graphics devices */
        if( x264_detect_switchable_graphics() )
        {
            x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" );
            return NULL;
        }

        /* Detect AMD SouthernIsland or newer device (single-width registers) */
        simdwidth = 4;
        status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
        if( status == CL_SUCCESS && simdwidth == 1 )
        {
            vectorize = 0;
            h->opencl.b_device_AMD_SI = 1;
        }
    }

    x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" );

    program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version );
    if( !program )
    {
        const char *strptr;
        size_t size;

		/* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */
        x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" );
        strptr = (const char*)x264_opencl_source;
        size = sizeof(x264_opencl_source);
        program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
        if( status != CL_SUCCESS || !program )
        {
            x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" );
            return NULL;
        }
    }

    /* Build the program binary for the OpenCL device */
    buildopts = vectorize ? "-DVECTORIZE=1" : "";
    status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
    if( status == CL_SUCCESS )
    {
        x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version );
        return program;
    }

    /* Compile failure, should not happen with production code. */

    build_log_len = 0;
    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_len );
    if( status != CL_SUCCESS || !build_log_len )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
        goto fail;
    }

    build_log = x264_malloc( build_log_len );
    if( !build_log )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" );
        goto fail;
    }

    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
        goto fail;
    }

    log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
    if( !log_file )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
        goto fail;
    }
    fwrite( build_log, 1, build_log_len, log_file );
    fclose( log_file );
    x264_log( h, X264_LOG_WARNING, "OpenCL: kernel build errors written to x264_kernel_build_log.txt\n" );

fail:
    x264_free( build_log );
    if( program )
        ocl->clReleaseProgram( program );
    return NULL;
}
Пример #11
0
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
{
    const int i_mb_type = h->mb.i_type;

    if( h->sh.b_mbaff &&
        (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
    {
        x264_cabac_encode_decision( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
    }

    if( h->sh.i_type == SLICE_TYPE_I )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
        {
            ctx++;
        }

        x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
    }
    else if( h->sh.i_type == SLICE_TYPE_P )
    {
        /* prefix: 14, suffix: 17 */
        if( i_mb_type == P_L0 )
        {
            if( h->mb.i_partition == D_16x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 0 );
                x264_cabac_encode_decision( cb, 16, 0 );
            }
            else if( h->mb.i_partition == D_16x8 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 1 );
            }
            else if( h->mb.i_partition == D_8x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 0 );
            }
        }
        else if( i_mb_type == P_8x8 )
        {
            x264_cabac_encode_decision( cb, 14, 0 );
            x264_cabac_encode_decision( cb, 15, 0 );
            x264_cabac_encode_decision( cb, 16, 1 );
        }
        else /* intra */
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 14, 1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
        }
    }
    else if( h->sh.i_type == SLICE_TYPE_B )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
        {
            ctx++;
        }

        if( i_mb_type == B_DIRECT )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 0 );
        }
        else if( i_mb_type == B_8x8 )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
        }
        else if( IS_INTRA( i_mb_type ) )
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   0 );
            x264_cabac_encode_decision( cb, 27+5,   1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
        }
        else
        {
            static const int i_mb_len[9*3] =
            {
                6, 6, 3,    /* L0 L0 */
                6, 6, 0,    /* L0 L1 */
                7, 7, 0,    /* L0 BI */
                6, 6, 0,    /* L1 L0 */
                6, 6, 3,    /* L1 L1 */
                7, 7, 0,    /* L1 BI */
                7, 7, 0,    /* BI L0 */
                7, 7, 0,    /* BI L1 */
                7, 7, 6,    /* BI BI */
            };
            static const int i_mb_bits[9*3][7] =
            {
                { 1,1,0,0,0,1   }, { 1,1,0,0,1,0,  }, { 1,0,0 },       /* L0 L0 */
                { 1,1,0,1,0,1   }, { 1,1,0,1,1,0   }, {0},             /* L0 L1 */
                { 1,1,1,0,0,0,0 }, { 1,1,1,0,0,0,1 }, {0},             /* L0 BI */
                { 1,1,0,1,1,1   }, { 1,1,1,1,1,0   }, {0},             /* L1 L0 */
                { 1,1,0,0,1,1   }, { 1,1,0,1,0,0   }, { 1,0,1 },       /* L1 L1 */
                { 1,1,1,0,0,1,0 }, { 1,1,1,0,0,1,1 }, {0},             /* L1 BI */
                { 1,1,1,0,1,0,0 }, { 1,1,1,0,1,0,1 }, {0},             /* BI L0 */
                { 1,1,1,0,1,1,0 }, { 1,1,1,0,1,1,1 }, {0},             /* BI L1 */
                { 1,1,1,1,0,0,0 }, { 1,1,1,1,0,0,1 }, { 1,1,0,0,0,0 }, /* BI BI */
            };

            const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
            int i;

            x264_cabac_encode_decision( cb, 27+ctx, i_mb_bits[idx][0] );
            x264_cabac_encode_decision( cb, 27+3,   i_mb_bits[idx][1] );
            x264_cabac_encode_decision( cb, 27+5-i_mb_bits[idx][1], i_mb_bits[idx][2] );
            for( i = 3; i < i_mb_len[idx]; i++ )
                x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] );
        }
    }
    else
    {
        x264_log(h, X264_LOG_ERROR, "unknown SLICE_TYPE unsupported in x264_macroblock_write_cabac\n" );
    }
}
Пример #12
0
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
{
    const int i_mb_type = h->mb.i_type;

    if( h->sh.i_type == SLICE_TYPE_I )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
        {
            ctx++;
        }

        x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
    }
    else if( h->sh.i_type == SLICE_TYPE_P )
    {
        /* prefix: 14, suffix: 17 */
        if( i_mb_type == P_L0 )
        {
            if( h->mb.i_partition == D_16x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 0 );
                x264_cabac_encode_decision( cb, 16, 0 );
            }
            else if( h->mb.i_partition == D_16x8 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 1 );
            }
            else if( h->mb.i_partition == D_8x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 0 );
            }
        }
        else if( i_mb_type == P_8x8 )
        {
            x264_cabac_encode_decision( cb, 14, 0 );
            x264_cabac_encode_decision( cb, 15, 0 );
            x264_cabac_encode_decision( cb, 16, 1 );
        }
        else /* intra */
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 14, 1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
        }
    }
    else if( h->sh.i_type == SLICE_TYPE_B )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
        {
            ctx++;
        }

        if( i_mb_type == B_DIRECT )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 0 );
        }
        else if( i_mb_type == B_8x8 )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
        }
        else if( IS_INTRA( i_mb_type ) )
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   0 );
            x264_cabac_encode_decision( cb, 27+5,   1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
        }
        else
        {
            static const int i_mb_len[21] =
            {
                3, 6, 6,    /* L0 L0 */
                3, 6, 6,    /* L1 L1 */
                6, 7, 7,    /* BI BI */

                6, 6,       /* L0 L1 */
                6, 6,       /* L1 L0 */
                7, 7,       /* L0 BI */
                7, 7,       /* L1 BI */
                7, 7,       /* BI L0 */
                7, 7,       /* BI L1 */
            };
            static const int i_mb_bits[21][7] =
            {
                { 1, 0, 0, },            { 1, 1, 0, 0, 0, 1, },    { 1, 1, 0, 0, 1, 0, },   /* L0 L0 */
                { 1, 0, 1, },            { 1, 1, 0, 0, 1, 1, },    { 1, 1, 0, 1, 0, 0, },   /* L1 L1 */
                { 1, 1, 0, 0, 0, 0 ,},   { 1, 1, 1, 1, 0, 0 , 0 }, { 1, 1, 1, 1, 0, 0 , 1 },/* BI BI */

                { 1, 1, 0, 1, 0, 1, },   { 1, 1, 0, 1, 1, 0, },     /* L0 L1 */
                { 1, 1, 0, 1, 1, 1, },   { 1, 1, 1, 1, 1, 0, },     /* L1 L0 */
                { 1, 1, 1, 0, 0, 0, 0 }, { 1, 1, 1, 0, 0, 0, 1 },   /* L0 BI */
                { 1, 1, 1, 0, 0, 1, 0 }, { 1, 1, 1, 0, 0, 1, 1 },   /* L1 BI */
                { 1, 1, 1, 0, 1, 0, 0 }, { 1, 1, 1, 0, 1, 0, 1 },   /* BI L0 */
                { 1, 1, 1, 0, 1, 1, 0 }, { 1, 1, 1, 0, 1, 1, 1 }    /* BI L1 */
            };

            const int i_partition = h->mb.i_partition;
            int idx = 0;
            int i;
            switch( i_mb_type )
            {
                /* D_16x16, D_16x8, D_8x16 */
                case B_BI_BI: idx += 3;
                case B_L1_L1: idx += 3;
                case B_L0_L0:
                    if( i_partition == D_16x8 )
                        idx += 1;
                    else if( i_partition == D_8x16 )
                        idx += 2;
                    break;

                /* D_16x8, D_8x16 */
                case B_BI_L1: idx += 2;
                case B_BI_L0: idx += 2;
                case B_L1_BI: idx += 2;
                case B_L0_BI: idx += 2;
                case B_L1_L0: idx += 2;
                case B_L0_L1:
                    idx += 3*3;
                    if( i_partition == D_8x16 )
                        idx++;
                    break;
                default:
                    x264_log(h, X264_LOG_ERROR, "error in B mb type\n" );
                    return;
            }

            x264_cabac_encode_decision( cb, 27+ctx, i_mb_bits[idx][0] );
            x264_cabac_encode_decision( cb, 27+3,   i_mb_bits[idx][1] );
            x264_cabac_encode_decision( cb, 27+(i_mb_bits[idx][1] != 0 ? 4 : 5), i_mb_bits[idx][2] );
            for( i = 3; i < i_mb_len[idx]; i++ )
            {
                x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] );
            }
        }
    }
    else
    {
        x264_log(h, X264_LOG_ERROR, "unknown SLICE_TYPE unsupported in x264_macroblock_write_cabac\n" );
    }
}
Пример #13
0
void x264_speedcontrol_frame( x264_t *h )
{
    x264_speedcontrol_t *sc = h->sc;
    int64_t t, delta_t, delta_buffer;
    int delta_f;

    x264_emms();

    // update buffer state after encoding and outputting the previous frame(s)
    t = x264_mdate();
    delta_f = h->i_frame - sc->prev_frame;
    delta_t = t - sc->timestamp;
    delta_buffer = delta_f * sc->spf / h->param.sc.f_speed - delta_t;
    sc->buffer_fill += delta_buffer;
    sc->prev_frame = h->i_frame;
    sc->timestamp = t;

    // update the time predictor
    if( delta_f )
    {
        int cpu_time = h->param.sc.b_alt_timer ? sc->cpu_time : delta_t;
        float decay = powf( sc->cplx_decay, delta_f );
        sc->cplx_num *= decay;
        sc->cplx_den *= decay;
        sc->cplx_num += cpu_time / presets[sc->preset].time;
        sc->cplx_den += delta_f;

        sc->stat.avg_preset += sc->preset * delta_f;
        sc->stat.den += delta_f;
    }
    sc->stat.min_buffer = X264_MIN( sc->buffer_fill, sc->stat.min_buffer );
    sc->stat.max_buffer = X264_MAX( sc->buffer_fill, sc->stat.max_buffer );

    if( sc->buffer_fill > sc->buffer_size ) // oops, cpu was idle
    {
        // not really an error, but we'll warn for debugging purposes
        static int64_t idle_t = 0, print_interval = 0;
        idle_t += sc->buffer_fill - sc->buffer_size;
        if( t - print_interval > 1e6 )
        {
            x264_log( h, X264_LOG_WARNING, "speedcontrol idle (%.6f sec)\n", idle_t/1e6 );
            print_interval = t;
            idle_t = 0;
        }
        sc->buffer_fill = sc->buffer_size;
    }
    else if( sc->buffer_fill < 0 && delta_buffer < 0 ) // oops, we're late
    {
        // don't clip fullness to 0; we'll hope the real buffer was bigger than
        // specified, and maybe we can catch up. if the application had to drop
        // frames, then it should override the buffer fullness (FIXME implement this).
        x264_log( h, X264_LOG_WARNING, "speedcontrol underflow (%.6f sec)\n", sc->buffer_fill/1e6 );
    }

    {
        // pick the preset that should return the buffer to 3/4-full within a time
        // specified by compensation_period
        float target = sc->spf / h->param.sc.f_speed
                     * (sc->buffer_fill + sc->compensation_period)
                     / (sc->buffer_size*3/4 + sc->compensation_period);
        float cplx = sc->cplx_num / sc->cplx_den;
        float set, t0, t1;
	float filled = (float) sc->buffer_fill / sc->buffer_size;
        int i;
        t0 = presets[0].time * cplx;
        for( i=1;; i++ )
        {
            t1 = presets[i].time * cplx;
            if( t1 >= target || i == PRESETS-1 )
                break;
            t0 = t1;
        }
        // linear interpolation between states
        set = i-1 + (target - t0) / (t1 - t0);
        // Even if our time estimations in the PRESETS array are off
        // this will push us towards our target fullness
        set += (20 * (filled-0.75));
        set = x264_clip3f(set,0,PRESETS-1);
        apply_preset( h, dither( sc, set ) );

        // FIXME
        if (h->param.i_log_level >= X264_LOG_DEBUG)
        {
            static float cpu, wall, tgt, den;
            float decay = 1-1/100.;
            cpu = cpu*decay + sc->cpu_time;
            wall = wall*decay + delta_t;
            tgt = tgt*decay + target;
            den = den*decay + 1;
            fprintf( stderr, "speed: %.2f %d[%.5f] (t/c/w: %6.0f/%6.0f/%6.0f = %.4f) fps=%.2f\r",
                     set, sc->preset, (float)sc->buffer_fill / sc->buffer_size,
                     tgt/den, cpu/den, wall/den, cpu/wall, 1e6*den/wall );
        }
    }

}
/*****************************************************************************
 * x264_macroblock_analyse:
 *****************************************************************************/
void dull_macroblock_analyse_P_BEST( x264_t *h )
{
    x264_mb_analysis_t analysis;
    int i_cost = COST_MAX;
    int i;

    dull_mb_analyse_init_P( h, &analysis );

    /*--------------------------- Do the analysis ---------------------------*/
//{ macroblock_analyse_P //{ macroblock_analyse_P //{ macroblock_analyse_P //{ macroblock_analyse_P 
    {
        int b_skip = 0;

        analysis.b_try_skip = 0;

        if( b_skip )
        {
            h->mb.i_type = P_SKIP;
            h->mb.i_partition = D_16x16;
            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
            /* Set up MVs for future predictors */
            for( i = 0; i < h->mb.pic.i_fref[0]; i++ )
                M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
        }
        else
        {
            const unsigned int flags = h->param.analyse.inter;
            int i_type;
            int i_partition;

            x264_mb_analyse_load_costs( h, &analysis );

            dull_mb_analyse_inter_p16x16_2( h, &analysis );

            if( h->mb.i_type == P_SKIP )
            {
                for( i = 1; i < h->mb.pic.i_fref[0]; i++ )
                    M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
                return;
            }

            if( flags & X264_ANALYSE_PSUB16x16 )
            {
                if( h->param.analyse.b_mixed_references )
                    x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis );
                else
                    dull_mb_analyse_inter_p8x8_2( h, &analysis );
            }

            /* Select best inter mode */
            i_type = P_L0;
            i_partition = D_16x16;
            i_cost = analysis.l0.me16x16.cost;

            if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
            {
                i_type = P_8x8;
                i_partition = D_8x8;
                i_cost = analysis.l0.i_cost8x8;

                /* Do sub 8x8 */
                if( flags & X264_ANALYSE_PSUB8x8 )
                {
                    for( i = 0; i < 4; i++ )
                    {
                        x264_mb_analyse_inter_p4x4( h, &analysis, i );
                        if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
                        {
                            int i_cost8x8 = analysis.l0.i_cost4x4[i];
                            h->mb.i_sub_partition[i] = D_L0_4x4;

                            i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
                        }
                        x264_mb_cache_mv_p8x8( h, &analysis, i );
                    }
                    analysis.l0.i_cost8x8 = i_cost;
                }
            }

            h->mb.i_partition = i_partition;

            /* refine qpel */
            //FIXME mb_type costs?
            if( analysis.i_mbrd || !h->mb.i_subpel_refine )
            {
                /* refine later */
            }
            else if( i_partition == D_16x16 )
            {
                x264_me_refine_qpel( h, &analysis.l0.me16x16 );
                i_cost = analysis.l0.me16x16.cost;
            }
            else if( i_partition == D_16x8 )
            {
                x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
                x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
                i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
            }
            else if( i_partition == D_8x16 )
            {
                x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
                x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
                i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
            }
            else if( i_partition == D_8x8 )
            {
                int i8x8;
                i_cost = 0;
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                {
                    switch( h->mb.i_sub_partition[i8x8] )
                    {
                        case D_L0_8x8:
                            x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
                            i_cost += analysis.l0.me8x8[i8x8].cost;
                            break;
                        case D_L0_8x4:
                            x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
                            x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
                            i_cost += analysis.l0.me8x4[i8x8][0].cost +
                                      analysis.l0.me8x4[i8x8][1].cost;
                            break;
                        case D_L0_4x8:
                            x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
                            i_cost += analysis.l0.me4x8[i8x8][0].cost +
                                      analysis.l0.me4x8[i8x8][1].cost;
                            break;

                        case D_L0_4x4:
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
                            i_cost += analysis.l0.me4x4[i8x8][0].cost +
                                      analysis.l0.me4x4[i8x8][1].cost +
                                      analysis.l0.me4x4[i8x8][2].cost +
                                      analysis.l0.me4x4[i8x8][3].cost;
                            break;
                        default:
                            x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" );
                            break;
                    }
                }
            }

            if( h->mb.b_chroma_me )
            {
                x264_mb_analyse_intra_chroma( h, &analysis );
                x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_i8x8chroma );
                analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
                analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
                analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
            }
            else
                x264_mb_analyse_intra( h, &analysis, i_cost );

            COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
            COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 );
            COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 );

            h->mb.i_type = i_type;

            if( analysis.i_mbrd >= 2 && h->mb.i_type != I_PCM )
            {
                if( IS_INTRA( h->mb.i_type ) )
                {
                    x264_intra_rd_refine( h, &analysis );
                }
                else if( i_partition == D_16x16 )
                {
                    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref );
                    analysis.l0.me16x16.cost = i_cost;
                    x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );
                }
                else if( i_partition == D_16x8 )
                {
                    h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
                    h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
                    x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref );
                    x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0, 0 );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 8, 0 );
                }
                else if( i_partition == D_8x16 )
                {
                    h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
                    h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
                    x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref );
                    x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0, 0 );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 4, 0 );
                }
                else if( i_partition == D_8x8 )
                {
                    int i8x8;
                    x264_analyse_update_cache( h, &analysis );
                    for( i8x8 = 0; i8x8 < 4; i8x8++ )
                    {
                        if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8*4, 0 );
                        }
                        else if( h->mb.i_sub_partition[i8x8] == D_L0_8x4 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 );
                        }
                        else if( h->mb.i_sub_partition[i8x8] == D_L0_4x8 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
                        }
                        else if( h->mb.i_sub_partition[i8x8] == D_L0_4x4 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 );
                        }
                    }
                }
            }
        }
    }
//} macroblock_analyse_P //} macroblock_analyse_P //} macroblock_analyse_P //} macroblock_analyse_P 

    dull_analyse_update_cache_P( h, &analysis );
}