Example #1
0
int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) {
    cl_int status;
    if (os->xscale != xscale || os->width < width) {
        cl_float *xweights = hb_bicubic_weights(xscale, width);
        CL_FREE(os->bicubic_x_weights);
        CREATEBUF(os->bicubic_x_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * width * 4);
        OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL );
        os->width = width;
        os->xscale = xscale;
        free(xweights);
    }

    if ((os->yscale != yscale) || (os->height < height)) {
        cl_float *yweights = hb_bicubic_weights(yscale, height);
        CL_FREE(os->bicubic_y_weights);
        CREATEBUF(os->bicubic_y_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * height * 4);
        OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL );
        os->height = height;
        os->yscale = yscale;
        free(yweights);
    }
    return 0;
}
Example #2
0
int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda )
{
    if( fenc->b_intra_calculated )
        return 0;
    fenc->b_intra_calculated = 1;

    x264_opencl_function_t *ocl = h->opencl.ocl;
    int luma_length = fenc->i_stride[0] * fenc->i_lines[0];

#define CREATEBUF( out, flags, size )\
    out = ocl->clCreateBuffer( h->opencl.context, (flags), (size), NULL, &status );\
    if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateBuffer error '%d'\n", status ); return -1; }
#define CREATEIMAGE( out, flags, pf, width, height )\
    out = ocl->clCreateImage2D( h->opencl.context, (flags), &pf, width, height, 0, NULL, &status );\
    if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateImage2D error '%d'\n", status ); return -1; }

    int mb_count = h->mb.i_mb_count;
    cl_int status;

    if( !h->opencl.lowres_mv_costs )
    {
        /* Allocate shared memory buffers */
        int width = h->mb.i_mb_width * 8 * sizeof(pixel);
        int height = h->mb.i_mb_height * 8 * sizeof(pixel);

        cl_image_format pixel_format;
        pixel_format.image_channel_order = CL_R;
        pixel_format.image_channel_data_type = CL_UNSIGNED_INT32;
        CREATEIMAGE( h->opencl.weighted_luma_hpel, CL_MEM_READ_WRITE, pixel_format, width, height );

        for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
        {
            pixel_format.image_channel_order = CL_RGBA;
            pixel_format.image_channel_data_type = CL_UNSIGNED_INT8;
            CREATEIMAGE( h->opencl.weighted_scaled_images[i], CL_MEM_READ_WRITE, pixel_format, width, height );
            width >>= 1;
            height >>= 1;
        }

        CREATEBUF( h->opencl.lowres_mv_costs,     CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) );
        CREATEBUF( h->opencl.lowres_costs[0],     CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) );
        CREATEBUF( h->opencl.lowres_costs[1],     CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) );
        CREATEBUF( h->opencl.mv_buffers[0],       CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 );
        CREATEBUF( h->opencl.mv_buffers[1],       CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 );
        CREATEBUF( h->opencl.mvp_buffer,          CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 );
        CREATEBUF( h->opencl.frame_stats[0],      CL_MEM_WRITE_ONLY, 4 * sizeof(int) );
        CREATEBUF( h->opencl.frame_stats[1],      CL_MEM_WRITE_ONLY, 4 * sizeof(int) );
        CREATEBUF( h->opencl.row_satds[0],        CL_MEM_WRITE_ONLY, h->mb.i_mb_height * sizeof(int) );
        CREATEBUF( h->opencl.row_satds[1],        CL_MEM_WRITE_ONLY, h->mb.i_mb_height * sizeof(int) );
        CREATEBUF( h->opencl.luma_16x16_image[0], CL_MEM_READ_ONLY,  luma_length );
        CREATEBUF( h->opencl.luma_16x16_image[1], CL_MEM_READ_ONLY,  luma_length );
    }