int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) { cl_int status; if (os->xscale != xscale || os->width < width) { cl_float *xweights = hb_bicubic_weights(xscale, width); CL_FREE(os->bicubic_x_weights); CREATEBUF(os->bicubic_x_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * width * 4); OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL ); os->width = width; os->xscale = xscale; free(xweights); } if ((os->yscale != yscale) || (os->height < height)) { cl_float *yweights = hb_bicubic_weights(yscale, height); CL_FREE(os->bicubic_y_weights); CREATEBUF(os->bicubic_y_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * height * 4); OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL ); os->height = height; os->yscale = yscale; free(yweights); } return 0; }
int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda ) { if( fenc->b_intra_calculated ) return 0; fenc->b_intra_calculated = 1; x264_opencl_function_t *ocl = h->opencl.ocl; int luma_length = fenc->i_stride[0] * fenc->i_lines[0]; #define CREATEBUF( out, flags, size )\ out = ocl->clCreateBuffer( h->opencl.context, (flags), (size), NULL, &status );\ if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateBuffer error '%d'\n", status ); return -1; } #define CREATEIMAGE( out, flags, pf, width, height )\ out = ocl->clCreateImage2D( h->opencl.context, (flags), &pf, width, height, 0, NULL, &status );\ if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateImage2D error '%d'\n", status ); return -1; } int mb_count = h->mb.i_mb_count; cl_int status; if( !h->opencl.lowres_mv_costs ) { /* Allocate shared memory buffers */ int width = h->mb.i_mb_width * 8 * sizeof(pixel); int height = h->mb.i_mb_height * 8 * sizeof(pixel); cl_image_format pixel_format; pixel_format.image_channel_order = CL_R; pixel_format.image_channel_data_type = CL_UNSIGNED_INT32; CREATEIMAGE( h->opencl.weighted_luma_hpel, CL_MEM_READ_WRITE, pixel_format, width, height ); for( int i = 0; i < NUM_IMAGE_SCALES; i++ ) { pixel_format.image_channel_order = CL_RGBA; pixel_format.image_channel_data_type = CL_UNSIGNED_INT8; CREATEIMAGE( h->opencl.weighted_scaled_images[i], CL_MEM_READ_WRITE, pixel_format, width, height ); width >>= 1; height >>= 1; } CREATEBUF( h->opencl.lowres_mv_costs, CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) ); CREATEBUF( h->opencl.lowres_costs[0], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) ); CREATEBUF( h->opencl.lowres_costs[1], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) ); CREATEBUF( h->opencl.mv_buffers[0], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 ); CREATEBUF( h->opencl.mv_buffers[1], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 ); CREATEBUF( h->opencl.mvp_buffer, CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 ); CREATEBUF( h->opencl.frame_stats[0], CL_MEM_WRITE_ONLY, 4 * sizeof(int) ); CREATEBUF( h->opencl.frame_stats[1], CL_MEM_WRITE_ONLY, 4 * sizeof(int) ); CREATEBUF( h->opencl.row_satds[0], CL_MEM_WRITE_ONLY, h->mb.i_mb_height * sizeof(int) ); CREATEBUF( h->opencl.row_satds[1], CL_MEM_WRITE_ONLY, h->mb.i_mb_height * sizeof(int) ); CREATEBUF( h->opencl.luma_16x16_image[0], CL_MEM_READ_ONLY, luma_length ); CREATEBUF( h->opencl.luma_16x16_image[1], CL_MEM_READ_ONLY, luma_length ); }