Ejemplo n.º 1
0
int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) {
    cl_int status;
    if (os->xscale != xscale || os->width < width) {
        cl_float *xweights = hb_bicubic_weights(xscale, width);
        CL_FREE(os->bicubic_x_weights);
        CREATEBUF(os->bicubic_x_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * width * 4);
        OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL );
        os->width = width;
        os->xscale = xscale;
        free(xweights);
    }

    if ((os->yscale != yscale) || (os->height < height)) {
        cl_float *yweights = hb_bicubic_weights(yscale, height);
        CL_FREE(os->bicubic_y_weights);
        CREATEBUF(os->bicubic_y_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * height * 4);
        OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL );
        os->height = height;
        os->yscale = yscale;
        free(yweights);
    }
    return 0;
}
Ejemplo n.º 2
0
static int to3d_ocl_fillHole(void **userdata, KernelEnv *kenv)
{
	cl_mem view = (cl_mem)userdata[0];
	cl_mem hole = (cl_mem)userdata[1];
	cl_mem holeU = (cl_mem)userdata[2];
	cl_mem holeV = (cl_mem)userdata[3];
	int w = (int)userdata[4];
	int h = (int)userdata[5];
	int base = (int)userdata[6];
	cl_kernel kernel = (cl_kernel)userdata[7];
	int flag = (int)userdata[8];
	int lr = (int)userdata[9];

	int status = 0;
    OCLCHECK( clSetKernelArg, kernel, 0, sizeof(cl_mem), &view );
	OCLCHECK( clSetKernelArg, kernel, 1, sizeof(cl_mem), &hole );
	OCLCHECK( clSetKernelArg, kernel, 2, sizeof(cl_mem), &holeU );
	OCLCHECK( clSetKernelArg, kernel, 3, sizeof(cl_mem), &holeV );
	OCLCHECK( clSetKernelArg, kernel, 4, sizeof(int), &w );
	OCLCHECK( clSetKernelArg, kernel, 5, sizeof(int), &h );
	OCLCHECK( clSetKernelArg, kernel, 6, sizeof(int), &base );

	//NDRangekernel
	kenv->kernel = kernel;
	size_t globalThreads[2], offset[2];
	if(flag == 0)
	{
		if(lr == 0)//left
		{
		 globalThreads[0] = w - (tx / 2 * F / Znear);
		 globalThreads[1] = h;
		 offset[0] = tx / 2 * F / Znear;
		 offset[1] = 0;
		}
		else
		{
			globalThreads[0] = w - (tx / 2 * F / Znear);
			globalThreads[1] = h;
			offset[0] = 0;
			offset[1] = 0;
		}
	}
	else
	{
		if(lr == 0)
		{
			globalThreads[0] = tx / 2 * F / Znear;
			globalThreads[1] = h;
			offset[0] = 0;
			offset[1] = 0; 
		}
		else
		{
			globalThreads[0] = tx / 2 * F / Znear;
			globalThreads[1] = h;
			offset[0] = w - (tx / 2 * F / Znear);
			offset[1] = 0; 
		}
	}
	 
	OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel,
						  2, offset,globalThreads, NULL, 0, NULL, NULL );
	return TO3D_SUCCESS;
}
Ejemplo n.º 3
0
/**
* executive scale using opencl
* get filter args
* create output buffer
* create horizontal filter buffer
* create vertical filter buffer
* create  kernels
*/
int hb_ocl_scale_func( void **data, KernelEnv *kenv )
{
    cl_int status;

    cl_mem in_buf = data[0];
    cl_mem out_buf = data[1];
    int crop_top = data[2];
    int crop_bottom = data[3];
    int crop_left = data[4];
    int crop_right = data[5];
    cl_int in_frame_w = (int)data[6];
    cl_int in_frame_h = (int)data[7];
    cl_int out_frame_w = (int)data[8];
    cl_int out_frame_h = (int)data[9];
    hb_oclscale_t  *os = data[10];
    hb_buffer_t *in = data[11];
    hb_buffer_t *out = data[12];

    if (os->initialized == 0)
    {
        hb_log( "Scaling With OpenCL" );
        if (kenv->isAMD != 0)
            hb_log( "Using Zero Copy");
        // create the block kernel
        cl_int status;
        os->m_kernel = clCreateKernel( kenv->program, "frame_scale", &status );

        os->initialized = 1;
    }

    {
        // Use the new kernel
        cl_event events[5];
        int eventCount = 0;

        if (kenv->isAMD == 0) {
            status = clEnqueueUnmapMemObject(kenv->command_queue, in->cl.buffer, in->data, 0, NULL, &events[eventCount++]);
            status = clEnqueueUnmapMemObject(kenv->command_queue, out->cl.buffer, out->data, 0, NULL, &events[eventCount++]);
        }

        cl_int srcPlaneOffset0 = in->plane[0].data - in->data;
        cl_int srcPlaneOffset1 = in->plane[1].data - in->data;
        cl_int srcPlaneOffset2 = in->plane[2].data - in->data;
        cl_int srcRowWords0 = in->plane[0].stride;
        cl_int srcRowWords1 = in->plane[1].stride;
        cl_int srcRowWords2 = in->plane[2].stride;
        cl_int dstPlaneOffset0 = out->plane[0].data - out->data;
        cl_int dstPlaneOffset1 = out->plane[1].data - out->data;
        cl_int dstPlaneOffset2 = out->plane[2].data - out->data;
        cl_int dstRowWords0 = out->plane[0].stride;
        cl_int dstRowWords1 = out->plane[1].stride;
        cl_int dstRowWords2 = out->plane[2].stride;

        if (crop_top != 0 || crop_bottom != 0 || crop_left != 0 || crop_right != 0) {
            srcPlaneOffset0 += crop_left + crop_top * srcRowWords0;
            srcPlaneOffset1 += crop_left / 2 + (crop_top / 2) * srcRowWords1;
            srcPlaneOffset2 += crop_left / 2 + (crop_top / 2) * srcRowWords2;
            in_frame_w = in_frame_w - crop_right - crop_left;
            in_frame_h = in_frame_h - crop_bottom - crop_top;
        }

        cl_float xscale = (out_frame_w * 1.0f) / in_frame_w;
        cl_float yscale = (out_frame_h * 1.0f) / in_frame_h;
        setupScaleWeights(xscale, yscale, out_frame_w, out_frame_h, os, kenv);

        OCLCHECK( clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf );
        OCLCHECK( clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf );
        OCLCHECK( clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale );
        OCLCHECK( clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale );
        OCLCHECK( clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2 );
        OCLCHECK( clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w );
        OCLCHECK( clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h );
        OCLCHECK( clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w );
        OCLCHECK( clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h );
        OCLCHECK( clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights );
        OCLCHECK( clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights );

        size_t workOffset[] = { 0, 0, 0 };
        size_t globalWorkSize[] = { 1, 1, 1 };
        size_t localWorkSize[] = { 1, 1, 1 };

        int xgroups = (out_frame_w + 63) / 64;
        int ygroups = (out_frame_h + 15) / 16;

        localWorkSize[0] = 64;
        localWorkSize[1] = 1;
        localWorkSize[2] = 1;
        globalWorkSize[0] = xgroups * 64;
        globalWorkSize[1] = ygroups;
        globalWorkSize[2] = 3;

        OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize, eventCount, (eventCount == 0) ? NULL : &events[0], &events[eventCount] );
        ++eventCount;

        if (kenv->isAMD == 0) {
            in->data = clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, in->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount], &status);
            out->data = clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, out->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount + 1], &status);
            eventCount += 2;
        }

        clFlush(kenv->command_queue);
        clWaitForEvents(eventCount, &events[0]);
        int i;
        for (i = 0; i < eventCount; ++i)
            clReleaseEvent(events[i]);
    }

    return 1;
}