コード例 #1
0
ファイル: CVC_cl.cpp プロジェクト: celesius/PRiMEStereoMatch
int CVC_cl::buildCV(const Mat& lImg, const Mat& rImg, cl_mem *memoryObjects)
{
	lImgRGB = new Mat[lImg.channels()];
    rImgRGB = new Mat[rImg.channels()];
    split(lImg, lImgRGB);
    split(rImg, rImgRGB);

	cvtColor(lImg, lGray, CV_RGB2GRAY);
	cvtColor(rImg, rGray, CV_RGB2GRAY);

	/* Map the input memory objects to host side pointers. */
	bool EnqueueMapBufferSuccess = true;
//	if(imgType == CV_32F)
//	{
	    //Sobel filter to compute X gradient     <-- investigate Mali Sobel OpenCL kernel
		Sobel( lGray, lGrdX, CV_32F, 1, 0, 1 ); // ex time 16 -17ms
		Sobel( rGray, rGrdX, CV_32F, 1, 0, 1 ); // for both
		lGrdX += 0.5;
		rGrdX += 0.5;

		cl_float *clbuffer_lImgRGB[3], *clbuffer_rImgRGB[3];
		for (int i = 0; i < channels; i++)
		{
			clbuffer_lImgRGB[i] = (cl_float*)clEnqueueMapBuffer(*commandQueue, memoryObjects[i], CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize_2D, 0, NULL, NULL, &errorNumber);
			EnqueueMapBufferSuccess &= checkSuccess(errorNumber);
			clbuffer_rImgRGB[i] = (cl_float*)clEnqueueMapBuffer(*commandQueue, memoryObjects[i+channels], CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize_2D, 0, NULL, NULL, &errorNumber);
			EnqueueMapBufferSuccess &= checkSuccess(errorNumber);

			memcpy(clbuffer_lImgRGB[i], lImgRGB[i].data, bufferSize_2D);
			memcpy(clbuffer_rImgRGB[i], rImgRGB[i].data, bufferSize_2D);
		}
//	}
//	else if(imgType == CV_8U)
//	{
//		//Sobel filter to compute X gradient
//		Sobel( lGray, lGrdX, CV_8U, 1, 0, 1 );
//		Sobel( rGray, rGrdX, CV_8U, 1, 0, 1 );
//		lGrdX += 0.5;
//		rGrdX += 0.5;
//
//		cl_uchar *clbuffer_lImgRGB[3], *clbuffer_rImgRGB[3];
//		//Six 1-channel 2D buffers W*H
//		for (int i = 0; i < channels; i++)
//		{
//			clbuffer_lImgRGB[i] = (cl_uchar*)clEnqueueMapBuffer(*commandQueue, memoryObjects[i], CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize_2D, 0, NULL, NULL, &errorNumber);
//			EnqueueMapBufferSuccess &= checkSuccess(errorNumber);
//			clbuffer_rImgRGB[i] = (cl_uchar*)clEnqueueMapBuffer(*commandQueue, memoryObjects[i+channels], CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize_2D, 0, NULL, NULL, &errorNumber);
//			EnqueueMapBufferSuccess &= checkSuccess(errorNumber);
//
//			memcpy(clbuffer_lImgRGB[i], lImgRGB[i].data, bufferSize_2D);
//			memcpy(clbuffer_rImgRGB[i], rImgRGB[i].data, bufferSize_2D);
//		}
//	}

	//Two 1-channel 2D buffers W*H
	cl_uchar *clbuffer_lGrdX = (cl_uchar*)clEnqueueMapBuffer(*commandQueue, memoryObjects[CVC_LGRDX], CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize_2D, 0, NULL, NULL, &errorNumber);
	EnqueueMapBufferSuccess &= checkSuccess(errorNumber);
	cl_uchar *clbuffer_rGrdX = (cl_uchar*)clEnqueueMapBuffer(*commandQueue, memoryObjects[CVC_RGRDX], CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize_2D, 0, NULL, NULL, &errorNumber);
	EnqueueMapBufferSuccess &= checkSuccess(errorNumber);
	if (!EnqueueMapBufferSuccess)
	{
	   cleanUpOpenCL(NULL, NULL, program, NULL, NULL, 0);
	   cerr << "Mapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
	}

    //printf("CVC_cl: Copying data to OpenCL memory space\n");
	memcpy(clbuffer_lGrdX, lGrdX.data, bufferSize_2D);
	memcpy(clbuffer_rGrdX, rGrdX.data, bufferSize_2D);

    int arg_num = 0;
    /* Setup the kernel arguments. */
    bool setKernelArgumentsSuccess = true;
	setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_LIMGR]));
	setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_LIMGG]));
	setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_LIMGB]));
	setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_RIMGR]));
	setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_RIMGG]));
	setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_RIMGB]));
    setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_LGRDX]));
    setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CVC_RGRDX]));
    setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_int), &height));
    setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_int), &width));
    setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CV_LCV]));
    setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, arg_num++, sizeof(cl_mem), &memoryObjects[CV_RCV]));
    if (!setKernelArgumentsSuccess)
    {
		cleanUpOpenCL(NULL, NULL, NULL, NULL, NULL, 0);
        cerr << "Failed setting OpenCL kernel arguments. " << __FILE__ << ":"<< __LINE__ << endl;
    }

    if(OCL_STATS) printf("CVC_cl: Running CVC Kernels\n");
    /* Enqueue the kernel */
    if (!checkSuccess(clEnqueueNDRangeKernel(*commandQueue, kernel, 3, NULL, globalWorksize, NULL, 0, NULL, &event)))
    {
        cleanUpOpenCL(NULL, NULL, NULL, NULL, NULL, 0);
        cerr << "Failed enqueuing the kernel. " << __FILE__ << ":"<< __LINE__ << endl;
        return 1;
    }

    /* Wait for completion */
    if (!checkSuccess(clFinish(*commandQueue)))
    {
        cleanUpOpenCL(NULL, NULL, NULL, NULL, NULL, 0);
        cerr << "Failed waiting for kernel execution to finish. " << __FILE__ << ":"<< __LINE__ << endl;
        return 1;
    }

    /* Print the profiling information for the event. */
    if(OCL_STATS) printProfilingInfo(event);
    /* Release the event object. */
    if (!checkSuccess(clReleaseEvent(event)))
    {
        cleanUpOpenCL(*context, *commandQueue, program, NULL, NULL, 0);
        cerr << "Failed releasing the event object. " << __FILE__ << ":"<< __LINE__ << endl;
        return 1;
    }

    return 0;
}
コード例 #2
0
float sgemmMain(int rowa,int cola,int colb)
{
	 cl_context context = 0;
	 cl_command_queue commandQueue = 0;
	 cl_program program = 0;
	 cl_device_id device = 0;
	 cl_kernel kernel = 0;
	 const unsigned int numberOfMemoryObjects = 3;
	 cl_mem memoryObjectsa = 0;
	 cl_mem memoryObjectsb = 0;
	 cl_mem memoryObjectsc = 0;
	 cl_int errorNumber;
	 cl_uint clrowa = rowa;
	 cl_uint clcola = cola;
	 cl_uint clcolb = colb;
	 int err;
	 err = createContext(&context);
	 LOGD("create context");
	 err = createCommandQueue(context, &commandQueue, &device);
	 err = createProgram(context, device, "/mnt/sdcard/kernel/sgemm.cl", &program);
	 kernel = clCreateKernel(program, "sgemm", &errorNumber);
	 LOGD("createKernel code %d",errorNumber);
	 LOGD("start computing");
	 float alpha = 1;
	 float beta = 0.1;

	 /* Create the matrices. */
	 size_t matrixSizea = rowa * cola;
	 size_t matrixSizeb = cola * colb;
	 size_t matrixSizec = rowa * colb;

	 /* As all the matrices have the same size, the buffer size is common. */
	 size_t bufferSizea = matrixSizea * sizeof(float);
	 size_t bufferSizeb = matrixSizeb * sizeof(float);
	 size_t bufferSizec = matrixSizec * sizeof(float);

	 /* Create buffers for the matrices used in the kernel. */
	 int createMemoryObjectsSuccess = 0;
	 memoryObjectsa = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSizea, NULL, &errorNumber);
	 createMemoryObjectsSuccess &= errorNumber;
	 memoryObjectsb = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSizeb, NULL, &errorNumber);
	 createMemoryObjectsSuccess &= errorNumber;
	 memoryObjectsc = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufferSizec, NULL, &errorNumber);
	 createMemoryObjectsSuccess &= errorNumber;
	 LOGD("create memory err %d",createMemoryObjectsSuccess);
	 int mapMemoryObjectsSuccess = 0;
	 cl_float* matrixA = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsa, CL_TRUE, CL_MAP_WRITE, 0, bufferSizea, 0, NULL, NULL, &errorNumber);
	 mapMemoryObjectsSuccess &= errorNumber;
	 cl_float* matrixB = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsb, CL_TRUE, CL_MAP_WRITE, 0, bufferSizeb, 0, NULL, NULL, &errorNumber);
	 mapMemoryObjectsSuccess &= errorNumber;
	 cl_float* matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsc, CL_TRUE, CL_MAP_WRITE, 0, bufferSizec, 0, NULL, NULL, &errorNumber);
	 mapMemoryObjectsSuccess &= errorNumber;
	 LOGD("map memory err %d",mapMemoryObjectsSuccess);

	 sgemmInitialize(rowa,cola,colb, matrixA, matrixB, matrixC);
	 LOGD("data initial finish");
	 int unmapMemoryObjectsSuccess = 0;
	 errorNumber = clEnqueueUnmapMemObject(commandQueue, memoryObjectsa, matrixA, 0, NULL, NULL);
	 LOGD("memory code %d",errorNumber);
	 unmapMemoryObjectsSuccess &= errorNumber;
	 errorNumber = clEnqueueUnmapMemObject(commandQueue, memoryObjectsb, matrixB, 0, NULL, NULL);
	 LOGD("memory code %d",errorNumber);
	 unmapMemoryObjectsSuccess &= errorNumber;
	 errorNumber = clEnqueueUnmapMemObject(commandQueue, memoryObjectsc, matrixC, 0, NULL, NULL);
	 LOGD("memory code %d",errorNumber);
	 unmapMemoryObjectsSuccess &= errorNumber;
	 LOGD("unmap memory err %d",unmapMemoryObjectsSuccess);

	 int setKernelArgumentsSuccess = 0;
	 errorNumber = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memoryObjectsa);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoryObjectsb);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 2, sizeof(cl_mem), &memoryObjectsc);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 3, sizeof(cl_uint), &clrowa);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 4, sizeof(cl_uint), &clcola);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 5, sizeof(cl_uint), &clcolb);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 6, sizeof(cl_float), &alpha);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 7, sizeof(cl_float), &beta);
	 setKernelArgumentsSuccess &= errorNumber;
	 LOGD("setKernel err %d",setKernelArgumentsSuccess);

	 LOGD("start running kernel");
	 clock_t start_t,end_t;
	 float cost_time;
	 start_t = clock();
	 cl_event event = 0;
	 size_t globalWorksize[2] = {rowa, colb};
	 errorNumber = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalWorksize, NULL, 0, NULL, &event);
	 //LOGD("Enqueue err code %d",errorNumber);
	 errorNumber = clFinish(commandQueue);
	 end_t = clock();
	 cost_time = (float)(end_t-start_t)/CLOCKS_PER_SEC*1000;
	 LOGD("Finish err code %d",errorNumber);
	 float time;
	 time = printProfilingInfo(event);
	 LOGT("using CPU clock: %f ms",cost_time);
	 LOGT("using GPU clock: %f ms",time);
	 clReleaseEvent(event);
	 matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsc, CL_TRUE, CL_MAP_READ, 0, bufferSizec, 0, NULL, NULL, &errorNumber);
	 clEnqueueUnmapMemObject(commandQueue, memoryObjectsc, matrixC, 0, NULL, NULL);
	 LOGD("read out matrixC finish");
	 LOGD("matrixC value C(0,0): %f",matrixC[0]);
	 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjectsa, memoryObjectsb,memoryObjectsc,numberOfMemoryObjects);
	 LOGD("RUNNING finsh");
	 return time;
}