示例#1
0
/*
 * Create and build OpenCL program from its source code
 */
int CreateAndBuildProgram(ocl_args_d_t *ocl)
{
    cl_int err = CL_SUCCESS;

    // Upload the OpenCL C source code from the input file to source
    // The size of the C program is returned in sourceSize
    char* source = NULL;
    size_t src_size = 0;
    err = ReadSourceFromFile("Template.cl", &source, &src_size);
    if (CL_SUCCESS != err)
    {
        LogError("Error: ReadSourceFromFile returned %s.\n", TranslateOpenCLError(err));
        goto Finish;
    }

    // And now after you obtained a regular C string call clCreateProgramWithSource to create OpenCL program object.
    ocl->program = clCreateProgramWithSource(ocl->context, 1, (const char**)&source, &src_size, &err);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clCreateProgramWithSource returned %s.\n", TranslateOpenCLError(err));
        goto Finish;
    }

    // Build the program
    // During creation a program is not built. You need to explicitly call build function.
    // Here you just use create-build sequence,
    // but there are also other possibilities when program consist of several parts,
    // some of which are libraries, and you may want to consider using clCompileProgram and clLinkProgram as
    // alternatives.
    err = clBuildProgram(ocl->program, 1, &ocl->device, "", NULL, NULL);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clBuildProgram() for source program returned %s.\n", TranslateOpenCLError(err));

        // In case of error print the build log to the standard output
        // First check the size of the log
        // Then allocate the memory and obtain the log from the program
        if (err == CL_BUILD_PROGRAM_FAILURE)
        {
            size_t log_size = 0;
            clGetProgramBuildInfo(ocl->program, ocl->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

            std::vector<char> build_log(log_size);
            clGetProgramBuildInfo(ocl->program, ocl->device, CL_PROGRAM_BUILD_LOG, log_size, &build_log[0], NULL);

            LogError("Error happened during the build of OpenCL program.\nBuild log:%s", &build_log[0]);
        }
    }

Finish:
    if (source)
    {
        delete[] source;
        source = NULL;
    }

    return err;
}
示例#2
0
cl_uint CreateAndBuildProgram()
{
    cl_int err = CL_SUCCESS;

    // Upload the OpenCL C source code from the input file to source
    // The size of the C program is returned in sourceSize
    char* source = NULL;
    size_t src_size = 0;
    err = ReadSourceFromFile("ray_algorithm.cl", &source, &src_size);
    if (CL_SUCCESS != err)
    {
        printf("Error: ReadSourceFromFile returned %s.\n", TranslateOpenCLError(err));
        goto Finish;
    }

    // And now after you obtained a regular C string call clCreateProgramWithSource to create OpenCL program object.
    ocl.program = clCreateProgramWithSource(ocl.context, 1, (const char**)&source, &src_size, &err);
    if (CL_SUCCESS != err)
    {
        printf("Error: clCreateProgramWithSource returned %s.\n", TranslateOpenCLError(err));
        goto Finish;
    }

    // Build the program
    // During creation a program is not built. You need to explicitly call build function.
    // Here you just use create-build sequence,
    // but there are also other possibilities when program consist of several parts,
    // some of which are libraries, and you may want to consider using clCompileProgram and clLinkProgram as
    // alternatives.
    err = clBuildProgram(ocl.program, 2, ocl.device, "", NULL, NULL);
    if (CL_SUCCESS != err)
    {
        printf("Error: clBuildProgram() for source program returned %s.\n", TranslateOpenCLError(err));
    }

Finish:
    if (source)
    {
        delete[] source;
        source = NULL;
    }

    return err;
}
示例#3
0
void imgdiff(size_t N, size_t width, size_t height, double* diff_matrix, unsigned char* images) 
{

	//// we need to fill in ////
	cl_platform_id *platform;
	cl_device_type dev_type = CL_DEVICE_TYPE_GPU;
	cl_device_id *devs;
	cl_context context;
	cl_command_queue *cmd_queues;
	cl_program program;
	cl_kernel *kernels;
	cl_uint num_platforms;
	cl_uint num_devs;

	cl_mem* m_image1;
	cl_mem* m_image2;
	cl_mem* m_result;

	cl_event* ev_kernels;

	int err = CL_SUCCESS;

	int i, j, k;
	
	// modify version
	err = clGetPlatformIDs(0, NULL, &num_platforms);
	if(err != CL_SUCCESS)
	{
		printf("Error: platform error\n");
		return 0;
	}

	if(num_platforms == 0)
	{
		printf("Error: platform no count\n");
		return 0;
	}

	platform = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platforms);
	err = clGetPlatformIDs(num_platforms, platform, NULL);
	if(err != CL_SUCCESS)
	{
		printf("Error: clGetPlatformIDs error\n");
		return 0;
	}

	for(i = 0; i<num_platforms; i++)
	{
		err = clGetDeviceIDs(platform[i], dev_type, 0, NULL, &num_devs);
		if(err != CL_SUCCESS)
		{
			printf("Error: clGetDevice\n");
			return 0;
		}
		if(num_devs >= 1)
		{
			devs = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devs);

			clGetDeviceIDs(platform[i], dev_type, num_devs, devs, NULL);
			break;
		}
	}

	context = clCreateContext(NULL, num_devs, devs, NULL, NULL, &err);
	if(err != CL_SUCCESS)
	{
		printf("Error: clCreateContext error\n");
		return 0;
	}

	char* source = NULL;
	size_t src_size = 0;
	err = ReadSourceFromFile("./imgdiff_cal.cl", &source, &src_size);
	if (CL_SUCCESS != err)
	{
		printf("Error: ReadSourceFromFile returned %s.\n", err);
		free(source);
		return 0;
	}

	program = clCreateProgramWithSource(context, 1, (const char**)&source, &src_size, &err);
	if(err != CL_SUCCESS)
	{
		printf("Error: clCreateProgram error\n");
		return 0;
	}

	free(source);
	printf("Create Program Success\n");

#if DBG
	// Measure clBuildProgram -@henry added
	gettimeofday(&start_m, NULL );
#endif
	err = clBuildProgram(program, num_devs, devs, "", NULL, NULL);
#if DBG
	gettimeofday(&end_m, NULL );

	double time = (end_m.tv_usec - start_m.tv_usec)*1e-6 + (end_m.tv_sec - start_m.tv_sec);
	printf("[Debug] Elapsed Time of clBuildProgram() : %lf s\n",time); 
#endif
	if(err != CL_SUCCESS)
	{
		printf("Error: clBuildProgram\n");
		return 0;
	}

	printf("Build Program Success\n");

	kernels = (cl_kernel*)malloc(sizeof(cl_kernel)*num_devs);
	for(i = 0; i<num_devs; i++)
	{
		kernels[i] = clCreateKernel(program, "imgdiff_cal", NULL);
	}


	printf("Create Kernel Success\n");

	cmd_queues = (cl_command_queue*)malloc(sizeof(cl_command_queue)*num_devs);
	for(i=0; i<num_devs; i++)
	{
		cmd_queues[i] = clCreateCommandQueue(context, devs[i], 0, &err);
		if(err != CL_SUCCESS)
		{
			printf("Error: clCreateCommandQueue error\n");
			return 0;
		}

	}

	printf("Create commandQueue Success\n");
	int LOCAL_WIDTH = 16;
	int LOCAL_HEIGHT = 16;


	int WORK_WIDTH = ceil((double)width / LOCAL_WIDTH)*LOCAL_WIDTH;
	int WORK_HEIGHT = ceil((double)height/LOCAL_HEIGHT) * LOCAL_HEIGHT;
	int WORK_AMOUNT = width * height;
	int WORK_GROUP_COUNT = ceil(((double)WORK_WIDTH * WORK_HEIGHT) / (LOCAL_WIDTH * LOCAL_HEIGHT));
	
	int WORK_GROUP_WIDTH = width;
	int WORK_GROUP_HEIGHT = height;

	int SAMPLE_COUNT = 16;
	int WORK_COUNT[num_devs];
	double tmp_result_data[WORK_GROUP_COUNT*SAMPLE_COUNT];

	printf("WORK_WIDTH %d\tWORK_HEIGHT %d\t WORK_AMOUNT %d\t WORK_GROUP_COUNT %d\n", 
			WORK_WIDTH, WORK_HEIGHT, WORK_AMOUNT, WORK_GROUP_COUNT);

	m_image1 = (cl_mem*)malloc(sizeof(cl_mem)* num_devs);
	m_image2 = (cl_mem*)malloc(sizeof(cl_mem)* num_devs);
	
	m_result = (cl_mem*)malloc(sizeof(cl_mem)* num_devs);


	for(i=0; i<num_devs; i++)
	{
		m_image1[i] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned char) * WORK_AMOUNT * 3, NULL, NULL);
		m_image2[i] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned char) * WORK_AMOUNT*SAMPLE_COUNT * 3, NULL, NULL);
			
		m_result[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(double) * WORK_GROUP_COUNT * SAMPLE_COUNT, NULL, NULL);
		clSetKernelArg(kernels[i], 0, sizeof(cl_mem), (void*)&m_image1[i]);
		clSetKernelArg(kernels[i], 1, sizeof(cl_mem), (void*)&m_image2[i]);
		clSetKernelArg(kernels[i], 2, sizeof(cl_mem), (void*)&m_result[i]);
		clSetKernelArg(kernels[i], 3, sizeof(cl_int), &WORK_GROUP_WIDTH);
		clSetKernelArg(kernels[i], 4, sizeof(cl_int), &WORK_GROUP_HEIGHT);
	}

	ev_kernels  = (cl_event*)malloc(sizeof(cl_event)*num_devs);

	int row, col;

	row = 0;
	col = 1;

	for(row = 0; row < N; row++)
	{
		if( (N-row-1) < (SAMPLE_COUNT*4) && SAMPLE_COUNT > 1)
			SAMPLE_COUNT = SAMPLE_COUNT / 2;
		
		int remain_count = N - (row + 1);

		for(i=0; i<num_devs; i++)
		{

			clEnqueueWriteBuffer(cmd_queues[i], m_image1[i], CL_FALSE, 0, 
					sizeof(unsigned char) * WORK_AMOUNT * 3, (void*)(images + 
					(row * width*height)*3), 0, NULL, NULL);
		}
			
		diff_matrix[row*N + row] = 0;
		col = row + 1;
		while( col< N)
		{
			size_t lws[2] = { LOCAL_WIDTH, LOCAL_HEIGHT };
			size_t gws[2] = { WORK_WIDTH, WORK_HEIGHT};
			
			for(i=0; i<num_devs; i++)
			{
				if((remain_count - SAMPLE_COUNT) < 0)
				{
					WORK_COUNT[i] = remain_count;
					remain_count = 0;
				}
				else
				{
					WORK_COUNT[i] = SAMPLE_COUNT;
					remain_count = remain_count - SAMPLE_COUNT;
				}
				
				if(WORK_COUNT[i] != 0)
				{
					
					clSetKernelArg(kernels[i], 5, sizeof(cl_int), &WORK_COUNT[i]);
					
					int offset = 0;
					for(j=0; j<i; j++)
						offset += WORK_COUNT[j];


					err = clEnqueueWriteBuffer(cmd_queues[i], m_image2[i], CL_FALSE, 0, 
							sizeof(unsigned char)*WORK_AMOUNT*WORK_COUNT[i]*3, 
							(void*)(images +((col * width*height) + (WORK_AMOUNT * 
										offset))*3), 0, NULL, NULL);

				}


			}

			for( i=0; i < num_devs; i++ )
			{
				if(WORK_COUNT[i] != 0)
				{
					err = clEnqueueNDRangeKernel(cmd_queues[i], kernels[i], 2, NULL, gws, lws, 0, NULL, NULL);
					if(err != CL_SUCCESS)
					{
						printf("Error: clEnqueueNDRangeKernel %d error\n", i);
						printf("%s\n", TranslateOpenCLError(err));
						return 0;
					}
				}
			}
			double tmp_sum = 0;
			i = 0;
			for( i = num_devs -1; i >= 0; i-- )
			{
				
				if(WORK_COUNT[i] != 0)
				{
					err = clEnqueueReadBuffer( cmd_queues[i], m_result[i], CL_TRUE, 0, 
						sizeof(double) * WORK_GROUP_COUNT * WORK_COUNT[i], 
						tmp_result_data, 0, NULL, NULL); 
					if(err != CL_SUCCESS)
					{
						printf("Error: clEnqueueReadBuffer%d error\n", i);
						return 0;
					}
					//printf("receive......");

					for(j = 0; j<WORK_COUNT[i]; j++)
					{	
						tmp_sum = 0;
						for(k = 0; k<WORK_GROUP_COUNT; k++)
						{
							tmp_sum += tmp_result_data[k + j*WORK_GROUP_COUNT];
							//printf("%lf\t", tmp_result_data[k+j*WORK_GROUP_COUNT]);

						}
						//printf("%lf %lf\n", tmp_sum, tmp_result_data[j*WORK_GROUP_COUNT]);
						
						int offset = 0;
						for(k=0; k<i; k++)
							offset += WORK_COUNT[k];
						diff_matrix[row*N+col+j+offset] = diff_matrix[(col+j+offset)*N+row] = tmp_sum;
					}

				}
				
			}
			
			for( i = 0; i < num_devs; i++ )
			{
				col += WORK_COUNT[i];
			} 
		}
	}

}