示例#1
0
int openclInit()
{
    cl_int ret;
    openclRetTackle( clGetPlatformIDs(1, &cpPlatform, NULL), "clGetPlatFormIDs");
    openclRetTackle( clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU,1,&cdDevice,NULL), "clGetDeviceIDs");
    cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ret);
    #if (defined CL_DEBUG) || (defined CL_VERBOSE)
    printCLDeviceInfo();
    #endif
    openclRetTackle( ret , "clCreateContext" );
    cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ret);
    openclRetTackle( ret , "clCreateCommandQueue");
    return CL_SUCCESS;
}
示例#2
0
int main() {
	std::ios::sync_with_stdio();
	cl_int err = 0;
	cl_uint num_devices;
	cl_uint num_plats;
	cl_platform_id *plat;
	cl_device_id final_device;

	err = clGetPlatformIDs(0, NULL, &num_plats);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);
	if (num_plats > 1)		printf("There are %u platforms\n", num_plats);
	else if (num_plats == 1)	printf("There's exactly one platform, namely:\n");
	else				printf("There are no available platforms"), exit(0);

	plat = (cl_platform_id*) malloc(sizeof(cl_platform_id) * num_plats);
	err = clGetPlatformIDs(num_plats, plat, NULL);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);

	for (int i = 0; i < num_plats; i++) {
		char plat_info[1024];

		err = clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, 1024, plat_info, NULL);
		if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);
		printf("Platform %d:\n", i);
		printf(" * %s\n", plat_info);

		err = clGetDeviceIDs(plat[i], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
		if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);
		printf("There are %u devices found, namely:\n", num_devices);

		cl_device_id devices[num_devices];
		err = clGetDeviceIDs(plat[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
		if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);
		
		for (int j = 0; j < num_devices; j++) {
			cl_device_type current_type;
			clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(cl_device_type), &current_type, NULL);
			if (current_type == CL_DEVICE_TYPE_GPU) {
				printf("Found GPU\n");
				final_device = devices[j];
			}

			printCLDeviceInfo(devices[j],false);
			if (j != num_devices - 1) putchar('\n');
		}
	}
	printf("\n");

	size_t global = 8092;
	unsigned int count = global * N;

	// Fill the array
	float* input_data = (float*)malloc(count * sizeof(float));
	srand(time(NULL));
	for (int i = 0; i < count; i++) input_data[i] = rand() / (float)RAND_MAX;

	// Load the kernel
	const char* program_source = readFile("kernel/square.cl");
	printf("Loaded kernel program source:\n%s\n", program_source);

	// Set OpenCL context
	cl_context context = clCreateContext(0, 1, &final_device, NULL, NULL, &err);
	if (err != CL_SUCCESS)		printf("ERROR at line %u: Failed to make OpenCL context\n", __LINE__);

	// Create command queue
	cl_command_queue commands = clCreateCommandQueue(context, final_device, 0, &err);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);

	// Create the program from source and build it.
	cl_program program = clCreateProgramWithSource(context, 1, (const char **) &program_source, NULL, &err);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);
	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (err != CL_SUCCESS) {
		size_t len;
		char buffer[2048];

		printf("Error: Failed to build program executable!\n");
		clGetProgramBuildInfo(program, final_device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s\n", buffer);
	}
	cl_kernel kernel = clCreateKernel(program, "square", &err);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);

	// Create the buffers for input and output arrays.
	cl_mem read_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*count, NULL, &err);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);
	cl_mem write_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*count, NULL, &err);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);

	// Copy the input data (blocking)
	err = clEnqueueWriteBuffer(commands, read_buffer, CL_TRUE, 0, sizeof(float)*count, input_data, 0, NULL, NULL);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);

	// Set the kernel arguments
	err = 0;
	err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &read_buffer);
	err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &write_buffer);
	err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);	
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);

	std::clock_t c_start = std::clock();
	// Execute the kernel
	err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);

	// Give it time to finish
	clFinish(commands);

	float* output_data = (float*)malloc(count * sizeof(float));
	err = clEnqueueReadBuffer(commands, write_buffer, CL_TRUE, 0, sizeof(float)*count, output_data, 0, NULL, NULL);
	if (err != CL_SUCCESS)		printf("ERROR at line %u\n", __LINE__);
	std::clock_t c_end = std::clock();
	printf("Test duration (OpenCL): %f ms\n", 1000.0 * (c_end - c_start) / CLOCKS_PER_SEC);

	float *test_result = (float*)malloc(count * sizeof(float));
	c_start = std::clock();
	for (int i = 0; i < count; i++) {
		float num = input_data[i];
		test_result[i] = num*num;
	}
	c_end = std::clock();
	printf("Test duration (regular): %f ms\n", 1000.0 * (c_end - c_start) / CLOCKS_PER_SEC);
	printf("Validating..:\n");
	// Validate everything
	int correct_opencl = 0;
	int correct_regular = 0;
	for (int i = 0; i < count; i++) {
		if (output_data[i] == input_data[i]*input_data[i]) correct_opencl++;
		if (test_result[i] == input_data[i]*input_data[i]) correct_regular++;
	}
	printf("%d/%d correct results (OpenCL)\n", correct_opencl, count);
	printf("%d/%d correct results (regular)\n", correct_regular, count);
}