void OpenCLPrinter::printPlatformAndDeviceInfo() { VECTOR_CLASS<cl::Platform> platforms; cl::Platform::get(&platforms); VECTOR_CLASS<cl::Device> devices; for(unsigned int i = 0; i < platforms.size(); i++) { printPlatformInfo(platforms[i]); platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices); for(unsigned int j = 0; j < devices.size(); j++) { printDeviceInfo(devices[j]); } } print("Number of platforms", platforms.size()); print("Number of devices", devices.size()); }
// Serial ray casting unsigned char* raycast_serial(unsigned char* data, unsigned char* region){ unsigned char* image = (unsigned char*)malloc(sizeof(unsigned char)*IMAGE_DIM*IMAGE_DIM); // Camera/eye position, and direction of viewing. These can be changed to look // at the volume from different angles. float3 camera = {.x=1000,.y=1000,.z=1000}; float3 forward = {.x=-1, .y=-1, .z=-1}; float3 z_axis = {.x=0, .y=0, .z = 1}; // Finding vectors aligned with the axis of the image float3 right = cross(forward, z_axis); float3 up = cross(right, forward); // Creating unity lenght vectors forward = normalize(forward); right = normalize(right); up = normalize(up); float fov = 3.14/4; float pixel_width = tan(fov/2.0)/(IMAGE_DIM/2); float step_size = 0.5; // For each pixel for(int y = -(IMAGE_DIM/2); y < (IMAGE_DIM/2); y++){ for(int x = -(IMAGE_DIM/2); x < (IMAGE_DIM/2); x++){ // Find the ray for this pixel float3 screen_center = add(camera, forward); float3 ray = add(add(screen_center, scale(right, x*pixel_width)), scale(up, y*pixel_width)); ray = add(ray, scale(camera, -1)); ray = normalize(ray); float3 pos = camera; // Move along the ray, we stop if the color becomes completely white, // or we've done 5000 iterations (5000 is a bit arbitrary, it needs // to be big enough to let rays pass through the entire volume) int i = 0; float color = 0; while(color < 255 && i < 5000){ i++; pos = add(pos, scale(ray, step_size)); // Update position int r = value_at(pos, region); // Check if we're in the region color += value_at(pos, data)*(0.01 + r) ; // Update the color based on data value, and if we're in the region } // Write final color to image image[(y+(IMAGE_DIM/2)) * IMAGE_DIM + (x+(IMAGE_DIM/2))] = color > 255 ? 255 : color; } } return image; } // Check if two values are similar, threshold can be changed. int similar(unsigned char* data, int3 a, int3 b){ unsigned char va = data[a.z * DATA_DIM*DATA_DIM + a.y*DATA_DIM + a.x]; unsigned char vb = data[b.z * DATA_DIM*DATA_DIM + b.y*DATA_DIM + b.x]; int i = abs(va-vb) < 1; return i; } // Serial region growing, same algorithm as in assignment 2 unsigned char* grow_region_serial(unsigned char* data){ unsigned char* region = (unsigned char*)calloc(sizeof(unsigned char), DATA_DIM*DATA_DIM*DATA_DIM); stack_t* stack = new_stack(); int3 seed = {.x=50, .y=300, .z=300}; push(stack, seed); region[seed.z *DATA_DIM*DATA_DIM + seed.y*DATA_DIM + seed.x] = 1; int dx[6] = {-1,1,0,0,0,0}; int dy[6] = {0,0,-1,1,0,0}; int dz[6] = {0,0,0,0,-1,1}; while(stack->size > 0){ int3 pixel = pop(stack); for(int n = 0; n < 6; n++){ int3 candidate = pixel; candidate.x += dx[n]; candidate.y += dy[n]; candidate.z += dz[n]; if(!inside_int(candidate)){ continue; } if(region[candidate.z * DATA_DIM*DATA_DIM + candidate.y*DATA_DIM + candidate.x]){ continue; } if(similar(data, pixel, candidate)){ push(stack, candidate); region[candidate.z * DATA_DIM*DATA_DIM + candidate.y*DATA_DIM + candidate.x] = 1; } } } return region; } unsigned char* grow_region_gpu(unsigned char* data){ cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_kernel kernel; cl_int err; char *source; int i; clGetPlatformIDs(1, &platform, NULL); clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); printPlatformInfo(platform); queue = clCreateCommandQueue(context, device, 0, &err); kernel = buildKernel("region.cl", "region", NULL, context, device); //Host variables unsigned char* host_region = (unsigned char*)calloc(sizeof(unsigned char), DATA_SIZE); int host_unfinished; cl_mem device_region = clCreateBuffer(context, CL_MEM_READ_WRITE, DATA_SIZE * sizeof(cl_uchar) ,NULL,&err); cl_mem device_data = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(cl_uchar), NULL,&err); cl_mem device_unfinished = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL,&err); clError("Error allocating memory", err); //plant seed int3 seed = {.x=50, .y=300, .z=300}; host_region[index(seed.z, seed.y, seed.x)] = 2; //Copy data to the device clEnqueueWriteBuffer(queue, device_data , CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), data , 0, NULL, NULL); clEnqueueWriteBuffer(queue, device_region, CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), host_region, 0, NULL, NULL); //Calculate block and grid sizes size_t global[] = { 512, 512, 512 }; size_t local[] = { 8, 8, 8 }; //Run kernel untill completion do{ host_unfinished = 0; clEnqueueWriteBuffer(queue, device_unfinished, CL_FALSE, 0, sizeof(cl_int), &host_unfinished , 0, NULL, NULL); clFinish(queue); err = clSetKernelArg(kernel, 0, sizeof(device_data), (void*)&device_data); err = clSetKernelArg(kernel, 1, sizeof(device_region), (void*)&device_region); err = clSetKernelArg(kernel, 2, sizeof(device_unfinished), (void*)&device_unfinished); clError("Error setting arguments", err); //Run the kernel clEnqueueNDRangeKernel(queue, kernel, 3, NULL, &global, &local, 0, NULL, NULL); clFinish(queue); clError("Error running kernel", err); err = clEnqueueReadBuffer(queue, device_unfinished, CL_TRUE, 0, sizeof(cl_int), &host_unfinished, 0, NULL, NULL); clFinish(queue); clError("Error reading buffer 1", err); }while(host_unfinished); //Copy result to host err = clEnqueueReadBuffer(queue, device_region, CL_TRUE, 0, DATA_SIZE * sizeof(cl_uchar), host_region, 0, NULL, NULL); clFinish(queue); clError("Error reading buffer 2", err); return host_region; } unsigned char* raycast_gpu(unsigned char* data, unsigned char* region){ cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_kernel kernel; cl_int err; char *source; int i; clGetPlatformIDs(1, &platform, NULL); clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); printPlatformInfo(platform); printDeviceInfo(device); queue = clCreateCommandQueue(context, device, 0, &err); kernel = buildKernel("raycast.cl", "raycast", NULL, context, device); cl_mem device_region = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(cl_uchar) ,NULL,&err); cl_mem device_data = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(cl_uchar), NULL,&err); cl_mem device_image = clCreateBuffer(context, CL_MEM_READ_WRITE, IMAGE_SIZE * sizeof(cl_uchar),NULL,&err); clError("Error allocating memory", err); //Copy data to the device clEnqueueWriteBuffer(queue, device_data , CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), data , 0, NULL, NULL); clEnqueueWriteBuffer(queue, device_region, CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), region, 0, NULL, NULL); int grid_size = IMAGE_DIM; int block_size = IMAGE_DIM; //Set up kernel arguments err = clSetKernelArg(kernel, 0, sizeof(device_data), (void*)&device_data); err = clSetKernelArg(kernel, 1, sizeof(device_region), (void*)&device_region); err = clSetKernelArg(kernel, 2, sizeof(device_image), (void*)&device_image); clError("Error setting arguments", err); //Run the kernel const size_t globalws[2] = {IMAGE_DIM, IMAGE_DIM}; const size_t localws[2] = {8, 8}; clEnqueueNDRangeKernel(queue, kernel, 2, NULL, &globalws, &localws, 0, NULL, NULL); clFinish(queue); //Allocate memory for the result unsigned char* host_image = (unsigned char*)malloc(IMAGE_SIZE_BYTES); //Copy result from device err = clEnqueueReadBuffer(queue, device_image, CL_TRUE, 0, IMAGE_SIZE * sizeof(cl_uchar), host_image, 0, NULL, NULL); clFinish(queue); //Free device memory return host_image; } int main(int argc, char** argv){ unsigned char* data = create_data(); unsigned char* region = grow_region_gpu(data); unsigned char* image = raycast_gpu(data, region); write_bmp(image, IMAGE_DIM, IMAGE_DIM); }
/** * Loads the OpenCL program by loading the source code, setting up devices and context, * as well as building the actual kernel */ void CL_Program::loadProgram() { const std::string hw("Hello World\n"); std::vector<cl::Platform> platforms; error = cl::Platform::get(&platforms); print_errors("cl::Platform::get", error); std::cout << "Available platforms: " << platforms.size() << std::endl; if (platforms.size() == 0) { std::cout << "-OpenCL: There are no available platforms. This probably means proper GPU drivers aren't installed." << std::endl; } std::string platformVendor; if (app.getApplicationFlags()->opencl_devices_debug) { std::remove("gpu_debug.txt"); for (auto iter : platforms) { printPlatformInfo(iter); } } device_used = 0; cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; if (app.getApplicationFlags()->use_GPU) { std::cout << "+OpenCL: Attempting to use GPU as OpenCL device" << std::endl; std::cout << "+OpenCL: If this causes errors, switch to CPU by changing \"use_GPU\" to \"no\" in config.json" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_GPU, properties); } catch (cl::Error e) { std::cout << "----------------------------------------" << std::endl; std::cout << e.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: Could not use GPU as OpenCL device. Most of the time this is due to GPU drivers not having the required functionality." << std::endl; std::cout << "-OpenCL: I'm switching to CPU OpenCL. This is slower, but should work" << std::endl; std::cout << "----------------------------------------" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_CPU, properties); std::cout << "+OpenCL: I was able to create a backup context using the CPU as OpenCL device" << std::endl; std::cout << "+OpenCL: Consider tweaking your GPU drivers later so that the program runs faster." << std::endl; app.getApplicationFlags()->use_GPU = false; } catch (cl::Error e2) { std::cout << "----------------------------------------" << std::endl; std::cout << e.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: I was not able to use CPU as a backup OpenCL device. Something real bad is going on.\nAborting.\nContact the software author!" << std::endl; std::cout << "----------------------------------------" << std::endl; app.exit(); return; } } } else { std::cout << "+OpenCL: Attempting to use CPU as OpenCL device" << std::endl; std::cout << "+OpenCL: If you have modern GPU drivers, please switch to GPU for better performance" << std::endl; std::cout << "+OpenCL: This can be done by changing \"use_GPU\" to \"yes\" in config.json" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_CPU, properties); } catch (cl::Error e) { std::cout << "----------------------------------------" << std::endl; std::cout << e.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: Error at creating context with CPU as OpenCL device" << std::endl; std::cout << "-OpenCL: This should not happen, but it did. Trying GPU as a backup device" << std::endl; std::cout << "----------------------------------------" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_GPU, properties); } catch (cl::Error e2) { std::cout << "----------------------------------------" << std::endl; std::cout << e2.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: Using GPU as a backup device failed. This is probably due to problems with the GPU drivers" << std::endl; std::cout << "-OpenCL: There were no OpenCL capable devices. The program cannot continue :(" << std::endl; std::cout << "----------------------------------------" << std::endl; app.exit(); return; } } } devices = context.getInfo<CL_CONTEXT_DEVICES>(); std::cout << "+OpenCL: Devices available: " << devices.size() << std::endl; commandQueue = cl::CommandQueue(context, devices[device_used], 0, &error); print_errors("cl::CommandQueue", error); programSourceRaw = readSource(sourcepath); if (app.getApplicationFlags()->print_cl_programs) { std::cout << "+OpenCL: Kernel size: " << programSourceRaw.size() << std::endl; std::cout << "+OpenCL: Kernel: " << programSourceRaw << std::endl; } try { programSource = cl::Program::Sources(1, std::make_pair(programSourceRaw.c_str(), programSourceRaw.size())); program = cl::Program(context, programSource); } catch (cl::Error er) { std::cout << "-OpenCL Exception: " << er.what() << ", " << er.err() << std::endl; } try { error = program.build(devices); } catch (cl::Error err) { std::cout << "-OpenCL Exception: " << err.what() << ", " << err.err() << std::endl; print_errors("program.build()", error); } std::cout << "Build status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[0]) << std::endl; std::cout << "Build Options:\t" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[0]) << std::endl; std::cout << "Build Log:\t " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl; }
int main (int argc, char *argv[]) { cl_int error; struct arguments arguments; /* Default values. */ arguments.silent = 0; arguments.verbose = 0; arguments.nocharext = 0; arguments.mandeliterations = 25; arguments.info = 0; argp_parse (&argp, argc, argv, 0, 0, &arguments); // CL initialisation error = initialisecl(arguments.verbose); if(error != CL_SUCCESS) { fprintf (stdout, "initialisecl() returned %s\n", errorMessageCL(error)); return 1; } if (arguments.info) { // Print all the info about the CL device printPlatformInfo(); printDevInfo(); return CL_SUCCESS; } // Some general information fprintf (stdout, "========= CL DEVICE =========\n"); printDeviceName(); printDevExt(); // SIN sinTest(); // OCCOIDS occoidsTest(); // MANDELBROT VIS TEST mandelbrotVisTest(); // check for --nocharext if (!arguments.nocharext) { // Check device supports extensions we need for rot13 & mandelbrot if (getCorrectDevice("cl_khr_byte_addressable_store")) { fprintf (stdout, "No devices supporting cl_khr_byte_addressable_store found - bypassing rot13, mandelbrot and modulot tests.\n"); return 2; } // ROT13 rot13Test(); // MANDELBROT mandelbrotTest(arguments.verbose, arguments.mandeliterations); if (getCorrectDevice("cl_khr_fp64")) { fprintf (stdout, "No devices supporting cl_khr_fp64 found - bypassing modulo tests.\n"); return 3; } // MODULO PRECISION moduloTest(); } // cleaup cl destroycl(); return error; }