void SetUpOpenCL() { //---------------------------------------------------------------------- // Compile kernel //---------------------------------------------------------------------- const std::string &kernelFileName = commandLineOpts["kernel"].as<std::string>(); OCLTOY_LOG("Compile OpenCL kernel: " << kernelFileName); // Read the kernel const std::string kernelSource = ReadSources(kernelFileName, "jugCLer"); // Create the kernel program cl::Device &oclDevice = selectedDevices[0]; cl::Context &oclContext = deviceContexts[0]; cl::Program program = cl::Program(oclContext, kernelSource); try { VECTOR_CLASS<cl::Device> buildDevice; buildDevice.push_back(oclDevice); program.build(buildDevice); } catch (cl::Error err) { cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice); OCLTOY_LOG("Kernel compilation error:\n" << strError.c_str()); throw err; } kernelsJugCLer = cl::Kernel(program, "render_gpu"); kernelsJugCLer.getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &kernelsWorkGroupSize); if (commandLineOpts.count("workgroupsize")) kernelsWorkGroupSize = commandLineOpts["workgroupsize"].as<size_t>(); OCLTOY_LOG("Using workgroup size: " << kernelsWorkGroupSize); //---------------------------------------------------------------------- // Allocate buffer //---------------------------------------------------------------------- AllocateBuffers(); //---------------------------------------------------------------------- // Set kernel arguments //---------------------------------------------------------------------- kernelsJugCLer.setArg(0, *sceneBuff); kernelsJugCLer.setArg(1, *pixelsBuff); }
int Setup_OpenCL( const char *program_source ) { cl_device_id devices[16]; size_t cb; cl_uint size_ret = 0; cl_int err; int num_cores; cl_device_id device_ID; char device_name[128] = {0}; if(g_bRunOnPG) { printf("Trying to run on a Processor Graphics \n"); } else { printf("Trying to run on a CPU \n"); } cl_platform_id intel_platform_id = GetIntelOCLPlatform(); if( intel_platform_id == NULL ) { printf("ERROR: Failed to find Intel OpenCL platform.\n"); return -1; } cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)intel_platform_id, NULL }; // create the OpenCL context on a CPU/PG if(g_bRunOnPG) { g_context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL); } else { g_context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, NULL, NULL, NULL); } if (g_context == (cl_context)0) return -1; // get the list of CPU devices associated with context err = clGetContextInfo(g_context, CL_CONTEXT_DEVICES, 0, NULL, &cb); clGetContextInfo(g_context, CL_CONTEXT_DEVICES, cb, devices, NULL); g_cmd_queue = clCreateCommandQueue(g_context, devices[0], 0, NULL); if (g_cmd_queue == (cl_command_queue)0) { Cleanup_OpenCL(); return -1; } char *sources = ReadSources(program_source); //read program .cl source file g_program = clCreateProgramWithSource(g_context, 1, (const char**)&sources, NULL, NULL); if (g_program == (cl_program)0) { printf("ERROR: Failed to create Program with source...\n"); Cleanup_OpenCL(); free(sources); return -1; } err = clBuildProgram(g_program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { printf("ERROR: Failed to build program...\n"); BuildFailLog(g_program, devices[0]); Cleanup_OpenCL(); free(sources); return -1; } #ifdef PER_PIXEL g_kernel = clCreateKernel(g_program, "ToneMappingPerPixel", NULL); #else g_kernel = clCreateKernel(g_program, "ToneMappingLine", NULL); #endif if (g_kernel == (cl_kernel)0) { printf("ERROR: Failed to create kernel...\n"); Cleanup_OpenCL(); free(sources); return -1; } free(sources); // retrieve platform information // use first device ID device_ID = devices[0]; err = clGetDeviceInfo(device_ID, CL_DEVICE_NAME, 128, device_name, NULL); if (err!=CL_SUCCESS) { printf("ERROR: Failed to get device information (device name)...\n"); Cleanup_OpenCL(); return -1; } printf("Using device %s...\n", device_name); err = clGetDeviceInfo(device_ID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cores, NULL); if (err!=CL_SUCCESS) { printf("ERROR: Failed to get device information (max compute units)...\n"); Cleanup_OpenCL(); return -1; } printf("Using %d compute units...\n", num_cores); err = clGetDeviceInfo(device_ID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &g_min_align, NULL); if (err!=CL_SUCCESS) { printf("ERROR: Failed to get device information (max memory base address align size)...\n"); Cleanup_OpenCL(); return -1; } g_min_align /= 8; //in bytes printf("Expected min alignment for buffers is %d bytes...\n", g_min_align); return 0; // success... }
RenderDevice::RenderDevice(const cl::Device &device, const string &kernelFileName, const unsigned int forceGPUWorkSize, Camera *camera, Sphere *spheres, const unsigned int sceneSphereCount/*, boost::barrier *startBarrier, boost::barrier *endBarrier*/) : /*renderThread(NULL), threadStartBarrier(startBarrier), threadEndBarrier(endBarrier),*/ sphereCount(sceneSphereCount), colorBuffer(NULL), pixelBuffer(NULL), seedBuffer(NULL), pixels(NULL), colors(NULL), seeds(NULL), exeUnitCount(0.0), exeTime(0.0) { deviceName = "anonymouse";//device.getInfo<CL_DEVICE_NAME > ().c_str(); // Allocate a context with the selected device cl::Platform platform = device.getInfo<CL_DEVICE_PLATFORM>(); VECTOR_CLASS<cl::Device> devices; devices.push_back(device); cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform(), 0 }; context = new cl::Context(devices, cps); // Allocate the queue for this device cl_command_queue_properties prop = CL_QUEUE_PROFILING_ENABLE; queue = new cl::CommandQueue(*context, device, prop); // Create the kernel string src = ReadSources(kernelFileName); // Compile sources cl::Program::Sources source(1, make_pair(src.c_str(), src.length())); cl::Program program = cl::Program(*context, source); try { VECTOR_CLASS<cl::Device> buildDevice; buildDevice.push_back(device); #if defined(__EMSCRIPTEN__) program.build(buildDevice, ""); #elif defined(__APPLE__) program.build(buildDevice, "-D__APPLE__"); #else program.build(buildDevice, ""); #endif cl::string result = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device); cerr << "[Device::" << deviceName << "]" << " Compilation result: " << result.c_str() << endl; } catch (cl::Error err) { cl::string strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device); cerr << "[Device::" << deviceName << "]" << " Compilation error:" << endl << strError.c_str() << endl; throw err; } kernel = new cl::Kernel(program, "RadianceGPU"); kernel->getWorkGroupInfo<size_t>(device, CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize); cerr << "[Device::" << deviceName << "]" << " Suggested work group size: " << workGroupSize << endl; // Force workgroup size if applicable and required if ((forceGPUWorkSize > 0) && (device.getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_GPU)) { workGroupSize = forceGPUWorkSize; cerr << "[Device::" << deviceName << "]" << " Forced work group size: " << workGroupSize << endl; } // Create the thread for the rendering //renderThread = new boost::thread(boost::bind(RenderDevice::RenderThread, this)); // Create camera buffer cameraBuffer = new cl::Buffer(*context, #if defined (__APPLE__) CL_MEM_READ_ONLY, // CL_MEM_USE_HOST_PTR is very slow with Apple's OpenCL #else CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, #endif sizeof(Camera), camera); cerr << "[Device::" << deviceName << "] Camera buffer size: " << (sizeof(Camera) / 1024) << "Kb" << endl; sphereBuffer = new cl::Buffer(*context, #if defined (__APPLE__) CL_MEM_READ_ONLY, // CL_MEM_USE_HOST_PTR is very slow with Apple's OpenCL #else CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, #endif sizeof(Sphere) * sphereCount, spheres); cerr << "[Device::" << deviceName << "] Scene buffer size: " << (sizeof(Sphere) * sphereCount / 1024) << "Kb" << endl; }