void initSimulation() { // source: http://stackoverflow.com/questions/26517114/how-to-compile-opencl-project-with-kernels try { std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); std::vector<cl::Device> devices; platforms[PLATFORM_ID].getDevices(CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, &devices); context = cl::Context(devices); queue = cl::CommandQueue(context, devices[DEVICE_ID]); std::ifstream sourceFile{"kernels/programs.cl"}; std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length())); simulationProgram = cl::Program(context, source); simulationProgram.build(devices); visualizationBufferGPU = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(unsigned char) * 4 * fieldWidth * fieldHeight, nullptr, nullptr); randomizeField(); stepKernel = cl::Kernel(simulationProgram, "tick"); stepKernel.setArg(0, fieldWidth); stepKernel.setArg(1, fieldHeight); stepKernel.setArg(3, visualizationBufferGPU); } catch (cl::Error err) { std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl; exit(2); } }
void buildProgram(cl::Program &prog, const int num_files, const char **ker_strs, const int *ker_lens, std::string options) { try { Program::Sources setSrc; setSrc.emplace_back(USE_DBL_SRC_STR.c_str(), USE_DBL_SRC_STR.length()); setSrc.emplace_back(KParam_hpp, KParam_hpp_len); for (int i = 0; i < num_files; i++) { setSrc.emplace_back(ker_strs[i], ker_lens[i]); } static std::string defaults = std::string(" -cl-std=CL1.1") + std::string(" -D dim_type=") + std::string(dtype_traits<dim_type>::getName()); prog = cl::Program(getContext(), setSrc); std::vector<cl::Device> targetDevices; targetDevices.push_back(getDevice()); prog.build(targetDevices, (defaults + options).c_str()); } catch (...) { SHOW_BUILD_INFO(prog); throw; } }
void initCL() { dumpCLinfo(); EGLDisplay mEglDisplay = eglGetCurrentDisplay(); if (mEglDisplay == EGL_NO_DISPLAY) LOGE("initCL: eglGetCurrentDisplay() returned 'EGL_NO_DISPLAY', error = %x", eglGetError()); EGLContext mEglContext = eglGetCurrentContext(); if (mEglContext == EGL_NO_CONTEXT) LOGE("initCL: eglGetCurrentContext() returned 'EGL_NO_CONTEXT', error = %x", eglGetError()); cl_context_properties props[] = { CL_GL_CONTEXT_KHR, (cl_context_properties) mEglContext, CL_EGL_DISPLAY_KHR, (cl_context_properties) mEglDisplay, CL_CONTEXT_PLATFORM, 0, 0 }; try { cl::Platform p = cl::Platform::getDefault(); std::string ext = p.getInfo<CL_PLATFORM_EXTENSIONS>(); if(ext.find("cl_khr_gl_sharing") == std::string::npos) LOGE("Warning: CL-GL sharing isn't supported by PLATFORM"); props[5] = (cl_context_properties) p(); theContext = cl::Context(CL_DEVICE_TYPE_GPU, props); std::vector<cl::Device> devs = theContext.getInfo<CL_CONTEXT_DEVICES>(); LOGD("Context returned %d devices, taking the 1st one", devs.size()); ext = devs[0].getInfo<CL_DEVICE_EXTENSIONS>(); if(ext.find("cl_khr_gl_sharing") == std::string::npos) LOGE("Warning: CL-GL sharing isn't supported by DEVICE"); theQueue = cl::CommandQueue(theContext, devs[0]); cl::Program::Sources src(1, std::make_pair(oclProgI2I, sizeof(oclProgI2I))); theProgI2I = cl::Program(theContext, src); theProgI2I.build(devs); cv::ocl::attachContext(p.getInfo<CL_PLATFORM_NAME>(), p(), theContext(), devs[0]()); if( cv::ocl::useOpenCL() ) LOGD("OpenCV+OpenCL works OK!"); else LOGE("Can't init OpenCV with OpenCL TAPI"); } catch(cl::Error& e) { LOGE("cl::Error: %s (%d)", e.what(), e.err()); } catch(std::exception& e) { LOGE("std::exception: %s", e.what()); } catch(...) { LOGE( "OpenCL info: unknown error while initializing OpenCL stuff" ); } LOGD("initCL completed"); }
void initCL() { ocl::createContextEx(CL_DEVICE_TYPE_ALL, clPlatform, clDevices, clContext, clQueues); cl_int clError = CL_SUCCESS; std::ifstream t("kmeans.cl"); std::string code((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>()); std::string header = "#define DIM "; header += util::toString(DIM); header += "\n"; header += "#define K "; header += util::toString(K); header += "\n"; header += "#define N "; header += util::toString(N); header += "\n"; header += "#define AM_LWS "; header += util::toString(AM_LWS); header += "\n"; header += "#define RP_LWS "; header += util::toString(RP_LWS); header += "\n\n\n"; code = header + code; try { cl::Program::Sources source(1, std::make_pair(code.c_str(), code.size())); clProgram = cl::Program(clContext, source); clProgram.build(clDevices, "-cl-fast-relaxed-math -cl-unsafe-math-optimizations -cl-mad-enable"); std::string info(""); for (std::vector<cl::Device>::iterator itr = clDevices.begin(); itr != clDevices.end(); ++itr) { clProgram.getBuildInfo(*itr, CL_PROGRAM_BUILD_LOG, &info); if (info.size() > 0) std::cout << "Build log: " << info << std::endl; } for (int i = 0; i < clDevices.size(); ++i) { clClusterAssignment.push_back(cl::Kernel(clProgram, "cluster_assignment", &clError)); clClusterReposition.push_back(cl::Kernel(clProgram, "cluster_reposition", &clError)); clClusterReposition_k.push_back(cl::Kernel(clProgram, "cluster_reposition_k", &clError)); clClusterReposition_k_c.push_back(cl::Kernel(clProgram, "c_cluster_reposition", &clError)); clComputeCost.push_back(cl::Kernel(clProgram, "compute_cost", &clError)); } } catch (const cl::Error& err) { std::cout << "OpenCL Error 4: " << err.what() << " (" << err.err() << ")" << std::endl; std::string info(""); for (std::vector<cl::Device>::iterator itr = clDevices.begin(); itr != clDevices.end(); ++itr) { clProgram.getBuildInfo(*itr, CL_PROGRAM_BUILD_LOG, &info); if (info.size() > 0) std::cout << "Build log: " << info << std::endl; } std::cin.get(); } }
void InitCL() { //cl_int err = CL_SUCCESS; try { //Identify platforms cl::Platform::get(&clPlatformList); //Select first platform with any GPU devices for(unsigned int i=0; i<clPlatformList.size(); i++) { clPlatformList[i].getDevices(CL_DEVICE_TYPE_GPU, &clDeviceList); if(!clDeviceList.empty()) break; } //Set Context Properties: Get associated cl_platform_id using getInfo() on the first GPU //Thus conveniently avoiding previous C++ bindings issues :) cl_context_properties clProps[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties)clDeviceList[0].getInfo<CL_DEVICE_PLATFORM>(), 0 }; //Create interop context from GPU devices clContext = cl::Context(CL_DEVICE_TYPE_GPU, clProps); //Generate program with source and build std::string progFile = ReadKernels(KERNEL_FILE); cl::Program::Sources clSource(1, std::make_pair(progFile.c_str(), progFile.size())); clProgram = cl::Program(clContext, clSource); clProgram.build(clDeviceList); //Initialize kernels for(int i=0; i<NUM_KERNELS; i++) { clKernels[i] = cl::Kernel(clProgram, kernelName[i]); } //Create Command Queue with profiling enabled clQueue = cl::CommandQueue(clContext, clDeviceList[0], CL_QUEUE_PROFILING_ENABLE); } catch(cl::Error e) { cout << "OpenCL initialization failure: " << e.what() << endl << "Error code: " << e.err() << endl; if(e.err() == -11) { std::string clProgLog; clProgram.getBuildInfo(clDeviceList[0], CL_PROGRAM_BUILD_LOG, &clProgLog); cout << clProgLog; system("pause"); exit(EXIT_FAILURE); } throw; } }
int main(int argc, char *argv[]) { cl_int err = CL_SUCCESS; cl::Event evt; std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); if (platforms.size() == 0) { return false; } platform_ = platforms[0]; cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &err); CHECK_CL_ERROR(err, "cl::Context"); std::vector<cl::Device> devices = context_.getInfo<CL_CONTEXT_DEVICES>(); if (devices.size() == 0) { return false; } device_ = devices[0]; sources_.push_back(std::make_pair(source_str.c_str(), source_str.size())); program_ = cl::Program(context_, sources_); err = program_.build(devices); if (err != CL_SUCCESS) { std::string log = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]); std::cout << "program.build() ERROR: " << log.c_str() << std::endl; return false; } kernel_ = cl::Kernel(program_, "hello", &err); CHECK_CL_ERROR(err, "cl::Kernel"); buf_ = cl::Buffer(context_, CL_MEM_READ_ONLY, 1024, NULL, &err); queue_ = cl::CommandQueue(context_, device_, 0, &err); CHECK_CL_ERROR(err, "cl::CommandQueue"); kernel_.setArg(0, buf_); err = queue_.enqueueNDRangeKernel(kernel_, cl::NullRange, cl::NDRange(10, 10), cl::NullRange, NULL, &evt); evt.wait(); CHECK_CL_ERROR(err, "queue.enqueueNDRangeKernel()"); return 0; }
cl_int CLFW::Build(cl::Program &program, cl::Program::Sources &sources, cl::Context &context, cl::Device &device) { cl_int error; program = cl::Program(context, sources, &error); if (error != CL_SUCCESS) { Print("Error creating program:", errorFG, errorBG); return error; } error = program.build({ device }); if (error != CL_SUCCESS) { Print("Error building program:", errorFG, errorBG); Print(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device), errorFG, errorBG); } else { Print("Success building OpenCL program. ", successFG, successBG); } return error; }
void CLHelper::compileProgram( cl::Program& program, std::vector<cl::Device>& devices, const char* options, void (CL_CALLBACK * notifyFptr)(cl_program, void *), void* data) { cl_int err; err = program.build(devices, options, NULL, NULL); if(err != CL_SUCCESS) { std::cout << "Build error! Showing build log:" << std::endl << std::endl; std::string errorLog; std::vector<cl::Device>::iterator device; for(device = devices.begin(); device != devices.end(); device++) { errorLog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device); std::cout << errorLog << std::endl; } CHECK_OPENCL_ERROR(err, "cl::Program::build() failed."); } }
void PTWeekend::setup() { /* Scene data */ camera.lookAt(glm::vec3(-2,1,1), vec3(0, 0, -1.0f), vec3(0,1,0)); camera.setPerspective( 45.0f, getWindowAspectRatio(), 0.01f, 100.0f ); cameraUI = CameraUi(&camera, getWindow()); glm::vec3 bottom_sky_color(1.0, 1.0, 1.0); glm::vec3 top_sky_color(0.5, 0.7, 1.0); CGLContextObj glContext = CGLGetCurrentContext(); CGLShareGroupObj shareGroup = CGLGetShareGroup(glContext); GLuint imgTexName; const char* program_file_str = "../../../assets/path_tracing.cl"; /* Obtain a platform */ std::vector<cl::Platform> platforms; clStatus = cl::Platform::get(&platforms); pt_assert(clStatus, "Could not find an OpenCL platform."); /* Obtain a device and determinte max local size */ std::vector<cl::Device> devices; clStatus = platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); pt_assert(clStatus, "Could not find a GPU device."); device = devices[0]; /* Create an OpenCL context for the device */ cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)shareGroup, 0 }; context = cl::Context({device}, properties, NULL, NULL, &clStatus); pt_assert(clStatus, "Could not create a context for device."); /* Load and build a program */ std::ifstream program_file(program_file_str); std::string program_str(std::istreambuf_iterator<char>(program_file), (std::istreambuf_iterator<char>())); cl::Program::Sources sources(1, std::make_pair(program_str.c_str(), program_str.length() + 1)); program = cl::Program(context, sources); clStatus = program.build({device}, "-I ../../../assets/ -cl-denorms-are-zero"); if (clStatus != CL_SUCCESS) { std::string log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device); std::cerr << log << "\n"; exit(EXIT_FAILURE); } /* Create command queue */ cmd_queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &clStatus); pt_assert(clStatus, "Could not create command queue"); /* create kernel and set the kernel arguments */ kernel = cl::Kernel(program, "path_tracing", &clStatus); pt_assert(clStatus, "Could not create kernel"); img_width = getWindowWidth(); img_height = getWindowHeight(); true_img_width = getWindowWidth(); true_img_height = getWindowHeight(); local_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); //TODO: throws local_width = (size_t)pow(2, ceilf(log2f((floorf(sqrtf(local_size)))))); local_height = local_size / local_width; img_width = ceilf((float)img_width / (float)local_width) * local_width; img_height = ceilf((float)img_height / (float)local_height) * local_height; unsigned int samples = 16; /* Create GL texture and CL wrapper */ glGenTextures(1, &imgTexName); glBindTexture(GL_TEXTURE_2D, imgTexName); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, img_width, img_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, 0); imgTex = gl::Texture2d::create(GL_TEXTURE_2D, imgTexName, img_width, img_height, true); glFinish(); img_buffer.push_back(cl::Image2DGL(context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, imgTexName, &clStatus)); pt_assert(clStatus, "Could not create buffer"); /* Create all buffers */ cam_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_pinhole_cam), NULL, &clStatus); pt_assert(clStatus, "Could not create camera buffer"); primitive_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_sphere), NULL, &clStatus); pt_assert(clStatus, "Could not create primitive buffer"); material_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_material), NULL, &clStatus); pt_assert(clStatus, "Could not create primitive buffer"); sky_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_sky_material), NULL, &clStatus); pt_assert(clStatus, "Could not create sky buffer"); /* Upload scene (static) */ size_t sceneObjectCount = 5; cl_sphere* primitive_array = (cl_sphere*)malloc(sceneObjectCount * sizeof(cl_sphere)); cl_material* material_array = (cl_material*)malloc(sceneObjectCount * sizeof(cl_material)); primitive_array[0] = cl_make_sphere(glm::vec3(1, 0, -1), 0.5f); material_array[0] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x730202"), 0, MAT_LAMBERTIAN); primitive_array[1] = cl_make_sphere(glm::vec3(-1, 0, -1), 0.5f); material_array[1] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0xF89000"), 0, MAT_LAMBERTIAN); primitive_array[2] = cl_make_sphere(glm::vec3(0, 0, 0), 0.5f); material_array[2] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x97A663"), 0.1f, MAT_METALLIC); primitive_array[3] = cl_make_sphere(glm::vec3(0, 0, -2), 0.5f); material_array[3] = cl_make_material(glm::vec3(0.8f, 0.6f, 0.2f), 0.3f, MAT_METALLIC); primitive_array[4] = cl_make_sphere(glm::vec3(0,-100.5f, 1.0f), 100.0f); material_array[4] = cl_make_material(glm::vec3(0.5f), 0, MAT_LAMBERTIAN); clStatus = cmd_queue.enqueueWriteBuffer(primitive_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_sphere), primitive_array, NULL, NULL); pt_assert(clStatus, "Could not fill primitive buffer"); clStatus = cmd_queue.enqueueWriteBuffer(material_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_material), material_array, NULL, NULL); pt_assert(clStatus, "Could not fill material buffer"); pt_assert(cl_set_skycolors(bottom_sky_color, top_sky_color, sky_buffer, cmd_queue), "Could not fill sky buffer"); clStatus = kernel.setArg(1, primitive_buffer); pt_assert(clStatus, "Could not set primitive buffer argument"); clStatus = kernel.setArg(2, material_buffer); pt_assert(clStatus, "Could not set material buffer argument"); clStatus = kernel.setArg(3, sky_buffer); pt_assert(clStatus, "Could not set sky buffer argument"); clStatus = kernel.setArg(4, sceneObjectCount); pt_assert(clStatus, "Could not set primitive count count argument"); clStatus = kernel.setArg(5, img_buffer[0]); pt_assert(clStatus, "Could not set img buffer argument"); clStatus = kernel.setArg(6, samples); pt_assert(clStatus, "Could not set samples argument"); clStatus = kernel.setArg(0, cam_buffer); pt_assert(clStatus, "Could not set camera buffer argument"); }
bool MaxValueSimple::initialize(cl_device_type type) { if (type == CL_DEVICE_TYPE_CPU) TYPE = CLCPU; else if (type == CL_DEVICE_TYPE_GPU) TYPE = CLGPU; else { TYPE = CPU; return true; } try { /*** Hole OpenCL-Plattformen z.B. AMD APP, NVIDIA CUDA ***/ cl::Platform::get(&platforms); /*** Hole OpenCL-Device des geforderten Typs z.B. GPU, CPU ***/ std::vector < cl::Device > devTmp; for (std::vector<cl::Platform>::iterator it = platforms.begin(); it != platforms.end(); ++it) { it->getDevices(type, &devTmp); devices.insert(devices.end(), devTmp.begin(), devTmp.end()); devTmp.clear(); } std::cerr << "[DEBUG] OpenCL device: " << devices[0].getInfo< CL_DEVICE_NAME> () << std::endl; /*** Erstelle OpenCL-Context und CommandQueue ***/ context = cl::Context(devices); cmdQ = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE); /*** OpenCL-Quellcode einlesen ***/ std::string src = readFile(KERNEL_PATH); cl::Program::Sources source; source.push_back(std::make_pair(src.data(), src.length())); /*** OpenCL-Programm aus Quellcode erstellen ***/ program = cl::Program(context, source); try { program.build(devices); } catch (cl::Error & err) { /* TODO logging Logger::logDebug( "initCL", Logger::sStream << err.what() << "\nBuild-Log fuer \"" << devices.front().getInfo<CL_DEVICE_NAME> () << "\":\n" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG> (devices.front())); */ throw err; } kernel = cl::Kernel(program, "maxInt"); event = cl::Event(); return true; } catch (cl::Error& err) { // TODO Logger::logError(METHOD, Logger::sStream << err.what()); std::cerr << "[ERROR] MaxValueSimple::initialize(cl_device_type): " << err.what() << " (" << err.err() << ")" << std::endl; return false; } catch (std::exception& err) { // TODO Logger::logError(METHOD, Logger::sStream << err.what()); std::cerr << "[ERROR] MaxValueSimple::initialize(cl_device_type): " << err.what() << std::endl; return false; } }
int main(int argc, char **argv) { TS ts; //Time stepper Vec soln; //Holds the solution vector, including all the primitive //variables. DM dmda; //Manages the computational grid and parallelization. int X1Start, X2Start; int X1Size, X2Size; PetscInitialize(&argc, &argv, PETSC_NULL, help); // Create the computational domain. DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED, DMDA_STENCIL_STAR, N1, N2, PETSC_DECIDE, PETSC_DECIDE, DOF, NG, PETSC_NULL, PETSC_NULL, &dmda); // When running in parallel, each process computes from // [X1Start, X1Start+X1Size] x [X2Start, X2Start+X2Size] DMDAGetCorners(dmda, &X1Start, &X2Start, NULL, &X1Size, &X2Size, NULL); // Create the solution vector. DMCreateGlobalVector(dmda, &soln); // Create the time stepper and link it to the computational grid and the // residual evaluation function. TSCreate(PETSC_COMM_WORLD, &ts); TSSetDM(ts, dmda); TSSetIFunction(ts, PETSC_NULL, ComputeResidual, NULL); // OpenCL boilerplate code. clErr = cl::Platform::get(&platforms); CheckCLErrors(clErr, "cl::Platform::get"); // Select computation device here. clErr = platforms.at(1).getDevices(CL_DEVICE_TYPE_CPU, &devices); CheckCLErrors(clErr, "cl::Platform::getDevices"); context = cl::Context(devices, NULL, NULL, NULL, &clErr); CheckCLErrors(clErr, "cl::Context::Context"); queue = cl::CommandQueue(context, devices.at(0), 0, &clErr); CheckCLErrors(clErr, "cl::CommandQueue::CommandQueue"); std::ifstream sourceFile("computeresidual.cl"); std::string sourceCode((std::istreambuf_iterator<char>(sourceFile)), std::istreambuf_iterator<char>()); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); program = cl::Program(context, source, &clErr); CheckCLErrors(clErr, "cl::Program::Program"); // Pass in constants to the OpenCL kernel as compiler switches. This is an // efficient way to handle constants such as domain sizes in OpenCL. std::string BuildOptions("\ -D X1_SIZE=" + std::to_string(X1Size) + " -D X2_SIZE=" + std::to_string(X2Size) + " -D TOTAL_X1_SIZE=" + std::to_string(X1Size+2*NG) + " -D TOTAL_X2_SIZE=" + std::to_string(X2Size+2*NG)); // Compile the OpenCL program and extract the kernel. PetscScalar start = std::clock(); clErr = program.build(devices, BuildOptions.c_str(), NULL, NULL); const char *buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>( devices.at(0), &clErr).c_str(); PetscPrintf(PETSC_COMM_WORLD, "%s\n", buildlog); CheckCLErrors(clErr, "cl::Program::build"); PetscScalar end = std::clock(); PetscScalar time = (end - start)/(PetscScalar)CLOCKS_PER_SEC; PetscPrintf(PETSC_COMM_WORLD, "Time taken for kernel compilation = %f\n", time); kernel = cl::Kernel(program, "ComputeResidual", &clErr); CheckCLErrors(clErr, "cl::Kernel::Kernel"); // How much memory is the kernel using? cl_ulong localMemSize = kernel.getWorkGroupInfo<CL_KERNEL_LOCAL_MEM_SIZE>( devices.at(0), &clErr); cl_ulong privateMemSize = kernel.getWorkGroupInfo<CL_KERNEL_PRIVATE_MEM_SIZE>( devices.at(0), &clErr); printf("Local memory used = %llu\n", (unsigned long long)localMemSize); printf("Private memory used = %llu\n", (unsigned long long)privateMemSize); // Set initial conditions. InitialCondition(ts, soln); TSSetSolution(ts, soln); TSSetType(ts, TSTHETA); TSSetFromOptions(ts); // Finally solve! All time stepping options can be controlled from the // command line. TSSolve(ts, soln); // Delete the data structures in the following order. DMDestroy(&dmda); VecDestroy(&soln); TSDestroy(&ts); PetscFinalize(); return(0); }
void initOpenCL() { // OpenCL try { // Get available platforms vector<cl::Platform> platforms; cl::Platform::get(&platforms); LOG_INFO<<platforms.front().getInfo<CL_PLATFORM_VERSION>(); // context sharing is OS specific #if defined (__APPLE__) || defined(MACOSX) CGLContextObj curCGLContext = CGLGetCurrentContext(); CGLShareGroupObj curCGLShareGroup = CGLGetShareGroup(curCGLContext); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)curCGLShareGroup, 0 }; #elif defined WIN32 cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; #else cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; #endif m_context = cl::Context( CL_DEVICE_TYPE_GPU, properties); // Get a list of devices on this platform vector<cl::Device> devices = m_context.getInfo<CL_CONTEXT_DEVICES>(); m_device = devices[0]; // Create a command queue and use the first device m_queue = cl::CommandQueue(m_context, devices[0]); // Read source file std::string sourceCode = kinski::readFile("kernels.cl"); // Make program of the source code in the context m_program = cl::Program(m_context, sourceCode); // Build program for these specific devices m_program.build(); m_particleKernel = cl::Kernel(m_program, "updateParticles"); m_imageKernel = cl::Kernel(m_program, "set_colors_from_image"); } catch(cl::Error &error) { LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")"; LOG_ERROR << "Build Status: " << m_program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(m_device); LOG_ERROR << "Build Options:\t" << m_program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(m_device); LOG_ERROR << "Build Log:\t " << m_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_device); } }
CL_helper(const char *kernelFileName, std::vector<std::string> kernelNames, bool loadBinary, bool writeBinary, bool usePreProcArgs = false, std::map<std::string, std::string> preProcArgs = std::map<std::string, std::string>()){ cl::Platform::get(&platforms); if(platforms.size()==0) throw std::runtime_error("No OpenCL platforms found.\n"); int selectedPlatform=0; platform=platforms.at(selectedPlatform); platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); if(devices.size()==0) throw std::runtime_error("No opencl devices found.\n"); int selectedDevice=0; device=devices.at(selectedDevice); context = cl::Context(devices); try{ if(loadBinary){ std::string vendor=platform.getInfo<CL_PLATFORM_VENDOR>(); size_t found = vendor.find("NVIDIA"); if(found != std::string::npos){ FILE* fp; fp = fopen("src/kernels/julia_filter.ptx", "r"); if (!fp) { std::cerr << "Error loading kernel binary" << std::endl; std::cerr << "Building kernel from .cl file" << std::endl; loadBinary = false; } else { fseek(fp, 0, SEEK_END); size_t kernel_sz = ftell(fp); rewind(fp); char* kernel_str = (char*)malloc(kernel_sz); unsigned bytes = fread(kernel_str, 1, kernel_sz, fp); fclose(fp); binaries.push_back(std::make_pair((void*)kernel_str,kernel_sz+1)); program = cl::Program(context, devices, binaries); program.build(devices); } } else{ std::cerr << "Vendor not NVIDIA, cannot load .ptx binary" << std::endl; std::cerr << "Building kernel from .cl file" << std::endl; loadBinary = false; } } if(!loadBinary){ std::string kernelSource=CL_helper::LoadSource(kernelFileName); sources.push_back(std::make_pair(kernelSource.c_str(), kernelSource.size()+1)); program = cl::Program(context, sources); if(usePreProcArgs && !preProcArgs.empty()){ std::string preProcArgsString; for(auto& arg : preProcArgs) { preProcArgsString += "-D" + arg.first + "=" + arg.second + " "; } program.build(devices, preProcArgsString.c_str()); } else { //std::string params = "-cl-unsafe-math-optimizations"; program.build(devices); } } }catch (cl::Error er) { for(unsigned i=0;i<devices.size();i++){ std::cerr <<"Log for device " << devices[i].getInfo<CL_DEVICE_NAME>().c_str()<<std::endl; std::cerr << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[i]).c_str() <<std::endl; } std::cerr << "ERROR:" << er.what() << " Code " << er.err()<<std::endl; throw; } for(unsigned i=0; i<kernelNames.size();i++){ kernels.push_back( cl::Kernel(program, kernelNames.at(i).c_str()) ); } queue = cl::CommandQueue(context, device); if(writeBinary){ size_t bin_sz; program.getInfo( CL_PROGRAM_BINARY_SIZES, &bin_sz); unsigned char *bin = (unsigned char *)malloc(bin_sz); program.getInfo(CL_PROGRAM_BINARIES, &bin); FILE* fp = fopen("src/kernels/julia_filter.ptx", "wb"); fwrite(bin, sizeof(char), bin_sz, fp); fclose(fp); free(bin); } }
int main() { try { std::vector<cl::Device> devices; // select platform cl::Platform platform = selectPlatform(); // select device platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); cl::Device device = selectDevice(devices); // create context context = cl::Context(devices); // create command queue queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); // load opencl source std::ifstream cl_file("inclusive_scan.cl"); std::string cl_string{std::istreambuf_iterator<char>(cl_file), std::istreambuf_iterator<char>()}; cl::Program::Sources source(1, std::make_pair(cl_string.c_str(), cl_string.length() + 1)); // create programm program = cl::Program(context, source); // compile opencl source try { program.build(devices); size_t input_size; std::ifstream input_file("input.txt"); input_file >> input_size; std::vector<float> input(input_size); // for (size_t i = 0; i < input_size; ++i) { // input[i] = i % 10; // } for (int i = 0; i < input_size; i++) { input_file >> input[i]; } std::vector<float> output(input_size, 0); cl::Buffer dev_input (context, CL_MEM_READ_ONLY, sizeof(float) * input_size); queue.enqueueWriteBuffer(dev_input, CL_TRUE, 0, sizeof(float) * input_size, &input[0]); cl::Buffer dev_output = inclusive_scan(dev_input, input_size); queue.enqueueReadBuffer(dev_output, CL_TRUE, 0, sizeof(float) * input_size, &output[0]); queue.finish(); cpu_check(input, output); std::ofstream output_file("output.txt"); for (int i = 0; i < input_size; i++) { output_file << output[i] << " "; } } catch (cl::Error const & e) { std::string log_str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device); std::cout << std::endl << e.what() << " : " << e.err() << std::endl; std::cout << log_str; return 0; } } catch (cl::Error const & e) { std::cout << "Error: " << e.what() << " #" << e.err() << std::endl; } return 0; }