void initCL() { ocl::createContextEx(CL_DEVICE_TYPE_ALL, clPlatform, clDevices, clContext, clQueues); cl_int clError = CL_SUCCESS; std::ifstream t("kmeans.cl"); std::string code((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>()); std::string header = "#define DIM "; header += util::toString(DIM); header += "\n"; header += "#define K "; header += util::toString(K); header += "\n"; header += "#define N "; header += util::toString(N); header += "\n"; header += "#define AM_LWS "; header += util::toString(AM_LWS); header += "\n"; header += "#define RP_LWS "; header += util::toString(RP_LWS); header += "\n\n\n"; code = header + code; try { cl::Program::Sources source(1, std::make_pair(code.c_str(), code.size())); clProgram = cl::Program(clContext, source); clProgram.build(clDevices, "-cl-fast-relaxed-math -cl-unsafe-math-optimizations -cl-mad-enable"); std::string info(""); for (std::vector<cl::Device>::iterator itr = clDevices.begin(); itr != clDevices.end(); ++itr) { clProgram.getBuildInfo(*itr, CL_PROGRAM_BUILD_LOG, &info); if (info.size() > 0) std::cout << "Build log: " << info << std::endl; } for (int i = 0; i < clDevices.size(); ++i) { clClusterAssignment.push_back(cl::Kernel(clProgram, "cluster_assignment", &clError)); clClusterReposition.push_back(cl::Kernel(clProgram, "cluster_reposition", &clError)); clClusterReposition_k.push_back(cl::Kernel(clProgram, "cluster_reposition_k", &clError)); clClusterReposition_k_c.push_back(cl::Kernel(clProgram, "c_cluster_reposition", &clError)); clComputeCost.push_back(cl::Kernel(clProgram, "compute_cost", &clError)); } } catch (const cl::Error& err) { std::cout << "OpenCL Error 4: " << err.what() << " (" << err.err() << ")" << std::endl; std::string info(""); for (std::vector<cl::Device>::iterator itr = clDevices.begin(); itr != clDevices.end(); ++itr) { clProgram.getBuildInfo(*itr, CL_PROGRAM_BUILD_LOG, &info); if (info.size() > 0) std::cout << "Build log: " << info << std::endl; } std::cin.get(); } }
void InitCL() { //cl_int err = CL_SUCCESS; try { //Identify platforms cl::Platform::get(&clPlatformList); //Select first platform with any GPU devices for(unsigned int i=0; i<clPlatformList.size(); i++) { clPlatformList[i].getDevices(CL_DEVICE_TYPE_GPU, &clDeviceList); if(!clDeviceList.empty()) break; } //Set Context Properties: Get associated cl_platform_id using getInfo() on the first GPU //Thus conveniently avoiding previous C++ bindings issues :) cl_context_properties clProps[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties)clDeviceList[0].getInfo<CL_DEVICE_PLATFORM>(), 0 }; //Create interop context from GPU devices clContext = cl::Context(CL_DEVICE_TYPE_GPU, clProps); //Generate program with source and build std::string progFile = ReadKernels(KERNEL_FILE); cl::Program::Sources clSource(1, std::make_pair(progFile.c_str(), progFile.size())); clProgram = cl::Program(clContext, clSource); clProgram.build(clDeviceList); //Initialize kernels for(int i=0; i<NUM_KERNELS; i++) { clKernels[i] = cl::Kernel(clProgram, kernelName[i]); } //Create Command Queue with profiling enabled clQueue = cl::CommandQueue(clContext, clDeviceList[0], CL_QUEUE_PROFILING_ENABLE); } catch(cl::Error e) { cout << "OpenCL initialization failure: " << e.what() << endl << "Error code: " << e.err() << endl; if(e.err() == -11) { std::string clProgLog; clProgram.getBuildInfo(clDeviceList[0], CL_PROGRAM_BUILD_LOG, &clProgLog); cout << clProgLog; system("pause"); exit(EXIT_FAILURE); } throw; } }
string Host::buildLog(cl::Program prog) { string options, blog, status; ostringstream info; prog.getBuildInfo(_devices[d_index], CL_PROGRAM_BUILD_OPTIONS, &options); prog.getBuildInfo(_devices[d_index], CL_PROGRAM_BUILD_LOG, &blog); prog.getBuildInfo(_devices[d_index], CL_PROGRAM_BUILD_STATUS, &status); info << "Building kernels" << endl << "Build Options: " << options << endl << "Build Status: " << status << endl << endl << "Build Log: " << blog << endl; return info.str(); }
void buildProgram(cl::Program &prog, const int num_files, const char **ker_strs, const int *ker_lens, std::string options) { try { Program::Sources setSrc; setSrc.emplace_back(USE_DBL_SRC_STR.c_str(), USE_DBL_SRC_STR.length()); setSrc.emplace_back(KParam_hpp, KParam_hpp_len); for (int i = 0; i < num_files; i++) { setSrc.emplace_back(ker_strs[i], ker_lens[i]); } static std::string defaults = std::string(" -cl-std=CL1.1") + std::string(" -D dim_type=") + std::string(dtype_traits<dim_type>::getName()); prog = cl::Program(getContext(), setSrc); std::vector<cl::Device> targetDevices; targetDevices.push_back(getDevice()); prog.build(targetDevices, (defaults + options).c_str()); } catch (...) { SHOW_BUILD_INFO(prog); throw; } }
void initSimulation() { // source: http://stackoverflow.com/questions/26517114/how-to-compile-opencl-project-with-kernels try { std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); std::vector<cl::Device> devices; platforms[PLATFORM_ID].getDevices(CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, &devices); context = cl::Context(devices); queue = cl::CommandQueue(context, devices[DEVICE_ID]); std::ifstream sourceFile{"kernels/programs.cl"}; std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length())); simulationProgram = cl::Program(context, source); simulationProgram.build(devices); visualizationBufferGPU = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(unsigned char) * 4 * fieldWidth * fieldHeight, nullptr, nullptr); randomizeField(); stepKernel = cl::Kernel(simulationProgram, "tick"); stepKernel.setArg(0, fieldWidth); stepKernel.setArg(1, fieldHeight); stepKernel.setArg(3, visualizationBufferGPU); } catch (cl::Error err) { std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl; exit(2); } }
void initCL() { dumpCLinfo(); EGLDisplay mEglDisplay = eglGetCurrentDisplay(); if (mEglDisplay == EGL_NO_DISPLAY) LOGE("initCL: eglGetCurrentDisplay() returned 'EGL_NO_DISPLAY', error = %x", eglGetError()); EGLContext mEglContext = eglGetCurrentContext(); if (mEglContext == EGL_NO_CONTEXT) LOGE("initCL: eglGetCurrentContext() returned 'EGL_NO_CONTEXT', error = %x", eglGetError()); cl_context_properties props[] = { CL_GL_CONTEXT_KHR, (cl_context_properties) mEglContext, CL_EGL_DISPLAY_KHR, (cl_context_properties) mEglDisplay, CL_CONTEXT_PLATFORM, 0, 0 }; try { cl::Platform p = cl::Platform::getDefault(); std::string ext = p.getInfo<CL_PLATFORM_EXTENSIONS>(); if(ext.find("cl_khr_gl_sharing") == std::string::npos) LOGE("Warning: CL-GL sharing isn't supported by PLATFORM"); props[5] = (cl_context_properties) p(); theContext = cl::Context(CL_DEVICE_TYPE_GPU, props); std::vector<cl::Device> devs = theContext.getInfo<CL_CONTEXT_DEVICES>(); LOGD("Context returned %d devices, taking the 1st one", devs.size()); ext = devs[0].getInfo<CL_DEVICE_EXTENSIONS>(); if(ext.find("cl_khr_gl_sharing") == std::string::npos) LOGE("Warning: CL-GL sharing isn't supported by DEVICE"); theQueue = cl::CommandQueue(theContext, devs[0]); cl::Program::Sources src(1, std::make_pair(oclProgI2I, sizeof(oclProgI2I))); theProgI2I = cl::Program(theContext, src); theProgI2I.build(devs); cv::ocl::attachContext(p.getInfo<CL_PLATFORM_NAME>(), p(), theContext(), devs[0]()); if( cv::ocl::useOpenCL() ) LOGD("OpenCV+OpenCL works OK!"); else LOGE("Can't init OpenCV with OpenCL TAPI"); } catch(cl::Error& e) { LOGE("cl::Error: %s (%d)", e.what(), e.err()); } catch(std::exception& e) { LOGE("std::exception: %s", e.what()); } catch(...) { LOGE( "OpenCL info: unknown error while initializing OpenCL stuff" ); } LOGD("initCL completed"); }
int main(int argc, char *argv[]) { cl_int err = CL_SUCCESS; cl::Event evt; std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); if (platforms.size() == 0) { return false; } platform_ = platforms[0]; cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &err); CHECK_CL_ERROR(err, "cl::Context"); std::vector<cl::Device> devices = context_.getInfo<CL_CONTEXT_DEVICES>(); if (devices.size() == 0) { return false; } device_ = devices[0]; sources_.push_back(std::make_pair(source_str.c_str(), source_str.size())); program_ = cl::Program(context_, sources_); err = program_.build(devices); if (err != CL_SUCCESS) { std::string log = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]); std::cout << "program.build() ERROR: " << log.c_str() << std::endl; return false; } kernel_ = cl::Kernel(program_, "hello", &err); CHECK_CL_ERROR(err, "cl::Kernel"); buf_ = cl::Buffer(context_, CL_MEM_READ_ONLY, 1024, NULL, &err); queue_ = cl::CommandQueue(context_, device_, 0, &err); CHECK_CL_ERROR(err, "cl::CommandQueue"); kernel_.setArg(0, buf_); err = queue_.enqueueNDRangeKernel(kernel_, cl::NullRange, cl::NDRange(10, 10), cl::NullRange, NULL, &evt); evt.wait(); CHECK_CL_ERROR(err, "queue.enqueueNDRangeKernel()"); return 0; }
cl_int CLFW::Build(cl::Program &program, cl::Program::Sources &sources, cl::Context &context, cl::Device &device) { cl_int error; program = cl::Program(context, sources, &error); if (error != CL_SUCCESS) { Print("Error creating program:", errorFG, errorBG); return error; } error = program.build({ device }); if (error != CL_SUCCESS) { Print("Error building program:", errorFG, errorBG); Print(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device), errorFG, errorBG); } else { Print("Success building OpenCL program. ", successFG, successBG); } return error; }
cl_int CLFW::get(std::unordered_map<cl::STRING_CLASS, cl::Kernel> &Kernels, cl::Program &program) { Kernels.clear(); std::vector<cl::Kernel> tempKernels; cl_int error = program.createKernels(&tempKernels); if (error != CL_SUCCESS) { Print("Unable to create kernels.", errorFG, errorBG); return error; } for (int i = 0; i < tempKernels.size(); ++i) { std::string temp = std::string(tempKernels[i].getInfo<CL_KERNEL_FUNCTION_NAME>()); //For some reason, OpenCL string's lengths are 1 char longer than they should be. temp = temp.substr(0, temp.length() - 1); Kernels[temp] = tempKernels[i]; } for (auto i : Kernels) { Print("Created Kernel " + i.first, successFG, successBG); } return CL_SUCCESS; }
void CLHelper::compileProgram( cl::Program& program, std::vector<cl::Device>& devices, const char* options, void (CL_CALLBACK * notifyFptr)(cl_program, void *), void* data) { cl_int err; err = program.build(devices, options, NULL, NULL); if(err != CL_SUCCESS) { std::cout << "Build error! Showing build log:" << std::endl << std::endl; std::string errorLog; std::vector<cl::Device>::iterator device; for(device = devices.begin(); device != devices.end(); device++) { errorLog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device); std::cout << errorLog << std::endl; } CHECK_OPENCL_ERROR(err, "cl::Program::build() failed."); } }
bool RenderThread::tick() { for(int i = 0; i < RenderThread::taskList.getSize(); i++) { if(!RenderThread::taskList[i]->invoke()) { GlobalThread::stop = true; } } RenderThread::taskList.clear(); if(RenderThread::skipRender) { return true; } GameStates::swapPendingRendering(); GameStates::GameState* state = GameStates::renderingState; if(GLWindow::instance->rescaled) { glViewport(0, 0, GLWindow::instance->width, GLWindow::instance->height); GLWindow::instance->rescaled = false; } normalShaderProgram->bind(); gfxu::Uniforms::camPos.set(state->cam.pos); gfxu::Uniforms::setColor(1.0f, 1.0f, 1.0f, 1.0f); if(GlobalThread::world.getBlock(floorf(state->cam.pos.x), floorf(state->cam.pos.y), floorf(state->cam.pos.z)) != Blocks::water) { glClearColor(0.5f, 0.875f, 1.0f, 1.0f); gfxu::Uniforms::setFogColor(0.5f, 0.875f, 1.0f, 1.0f); gfxu::Uniforms::fogDist.set(16.0f * renderDistance); } else { glClearColor(0.0f, 0.0f, 0.1f, 1.0f); gfxu::Uniforms::setFogColor(0.0f, 0.0f, 0.1f, 1.0f); gfxu::Uniforms::fogDist.set(16.0f); } gfxu::Uniforms::reset(); gfxu::Uniforms::PMS.mult(geom::Matrix::perspective(state->FOV, (float)GLWindow::instance->width / (float)GLWindow::instance->height, 0.1f, 16.0f * renderDistance)); gfxu::Uniforms::PMS.mult(geom::Matrix::rotate(state->cam.rot.x, 1.0f, 0.0f, 0.0f)); gfxu::Uniforms::PMS.mult(geom::Matrix::rotate(state->cam.rot.y, 0.0f, 1.0f, 0.0f)); gfxu::Uniforms::PMS.mult(geom::Matrix::rotate(state->cam.rot.z, 0.0f, 0.0f, 1.0f)); gfxu::Uniforms::PMS.mult(geom::Matrix::translate(-state->cam.pos.x, -state->cam.pos.y, -state->cam.pos.z)); float cScale = 1.0f + 90.0f / state->FOV; int xCam = floorf(state->cam.pos.x / 16.0f); int yCam = floorf(state->cam.pos.y / 16.0f); int zCam = floorf(state->cam.pos.z / 16.0f); geom::Matrix projectionMatrix = gfxu::Uniforms::PMS.getTopmost(); projectionMatrixBuffer.write(commandQueue, projectionMatrix.data); const size_t global_ws_1[] = {renderDistance * 2 + 2, renderDistance * 2 + 2, renderDistance * 2 + 2}; const size_t local_ws_1[] = {1, 1, 1}; if(!program.prepare("gridTransform")) return false; if(!program.setArgument(sizeof(const unsigned int), &renderDistance)) return false; if(!program.setArgument(sizeof(cl_mem), &projectionMatrixBuffer)) return false; if(!program.setArgument(sizeof(const int), &xCam)) return false; if(!program.setArgument(sizeof(const int), &yCam)) return false; if(!program.setArgument(sizeof(const int), &zCam)) return false; if(!program.setArgument(sizeof(cl_mem), &boolBuffer)) return false; if(!program.invoke(commandQueue, 3, global_ws_1, local_ws_1)) return false; const size_t global_ws_2[] = {renderDistance * 2 + 1, renderDistance * 2 + 1, renderDistance * 2 + 1}; const size_t local_ws_2[] = {1, 1, 1}; if(!program.prepare("arrayInsideCheck")) return false; if(!program.setArgument(sizeof(const unsigned int), &renderDistance)) return false; if(!program.setArgument(sizeof(cl_mem), &gridBuffer)) return false; if(!program.setArgument(sizeof(cl_mem), &boolBuffer)) return false; if(!program.invoke(commandQueue, 3, global_ws_2, local_ws_2)) return false; if(!gridBuffer.read(commandQueue, bGrid)) return false; getError(); /*for(int i = 0; i <= renderDistance * 2; i++) { int f1 = i - renderDistance; for(int j = 0; j <= renderDistance * 2; j++) { int f2 = j - renderDistance; for(int k = 0; k <= renderDistance * 2; k++) { int f3 = k - renderDistance; bGrid[i][j][k] = cube.inside((gfxu::Uniforms::PMS.getTopmost() * geom::Vector((xCam + f1) * 16.0f, (yCam + f2) * 16.0f, (zCam + f3) * 16.0f)).wDivide()); } } }*/ GlobalThread::world.chunkMapLock.lock(); GlobalThread::world.additionQueueLock.lock(); while(!GlobalThread::world.additionQueue.empty()) { std::shared_ptr<ChunkBase> c = GlobalThread::world.additionQueue.front(); GlobalThread::world.additionQueue.pop(); GlobalThread::world.addChunk(c); } GlobalThread::world.additionQueueLock.unlock(); GlobalThread::world.removalQueueLock.lock(); while(!GlobalThread::world.removalQueue.empty()) { std::shared_ptr<ChunkBase> c = GlobalThread::world.removalQueue.front(); c->setUnloaded(); GlobalThread::world.removalQueue.pop(); GlobalThread::world.removeChunk(c->pos); } GlobalThread::world.removalQueueLock.unlock(); std::vector<shared_ptr<ChunkBase>> chunksToRender; for(auto iter = GlobalThread::world.chunkMap.begin(); iter != GlobalThread::world.chunkMap.end(); ++iter) { shared_ptr<ChunkBase> chunk = iter->second; if(!chunk->isEmpty()) { if(chunk->pos.x - renderDistance <= xCam && chunk->pos.x + renderDistance >= xCam && chunk->pos.y - renderDistance <= yCam && chunk->pos.y + renderDistance >= yCam && chunk->pos.z - renderDistance <= zCam && chunk->pos.z + renderDistance > zCam) { if(bGrid[chunk->pos.x - xCam + renderDistance][chunk->pos.y - yCam + renderDistance][chunk->pos.z - zCam + renderDistance]) { chunksToRender.push_back(chunk); } } } } GlobalThread::world.chunkMapLock.unlock(); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); blocksTexture->bind(); glEnable(GL_BLEND); for(int i = 0; i < chunksToRender.size(); i++) { std::shared_ptr<ChunkBase> chunk = chunksToRender[i]; chunk->renderMutex.lock(); if(chunk->isLoaded() && chunk->firstPass != nullptr) { gfxu::Uniforms::MMS.push(geom::Matrix::translate(chunk->pos.x * 16, chunk->pos.y * 16, chunk->pos.z * 16)); chunk->firstPass->draw(); gfxu::Uniforms::MMS.pop(); } chunk->renderMutex.unlock(); } for(int i = 0; i < chunksToRender.size(); i++) { std::shared_ptr<ChunkBase> chunk = chunksToRender[i]; chunk->renderMutex.lock(); if(chunk->secondPass != nullptr) { gfxu::Uniforms::MMS.push(geom::Matrix::translate(chunk->pos.x * 16, chunk->pos.y * 16, chunk->pos.z * 16)); chunk->secondPass->draw(); gfxu::Uniforms::MMS.pop(); } chunk->renderMutex.unlock(); } noTexShaderProgram->bind(); /*gfxu::Uniforms::MMS.push(geom::Matrix::translate(state->cam.pos.x - renderDistance * 16.0f, -0.125f, state->cam.pos.z - renderDistance * 16.0f)); gfxu::Uniforms::MMS.mult(geom::Matrix::scale(32.0f * renderDistance, 1.0f, 32.0f * renderDistance)); gfxu::Uniforms::setColor(0.1f, 0.2f, 0.5f, 0.75f); square->draw(); gfxu::Uniforms::MMS.pop();*/ if(state->devEnabled) { gfxu::Uniforms::setFogColor(1.0f, 0.0f, 0.0f, 0.0f); gfxu::Uniforms::fogDist.set(128.0f); gfxu::Uniforms::MMS.push(geom::Matrix::scale(16.0f, 16.0f, 16.0f)); gfxu::Uniforms::MMS.mult(geom::Matrix::translate(xCam, yCam, zCam)); gfxu::Uniforms::setColor(1.0f, 0.0f, 0.0f, 0.25f); grid->draw(GL_LINES); glDepthFunc(GL_GREATER); gfxu::Uniforms::setColor(1.0f, 0.0f, 0.0f, 0.125f); grid->draw(GL_LINES); gfxu::Uniforms::MMS.pop(); glDepthFunc(GL_LEQUAL); } glDisable(GL_BLEND); glFlush(); if(gfxu::getError("Graphics thread loop error")) GlobalThread::stop = true; GLWindow::instance->swapBuffers(); return true; }
void RenderThread::preStart() { const size_t gridSize = sizeof(bool) * (renderDistance * 2 + 1) * (renderDistance * 2 + 1) * (renderDistance * 2 + 1); commandQueue.create(); projectionMatrixBuffer.create(sizeof(float) * 16, CL_MEM_READ_ONLY); gridBuffer.create(gridSize, CL_MEM_WRITE_ONLY); boolBuffer.create(sizeof(unsigned char) * (renderDistance * 2 + 2) * (renderDistance * 2 + 2) * (renderDistance * 2 + 2), CL_MEM_READ_WRITE); std::wstring filePath(IOUtil::EXE_DIR); filePath += L"\\programs\\frustumclip.cl"; program.create(filePath); grid = new VertexStream(); for(int i = -8; i <= 8; i++) { for(int j = -8; j <= 8; j++) { grid->put(Vertex(i, j, -8)); grid->put(Vertex(i, j, 8)); grid->put(Vertex(i, -8, j)); grid->put(Vertex(i, 8, j)); grid->put(Vertex(-8, i, j)); grid->put(Vertex(8, i, j)); } } square = new VertexStream(); square->put(Vertex(0.0f, 0.0f, 0.0f)); square->put(Vertex(1.0f, 0.0f, 0.0f)); square->put(Vertex(1.0f, 0.0f, 1.0f)); square->put(Vertex(1.0f, 0.0f, 1.0f)); square->put(Vertex(0.0f, 0.0f, 1.0f)); square->put(Vertex(0.0f, 0.0f, 0.0f)); GLWindow::instance->initGL(); if(!GLWindow::instance->isOK()) { return; } filePath = IOUtil::EXE_DIR; filePath += L"\\shaders\\normal.vert"; normalVShader = new gfxu::VertexShader(filePath); filePath = IOUtil::EXE_DIR; filePath += L"\\shaders\\normal.frag"; normalFShader = new gfxu::FragmentShader(filePath); normalShaderProgram = new gfxu::ShaderProgram(normalVShader, nullptr, normalFShader); filePath = IOUtil::EXE_DIR; filePath += L"\\shaders\\noTex.vert"; noTexVShader = new gfxu::VertexShader(filePath); filePath = IOUtil::EXE_DIR; filePath += L"\\shaders\\noTex.frag"; noTexFShader = new gfxu::FragmentShader(filePath); noTexShaderProgram = new gfxu::ShaderProgram(noTexVShader, nullptr, noTexFShader); gfxu::Uniforms::reset(); glEnable(GL_DEPTH_TEST); glEnable(GL_CULL_FACE); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); }
void PTWeekend::setup() { /* Scene data */ camera.lookAt(glm::vec3(-2,1,1), vec3(0, 0, -1.0f), vec3(0,1,0)); camera.setPerspective( 45.0f, getWindowAspectRatio(), 0.01f, 100.0f ); cameraUI = CameraUi(&camera, getWindow()); glm::vec3 bottom_sky_color(1.0, 1.0, 1.0); glm::vec3 top_sky_color(0.5, 0.7, 1.0); CGLContextObj glContext = CGLGetCurrentContext(); CGLShareGroupObj shareGroup = CGLGetShareGroup(glContext); GLuint imgTexName; const char* program_file_str = "../../../assets/path_tracing.cl"; /* Obtain a platform */ std::vector<cl::Platform> platforms; clStatus = cl::Platform::get(&platforms); pt_assert(clStatus, "Could not find an OpenCL platform."); /* Obtain a device and determinte max local size */ std::vector<cl::Device> devices; clStatus = platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); pt_assert(clStatus, "Could not find a GPU device."); device = devices[0]; /* Create an OpenCL context for the device */ cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)shareGroup, 0 }; context = cl::Context({device}, properties, NULL, NULL, &clStatus); pt_assert(clStatus, "Could not create a context for device."); /* Load and build a program */ std::ifstream program_file(program_file_str); std::string program_str(std::istreambuf_iterator<char>(program_file), (std::istreambuf_iterator<char>())); cl::Program::Sources sources(1, std::make_pair(program_str.c_str(), program_str.length() + 1)); program = cl::Program(context, sources); clStatus = program.build({device}, "-I ../../../assets/ -cl-denorms-are-zero"); if (clStatus != CL_SUCCESS) { std::string log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device); std::cerr << log << "\n"; exit(EXIT_FAILURE); } /* Create command queue */ cmd_queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &clStatus); pt_assert(clStatus, "Could not create command queue"); /* create kernel and set the kernel arguments */ kernel = cl::Kernel(program, "path_tracing", &clStatus); pt_assert(clStatus, "Could not create kernel"); img_width = getWindowWidth(); img_height = getWindowHeight(); true_img_width = getWindowWidth(); true_img_height = getWindowHeight(); local_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); //TODO: throws local_width = (size_t)pow(2, ceilf(log2f((floorf(sqrtf(local_size)))))); local_height = local_size / local_width; img_width = ceilf((float)img_width / (float)local_width) * local_width; img_height = ceilf((float)img_height / (float)local_height) * local_height; unsigned int samples = 16; /* Create GL texture and CL wrapper */ glGenTextures(1, &imgTexName); glBindTexture(GL_TEXTURE_2D, imgTexName); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, img_width, img_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, 0); imgTex = gl::Texture2d::create(GL_TEXTURE_2D, imgTexName, img_width, img_height, true); glFinish(); img_buffer.push_back(cl::Image2DGL(context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, imgTexName, &clStatus)); pt_assert(clStatus, "Could not create buffer"); /* Create all buffers */ cam_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_pinhole_cam), NULL, &clStatus); pt_assert(clStatus, "Could not create camera buffer"); primitive_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_sphere), NULL, &clStatus); pt_assert(clStatus, "Could not create primitive buffer"); material_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_material), NULL, &clStatus); pt_assert(clStatus, "Could not create primitive buffer"); sky_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_sky_material), NULL, &clStatus); pt_assert(clStatus, "Could not create sky buffer"); /* Upload scene (static) */ size_t sceneObjectCount = 5; cl_sphere* primitive_array = (cl_sphere*)malloc(sceneObjectCount * sizeof(cl_sphere)); cl_material* material_array = (cl_material*)malloc(sceneObjectCount * sizeof(cl_material)); primitive_array[0] = cl_make_sphere(glm::vec3(1, 0, -1), 0.5f); material_array[0] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x730202"), 0, MAT_LAMBERTIAN); primitive_array[1] = cl_make_sphere(glm::vec3(-1, 0, -1), 0.5f); material_array[1] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0xF89000"), 0, MAT_LAMBERTIAN); primitive_array[2] = cl_make_sphere(glm::vec3(0, 0, 0), 0.5f); material_array[2] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x97A663"), 0.1f, MAT_METALLIC); primitive_array[3] = cl_make_sphere(glm::vec3(0, 0, -2), 0.5f); material_array[3] = cl_make_material(glm::vec3(0.8f, 0.6f, 0.2f), 0.3f, MAT_METALLIC); primitive_array[4] = cl_make_sphere(glm::vec3(0,-100.5f, 1.0f), 100.0f); material_array[4] = cl_make_material(glm::vec3(0.5f), 0, MAT_LAMBERTIAN); clStatus = cmd_queue.enqueueWriteBuffer(primitive_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_sphere), primitive_array, NULL, NULL); pt_assert(clStatus, "Could not fill primitive buffer"); clStatus = cmd_queue.enqueueWriteBuffer(material_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_material), material_array, NULL, NULL); pt_assert(clStatus, "Could not fill material buffer"); pt_assert(cl_set_skycolors(bottom_sky_color, top_sky_color, sky_buffer, cmd_queue), "Could not fill sky buffer"); clStatus = kernel.setArg(1, primitive_buffer); pt_assert(clStatus, "Could not set primitive buffer argument"); clStatus = kernel.setArg(2, material_buffer); pt_assert(clStatus, "Could not set material buffer argument"); clStatus = kernel.setArg(3, sky_buffer); pt_assert(clStatus, "Could not set sky buffer argument"); clStatus = kernel.setArg(4, sceneObjectCount); pt_assert(clStatus, "Could not set primitive count count argument"); clStatus = kernel.setArg(5, img_buffer[0]); pt_assert(clStatus, "Could not set img buffer argument"); clStatus = kernel.setArg(6, samples); pt_assert(clStatus, "Could not set samples argument"); clStatus = kernel.setArg(0, cam_buffer); pt_assert(clStatus, "Could not set camera buffer argument"); }
bool MaxValueSimple::initialize(cl_device_type type) { if (type == CL_DEVICE_TYPE_CPU) TYPE = CLCPU; else if (type == CL_DEVICE_TYPE_GPU) TYPE = CLGPU; else { TYPE = CPU; return true; } try { /*** Hole OpenCL-Plattformen z.B. AMD APP, NVIDIA CUDA ***/ cl::Platform::get(&platforms); /*** Hole OpenCL-Device des geforderten Typs z.B. GPU, CPU ***/ std::vector < cl::Device > devTmp; for (std::vector<cl::Platform>::iterator it = platforms.begin(); it != platforms.end(); ++it) { it->getDevices(type, &devTmp); devices.insert(devices.end(), devTmp.begin(), devTmp.end()); devTmp.clear(); } std::cerr << "[DEBUG] OpenCL device: " << devices[0].getInfo< CL_DEVICE_NAME> () << std::endl; /*** Erstelle OpenCL-Context und CommandQueue ***/ context = cl::Context(devices); cmdQ = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE); /*** OpenCL-Quellcode einlesen ***/ std::string src = readFile(KERNEL_PATH); cl::Program::Sources source; source.push_back(std::make_pair(src.data(), src.length())); /*** OpenCL-Programm aus Quellcode erstellen ***/ program = cl::Program(context, source); try { program.build(devices); } catch (cl::Error & err) { /* TODO logging Logger::logDebug( "initCL", Logger::sStream << err.what() << "\nBuild-Log fuer \"" << devices.front().getInfo<CL_DEVICE_NAME> () << "\":\n" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG> (devices.front())); */ throw err; } kernel = cl::Kernel(program, "maxInt"); event = cl::Event(); return true; } catch (cl::Error& err) { // TODO Logger::logError(METHOD, Logger::sStream << err.what()); std::cerr << "[ERROR] MaxValueSimple::initialize(cl_device_type): " << err.what() << " (" << err.err() << ")" << std::endl; return false; } catch (std::exception& err) { // TODO Logger::logError(METHOD, Logger::sStream << err.what()); std::cerr << "[ERROR] MaxValueSimple::initialize(cl_device_type): " << err.what() << std::endl; return false; } }
int main() { try { std::vector<cl::Device> devices; // select platform cl::Platform platform = selectPlatform(); // select device platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); cl::Device device = selectDevice(devices); // create context context = cl::Context(devices); // create command queue queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); // load opencl source std::ifstream cl_file("inclusive_scan.cl"); std::string cl_string{std::istreambuf_iterator<char>(cl_file), std::istreambuf_iterator<char>()}; cl::Program::Sources source(1, std::make_pair(cl_string.c_str(), cl_string.length() + 1)); // create programm program = cl::Program(context, source); // compile opencl source try { program.build(devices); size_t input_size; std::ifstream input_file("input.txt"); input_file >> input_size; std::vector<float> input(input_size); // for (size_t i = 0; i < input_size; ++i) { // input[i] = i % 10; // } for (int i = 0; i < input_size; i++) { input_file >> input[i]; } std::vector<float> output(input_size, 0); cl::Buffer dev_input (context, CL_MEM_READ_ONLY, sizeof(float) * input_size); queue.enqueueWriteBuffer(dev_input, CL_TRUE, 0, sizeof(float) * input_size, &input[0]); cl::Buffer dev_output = inclusive_scan(dev_input, input_size); queue.enqueueReadBuffer(dev_output, CL_TRUE, 0, sizeof(float) * input_size, &output[0]); queue.finish(); cpu_check(input, output); std::ofstream output_file("output.txt"); for (int i = 0; i < input_size; i++) { output_file << output[i] << " "; } } catch (cl::Error const & e) { std::string log_str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device); std::cout << std::endl << e.what() << " : " << e.err() << std::endl; std::cout << log_str; return 0; } } catch (cl::Error const & e) { std::cout << "Error: " << e.what() << " #" << e.err() << std::endl; } return 0; }
int main(int argc, char **argv) { TS ts; //Time stepper Vec soln; //Holds the solution vector, including all the primitive //variables. DM dmda; //Manages the computational grid and parallelization. int X1Start, X2Start; int X1Size, X2Size; PetscInitialize(&argc, &argv, PETSC_NULL, help); // Create the computational domain. DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED, DMDA_STENCIL_STAR, N1, N2, PETSC_DECIDE, PETSC_DECIDE, DOF, NG, PETSC_NULL, PETSC_NULL, &dmda); // When running in parallel, each process computes from // [X1Start, X1Start+X1Size] x [X2Start, X2Start+X2Size] DMDAGetCorners(dmda, &X1Start, &X2Start, NULL, &X1Size, &X2Size, NULL); // Create the solution vector. DMCreateGlobalVector(dmda, &soln); // Create the time stepper and link it to the computational grid and the // residual evaluation function. TSCreate(PETSC_COMM_WORLD, &ts); TSSetDM(ts, dmda); TSSetIFunction(ts, PETSC_NULL, ComputeResidual, NULL); // OpenCL boilerplate code. clErr = cl::Platform::get(&platforms); CheckCLErrors(clErr, "cl::Platform::get"); // Select computation device here. clErr = platforms.at(1).getDevices(CL_DEVICE_TYPE_CPU, &devices); CheckCLErrors(clErr, "cl::Platform::getDevices"); context = cl::Context(devices, NULL, NULL, NULL, &clErr); CheckCLErrors(clErr, "cl::Context::Context"); queue = cl::CommandQueue(context, devices.at(0), 0, &clErr); CheckCLErrors(clErr, "cl::CommandQueue::CommandQueue"); std::ifstream sourceFile("computeresidual.cl"); std::string sourceCode((std::istreambuf_iterator<char>(sourceFile)), std::istreambuf_iterator<char>()); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); program = cl::Program(context, source, &clErr); CheckCLErrors(clErr, "cl::Program::Program"); // Pass in constants to the OpenCL kernel as compiler switches. This is an // efficient way to handle constants such as domain sizes in OpenCL. std::string BuildOptions("\ -D X1_SIZE=" + std::to_string(X1Size) + " -D X2_SIZE=" + std::to_string(X2Size) + " -D TOTAL_X1_SIZE=" + std::to_string(X1Size+2*NG) + " -D TOTAL_X2_SIZE=" + std::to_string(X2Size+2*NG)); // Compile the OpenCL program and extract the kernel. PetscScalar start = std::clock(); clErr = program.build(devices, BuildOptions.c_str(), NULL, NULL); const char *buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>( devices.at(0), &clErr).c_str(); PetscPrintf(PETSC_COMM_WORLD, "%s\n", buildlog); CheckCLErrors(clErr, "cl::Program::build"); PetscScalar end = std::clock(); PetscScalar time = (end - start)/(PetscScalar)CLOCKS_PER_SEC; PetscPrintf(PETSC_COMM_WORLD, "Time taken for kernel compilation = %f\n", time); kernel = cl::Kernel(program, "ComputeResidual", &clErr); CheckCLErrors(clErr, "cl::Kernel::Kernel"); // How much memory is the kernel using? cl_ulong localMemSize = kernel.getWorkGroupInfo<CL_KERNEL_LOCAL_MEM_SIZE>( devices.at(0), &clErr); cl_ulong privateMemSize = kernel.getWorkGroupInfo<CL_KERNEL_PRIVATE_MEM_SIZE>( devices.at(0), &clErr); printf("Local memory used = %llu\n", (unsigned long long)localMemSize); printf("Private memory used = %llu\n", (unsigned long long)privateMemSize); // Set initial conditions. InitialCondition(ts, soln); TSSetSolution(ts, soln); TSSetType(ts, TSTHETA); TSSetFromOptions(ts); // Finally solve! All time stepping options can be controlled from the // command line. TSSolve(ts, soln); // Delete the data structures in the following order. DMDestroy(&dmda); VecDestroy(&soln); TSDestroy(&ts); PetscFinalize(); return(0); }
CL_helper(const char *kernelFileName, std::vector<std::string> kernelNames, bool loadBinary, bool writeBinary, bool usePreProcArgs = false, std::map<std::string, std::string> preProcArgs = std::map<std::string, std::string>()){ cl::Platform::get(&platforms); if(platforms.size()==0) throw std::runtime_error("No OpenCL platforms found.\n"); int selectedPlatform=0; platform=platforms.at(selectedPlatform); platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); if(devices.size()==0) throw std::runtime_error("No opencl devices found.\n"); int selectedDevice=0; device=devices.at(selectedDevice); context = cl::Context(devices); try{ if(loadBinary){ std::string vendor=platform.getInfo<CL_PLATFORM_VENDOR>(); size_t found = vendor.find("NVIDIA"); if(found != std::string::npos){ FILE* fp; fp = fopen("src/kernels/julia_filter.ptx", "r"); if (!fp) { std::cerr << "Error loading kernel binary" << std::endl; std::cerr << "Building kernel from .cl file" << std::endl; loadBinary = false; } else { fseek(fp, 0, SEEK_END); size_t kernel_sz = ftell(fp); rewind(fp); char* kernel_str = (char*)malloc(kernel_sz); unsigned bytes = fread(kernel_str, 1, kernel_sz, fp); fclose(fp); binaries.push_back(std::make_pair((void*)kernel_str,kernel_sz+1)); program = cl::Program(context, devices, binaries); program.build(devices); } } else{ std::cerr << "Vendor not NVIDIA, cannot load .ptx binary" << std::endl; std::cerr << "Building kernel from .cl file" << std::endl; loadBinary = false; } } if(!loadBinary){ std::string kernelSource=CL_helper::LoadSource(kernelFileName); sources.push_back(std::make_pair(kernelSource.c_str(), kernelSource.size()+1)); program = cl::Program(context, sources); if(usePreProcArgs && !preProcArgs.empty()){ std::string preProcArgsString; for(auto& arg : preProcArgs) { preProcArgsString += "-D" + arg.first + "=" + arg.second + " "; } program.build(devices, preProcArgsString.c_str()); } else { //std::string params = "-cl-unsafe-math-optimizations"; program.build(devices); } } }catch (cl::Error er) { for(unsigned i=0;i<devices.size();i++){ std::cerr <<"Log for device " << devices[i].getInfo<CL_DEVICE_NAME>().c_str()<<std::endl; std::cerr << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[i]).c_str() <<std::endl; } std::cerr << "ERROR:" << er.what() << " Code " << er.err()<<std::endl; throw; } for(unsigned i=0; i<kernelNames.size();i++){ kernels.push_back( cl::Kernel(program, kernelNames.at(i).c_str()) ); } queue = cl::CommandQueue(context, device); if(writeBinary){ size_t bin_sz; program.getInfo( CL_PROGRAM_BINARY_SIZES, &bin_sz); unsigned char *bin = (unsigned char *)malloc(bin_sz); program.getInfo(CL_PROGRAM_BINARIES, &bin); FILE* fp = fopen("src/kernels/julia_filter.ptx", "wb"); fwrite(bin, sizeof(char), bin_sz, fp); fclose(fp); free(bin); } }
void initOpenCL() { // OpenCL try { // Get available platforms vector<cl::Platform> platforms; cl::Platform::get(&platforms); LOG_INFO<<platforms.front().getInfo<CL_PLATFORM_VERSION>(); // context sharing is OS specific #if defined (__APPLE__) || defined(MACOSX) CGLContextObj curCGLContext = CGLGetCurrentContext(); CGLShareGroupObj curCGLShareGroup = CGLGetShareGroup(curCGLContext); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)curCGLShareGroup, 0 }; #elif defined WIN32 cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; #else cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; #endif m_context = cl::Context( CL_DEVICE_TYPE_GPU, properties); // Get a list of devices on this platform vector<cl::Device> devices = m_context.getInfo<CL_CONTEXT_DEVICES>(); m_device = devices[0]; // Create a command queue and use the first device m_queue = cl::CommandQueue(m_context, devices[0]); // Read source file std::string sourceCode = kinski::readFile("kernels.cl"); // Make program of the source code in the context m_program = cl::Program(m_context, sourceCode); // Build program for these specific devices m_program.build(); m_particleKernel = cl::Kernel(m_program, "updateParticles"); m_imageKernel = cl::Kernel(m_program, "set_colors_from_image"); } catch(cl::Error &error) { LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")"; LOG_ERROR << "Build Status: " << m_program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(m_device); LOG_ERROR << "Build Options:\t" << m_program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(m_device); LOG_ERROR << "Build Log:\t " << m_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_device); } }