int main(int argc, char ** argv) { // Load image SIPL::Image<float> * image = new SIPL::Image<float>("images/sunset.jpg"); // Create OpenCL context Context context = createCLContextFromArguments(argc, argv); // Compile OpenCL code Program program = buildProgramFromSource(context, "gaussian_blur.cl"); // Select device and create a command queue for it VECTOR_CLASS<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); CommandQueue queue = CommandQueue(context, devices[0]); // Create an OpenCL Image / texture and transfer data to the device Image2D clImage = Image2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ImageFormat(CL_R, CL_FLOAT), image->getWidth(), image->getHeight(), 0, (void*)image->getData()); // Create a buffer for the result Buffer clResult = Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*image->getWidth()*image->getHeight()); // Create Gaussian mask int maskSize; float * mask = createBlurMask(10.0f, &maskSize); // Create buffer for mask and transfer it to the device Buffer clMask = Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*(maskSize*2+1)*(maskSize*2+1), mask); // Run Gaussian kernel Kernel gaussianBlur = Kernel(program, "gaussian_blur"); gaussianBlur.setArg(0, clImage); gaussianBlur.setArg(1, clMask); gaussianBlur.setArg(2, clResult); gaussianBlur.setArg(3, maskSize); queue.enqueueNDRangeKernel( gaussianBlur, NullRange, NDRange(image->getWidth(), image->getHeight()), NullRange ); // Transfer image back to host float* data = new float[image->getWidth()*image->getHeight()]; queue.enqueueReadBuffer(clResult, CL_TRUE, 0, sizeof(float)*image->getWidth()*image->getHeight(), data); image->setData(data); // Save image to disk image->save("images/result.jpg", "jpeg"); image->display(); }
void run() { NDRange global; Event event; boost::array<boost::uint32_t,4> size; size.assign(2*256*64); global = NDRange(256,64); // run kernel A _kernel.setArg(0,size); _kernel.setArg(1,_input); _kernel.setArg(2,_output); _queue.enqueueNDRangeKernel( _kernel, global, &event ); _queue.flush(); _queue.waitForEvent(event); }
void run( int workgroup_size ) { NDRange global,local; Event event; local = NDRange(workgroup_size); global = NDRange(_nr_groups*workgroup_size); posix_time::ptime t1 = posix_time::microsec_clock::local_time(); _kernel.setArg(0,_output); _queue.enqueueNDRangeKernel( _kernel, global, local, &event ); _queue.flush(); _queue.waitForEvent(event); posix_time::ptime t2 = posix_time::microsec_clock::local_time(); _exec_time = posix_time::time_period(t1,t2).length().total_microseconds(); }
int main(int argc, char **argv) { srand((unsigned)time(NULL)); Kernel kernel; CommandQueue queue; Context context; { std::vector<Platform> platformList; Platform::get(&platformList); clog << "Platform number is: " << platformList.size() << endl; std::string platformVendor; platformList[0].getInfo((cl_platform_info)CL_PLATFORM_VENDOR, &platformVendor); clog << "Platform is by: " << platformVendor << "\n"; cl_context_properties cprops[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platformList[0](), 0 }; context = Context(GET_TARGET_PLATFORM, cprops); std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); queue = CommandQueue(context, devices[0]); std::string sourceCode = "#include \"es.cl\"\n"; Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); Program program = Program(context, source); try { program.build(devices, "-I."); } catch (Error &) { std::string errors; program.getBuildInfo(devices[0], CL_PROGRAM_BUILD_LOG, &errors); std::cerr << "Build log: " << endl << errors << endl; return 1; } kernel = Kernel(program, "es"); } individual *individuals = new individual[LAMBDA]; for (int i = 0; i < LAMBDA; i++) { for (int j = 0; j < DIM; ++j) { individuals[i].x[j] = (rand()/((float)RAND_MAX)) * (XMAX-XMIN) + XMIN; individuals[i].s[j] = (XMAX-XMIN) / 6.f; } for (int j = 0; j < DIM_A; ++j) { individuals[i].a[j] = (rand()/((float)RAND_MAX)) * (2*PI) - PI; } individuals[i].fitness = 0; } float gbest = std::numeric_limits<float>::infinity(), xbest[DIM]; Buffer esBuffer = Buffer(context, 0, INDIVIDUALS_SIZE); Event ev; queue.enqueueMapBuffer(esBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, INDIVIDUALS_SIZE); for (int i = 0; i < 1000; i++) { queue.enqueueWriteBuffer(esBuffer, CL_TRUE, 0, INDIVIDUALS_SIZE, individuals); kernel.setArg(1, (cl_ulong)rand()); kernel.setArg(0, esBuffer); queue.enqueueNDRangeKernel(kernel, NullRange, NDRange(LAMBDA), NDRange(1), NULL, &ev); ev.wait(); queue.enqueueReadBuffer(esBuffer, CL_TRUE, 0, INDIVIDUALS_SIZE, individuals); std::sort(individuals, individuals + LAMBDA, individual_comp); individual mean = get_mean(individuals); for (int j = 0; j < LAMBDA; ++j) { individuals[j] = mean; } } gbest = individuals[0].fitness; for (int i = 0; i < DIM; ++i) xbest[i] = individuals[0].x[i]; clog << "Best value " << gbest << " found at ("; for (int i = 0; i < DIM; ++i) clog << xbest[i] << (i == DIM-1 ? ")" : ", "); clog << "\n"; clog << "Our computation estemates it: f(" << xbest[0] << ", ..., " << xbest[DIM-1] << ") = " << es_f(xbest) << endl; delete[] individuals; return 0; }
/** * Prepare and execute the OpenCL Kernel */ void executeCL(void) { Event wait; try { // Stage stageExecuteCL(); // Execute cl_int err = m_queue.enqueueNDRangeKernel(m_kernel, NullRange, m_global, m_local, NULL, &wait); clPrintErr(err, "Execute Error -> ", stdout); wait.wait(); m_queue.finish(); // Collate collateExecuteCL(); } catch(Error error) { std::cout << std::endl << error.what() << "(" << error.err() << ")" << std::endl; fail("OpenCL Error"); } }