void Convolution3DCLBuffer::createProgram(const std::string& source, size_t const* fs) { cl::Program::Sources program_source(1, std::make_pair(source.c_str(), source.length())); program = cl::Program(context, program_source, &status); CHECK_ERROR(status, "cl::Program"); std::string defines = std::string("-D FILTER_SIZE_X=") + std::to_string(fs[2]) + std::string(" -D FILTER_SIZE_Y=") + std::to_string(fs[1]) + std::string(" -D FILTER_SIZE_Z=") + std::to_string(fs[0]) + std::string(" -D FILTER_SIZE_X_HALF=") + std::to_string(fs[2]/2) + std::string(" -D FILTER_SIZE_Y_HALF=") + std::to_string(fs[1]/2) + std::string(" -D FILTER_SIZE_Z_HALF=") + std::to_string(fs[0]/2); status = program.build(devices, defines.c_str(), nullptr, nullptr); std::string log; program.getBuildInfo(devices[0],CL_PROGRAM_BUILD_LOG,&log); if(log.size() > 0) { std::cout << log << std::endl; } CHECK_ERROR(status, "cl::Program::Build"); }
cl::Program load_program(const std::string &filename, const cl::Context &context, const std::vector<cl::Device> &devices) { std::ifstream program_in(filename); std::string program_code(std::istreambuf_iterator<char>(program_in), (std::istreambuf_iterator<char>())); cl::Program::Sources program_source(1, std::make_pair(program_code.c_str(), program_code.length() + 1)); cl::Program program(context, program_source); program.build(devices); return program; }
int main() { try { std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); if (platforms.empty()) throw std::runtime_error("No suitable platform found"); cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; cl::Context context(CL_DEVICE_TYPE_GPU, properties); std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); if (devices.empty()) throw std::runtime_error("No suitable device found"); std::ifstream program_in("program.cl"); std::string program_code(std::istreambuf_iterator<char>(program_in), (std::istreambuf_iterator<char>())); cl::Program::Sources program_source(1, std::make_pair(program_code.c_str(), program_code.length() + 1)); cl::Program program(context, program_source); program.build(devices); cl::CommandQueue queue(context, devices[0], 0); size_t n, m; std::vector<float> fst_matrix, snd_matrix; std::ifstream in(INPUT_FILE); in >> n >> m; read_matrix(in, fst_matrix, n); read_matrix(in, snd_matrix, m); cl::Buffer fst_buf(context, std::begin(fst_matrix), std::end(fst_matrix), true, false); cl::Buffer snd_buf(context, std::begin(snd_matrix), std::end(snd_matrix), true, false); cl::Buffer result_buf(context, CL_MEM_WRITE_ONLY, sizeof(float) * n * n); cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, int, int> convolution_fn(program, "convolution"); size_t local_size = 16; size_t global_size = ((n + local_size - 1) / local_size) * local_size; // TODO: revise cl::EnqueueArgs args(queue, cl::NDRange(global_size, global_size), cl::NDRange(local_size, local_size)); convolution_fn(args, fst_buf, snd_buf, result_buf, n, m).wait(); std::vector<float> result(n*n); queue.enqueueReadBuffer(result_buf, CL_TRUE, 0, sizeof(float) * n * n, &result[0]); std::ofstream out(OUTPUT_FILE); out << std::fixed << std::setprecision(3); for (size_t i = 0; i < n; i++) { for (size_t j = 0; j < n; j++) out << result[i * n + j] << " "; out << std::endl; } } catch (cl::Error &e) { std::cerr << "ERROR: " << e.what() << " (" << e.err() << ")" << std::endl; } catch (std::runtime_error &e) { std::cerr << e.what() << std::endl; } return 0; }