void buildProgram(cl::Program &prog, const int num_files, const char **ker_strs, const int *ker_lens, std::string options) { try { Program::Sources setSrc; setSrc.emplace_back(USE_DBL_SRC_STR.c_str(), USE_DBL_SRC_STR.length()); setSrc.emplace_back(KParam_hpp, KParam_hpp_len); for (int i = 0; i < num_files; i++) { setSrc.emplace_back(ker_strs[i], ker_lens[i]); } static std::string defaults = std::string(" -cl-std=CL1.1") + std::string(" -D dim_type=") + std::string(dtype_traits<dim_type>::getName()); prog = cl::Program(getContext(), setSrc); std::vector<cl::Device> targetDevices; targetDevices.push_back(getDevice()); prog.build(targetDevices, (defaults + options).c_str()); } catch (...) { SHOW_BUILD_INFO(prog); throw; } }
void random(cl::Buffer out, dim_type elements) { try { static unsigned counter; static std::once_flag compileFlags[DeviceManager::MAX_DEVICES]; static Program ranProgs[DeviceManager::MAX_DEVICES]; static Kernel ranKernels[DeviceManager::MAX_DEVICES]; int device = getActiveDeviceId(); std::call_once( compileFlags[device], [device] () { Program::Sources setSrc; setSrc.emplace_back(random_cl, random_cl_len); std::ostringstream options; options << " -D T=" << dtype_traits<T>::getName() << " -D repeat="<< REPEAT << " -D " << random_name<T, isRandu>().name(); if (std::is_same<T, double>::value) { options << " -D USE_DOUBLE"; options << " -D IS_64"; } if (std::is_same<T, char>::value) { options << " -D IS_BOOL"; } buildProgram(ranProgs[device], random_cl, random_cl_len, options.str()); ranKernels[device] = Kernel(ranProgs[device], "random"); }); auto randomOp = make_kernel<cl::Buffer, uint, uint, uint, uint>(ranKernels[device]); uint groups = divup(elements, THREADS * REPEAT); counter += divup(elements, THREADS * groups); NDRange local(THREADS, 1); NDRange global(THREADS * groups, 1); randomOp(EnqueueArgs(getQueue(), global, local), out, elements, counter, random_seed[0], random_seed[1]); } catch(cl::Error ex) { CL_TO_AF_ERROR(ex); } }
void set(Buffer &ptr, T val, const size_t &elements) { static std::once_flag compileFlags[DeviceManager::MAX_DEVICES]; static Program setProgs[DeviceManager::MAX_DEVICES]; static Kernel setKernels[DeviceManager::MAX_DEVICES]; int device = getActiveDeviceId(); std::call_once( compileFlags[device], [device] () { Program::Sources setSrc; setSrc.emplace_back(set_cl, set_cl_len); setProgs[device] = Program(getContext(), setSrc); string opt = string("-D T=") + dtype_traits<T>::getName(); setProgs[device].build(opt.c_str()); setKernels[device] = Kernel(setProgs[device], "set"); }); auto setKern = make_kernel<Buffer, T, const unsigned long>(setKernels[device]); setKern(EnqueueArgs(getQueue(), NDRange(elements)), ptr, val, elements); }