bp::object Filt(bp::object in_obj, bp::object kernel_obj, bp::object channel_obj) { Img in, kernel, out; in.FromPyArrayObject(reinterpret_cast<PyArrayObject*>(in_obj.ptr())); kernel.FromPyArrayObject(reinterpret_cast<PyArrayObject*>(kernel_obj.ptr())); int channel = bp::extract<int>(channel_obj); FiltImg(in, kernel, channel, out); PyObject* out_obj =(PyObject*) out.ToPyArrayObject(); bp::handle<> out_handle(out_obj); bp::numeric::array out_array(out_handle); return out_array.copy(); }
bp::object FiltMax(bp::object in_obj, bp::object k_size, bp::object channel_obj) { Img in, out; in.CopyFromPyArrayObject(reinterpret_cast<PyArrayObject*>(in_obj.ptr())); int k_height = bp::extract<int>(k_size[0]); int k_width = bp::extract<int>(k_size[1]); int channel = bp::extract<int>(channel_obj); FiltMaxImg(in, k_width, k_height, channel, out); PyObject* out_obj =(PyObject*) out.ToPyArrayObject(); bp::handle<> out_handle(out_obj); bp::numeric::array out_array(out_handle); return out_array.copy(); }
int main() { try { cl::Context context; std::vector<cl::Device> devices; std::tie(context, devices) = init_open_cl(); cl::CommandQueue queue(context, devices[0]); cl::Program program = load_program("program.cl", context, devices); cl_fn reduce_fn(program, "do_reduce"); cl_fn sweep_fn(program, "do_sweep"); std::ifstream in(INPUT_FILE); size_t n, npow2; in >> n; npow2 = pow(2.0, ceil(log2(n))); std::vector<float> in_array(npow2); for (size_t i = 0; i < n; ++i) in >> in_array[i]; cl::Buffer out_buf(context, std::begin(in_array), std::end(in_array), false); std::vector<cl::Event> events; for (size_t offset = 1; npow2 / (offset * 2) >= WORKGROUP_SIZE; offset *= 2) exec_fn(reduce_fn, out_buf, npow2, offset, npow2 / offset, events, queue); if (npow2 < 512) exec_fn(reduce_fn, out_buf, npow2, 1, WORKGROUP_SIZE, events, queue); exec_fn(sweep_fn, out_buf, npow2, npow2 / 2, WORKGROUP_SIZE, events, queue); for (size_t offset = npow2 / 1024; offset > 0; offset /= 2) exec_fn(sweep_fn, out_buf, npow2, offset, npow2 / offset, events, queue); std::vector<float> out_array(n); queue.enqueueReadBuffer(out_buf, CL_TRUE, 0, sizeof(float) * n, &out_array[0]); std::ofstream out(OUTPUT_FILE); out << std::fixed << std::setprecision(3); for (size_t i = 0; i < n; i++) out << out_array[i] << " "; out << std::endl; } catch (cl::Error &e) { std::cerr << "ERROR: " << e.what() << " (" << e.err() << ")" << std::endl; } catch (std::runtime_error &e) { std::cerr << e.what() << std::endl; } return 0; }
static char get_dynamic(struct args_t *args) { uint16_t size = args->size; if (size == 0) { out_c('0'); return 1; } while (size--) { out_array(); out_c('\n'); } return 1; }