void add_arguments_to_kernel_rec(evnt_vec& events, args_vec& arguments, T0& arg0, Ts&... args) { cl_int err{0}; size_t buffer_size = sizeof(typename T0::value_type) * arg0.size(); auto buffer = clCreateBuffer(m_context.get(), CL_MEM_READ_ONLY, buffer_size, nullptr, &err); if (err != CL_SUCCESS) { CPPA_LOGMF(CPPA_ERROR, "clCreateBuffer: " << get_opencl_error(err)); return; } cl_event event; err = clEnqueueWriteBuffer(m_queue.get(), buffer, CL_FALSE, 0, buffer_size, arg0.data(), 0, nullptr, &event); if (err != CL_SUCCESS) { CPPA_LOGMF(CPPA_ERROR, "clEnqueueWriteBuffer: " << get_opencl_error(err)); return; } events.push_back(std::move(event)); mem_ptr tmp; tmp.adopt(std::move(buffer)); arguments.push_back(tmp); add_arguments_to_kernel_rec(events, arguments, args...); }
~command() { cl_int err{0}; for(auto& e : m_events) { err = clReleaseEvent(e); if (err != CL_SUCCESS) { CPPA_LOGMF(CPPA_ERROR, "clReleaseEvent: " << get_opencl_error(err)); } } }
void add_arguments_to_kernel_rec(evnt_vec&, args_vec& arguments) { cl_int err{0}; // rotate left (output buffer to the end) rotate(begin(arguments), begin(arguments) + 1, end(arguments)); for(size_t i = 0; i < arguments.size(); ++i) { err = clSetKernelArg(m_kernel.get(), i, sizeof(cl_mem), static_cast<void*>(&arguments[i])); CPPA_LOG_ERROR_IF(err != CL_SUCCESS, "clSetKernelArg: " << get_opencl_error(err)); } clFlush(m_queue.get()); }
void add_arguments_to_kernel(evnt_vec& events, args_vec& arguments, size_t ret_size, Ts&&... args) { arguments.clear(); cl_int err{ 0 }; auto buf = clCreateBuffer(m_context.get(), CL_MEM_WRITE_ONLY, sizeof(typename R::value_type) * ret_size, nullptr, &err); if (err != CL_SUCCESS) { CPPA_LOGMF(CPPA_ERROR, "clCreateBuffer: " << get_opencl_error(err)); return; } mem_ptr tmp; tmp.adopt(std::move(buf)); arguments.push_back(tmp); add_arguments_to_kernel_rec(events, arguments, std::forward<Ts>(args)...); }
static intrusive_ptr<actor_facade> create(const program& prog, const char* kernel_name, arg_mapping map_args, result_mapping map_result, const dim_vec& global_dims, const dim_vec& offsets, const dim_vec& local_dims, size_t result_size) { if (global_dims.empty()) { auto str = "OpenCL kernel needs at least 1 global dimension."; CPPA_LOGM_ERROR(detail::demangle(typeid(actor_facade)).c_str(), str); throw std::runtime_error(str); } auto check_vec = [&](const dim_vec& vec, const char* name) { if (!vec.empty() && vec.size() != global_dims.size()) { std::ostringstream oss; oss << name << " vector is not empty, but " << "its size differs from global dimensions vector's size"; CPPA_LOGM_ERROR(detail::demangle<actor_facade>().c_str(), oss.str()); throw std::runtime_error(oss.str()); } }; check_vec(offsets, "offsets"); check_vec(local_dims, "local dimensions"); cl_int err{ 0 }; kernel_ptr kernel; kernel.adopt(clCreateKernel(prog.m_program.get(), kernel_name, &err)); if (err != CL_SUCCESS) { std::ostringstream oss; oss << "clCreateKernel: " << get_opencl_error(err); CPPA_LOGM_ERROR(detail::demangle<actor_facade>().c_str(), oss.str()); throw std::runtime_error(oss.str()); } if (result_size == 0) { result_size = std::accumulate(global_dims.begin(), global_dims.end(), 1, std::multiplies<size_t>{}); } return new actor_facade<Ret (Args...)>{ prog , kernel , global_dims , offsets, local_dims, std::move(map_args), std::move(map_result), result_size }; }
void enqueue_read_buffers(cl_event& kernel_done, detail::int_list<I, Is...>) { using container_type = typename std::tuple_element<I, std::tuple<Ts...>>::type; using value_type = typename container_type::value_type; cl_event event; auto size = result_sizes_[I]; auto buffer_size = sizeof(value_type) * result_sizes_[I]; std::get<I>(result_buffers_).resize(size); auto err = clEnqueueReadBuffer(queue_.get(), output_buffers_[I].get(), CL_FALSE, 0, buffer_size, std::get<I>(result_buffers_).data(), 1, &kernel_done, &event); if (err != CL_SUCCESS) { this->deref(); // failed to enqueue command throw std::runtime_error("clEnqueueReadBuffer: " + get_opencl_error(err)); } mem_out_events_.push_back(std::move(event)); enqueue_read_buffers(kernel_done, detail::int_list<Is...>{}); }
void enqueue() { // Errors in this function can not be handled by opencl_err.hpp // because they require non-standard error handling CAF_LOG_TRACE("command::enqueue()"); this->ref(); // reference held by the OpenCL comand queue cl_event event_k; auto data_or_nullptr = [](const dim_vec& vec) { return vec.empty() ? nullptr : vec.data(); }; // OpenCL expects cl_uint (unsigned int), hence the cast cl_int err = clEnqueueNDRangeKernel( queue_.get(), actor_facade_->kernel_.get(), static_cast<cl_uint>(actor_facade_->config_.dimensions().size()), data_or_nullptr(actor_facade_->config_.offsets()), data_or_nullptr(actor_facade_->config_.dimensions()), data_or_nullptr(actor_facade_->config_.local_dimensions()), static_cast<cl_uint>(mem_in_events_.size()), (mem_in_events_.empty() ? nullptr : mem_in_events_.data()), &event_k ); if (err != CL_SUCCESS) { CAF_LOGMF(CAF_ERROR, "clEnqueueNDRangeKernel: " << get_opencl_error(err)); clReleaseEvent(event_k); this->deref(); return; } else { enqueue_read_buffers(event_k, detail::get_indices(result_buffers_)); cl_event marker; #if defined(__APPLE__) err = clEnqueueMarkerWithWaitList( queue_.get(), static_cast<cl_uint>(mem_out_events_.size()), mem_out_events_.data(), &marker ); #else err = clEnqueueMarker(queue_.get(), &marker); #endif if (err != CL_SUCCESS) { CAF_LOGMF(CAF_ERROR, "clSetEventCallback: " << get_opencl_error(err)); clReleaseEvent(marker); clReleaseEvent(event_k); this->deref(); // callback is not set return; } err = clSetEventCallback(marker, CL_COMPLETE, [](cl_event, cl_int, void* data) { auto cmd = reinterpret_cast<command*>(data); cmd->handle_results(); cmd->deref(); }, this); if (err != CL_SUCCESS) { CAF_LOGMF(CAF_ERROR, "clSetEventCallback: " << get_opencl_error(err)); clReleaseEvent(marker); clReleaseEvent(event_k); this->deref(); // callback is not set return; } err = clFlush(queue_.get()); if (err != CL_SUCCESS) { CAF_LOGMF(CAF_ERROR, "clFlush: " << get_opencl_error(err)); } mem_out_events_.push_back(std::move(event_k)); mem_out_events_.push_back(std::move(marker)); } }
void enqueue () { CPPA_LOG_TRACE("command::enqueue()"); this->ref(); // reference held by the OpenCL comand queue cl_int err{0}; cl_event event_k; auto data_or_nullptr = [](const dim_vec& vec) { return vec.empty() ? nullptr : vec.data(); }; err = clEnqueueNDRangeKernel(m_queue.get(), m_actor_facade->m_kernel.get(), m_actor_facade->m_global_dimensions.size(), data_or_nullptr(m_actor_facade->m_global_offsets), data_or_nullptr(m_actor_facade->m_global_dimensions), data_or_nullptr(m_actor_facade->m_local_dimensions), m_events.size(), (m_events.empty() ? nullptr : m_events.data()), &event_k); if (err != CL_SUCCESS) { CPPA_LOGMF(CPPA_ERROR, "clEnqueueNDRangeKernel: " << get_opencl_error(err)); this->deref(); // or can anything actually happen? return; } else { cl_event event_r; err = clEnqueueReadBuffer(m_queue.get(), m_arguments.back().get(), CL_FALSE, 0, sizeof(typename R::value_type) * m_result_size, m_result.data(), 1, &event_k, &event_r); if (err != CL_SUCCESS) { throw std::runtime_error("clEnqueueReadBuffer: " + get_opencl_error(err)); this->deref(); // failed to enqueue command return; } err = clSetEventCallback(event_r, CL_COMPLETE, [](cl_event, cl_int, void* data) { auto cmd = reinterpret_cast<command*>(data); cmd->handle_results(); cmd->deref(); }, this); if (err != CL_SUCCESS) { CPPA_LOGMF(CPPA_ERROR, "clSetEventCallback: " << get_opencl_error(err)); this->deref(); // callback is not set return; } err = clFlush(m_queue.get()); if (err != CL_SUCCESS) { CPPA_LOGMF(CPPA_ERROR, "clFlush: " << get_opencl_error(err)); } m_events.push_back(std::move(event_k)); m_events.push_back(std::move(event_r)); } }