Beispiel #1
0
 void add_arguments_to_kernel_rec(evnt_vec& events, args_vec& arguments, 
                                  T0& arg0, Ts&... args) {
     cl_int err{0};
     size_t buffer_size = sizeof(typename T0::value_type) * arg0.size();
     auto buffer = clCreateBuffer(m_context.get(),
                                  CL_MEM_READ_ONLY,
                                  buffer_size,
                                  nullptr,
                                  &err);
     if (err != CL_SUCCESS) {
         CPPA_LOGMF(CPPA_ERROR, "clCreateBuffer: " << get_opencl_error(err));
         return;
     }
     cl_event event;
     err = clEnqueueWriteBuffer(m_queue.get(), buffer, CL_FALSE, 0,
                                buffer_size, arg0.data(),
                                0, nullptr, &event);
     if (err != CL_SUCCESS) {
         CPPA_LOGMF(CPPA_ERROR, "clEnqueueWriteBuffer: "
                                << get_opencl_error(err));
         return;
     }
     events.push_back(std::move(event));
     mem_ptr tmp;
     tmp.adopt(std::move(buffer));
     arguments.push_back(tmp);
     add_arguments_to_kernel_rec(events, arguments, args...);
 }
Beispiel #2
0
 ~command() {
     cl_int err{0};
     for(auto& e : m_events) {
         err = clReleaseEvent(e);
         if (err != CL_SUCCESS) {
             CPPA_LOGMF(CPPA_ERROR, "clReleaseEvent: "
                                    << get_opencl_error(err));
         }
     }
 }
Beispiel #3
0
 void add_arguments_to_kernel_rec(evnt_vec&, args_vec& arguments) {
     cl_int err{0};
     // rotate left (output buffer to the end)
     rotate(begin(arguments), begin(arguments) + 1, end(arguments));
     for(size_t i = 0; i < arguments.size(); ++i) {
         err = clSetKernelArg(m_kernel.get(), i, sizeof(cl_mem),
                              static_cast<void*>(&arguments[i]));
         CPPA_LOG_ERROR_IF(err != CL_SUCCESS,
                           "clSetKernelArg: " << get_opencl_error(err));
     }
     clFlush(m_queue.get());
 }
Beispiel #4
0
 void add_arguments_to_kernel(evnt_vec& events, args_vec& arguments,
                              size_t ret_size, Ts&&... args) {
     arguments.clear();
     cl_int err{ 0 };
     auto buf = clCreateBuffer(m_context.get(), CL_MEM_WRITE_ONLY,
                               sizeof(typename R::value_type) * ret_size,
                               nullptr, &err);
     if (err != CL_SUCCESS) {
         CPPA_LOGMF(CPPA_ERROR, "clCreateBuffer: " << get_opencl_error(err));
         return;
     }
     mem_ptr tmp;
     tmp.adopt(std::move(buf));
     arguments.push_back(tmp);
     add_arguments_to_kernel_rec(events, arguments, std::forward<Ts>(args)...);
 }
Beispiel #5
0
 static intrusive_ptr<actor_facade>
 create(const program& prog, const char* kernel_name,
        arg_mapping map_args, result_mapping map_result,
        const dim_vec& global_dims, const dim_vec& offsets,
        const dim_vec& local_dims, size_t result_size) {
     if (global_dims.empty()) {
         auto str = "OpenCL kernel needs at least 1 global dimension.";
         CPPA_LOGM_ERROR(detail::demangle(typeid(actor_facade)).c_str(),
                         str);
         throw std::runtime_error(str);
     }
     auto check_vec = [&](const dim_vec& vec, const char* name) {
         if (!vec.empty() && vec.size() != global_dims.size()) {
             std::ostringstream oss;
             oss << name << " vector is not empty, but "
                 << "its size differs from global dimensions vector's size";
             CPPA_LOGM_ERROR(detail::demangle<actor_facade>().c_str(),
                             oss.str());
             throw std::runtime_error(oss.str());
         }
     };
     check_vec(offsets, "offsets");
     check_vec(local_dims, "local dimensions");
     cl_int err{ 0 };
     kernel_ptr kernel;
     kernel.adopt(clCreateKernel(prog.m_program.get(), kernel_name, &err));
     if (err != CL_SUCCESS) {
         std::ostringstream oss;
         oss << "clCreateKernel: " << get_opencl_error(err);
         CPPA_LOGM_ERROR(detail::demangle<actor_facade>().c_str(),
                         oss.str());
         throw std::runtime_error(oss.str());
     }
     if (result_size == 0) {
         result_size = std::accumulate(global_dims.begin(),
                                       global_dims.end(),
                                       1,
                                       std::multiplies<size_t>{});
     }
     return new actor_facade<Ret (Args...)>{
         prog      , kernel             , global_dims          , offsets,
         local_dims, std::move(map_args), std::move(map_result), result_size
     };
 }
Beispiel #6
0
 void enqueue_read_buffers(cl_event& kernel_done, detail::int_list<I, Is...>) {
   using container_type =
     typename std::tuple_element<I, std::tuple<Ts...>>::type;
   using value_type = typename container_type::value_type;
   cl_event event;
   auto size = result_sizes_[I];
   auto buffer_size = sizeof(value_type) * result_sizes_[I];
   std::get<I>(result_buffers_).resize(size);
   auto err = clEnqueueReadBuffer(queue_.get(), output_buffers_[I].get(),
                                  CL_FALSE, 0, buffer_size,
                                  std::get<I>(result_buffers_).data(),
                                  1, &kernel_done, &event);
   if (err != CL_SUCCESS) {
     this->deref(); // failed to enqueue command
     throw std::runtime_error("clEnqueueReadBuffer: " +
                              get_opencl_error(err));
   }
   mem_out_events_.push_back(std::move(event));
   enqueue_read_buffers(kernel_done, detail::int_list<Is...>{});
 }
Beispiel #7
0
  void enqueue() {
    // Errors in this function can not be handled by opencl_err.hpp
    // because they require non-standard error handling
    CAF_LOG_TRACE("command::enqueue()");
    this->ref(); // reference held by the OpenCL comand queue
    cl_event event_k;
    auto data_or_nullptr = [](const dim_vec& vec) {
      return vec.empty() ? nullptr : vec.data();
    };
    // OpenCL expects cl_uint (unsigned int), hence the cast
    cl_int err = clEnqueueNDRangeKernel(
      queue_.get(), actor_facade_->kernel_.get(),
      static_cast<cl_uint>(actor_facade_->config_.dimensions().size()),
      data_or_nullptr(actor_facade_->config_.offsets()),
      data_or_nullptr(actor_facade_->config_.dimensions()),
      data_or_nullptr(actor_facade_->config_.local_dimensions()),
      static_cast<cl_uint>(mem_in_events_.size()),
      (mem_in_events_.empty() ? nullptr : mem_in_events_.data()), &event_k
    );
    if (err != CL_SUCCESS) {
      CAF_LOGMF(CAF_ERROR, "clEnqueueNDRangeKernel: " << get_opencl_error(err));
      clReleaseEvent(event_k);
      this->deref();
      return;
    } else {
      enqueue_read_buffers(event_k, detail::get_indices(result_buffers_));
      cl_event marker;
#if defined(__APPLE__)
      err = clEnqueueMarkerWithWaitList(
        queue_.get(),
        static_cast<cl_uint>(mem_out_events_.size()),
        mem_out_events_.data(), &marker
      );
#else
      err = clEnqueueMarker(queue_.get(), &marker);
#endif
      if (err != CL_SUCCESS) {
        CAF_LOGMF(CAF_ERROR, "clSetEventCallback: " << get_opencl_error(err));
        clReleaseEvent(marker);
        clReleaseEvent(event_k);
        this->deref(); // callback is not set
        return;
      }
      err = clSetEventCallback(marker, CL_COMPLETE,
                               [](cl_event, cl_int, void* data) {
                                 auto cmd = reinterpret_cast<command*>(data);
                                 cmd->handle_results();
                                 cmd->deref();
                               },
                               this);
      if (err != CL_SUCCESS) {
        CAF_LOGMF(CAF_ERROR, "clSetEventCallback: " << get_opencl_error(err));
        clReleaseEvent(marker);
        clReleaseEvent(event_k);
        this->deref(); // callback is not set
        return;
      }
      err = clFlush(queue_.get());
      if (err != CL_SUCCESS) {
        CAF_LOGMF(CAF_ERROR, "clFlush: " << get_opencl_error(err));
      }
      mem_out_events_.push_back(std::move(event_k));
      mem_out_events_.push_back(std::move(marker));
    }
  }
Beispiel #8
0
    void enqueue () {
        CPPA_LOG_TRACE("command::enqueue()");
        this->ref(); // reference held by the OpenCL comand queue
        cl_int err{0};
        cl_event event_k;
        auto data_or_nullptr = [](const dim_vec& vec) {
            return vec.empty() ? nullptr : vec.data();
        };
        err = clEnqueueNDRangeKernel(m_queue.get(),
                                     m_actor_facade->m_kernel.get(),
                                     m_actor_facade->m_global_dimensions.size(),
                                     data_or_nullptr(m_actor_facade->m_global_offsets),
                                     data_or_nullptr(m_actor_facade->m_global_dimensions),
                                     data_or_nullptr(m_actor_facade->m_local_dimensions),
                                     m_events.size(),
                                     (m_events.empty() ? nullptr : m_events.data()),
                                     &event_k);
        if (err != CL_SUCCESS) {
            CPPA_LOGMF(CPPA_ERROR, "clEnqueueNDRangeKernel: " 
                                   << get_opencl_error(err));
            this->deref(); // or can anything actually happen?
            return;
        }
        else {
            cl_event event_r;
            err = clEnqueueReadBuffer(m_queue.get(),
                                      m_arguments.back().get(),
                                      CL_FALSE,
                                      0,
                                      sizeof(typename R::value_type) * m_result_size,
                                      m_result.data(),
                                      1,
                                      &event_k,
                                      &event_r);
            if (err != CL_SUCCESS) {
                throw std::runtime_error("clEnqueueReadBuffer: "
                                         + get_opencl_error(err));
                this->deref(); // failed to enqueue command
                return;
            }
            err = clSetEventCallback(event_r,
                                     CL_COMPLETE,
                                     [](cl_event, cl_int, void* data) {
                                         auto cmd = reinterpret_cast<command*>(data);
                                         cmd->handle_results();
                                         cmd->deref();
                                     },
                                     this);
            if (err != CL_SUCCESS) {
                CPPA_LOGMF(CPPA_ERROR, "clSetEventCallback: "
                                       << get_opencl_error(err));
                this->deref(); // callback is not set
                return;
            }

            err = clFlush(m_queue.get());
            if (err != CL_SUCCESS) {
                CPPA_LOGMF(CPPA_ERROR, "clFlush: " << get_opencl_error(err));
            }
            m_events.push_back(std::move(event_k));
            m_events.push_back(std::move(event_r));
        }
    }