Beispiel #1
0
template<> Uint8ClampedArray* CData::getValue<Uint8ClampedArray>()
{
    cl_int err_code;
#ifdef PREALLOCATE_IN_JS_HEAP
    void* mem;
#endif // PREALLOCATE_IN_JS_HEAP

    if (m_theUint8ClampedArray.get()) {
#ifdef PREALLOCATE_IN_JS_HEAP
        if (false && !m_isMapped) {
            DEBUG_LOG_STATUS("getValue", "memory is " << m_theUint8ClampedArray.get());
            void* mem = clEnqueueMapBuffer(m_queue, m_memObj, CL_TRUE, CL_MAP_READ, 0, m_size, 0, 0, 0, &err_code);

            if (err_code != CL_SUCCESS) {
                DEBUG_LOG_ERROR("getValue", err_code);
                return 0;
            }
#ifndef DEBUG_OFF
            if (mem != m_theUint8ClampedArray->data())
                DEBUG_LOG_STATUS("getValue", "EnqueueMap returned wrong pointer");
#endif // DEBUG_OFF
            m_isMapped = true;
        }
#endif // PREALLOCATE_IN_JS_HEAP
        return m_theUint8ClampedArray.get();
    } else {
#ifdef INCREMENTAL_MEM_RELEASE
        checkFree();
#endif // INCREMENTAL_MEM_RELEASE

        if (m_parent->createAlignedTA<Uint8ClampedArray, unsigned char>(m_type, m_length, m_theUint8ClampedArray) != RT_OK)
            return 0;

        if (!m_theUint8ClampedArray) {
            DEBUG_LOG_STATUS("getValue", "Cannot create typed array");
            return 0;
        }

        err_code = enqueueReadBuffer(m_size, m_theUint8ClampedArray->data());

        if (err_code != CL_SUCCESS) {
            DEBUG_LOG_ERROR("getValue", err_code);
            m_theUint8ClampedArray.clear();
            return 0;
        }

        DEBUG_LOG_STATUS("getValue", "materialized typed array");

        return m_theUint8ClampedArray.get();
    }
}
Beispiel #2
0
// Example use of the double-precision Xtrsm routine DTRSM, solving A*X = alpha*B, storing the
// result in the memory of matrix B. Uses row-major storage (C-style).
int main() {

  // OpenCL platform/device settings
  const auto platform_id = 0;
  const auto device_id = 0;

  // Example TRSM arguments
  const size_t m = 4;
  const size_t n = 3;
  const double alpha = 1.0;
  const auto a_ld = m;
  const auto b_ld = n;

  // Initializes the OpenCL platform
  auto platforms = std::vector<cl::Platform>();
  cl::Platform::get(&platforms);
  if (platforms.size() == 0 || platform_id >= platforms.size()) { return 1; }
  auto platform = platforms[platform_id];

  // Initializes the OpenCL device
  auto devices = std::vector<cl::Device>();
  platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
  if (devices.size() == 0 || device_id >= devices.size()) { return 1; }
  auto device = devices[device_id];

  // Creates the OpenCL context, queue, and an event
  auto device_as_vector = std::vector<cl::Device>{device};
  auto context = cl::Context(device_as_vector);
  auto queue = cl::CommandQueue(context, device);
  auto event = cl_event{nullptr};

  // Populate host matrices with some example data
  auto host_a = std::vector<double>({1.0,  2.0,  1.0, -2.0,
                                    0.0, -1.0, -2.0,  0.0,
                                    0.0,  0.0,  1.0,  1.0,
                                    0.0,  0.0,  0.0, -1.0});
  auto host_b = std::vector<double>({-1.0, -1.0,  3.0,
                                     1.0, -3.0,  2.0,
                                     1.0,  1.0, -1.0,
                                     4.0, -1.0, -2.0});
  // Expected result:
  //   8 -5  2
  // -11  3  4
  //   5  0 -3
  //  -4  1  2

  // Copy the matrices to the device
  auto device_a = cl::Buffer(context, CL_MEM_READ_WRITE, host_a.size()*sizeof(double));
  auto device_b = cl::Buffer(context, CL_MEM_READ_WRITE, host_b.size()*sizeof(double));
  queue.enqueueWriteBuffer(device_a, CL_TRUE, 0, host_a.size()*sizeof(double), host_a.data());
  queue.enqueueWriteBuffer(device_b, CL_TRUE, 0, host_b.size()*sizeof(double), host_b.data());

  // Call the DTRSM routine. Note that the type of alpha and beta (double) determine the precision.
  auto queue_plain = queue();
  auto status = clblast::Trsm(clblast::Layout::kRowMajor, clblast::Side::kLeft,
                              clblast::Triangle::kUpper, clblast::Transpose::kNo,
                              clblast::Diagonal::kNonUnit,
                              m, n,
                              alpha,
                              device_a(), 0, a_ld,
                              device_b(), 0, b_ld,
                              &queue_plain, &event);

  // Retrieves the results
  if (status == clblast::StatusCode::kSuccess) {
    clWaitForEvents(1, &event);
    clReleaseEvent(event);
  }
  queue.enqueueReadBuffer(device_b, CL_TRUE, 0, host_b.size()*sizeof(double), host_b.data());

  // Example completed. See "clblast.h" for status codes (0 -> success).
  printf("Completed TRSM with status %d and results:\n", static_cast<int>(status));
  for (auto i = size_t{0}; i < m; ++i) {
    for (auto j = size_t{0}; j < n; ++j) {
      printf("%3.0f ", host_b[i * b_ld + j]);
    }
    printf("\n");
  }
  return 0;
}
boost::shared_future<void> ocl_data_array::read(idx_t sourceBeginIndex, idx_t count, float* targetPtr, idx_t targetBeginIndex)
{
    verify_arg(sourceBeginIndex >= 0, "sourceBeginIndex");
    verify_arg(count > 0, "count");
    verify_arg(targetPtr != null, "targetPtr");
    verify_arg(targetBeginIndex >= 0, "targetBeginIndex");

    verify_if_accessible();

    auto& ctx = context();
    auto promise = new boost::promise<void>();
    auto future = boost::shared_future<void>(promise->get_future());
    try
    {
        cl::Event e;
        auto queue = ctx->cl_queue();

        queue.enqueueReadBuffer(
            buffer(),
            false, // blocking
            sourceBeginIndex * sizeof(float), // offset
            count * sizeof(float), // size
            targetPtr + targetBeginIndex,
            nullptr,
            &e);

        e.setCallback(
        CL_COMPLETE,
        [](cl_event event, cl_int status, void* userData)
        {
            auto promise = (boost::promise<void>*)userData;
            try
            {
                if (status == CL_COMPLETE)
                {
                    // Done
                    promise->set_value();
                }
                else
                {
                    // cl::Error
                    promise->set_exception(std::make_exception_ptr(ocl_error(status, "Cannot read memory.")));
                }
            }
            catch (...)
            {
                promise->set_exception(std::current_exception());
            }
            delete promise;
        },
        promise);

        queue.flush();
    }
    catch (exception& ex)
    {
        delete promise;
        throw as_ocl_error(ex);
    }

    return future;
}