예제 #1
0
  inline std::vector< platform > get_platforms()
  {
    std::vector< platform > ret;
    cl_int err;
    cl_uint num_platforms;
    cl_platform_id ids[42];   //no more than 42 platforms supported...
#if defined(VIENNACL_DEBUG_ALL)
    Rcpp::Rcout << "ViennaCL: Getting platform..." << std::endl;
#endif
    err = clGetPlatformIDs(42, ids, &num_platforms);
    VIENNACL_ERR_CHECK(err);

    for (cl_uint i = 0; i < num_platforms; ++i)
      ret.push_back( platform(ids[i]) );

    return ret;
  }
예제 #2
0
      /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context.
       *
       *  @param src_buffer     A smart pointer to the begin of an allocated OpenCL buffer
       *  @param dst_buffer     A smart pointer to the end of an allocated OpenCL buffer
       *  @param src_offset     Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
       *  @param dst_offset     Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
       *  @param bytes_to_copy  Number of bytes to be copied
       */
      inline void memory_copy(viennacl::ocl::handle<cl_mem> const & src_buffer,
                       viennacl::ocl::handle<cl_mem> & dst_buffer,
                       vcl_size_t src_offset,
                       vcl_size_t dst_offset,
                       vcl_size_t bytes_to_copy)
      {
        assert( &src_buffer.context() == &dst_buffer.context() && bool("Transfer between memory buffers in different contexts not supported yet!"));

        viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context());
        cl_int err = clEnqueueCopyBuffer(memory_context.get_queue().handle().get(),
                                         src_buffer.get(),
                                         dst_buffer.get(),
                                         src_offset,
                                         dst_offset,
                                         bytes_to_copy,
                                         0, NULL, NULL);  //events
        VIENNACL_ERR_CHECK(err);
      }
예제 #3
0
        /** @brief Initializes the class from a given device ID */
        void init(cl_device_id dev)
        {
          cl_int err;

          //query a little bit of info:
          err = clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(std::size_t), &max_work_group_size_, NULL);
          VIENNACL_ERR_CHECK(err);
          err = clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &compute_units_, NULL);
          VIENNACL_ERR_CHECK(err);
          err = clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(cl_device_type), &type_, NULL);
          VIENNACL_ERR_CHECK(err);
          err = clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &global_memory_, NULL);
          VIENNACL_ERR_CHECK(err);
          err = clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_memory_alloc_, NULL);
          VIENNACL_ERR_CHECK(err);
          err = clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &local_memory_, NULL);
          VIENNACL_ERR_CHECK(err);
        }
예제 #4
0
    void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue)
    {
      // 1D kernel:
      if (k.local_work_size(1) == 0)
      {
        #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
        std::cout << "ViennaCL: Starting 1D-kernel '" << k.name() << "'..." << std::endl;
        std::cout << "ViennaCL: Global work size: '"  << k.global_work_size() << "'..." << std::endl;
        std::cout << "ViennaCL: Local work size: '"   << k.local_work_size() << "'..." << std::endl;
        #endif
      
        size_t tmp_global = k.global_work_size();
        size_t tmp_local = k.local_work_size();
        
        cl_int err;
        if (tmp_global == 1 && tmp_local == 1)
          err = clEnqueueTask(queue.handle().get(), k.handle().get(), 0, NULL, NULL);
        else
          err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL);

        if (err != CL_SUCCESS)  //if not successful, try to start with smaller work size
        {
          //std::cout << "FAIL: " << std::endl; exit(0);
          while (err != CL_SUCCESS && tmp_local > 1)
          {
            //std::cout << "Flushing queue, then enqueuing again with half the size..." << std::endl;
            //std::cout << "Error code: " << err << std::endl;
            
            tmp_global /= 2;
            tmp_local /= 2;

            #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
            std::cout << "ViennaCL: Kernel start failed for '" << k.name() << "'." << std::endl;
            std::cout << "ViennaCL: Global work size: '"  << tmp_global << "'..." << std::endl;
            std::cout << "ViennaCL: Local work size: '"   << tmp_local << "'..." << std::endl;
            #endif
            
            queue.finish();
            err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL);
          }
          
          if (err != CL_SUCCESS)
          {
            //could not start kernel with any parameters
            std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
            std::cerr << "ViennaCL: Smaller work sizes could not solve the problem. " << std::endl;
            VIENNACL_ERR_CHECK(err);
          }
          else
          {
            //remember parameters:
            k.local_work_size(0, tmp_local);
            k.global_work_size(0, tmp_global);
            #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
            std::cout << "ViennaCL: Kernel '" << k.name() << "' now uses global work size " << tmp_global << " and local work size " << tmp_local << "."  << std::endl;
            #endif
          }          
        }
      }
      else //2D kernel
      {
        #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
        std::cout << "ViennaCL: Starting 2D-kernel '" << k.name() << "'..." << std::endl;
        std::cout << "ViennaCL: Global work size: '"  << k.global_work_size(0) << ", " << k.global_work_size(1) << "'..." << std::endl;
        std::cout << "ViennaCL: Local work size: '"   << k.local_work_size(0) << ", " << k.local_work_size(1) << "'..." << std::endl;
        #endif

        size_t tmp_global[2]; 
        tmp_global[0] = k.global_work_size(0);
        tmp_global[1] = k.global_work_size(1);
        
        size_t tmp_local[2];
        tmp_local[0] = k.local_work_size(0);
        tmp_local[1] = k.local_work_size(1);
        
        cl_int err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 2, NULL, tmp_global, tmp_local, 0, NULL, NULL);

        if (err != CL_SUCCESS)
        {
          //could not start kernel with any parameters
          std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
          VIENNACL_ERR_CHECK(err);
        }
        
      }
            
      #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
      queue.finish();
      std::cout << "ViennaCL: Kernel " << k.name() << " finished!" << std::endl;
      #endif
    } //enqueue()
예제 #5
0
 static void inc(cl_kernel & something)
 {
   cl_int err = clRetainKernel(something);
   VIENNACL_ERR_CHECK(err);
 }
예제 #6
0
 static void inc(cl_program & something)
 {
   cl_int err = clRetainProgram(something);
   VIENNACL_ERR_CHECK(err);
 }
예제 #7
0
 static void inc(cl_mem & something)
 {
   cl_int err = clRetainMemObject(something);
   VIENNACL_ERR_CHECK(err);
 }
예제 #8
0
 static void inc(cl_context & something)
 {
   cl_int err = clRetainContext(something);
   VIENNACL_ERR_CHECK(err);
 }
예제 #9
0
 static void inc(cl_command_queue & something)
 {
   cl_int err = clRetainCommandQueue(something);
   VIENNACL_ERR_CHECK(err);
 }
예제 #10
0
 static void get(cl_program context, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
     cl_int err = clGetProgramInfo(context,param_name,param_value_size,param_value,param_value_size_ret);
     VIENNACL_ERR_CHECK(err);
 }
예제 #11
0
 static void get(cl_kernel kernel, cl_device_id dev_id, cl_kernel_work_group_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
     cl_int err = clGetKernelWorkGroupInfo(kernel, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
     VIENNACL_ERR_CHECK(err);
 }
예제 #12
0
 static void get(cl_kernel kernel, cl_kernel_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
     cl_int err = clGetKernelInfo(kernel,param_name,param_value_size,param_value,param_value_size_ret);
     VIENNACL_ERR_CHECK(err);
 }
예제 #13
0
 static void get(cl_device_id device, cl_device_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
     cl_int err = clGetDeviceInfo(device,param_name,param_value_size,param_value,param_value_size_ret);
     VIENNACL_ERR_CHECK(err);
 }
예제 #14
0
 static void get(cl_mem mem, cl_mem_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
     cl_int err = clGetMemObjectInfo(mem,param_name,param_value_size,param_value,param_value_size_ret);
     VIENNACL_ERR_CHECK(err);
 }
예제 #15
0
/**
* With this let us go right to main():
**/
int main()
{
  typedef float       ScalarType;


  /**
  * <h2>Part 1: Set up a custom context</h2>
  *
  * The following is rather lengthy because OpenCL is a fairly low-level framework.
  * For comparison, the subsequent code explicitly performs the OpenCL setup that is done
  * in the background within the 'custom_kernels'-tutorial
  **/

  //manually set up a custom OpenCL context:
  std::vector<cl_device_id> device_id_array;

  //get all available devices
  viennacl::ocl::platform pf;
  std::cout << "Platform info: " << pf.info() << std::endl;
  std::vector<viennacl::ocl::device> devices = pf.devices(CL_DEVICE_TYPE_DEFAULT);
  std::cout << devices[0].name() << std::endl;
  std::cout << "Number of devices for custom context: " << devices.size() << std::endl;

  //set up context using all found devices:
  for (std::size_t i=0; i<devices.size(); ++i)
  {
      device_id_array.push_back(devices[i].id());
  }

  std::cout << "Creating context..." << std::endl;
  cl_int err;
  cl_context my_context = clCreateContext(0, cl_uint(device_id_array.size()), &(device_id_array[0]), NULL, NULL, &err);
  VIENNACL_ERR_CHECK(err);


  //create two Vectors:
  unsigned int vector_size = 10;
  std::vector<ScalarType> vec1(vector_size);
  std::vector<ScalarType> vec2(vector_size);
  std::vector<ScalarType> result(vector_size);

  //
  // fill the operands vec1 and vec2:
  //
  for (unsigned int i=0; i<vector_size; ++i)
  {
    vec1[i] = static_cast<ScalarType>(i);
    vec2[i] = static_cast<ScalarType>(vector_size-i);
  }

  //
  // create memory in OpenCL context:
  //
  cl_mem mem_vec1 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec1[0]), &err);
  VIENNACL_ERR_CHECK(err);
  cl_mem mem_vec2 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec2[0]), &err);
  VIENNACL_ERR_CHECK(err);
  cl_mem mem_result = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(result[0]), &err);
  VIENNACL_ERR_CHECK(err);

  //
  // create a command queue for each device:
  //

  std::vector<cl_command_queue> queues(devices.size());
  for (std::size_t i=0; i<devices.size(); ++i)
  {
    queues[i] = clCreateCommandQueue(my_context, devices[i].id(), 0, &err);
    VIENNACL_ERR_CHECK(err);
  }

  //
  // create and build a program in the context:
  //
  std::size_t source_len = std::string(my_compute_program).length();
  cl_program my_prog = clCreateProgramWithSource(my_context, 1, &my_compute_program, &source_len, &err);
  err = clBuildProgram(my_prog, 0, NULL, NULL, NULL, NULL);

/*            char buffer[1024];
            cl_build_status status;
            clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
            clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_LOG, sizeof(char)*1024, &buffer, NULL);
            std::cout << "Build Scalar: Err = " << err << " Status = " << status << std::endl;
            std::cout << "Log: " << buffer << std::endl;*/

  VIENNACL_ERR_CHECK(err);

  //
  // create a kernel from the program:
  //
  const char * kernel_name = "elementwise_prod";
  cl_kernel my_kernel = clCreateKernel(my_prog, kernel_name, &err);
  VIENNACL_ERR_CHECK(err);


  //
  // Execute elementwise_prod kernel on first queue: result = vec1 .* vec2;
  //
  err = clSetKernelArg(my_kernel, 0, sizeof(cl_mem), (void*)&mem_vec1);
  VIENNACL_ERR_CHECK(err);
  err = clSetKernelArg(my_kernel, 1, sizeof(cl_mem), (void*)&mem_vec2);
  VIENNACL_ERR_CHECK(err);
  err = clSetKernelArg(my_kernel, 2, sizeof(cl_mem), (void*)&mem_result);
  VIENNACL_ERR_CHECK(err);
  err = clSetKernelArg(my_kernel, 3, sizeof(unsigned int), (void*)&vector_size);
  VIENNACL_ERR_CHECK(err);
  std::size_t global_size = vector_size;
  std::size_t local_size = vector_size;
  err = clEnqueueNDRangeKernel(queues[0], my_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
  VIENNACL_ERR_CHECK(err);


  //
  // Read and output result:
  //
  err = clEnqueueReadBuffer(queues[0], mem_vec1, CL_TRUE, 0, sizeof(ScalarType)*vector_size, &(vec1[0]), 0, NULL, NULL);
  VIENNACL_ERR_CHECK(err);
  err = clEnqueueReadBuffer(queues[0], mem_result, CL_TRUE, 0, sizeof(ScalarType)*vector_size, &(result[0]), 0, NULL, NULL);
  VIENNACL_ERR_CHECK(err);

  std::cout << "vec1  : ";
  for (std::size_t i=0; i<vec1.size(); ++i)
    std::cout << vec1[i] << " ";
  std::cout << std::endl;

  std::cout << "vec2  : ";
  for (std::size_t i=0; i<vec2.size(); ++i)
    std::cout << vec2[i] << " ";
  std::cout << std::endl;

  std::cout << "result: ";
  for (std::size_t i=0; i<result.size(); ++i)
    std::cout << result[i] << " ";
  std::cout << std::endl;

  /**
  * <h2>Part 2: Reuse Custom OpenCL Context with ViennaCL</h2>
  *
  * To let ViennaCL reuse the previously created context, we need to make it known to ViennaCL \em before any ViennaCL objects are created.
  * We inject the custom context as the context with default id '0' when using viennacl::ocl::switch_context().
  **/
  viennacl::ocl::setup_context(0, my_context, device_id_array, queues);
  viennacl::ocl::switch_context(0); //activate the new context (only mandatory with context-id not equal to zero)

  /**
  * Check that ViennaCL really uses the new context:
  **/
  std::cout << "Existing context: " << my_context << std::endl;
  std::cout << "ViennaCL uses context: " << viennacl::ocl::current_context().handle().get() << std::endl;

  /**
  * Wrap existing OpenCL objects into ViennaCL:
  **/
  viennacl::vector<ScalarType> vcl_vec1(mem_vec1, vector_size);
  viennacl::vector<ScalarType> vcl_vec2(mem_vec2, vector_size);
  viennacl::vector<ScalarType> vcl_result(mem_result, vector_size);
  viennacl::scalar<ScalarType> vcl_s = 2.0;

  std::cout << "Standard vector operations within ViennaCL:" << std::endl;
  vcl_result = vcl_s * vcl_vec1 + vcl_vec2;

  std::cout << "vec1  : ";
  std::cout << vcl_vec1 << std::endl;

  std::cout << "vec2  : ";
  std::cout << vcl_vec2 << std::endl;

  std::cout << "result: ";
  std::cout << vcl_result << std::endl;

  /**
  * We can also reuse the existing elementwise_prod kernel.
  * Therefore, we first have to make the existing program known to ViennaCL
  * For more details on the three lines, see tutorial 'custom-kernels'
  **/
  std::cout << "Using existing kernel within the OpenCL backend of ViennaCL:" << std::endl;
  viennacl::ocl::program & my_vcl_prog = viennacl::ocl::current_context().add_program(my_prog, "my_compute_program");
  viennacl::ocl::kernel & my_vcl_kernel = my_vcl_prog.add_kernel(my_kernel, "elementwise_prod");
  viennacl::ocl::enqueue(my_vcl_kernel(vcl_vec1, vcl_vec2, vcl_result, static_cast<cl_uint>(vcl_vec1.size())));  //Note that std::size_t might differ between host and device. Thus, a cast to cl_uint is necessary here.

  std::cout << "vec1  : ";
  std::cout << vcl_vec1 << std::endl;

  std::cout << "vec2  : ";
  std::cout << vcl_vec2 << std::endl;

  std::cout << "result: ";
  std::cout << vcl_result << std::endl;


  /**
  * Since a linear piece of memory can be interpreted in several ways,
  * we will now create a 3x3 row-major matrix out of the linear memory in mem_vec1/
  * The first three entries in vcl_vec2 and vcl_result are used to carry out matrix-vector products:
  **/
  viennacl::matrix<ScalarType> vcl_matrix(mem_vec1, 3, 3);

  vcl_vec2.resize(3);   //note that the resize operation leads to new memory, thus vcl_vec2 is now at a different memory location (values are copied)
  vcl_result.resize(3); //note that the resize operation leads to new memory, thus vcl_vec2 is now at a different memory location (values are copied)
  vcl_result = viennacl::linalg::prod(vcl_matrix, vcl_vec2);

  std::cout << "result of matrix-vector product: ";
  std::cout << vcl_result << std::endl;

  /**
  *  Any further operations can be carried out in the same way.
  *  Just keep in mind that any resizing of vectors or matrices leads to a reallocation of the underlying memory buffer, through which the 'wrapper' is lost.
  **/
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}