예제 #1
0
int main()
{
  typedef float       ScalarType;

  //
  // Initialize OpenCL vectors:
  //
  unsigned int vector_size = 10;
  viennacl::scalar<ScalarType>  s = 1.0; //dummy
  viennacl::vector<ScalarType>  vec1(vector_size);
  viennacl::vector<ScalarType>  vec2(vector_size);
  viennacl::vector<ScalarType>  result_mul(vector_size);
  viennacl::vector<ScalarType>  result_div(vector_size);

  //
  // fill the operands vec1 and vec2:
  //
  for (unsigned int i=0; i<vector_size; ++i)
  {
    vec1[i] = static_cast<ScalarType>(i);
    vec2[i] = static_cast<ScalarType>(vector_size-i);
  }

  //
  // Set up the OpenCL program given in my_compute_kernel:
  // A program is one compilation unit and can hold many different compute kernels.
  //
  viennacl::ocl::program & my_prog = viennacl::ocl::current_context().add_program(my_compute_program, "my_compute_program");
  my_prog.add_kernel("elementwise_prod");  //register elementwise product kernel
  my_prog.add_kernel("elementwise_div");   //register elementwise division kernel
  
  //
  // Now we can get the kernels from the program 'my_program'.
  // (Note that first all kernels need to be registered via add_kernel() before get_kernel() can be called,
  // otherwise existing references might be invalidated)
  //
  viennacl::ocl::kernel & my_kernel_mul = my_prog.get_kernel("elementwise_prod");
  viennacl::ocl::kernel & my_kernel_div = my_prog.get_kernel("elementwise_div");
  
  //
  // Launch the kernel with 'vector_size' threads in one work group
  // Note that size_t might differ between host and device. Thus, a cast to cl_uint is necessary for the forth argument.
  //
  viennacl::ocl::enqueue(my_kernel_mul(vec1, vec2, result_mul, static_cast<cl_uint>(vec1.size())));  
  viennacl::ocl::enqueue(my_kernel_div(vec1, vec2, result_div, static_cast<cl_uint>(vec1.size())));
  
  //
  // Print the result:
  //
  std::cout << "        vec1: " << vec1 << std::endl;
  std::cout << "        vec2: " << vec2 << std::endl;
  std::cout << "vec1 .* vec2: " << result_mul << std::endl;
  std::cout << "vec1 /* vec2: " << result_div << std::endl;
  std::cout << "norm_2(vec1 .* vec2): " << viennacl::linalg::norm_2(result_mul) << std::endl;
  std::cout << "norm_2(vec1 /* vec2): " << viennacl::linalg::norm_2(result_div) << std::endl;
  
  //
  //  That's it.
  //
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
  
  return 0;
}
예제 #2
0
/**
*   Since no auxiliary routines are needed, we can directly start with main().
**/
int main()
{
  typedef float       ScalarType;

  /**
  * Initialize OpenCL vectors:
  **/
  unsigned int vector_size = 10;
  viennacl::vector<ScalarType>  vec1(vector_size);
  viennacl::vector<ScalarType>  vec2(vector_size);
  viennacl::vector<ScalarType>  result_mul(vector_size);
  viennacl::vector<ScalarType>  result_div(vector_size);

  /**
  * Fill the operands vec1 and vec2 with some numbers.
  **/
  for (unsigned int i=0; i<vector_size; ++i)
  {
    vec1[i] = static_cast<ScalarType>(i);
    vec2[i] = static_cast<ScalarType>(vector_size-i);
  }

  /**
  * Set up the OpenCL program given in my_compute_kernel:
  * A program is one compilation unit and can hold many different compute kernels.
  **/
  viennacl::ocl::program & my_prog = viennacl::ocl::current_context().add_program(my_compute_program, "my_compute_program");
  // Note: Releases older than ViennaCL 1.5.0 required calls to add_kernel(). This is no longer needed, the respective interface has been removed.

  /**
  * Now we can get the kernels from the program 'my_program'.
  * (Note that first all kernels need to be registered via add_kernel() before get_kernel() can be called,
  * otherwise existing references might be invalidated)
  **/
  viennacl::ocl::kernel & my_kernel_mul = my_prog.get_kernel("elementwise_prod");
  viennacl::ocl::kernel & my_kernel_div = my_prog.get_kernel("elementwise_div");

  /**
  * Launch the kernel with 'vector_size' threads in one work group
  * Note that std::size_t might differ between host and device. Thus, a cast to cl_uint is necessary for the forth argument.
  **/
  viennacl::ocl::enqueue(my_kernel_mul(vec1, vec2, result_mul, static_cast<cl_uint>(vec1.size())));
  viennacl::ocl::enqueue(my_kernel_div(vec1, vec2, result_div, static_cast<cl_uint>(vec1.size())));

  /**
  * Print the result:
  **/
  std::cout << "        vec1: " << vec1 << std::endl;
  std::cout << "        vec2: " << vec2 << std::endl;
  std::cout << "vec1 .* vec2: " << result_mul << std::endl;
  std::cout << "vec1 /* vec2: " << result_div << std::endl;
  std::cout << "norm_2(vec1 .* vec2): " << viennacl::linalg::norm_2(result_mul) << std::endl;
  std::cout << "norm_2(vec1 /* vec2): " << viennacl::linalg::norm_2(result_div) << std::endl;

  /**
  *  We are already done. We only needed a few lines of code by letting ViennaCL deal with the details :-)
  **/
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}