int main() { typedef float ScalarType; // // Initialize OpenCL vectors: // unsigned int vector_size = 10; viennacl::scalar<ScalarType> s = 1.0; //dummy viennacl::vector<ScalarType> vec1(vector_size); viennacl::vector<ScalarType> vec2(vector_size); viennacl::vector<ScalarType> result_mul(vector_size); viennacl::vector<ScalarType> result_div(vector_size); // // fill the operands vec1 and vec2: // for (unsigned int i=0; i<vector_size; ++i) { vec1[i] = static_cast<ScalarType>(i); vec2[i] = static_cast<ScalarType>(vector_size-i); } // // Set up the OpenCL program given in my_compute_kernel: // A program is one compilation unit and can hold many different compute kernels. // viennacl::ocl::program & my_prog = viennacl::ocl::current_context().add_program(my_compute_program, "my_compute_program"); my_prog.add_kernel("elementwise_prod"); //register elementwise product kernel my_prog.add_kernel("elementwise_div"); //register elementwise division kernel // // Now we can get the kernels from the program 'my_program'. // (Note that first all kernels need to be registered via add_kernel() before get_kernel() can be called, // otherwise existing references might be invalidated) // viennacl::ocl::kernel & my_kernel_mul = my_prog.get_kernel("elementwise_prod"); viennacl::ocl::kernel & my_kernel_div = my_prog.get_kernel("elementwise_div"); // // Launch the kernel with 'vector_size' threads in one work group // Note that size_t might differ between host and device. Thus, a cast to cl_uint is necessary for the forth argument. // viennacl::ocl::enqueue(my_kernel_mul(vec1, vec2, result_mul, static_cast<cl_uint>(vec1.size()))); viennacl::ocl::enqueue(my_kernel_div(vec1, vec2, result_div, static_cast<cl_uint>(vec1.size()))); // // Print the result: // std::cout << " vec1: " << vec1 << std::endl; std::cout << " vec2: " << vec2 << std::endl; std::cout << "vec1 .* vec2: " << result_mul << std::endl; std::cout << "vec1 /* vec2: " << result_div << std::endl; std::cout << "norm_2(vec1 .* vec2): " << viennacl::linalg::norm_2(result_mul) << std::endl; std::cout << "norm_2(vec1 /* vec2): " << viennacl::linalg::norm_2(result_div) << std::endl; // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return 0; }
/** * Since no auxiliary routines are needed, we can directly start with main(). **/ int main() { typedef float ScalarType; /** * Initialize OpenCL vectors: **/ unsigned int vector_size = 10; viennacl::vector<ScalarType> vec1(vector_size); viennacl::vector<ScalarType> vec2(vector_size); viennacl::vector<ScalarType> result_mul(vector_size); viennacl::vector<ScalarType> result_div(vector_size); /** * Fill the operands vec1 and vec2 with some numbers. **/ for (unsigned int i=0; i<vector_size; ++i) { vec1[i] = static_cast<ScalarType>(i); vec2[i] = static_cast<ScalarType>(vector_size-i); } /** * Set up the OpenCL program given in my_compute_kernel: * A program is one compilation unit and can hold many different compute kernels. **/ viennacl::ocl::program & my_prog = viennacl::ocl::current_context().add_program(my_compute_program, "my_compute_program"); // Note: Releases older than ViennaCL 1.5.0 required calls to add_kernel(). This is no longer needed, the respective interface has been removed. /** * Now we can get the kernels from the program 'my_program'. * (Note that first all kernels need to be registered via add_kernel() before get_kernel() can be called, * otherwise existing references might be invalidated) **/ viennacl::ocl::kernel & my_kernel_mul = my_prog.get_kernel("elementwise_prod"); viennacl::ocl::kernel & my_kernel_div = my_prog.get_kernel("elementwise_div"); /** * Launch the kernel with 'vector_size' threads in one work group * Note that std::size_t might differ between host and device. Thus, a cast to cl_uint is necessary for the forth argument. **/ viennacl::ocl::enqueue(my_kernel_mul(vec1, vec2, result_mul, static_cast<cl_uint>(vec1.size()))); viennacl::ocl::enqueue(my_kernel_div(vec1, vec2, result_div, static_cast<cl_uint>(vec1.size()))); /** * Print the result: **/ std::cout << " vec1: " << vec1 << std::endl; std::cout << " vec2: " << vec2 << std::endl; std::cout << "vec1 .* vec2: " << result_mul << std::endl; std::cout << "vec1 /* vec2: " << result_div << std::endl; std::cout << "norm_2(vec1 .* vec2): " << viennacl::linalg::norm_2(result_mul) << std::endl; std::cout << "norm_2(vec1 /* vec2): " << viennacl::linalg::norm_2(result_div) << std::endl; /** * We are already done. We only needed a few lines of code by letting ViennaCL deal with the details :-) **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }