/** * In this tutorial we do not need additional auxiliary functions, allowing us to start right with main(): **/ int main() { //Change this type definition to double if your gpu supports that typedef float ScalarType; /** * <h2> Scalar Operations </h2> * * Although usually not very efficient because of PCI-Express latency, ViennaCL enables you to directly manipulate individual scalar values. * As such, a viennacl::scalar<double> behaves very similar to a normal `double`. * * Let us define a few CPU and ViennaCL scalars: * **/ ScalarType s1 = ScalarType(3.1415926); //note: writing ScalarType s1 = 3.1415926; leads to warnings with some compilers if ScalarType is 'float'. ScalarType s2 = ScalarType(2.71763); ScalarType s3 = ScalarType(42.0); viennacl::scalar<ScalarType> vcl_s1; viennacl::scalar<ScalarType> vcl_s2 = ScalarType(1.0); viennacl::scalar<ScalarType> vcl_s3 = ScalarType(1.0); /** * CPU scalars can be transparently assigned to GPU scalars and vice versa: **/ std::cout << "Copying a few scalars..." << std::endl; vcl_s1 = s1; s2 = vcl_s2; vcl_s3 = s3; /** * Operations between GPU scalars work just as for CPU scalars: * (Note that such single compute kernels on the GPU are considerably slower than on the CPU) **/ std::cout << "Manipulating a few scalars..." << std::endl; std::cout << "operator +=" << std::endl; s1 += s2; vcl_s1 += vcl_s2; std::cout << "operator *=" << std::endl; s1 *= s2; vcl_s1 *= vcl_s2; std::cout << "operator -=" << std::endl; s1 -= s2; vcl_s1 -= vcl_s2; std::cout << "operator /=" << std::endl; s1 /= s2; vcl_s1 /= vcl_s2; std::cout << "operator +" << std::endl; s1 = s2 + s3; vcl_s1 = vcl_s2 + vcl_s3; std::cout << "multiple operators" << std::endl; s1 = s2 + s3 * s2 - s3 / s1; vcl_s1 = vcl_s2 + vcl_s3 * vcl_s2 - vcl_s3 / vcl_s1; /** * Operations can also be mixed: **/ std::cout << "mixed operations" << std::endl; vcl_s1 = s1 * vcl_s2 + s3 - vcl_s3; /** * The output stream is overloaded as well for direct printing to e.g. a terminal: **/ std::cout << "CPU scalar s3: " << s3 << std::endl; std::cout << "GPU scalar vcl_s3: " << vcl_s3 << std::endl; /** * <h2>Vector Operations * * Define a few vectors (from STL and plain C) and viennacl::vectors **/ std::vector<ScalarType> std_vec1(10); std::vector<ScalarType> std_vec2(10); ScalarType plain_vec3[10]; //plain C array viennacl::vector<ScalarType> vcl_vec1(10); viennacl::vector<ScalarType> vcl_vec2(10); viennacl::vector<ScalarType> vcl_vec3(10); /** * Let us fill the CPU vectors with random values: * (random<> is a helper function from Random.hpp) **/ for (unsigned int i = 0; i < 10; ++i) { std_vec1[i] = random<ScalarType>(); vcl_vec2(i) = random<ScalarType>(); //also works for GPU vectors, but is MUCH slower (approx. factor 10.000) than the CPU analogue plain_vec3[i] = random<ScalarType>(); } /** * Copy the CPU vectors to the GPU vectors and vice versa **/ viennacl::copy(std_vec1.begin(), std_vec1.end(), vcl_vec1.begin()); //either the STL way viennacl::copy(vcl_vec2.begin(), vcl_vec2.end(), std_vec2.begin()); //either the STL way viennacl::copy(vcl_vec2, std_vec2); //using the short hand notation for objects that provide .begin() and .end() members viennacl::copy(vcl_vec2.begin(), vcl_vec2.end(), plain_vec3); //copy to plain C vector /** * Also partial copies by providing the corresponding iterators are possible: **/ viennacl::copy(std_vec1.begin() + 4, std_vec1.begin() + 8, vcl_vec1.begin() + 4); //cpu to gpu viennacl::copy(vcl_vec1.begin() + 4, vcl_vec1.begin() + 8, vcl_vec2.begin() + 1); //gpu to gpu viennacl::copy(vcl_vec1.begin() + 4, vcl_vec1.begin() + 8, std_vec1.begin() + 1); //gpu to cpu /** * Compute the inner product of two GPU vectors and write the result to either CPU or GPU **/ vcl_s1 = viennacl::linalg::inner_prod(vcl_vec1, vcl_vec2); s1 = viennacl::linalg::inner_prod(vcl_vec1, vcl_vec2); s2 = viennacl::linalg::inner_prod(std_vec1, std_vec2); //inner prod can also be used with std::vector (computations are carried out on CPU then) /** * Compute norms: **/ s1 = viennacl::linalg::norm_1(vcl_vec1); vcl_s2 = viennacl::linalg::norm_2(vcl_vec2); s3 = viennacl::linalg::norm_inf(vcl_vec3); /** * Plane rotation of two vectors: **/ viennacl::linalg::plane_rotation(vcl_vec1, vcl_vec2, 1.1f, 2.3f); /** * Use viennacl::vector via the overloaded operators just as you would write it on paper: **/ //simple expression: vcl_vec1 = vcl_s1 * vcl_vec2 / vcl_s3; //more complicated expression: vcl_vec1 = vcl_vec2 / vcl_s3 + vcl_s2 * (vcl_vec1 - vcl_s2 * vcl_vec2); /** * Swap the content of two vectors without a temporary vector: **/ viennacl::swap(vcl_vec1, vcl_vec2); //swaps all entries in memory viennacl::fast_swap(vcl_vec1, vcl_vec2); //swaps OpenCL memory handles only /** * The vectors can also be cleared directly: **/ vcl_vec1.clear(); vcl_vec2.clear(); /** * That's it, the tutorial is completed. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int run_matrix_benchmark(test_config & config, viennacl::io::parameter_database& paras) { typedef viennacl::scalar<ScalarType> VCLScalar; typedef viennacl::vector<ScalarType> VCLVector; typedef viennacl::matrix<ScalarType> VCLMatrix; //////////////////////////////////////////////////////////////////// //set up a little bit of data to play with: //ScalarType std_result = 0; ScalarType std_factor1 = static_cast<ScalarType>(3.1415); ScalarType std_factor2 = static_cast<ScalarType>(42.0); viennacl::scalar<ScalarType> vcl_factor1(std_factor1); viennacl::scalar<ScalarType> vcl_factor2(std_factor2); std::vector<ScalarType> std_vec1(BENCHMARK_MATRIX_SIZE); //used to set all values to zero std::vector< std::vector<ScalarType> > stl_mat(BENCHMARK_MATRIX_SIZE); //store identity matrix here VCLVector vcl_vec1(BENCHMARK_MATRIX_SIZE); VCLVector vcl_vec2(BENCHMARK_MATRIX_SIZE); VCLMatrix vcl_mat(BENCHMARK_MATRIX_SIZE, BENCHMARK_MATRIX_SIZE); for (int i=0; i<BENCHMARK_MATRIX_SIZE; ++i) { stl_mat[i].resize(BENCHMARK_MATRIX_SIZE); stl_mat[i][i] = 1.0; } copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then) copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then) copy(stl_mat, vcl_mat); typedef test_data<VCLScalar, VCLVector, VCLMatrix> TestDataType; test_data<VCLScalar, VCLVector, VCLMatrix> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_mat); ////////////////////////////////////////////////////////// ///////////// Start parameter recording ///////////////// ////////////////////////////////////////////////////////// typedef std::map< double, std::pair<unsigned int, unsigned int> > TimingType; std::map< std::string, TimingType > all_timings; std::cout << "------- Related to direct solution algorithms ----------" << std::endl; config.kernel_name("trans_lower_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_trans_lower_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("lower_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_lower_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("unit_lower_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_unit_lower_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("upper_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_upper_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("trans_upper_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_trans_upper_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("unit_upper_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_unit_upper_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("lu_factorize"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_lu_factorize<TestDataType>, config, data); //other kernels: std::cout << "------- Related to other operations ----------" << std::endl; config.kernel_name("rank1_update"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_rank1_update<TestDataType>, config, data); config.kernel_name("scaled_rank1_update"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_scaled_rank1_update<TestDataType>, config, data); config.kernel_name("vec_mul"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_vec_mul<TestDataType>, config, data); config.kernel_name("trans_vec_mul"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_trans_vec_mul<TestDataType>, config, data); return 0; }
int run_vector_benchmark(test_config & config, viennacl::io::parameter_database& paras) { typedef viennacl::scalar<ScalarType> VCLScalar; typedef viennacl::vector<ScalarType> VCLVector; //////////////////////////////////////////////////////////////////// //set up a little bit of data to play with: //ScalarType std_result = 0; ScalarType std_factor1 = static_cast<ScalarType>(3.1415); ScalarType std_factor2 = static_cast<ScalarType>(42.0); viennacl::scalar<ScalarType> vcl_factor1(std_factor1); viennacl::scalar<ScalarType> vcl_factor2(std_factor2); std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE); //used to set all values to zero VCLVector vcl_vec1(BENCHMARK_VECTOR_SIZE); VCLVector vcl_vec2(BENCHMARK_VECTOR_SIZE); VCLVector vcl_vec3(BENCHMARK_VECTOR_SIZE); viennacl::copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then) viennacl::copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then) typedef test_data<VCLScalar, VCLVector> TestDataType; test_data<VCLScalar, VCLVector> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_vec3); ////////////////////////////////////////////////////////// ///////////// Start parameter recording ///////////////// ////////////////////////////////////////////////////////// typedef std::map< double, std::pair<unsigned int, unsigned int> > TimingType; std::map< std::string, TimingType > all_timings; // vector addition std::cout << "------- Related to vector addition ----------" << std::endl; config.kernel_name("add"); optimize_full(paras, all_timings[config.kernel_name()], vector_add<TestDataType>, config, data); config.kernel_name("inplace_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_add<TestDataType>, config, data); config.kernel_name("mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_mul_add<TestDataType>, config, data); config.kernel_name("cpu_mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_mul_add<TestDataType>, config, data); config.kernel_name("inplace_mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mul_add<TestDataType>, config, data); config.kernel_name("cpu_inplace_mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_inplace_mul_add<TestDataType>, config, data); config.kernel_name("inplace_div_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_div_add<TestDataType>, config, data); std::cout << "------- Related to vector subtraction ----------" << std::endl; config.kernel_name("sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_sub<TestDataType>, config, data); config.kernel_name("inplace_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_sub<TestDataType>, config, data); config.kernel_name("mul_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_mul_sub<TestDataType>, config, data); config.kernel_name("inplace_mul_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mul_sub<TestDataType>, config, data); config.kernel_name("inplace_div_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_div_sub<TestDataType>, config, data); std::cout << "------- Related to vector scaling (mult/div) ----------" << std::endl; config.kernel_name("mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_mult<TestDataType>, config, data); config.kernel_name("inplace_mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mult<TestDataType>, config, data); config.kernel_name("cpu_mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_mult<TestDataType>, config, data); config.kernel_name("cpu_inplace_mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_inplace_mult<TestDataType>, config, data); config.kernel_name("divide"); optimize_full(paras, all_timings[config.kernel_name()], vector_divide<TestDataType>, config, data); config.kernel_name("inplace_divide"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_divide<TestDataType>, config, data); std::cout << "------- Others ----------" << std::endl; config.kernel_name("inner_prod"); optimize_full(paras, all_timings[config.kernel_name()], vector_inner_prod<TestDataType>, config, data); config.kernel_name("swap"); optimize_full(paras, all_timings[config.kernel_name()], vector_swap<TestDataType>, config, data); config.kernel_name("clear"); optimize_full(paras, all_timings[config.kernel_name()], vector_clear<TestDataType>, config, data); config.kernel_name("plane_rotation"); optimize_full(paras, all_timings[config.kernel_name()], vector_plane_rotation<TestDataType>, config, data); //config.max_work_groups(32); //otherwise failures on 8500 GT config.kernel_name("norm_1"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_1<TestDataType>, config, data); config.kernel_name("norm_2"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_2<TestDataType>, config, data); config.kernel_name("norm_inf"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_inf<TestDataType>, config, data); //restricted optimizations: config.kernel_name("index_norm_inf"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_index_norm_inf<TestDataType>, config, data); return 0; }
int run_matrix_benchmark(test_config & config, viennacl::io::parameter_database& paras) { typedef viennacl::scalar<ScalarType> VCLScalar; typedef viennacl::vector<ScalarType> VCLVector; typedef viennacl::compressed_matrix<ScalarType> VCLMatrix; //////////////////////////////////////////////////////////////////// //set up a little bit of data to play with: //ScalarType std_result = 0; ScalarType std_factor1 = static_cast<ScalarType>(3.1415); ScalarType std_factor2 = static_cast<ScalarType>(42.0); viennacl::scalar<ScalarType> vcl_factor1(std_factor1); viennacl::scalar<ScalarType> vcl_factor2(std_factor2); std::vector<ScalarType> std_vec1(BENCHMARK_MATRIX_SIZE); //used to set all values to zero std::vector< std::map< unsigned int, ScalarType> > stl_mat(BENCHMARK_MATRIX_SIZE); //store identity matrix here VCLVector vcl_vec1(BENCHMARK_MATRIX_SIZE); VCLVector vcl_vec2(BENCHMARK_MATRIX_SIZE); VCLMatrix vcl_mat(BENCHMARK_MATRIX_SIZE, BENCHMARK_MATRIX_SIZE); for (int i=0; i<BENCHMARK_MATRIX_SIZE; ++i) { if (i > 10) { stl_mat[i][i - 10] = 1.0; stl_mat[i][i - 7] = 1.0; stl_mat[i][i - 4] = 1.0; stl_mat[i][i - 2] = 1.0; } stl_mat[i][i] = 1.0; if (i + 10 < BENCHMARK_MATRIX_SIZE) { stl_mat[i][i + 5] = 1.0; stl_mat[i][i + 7] = 1.0; stl_mat[i][i + 9] = 1.0; stl_mat[i][i + 10] = 1.0; } } viennacl::copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then) viennacl::copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then) viennacl::copy(stl_mat, vcl_mat); typedef test_data<VCLScalar, VCLVector, VCLMatrix> TestDataType; test_data<VCLScalar, VCLVector, VCLMatrix> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_mat); ////////////////////////////////////////////////////////// ///////////// Start parameter recording ///////////////// ////////////////////////////////////////////////////////// typedef std::map< double, std::pair<unsigned int, unsigned int> > TimingType; std::map< std::string, TimingType > all_timings; //other kernels: std::cout << "------- Related to other operations ----------" << std::endl; config.kernel_name("vec_mul"); optimize_full(paras, all_timings[config.kernel_name()], matrix_vec_mul<TestDataType>, config, data); return 0; }
int main(int argc, char * argv[]) { TAU_INIT(&argc, &argv) TAU_PROFILE_TIMER(orio_maintimer, "main()", "int (int, char **)", TAU_USER); TAU_PROFILE_START(orio_maintimer); TAU_PROFILE_SET_NODE(0); std::vector<viennacl::ocl::device> devices = viennacl::ocl::platform().devices(); std::vector<cl_device_id> my_devices; my_devices.push_back(devices[0].id()); viennacl::ocl::setup_context(0L, my_devices); //Change this type definition to double if your gpu supports that typedef double ScalarType; ///////////////////////////////////////////////// ///////////// Vector operations ///////////////// ///////////////////////////////////////////////// void * orio_profiler; TAU_PROFILER_CREATE(orio_profiler, "orio_generated_code", "", TAU_USER); for(int i = 0; i < 3; ++i) { // // Define a few vectors (from STL and plain C) and viennacl::vectors // std::vector<ScalarType> std_vec1(1000000); std::vector<ScalarType> std_vec2(1000000); viennacl::vector<ScalarType> vcl_vec1(1000000); viennacl::vector<ScalarType> vcl_vec2(1000000); viennacl::scalar<ScalarType> vcl_s1 = ScalarType(5.0); // // Let us fill the CPU vectors with random values: // (random<> is a helper function from Random.hpp) // for (unsigned int i = 0; i < 1000000; ++i) { std_vec1[i] = random<ScalarType>(); std_vec2[i] = 0.0; } // // Copy the CPU vectors to the GPU vectors and vice versa // TAU_PROFILER_START(orio_profiler); viennacl::copy(std_vec1.begin(), std_vec1.end(), vcl_vec1.begin()); //either the STL way viennacl::copy(std_vec2.begin(), std_vec2.end(), vcl_vec2.begin()); //either the STL way vcl_vec2 += vcl_s1 * vcl_vec1; viennacl::copy(vcl_vec2.begin(), vcl_vec2.end(), std_vec2.begin()); TAU_PROFILER_STOP(orio_profiler); } double orio_inclusive[TAU_MAX_COUNTERS]; TAU_PROFILER_GET_INCLUSIVE_VALUES(orio_profiler, &orio_inclusive); printf("{'/*@ coordinate @*/' : %g}\n", orio_inclusive[0]); return EXIT_SUCCESS; }
int main() { //Change this type definition to double if your gpu supports that typedef float ScalarType; // Choose the Phi (WORKS) viennacl::ocl::set_context_device_type(0, viennacl::ocl::accelerator_tag()); ///////////////////////////////////////////////// ///////////// Scalar operations ///////////////// ///////////////////////////////////////////////// // // Define a few CPU scalars: // ScalarType s1 = static_cast<ScalarType>(3.1415926); ScalarType s2 = static_cast<ScalarType>(2.71763); ScalarType s3 = static_cast<ScalarType>(42.0); // // ViennaCL scalars are defined in the same way: // viennacl::scalar<ScalarType> vcl_s1; viennacl::scalar<ScalarType> vcl_s2 = static_cast<ScalarType>(1.0); viennacl::scalar<ScalarType> vcl_s3 = static_cast<ScalarType>(1.0); // // CPU scalars can be transparently assigned to GPU scalars and vice versa: // vcl_s1 = s1; s2 = vcl_s2; vcl_s3 = s3; // // Operations between GPU scalars work just as for CPU scalars: // (Note that such single compute kernels on the GPU are considerably slower than on the CPU) // s1 += s2; vcl_s1 += vcl_s2; s1 *= s2; vcl_s1 *= vcl_s2; s1 -= s2; vcl_s1 -= vcl_s2; s1 /= s2; vcl_s1 /= vcl_s2; s1 = s2 + s3; vcl_s1 = vcl_s2 + vcl_s3; s1 = s2 + s3 * s2 - s3 / s1; vcl_s1 = vcl_s2 + vcl_s3 * vcl_s2 - vcl_s3 / vcl_s1; // // Operations can also be mixed: // vcl_s1 = s1 * vcl_s2 + s3 - vcl_s3; // // Output stream is overloaded as well: // std::cout << "CPU scalar s2: " << s2 << std::endl; std::cout << "GPU scalar vcl_s2: " << vcl_s2 << std::endl; std::vector< viennacl::ocl::device > devices = viennacl::ocl::platform().devices(); for (int i = 0; i < devices.size(); i++) { std::cout << devices[i].info() << "\n"; } std::cout << "SELECTED DEVICE: \n"; std::cout << viennacl::ocl::current_context().current_device().info() << "\n"; ///////////////////////////////////////////////// ///////////// Vector operations ///////////////// ///////////////////////////////////////////////// // // Define a few vectors (from STL and plain C) and viennacl::vectors // std::vector<ScalarType> std_vec1(10); std::vector<ScalarType> std_vec2(10); ScalarType plain_vec3[10]; //plain C array viennacl::vector<ScalarType> vcl_vec1(10); viennacl::vector<ScalarType> vcl_vec2(10); viennacl::vector<ScalarType> vcl_vec3(10); // // Let us fill the CPU vectors with random values: // (random<> is a helper function from Random.hpp) // for (unsigned int i = 0; i < 10; ++i) { std_vec1[i] = random<ScalarType>(); vcl_vec2(i) = random<ScalarType>(); //also works for GPU vectors, but is MUCH slower (approx. factor 10.000) than the CPU analogue plain_vec3[i] = random<ScalarType>(); } // // Copy the CPU vectors to the GPU vectors and vice versa // copy(std_vec1.begin(), std_vec1.end(), vcl_vec1.begin()); //either the STL way copy(vcl_vec2.begin(), vcl_vec2.end(), std_vec2.begin()); //either the STL way copy(vcl_vec2, std_vec2); //using the short hand notation for objects that provide .begin() and .end() members copy(vcl_vec2.begin(), vcl_vec2.end(), plain_vec3); //copy to plain C vector // // Compute the inner product of two GPU vectors and write the result to either CPU or GPU // vcl_s1 = viennacl::linalg::inner_prod(vcl_vec1, vcl_vec2); s1 = viennacl::linalg::inner_prod(vcl_vec1, vcl_vec2); // // Compute norms: // s1 = viennacl::linalg::norm_1(vcl_vec1); vcl_s2 = viennacl::linalg::norm_2(vcl_vec2); s3 = viennacl::linalg::norm_inf(vcl_vec3); // // Plane rotation of two vectors: // viennacl::linalg::plane_rotation(vcl_vec1, vcl_vec2, 1.1f, 2.3f); // // Use viennacl::vector via the overloaded operators just as you would write it on paper: // //simple expression: vcl_vec1 = vcl_s1 * vcl_vec2 / vcl_s3; //more complicated expression: vcl_vec1 = vcl_vec2 / vcl_s1 + vcl_s2 * (vcl_vec1 - vcl_s2 * vcl_vec2); // // Swap the content of two vectors without a temporary vector: // swap(vcl_vec1, vcl_vec2); // // That's it. Move on to the second tutorial, where dense matrices are explained. // std::cout << "!!!! TUTORIAL 1 COMPLETED SUCCESSFULLY !!!!" << std::endl; exit(EXIT_SUCCESS); return 0; }