void viennacl_gmres(double * result, //output vector mwIndex * cols, //input vector holding column jumpers mwIndex * rows, //input vector holding row indices double *entries, double *rhs, mwSize num_cols, mwSize nnzmax ) { viennacl::vector<double> vcl_rhs(num_cols); viennacl::vector<double> vcl_result(num_cols); viennacl::compressed_matrix<double> vcl_matrix(num_cols, num_cols); //convert from column-wise storage to row-wise storage std::vector< std::map< unsigned int, double > > stl_matrix(num_cols); for (mwIndex j=0; j<num_cols; ++j) { for (mwIndex i = cols[j]; i<cols[j+1]; ++i) stl_matrix[rows[i]][j] = entries[i]; } //now copy matrix to GPU: copy(stl_matrix, vcl_matrix); copy(rhs, rhs + num_cols, vcl_rhs.begin()); stl_matrix.clear(); //clean up this temporary storage //solve it: vcl_result = solve(vcl_matrix, vcl_rhs, viennacl::linalg::gmres_tag(1e-8, 30, 20)); //relative tolerance of 1e-8, krylov space of dimension 30, 20 restarts max. ///////////// copy back to CPU: /////////////////// copy(vcl_result.begin(), vcl_result.end(), result); return; }
void run_tutorial() { /** * Get Eigen matrix and vector types for the provided ScalarType. * Involves a little bit of template-metaprogramming. **/ typedef typename Eigen_dense_matrix<ScalarType>::type EigenMatrix; typedef typename Eigen_vector<ScalarType>::type EigenVector; /** * Create and fill dense matrices from the Eigen library: **/ EigenMatrix eigen_densemat(6, 5); EigenMatrix eigen_densemat2(6, 5); eigen_densemat(0,0) = 2.0; eigen_densemat(0,1) = -1.0; eigen_densemat(1,0) = -1.0; eigen_densemat(1,1) = 2.0; eigen_densemat(1,2) = -1.0; eigen_densemat(2,1) = -1.0; eigen_densemat(2,2) = -1.0; eigen_densemat(2,3) = -1.0; eigen_densemat(3,2) = -1.0; eigen_densemat(3,3) = 2.0; eigen_densemat(3,4) = -1.0; eigen_densemat(5,4) = -1.0; eigen_densemat(4,4) = -1.0; Eigen::Map<EigenMatrix> eigen_densemat_map(eigen_densemat.data(), 6, 5); // same as eigen_densemat, but emulating user-provided buffer /** * Create and fill sparse matrices from the Eigen library: **/ Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat(6, 5); Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat2(6, 5); eigen_sparsemat.reserve(5*2); eigen_sparsemat.insert(0,0) = 2.0; eigen_sparsemat.insert(0,1) = -1.0; eigen_sparsemat.insert(1,1) = 2.0; eigen_sparsemat.insert(1,2) = -1.0; eigen_sparsemat.insert(2,2) = -1.0; eigen_sparsemat.insert(2,3) = -1.0; eigen_sparsemat.insert(3,3) = 2.0; eigen_sparsemat.insert(3,4) = -1.0; eigen_sparsemat.insert(5,4) = -1.0; //eigen_sparsemat.endFill(); /** * Create and fill a few vectors from the Eigen library: **/ EigenVector eigen_rhs(5); Eigen::Map<EigenVector> eigen_rhs_map(eigen_rhs.data(), 5); EigenVector eigen_result(6); EigenVector eigen_temp(6); eigen_rhs(0) = 10.0; eigen_rhs(1) = 11.0; eigen_rhs(2) = 12.0; eigen_rhs(3) = 13.0; eigen_rhs(4) = 14.0; /** * Create the corresponding ViennaCL objects: **/ viennacl::vector<ScalarType> vcl_rhs(5); viennacl::vector<ScalarType> vcl_result(6); viennacl::matrix<ScalarType> vcl_densemat(6, 5); viennacl::compressed_matrix<ScalarType> vcl_sparsemat(6, 5); /** * Directly copy the Eigen objects to ViennaCL objects **/ viennacl::copy(&(eigen_rhs[0]), &(eigen_rhs[0]) + 5, vcl_rhs.begin()); // Method 1: via iterator interface (cf. std::copy()) viennacl::copy(eigen_rhs, vcl_rhs); // Method 2: via built-in wrappers (convenience layer) viennacl::copy(eigen_rhs_map, vcl_rhs); // Same as method 2, but for a mapped vector viennacl::copy(eigen_densemat, vcl_densemat); viennacl::copy(eigen_densemat_map, vcl_densemat); //same as above, using mapped matrix viennacl::copy(eigen_sparsemat, vcl_sparsemat); std::cout << "VCL sparsematrix dimensions: " << vcl_sparsemat.size1() << ", " << vcl_sparsemat.size2() << std::endl; // For completeness: Copy matrices from ViennaCL back to Eigen: viennacl::copy(vcl_densemat, eigen_densemat2); viennacl::copy(vcl_sparsemat, eigen_sparsemat2); /** * Run dense matrix-vector products and compare results: **/ eigen_result = eigen_densemat * eigen_rhs; vcl_result = viennacl::linalg::prod(vcl_densemat, vcl_rhs); viennacl::copy(vcl_result, eigen_temp); std::cout << "Difference for dense matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl; std::cout << "Difference for dense matrix-vector product (Eigen->ViennaCL->Eigen): " << (eigen_densemat2 * eigen_rhs - eigen_temp).norm() << std::endl; /** * Run sparse matrix-vector products and compare results: **/ eigen_result = eigen_sparsemat * eigen_rhs; vcl_result = viennacl::linalg::prod(vcl_sparsemat, vcl_rhs); viennacl::copy(vcl_result, eigen_temp); std::cout << "Difference for sparse matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl; std::cout << "Difference for sparse matrix-vector product (Eigen->ViennaCL->Eigen): " << (eigen_sparsemat2 * eigen_rhs - eigen_temp).norm() << std::endl; }
/** * We first setup the respective matrices in uBLAS matrices and then copy them over to the respective ViennaCL objects. * After that we run a couple of operations (mostly matrix-vector products). **/ int main() { typedef float ScalarType; std::size_t size = 4; /** * Set up ublas objects **/ boost::numeric::ublas::vector<ScalarType> ublas_vec(size); boost::numeric::ublas::matrix<ScalarType> ublas_circulant(size, size); boost::numeric::ublas::matrix<ScalarType> ublas_hankel(size, size); boost::numeric::ublas::matrix<ScalarType> ublas_toeplitz(size, size); boost::numeric::ublas::matrix<ScalarType> ublas_vandermonde(size, size); for (std::size_t i = 0; i < size; i++) for (std::size_t j = 0; j < size; j++) { ublas_circulant(i,j) = static_cast<ScalarType>((i - j + size) % size); ublas_hankel(i,j) = static_cast<ScalarType>((i + j) % (2 * size)); ublas_toeplitz(i,j) = static_cast<ScalarType>(i) - static_cast<ScalarType>(j); ublas_vandermonde(i,j) = std::pow(ScalarType(1.0) + ScalarType(i)/ScalarType(1000.0), ScalarType(j)); } /** * Set up ViennaCL objects **/ viennacl::vector<ScalarType> vcl_vec(size); viennacl::vector<ScalarType> vcl_result(size); viennacl::circulant_matrix<ScalarType> vcl_circulant(size, size); viennacl::hankel_matrix<ScalarType> vcl_hankel(size, size); viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz(size, size); viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde(size, size); // copy matrices: viennacl::copy(ublas_circulant, vcl_circulant); viennacl::copy(ublas_hankel, vcl_hankel); viennacl::copy(ublas_toeplitz, vcl_toeplitz); viennacl::copy(ublas_vandermonde, vcl_vandermonde); // fill vectors: for (std::size_t i = 0; i < size; i++) { ublas_vec[i] = ScalarType(i); vcl_vec[i] = ScalarType(i); } /** * Add circulant matrices using operator overloads: **/ std::cout << "Circulant matrix before addition: " << vcl_circulant << std::endl << std::endl; vcl_circulant += vcl_circulant; std::cout << "Circulant matrix after addition: " << vcl_circulant << std::endl << std::endl; /** * Manipulate single entries of structured matrices. * These manipulations are structure-preserving, so any other affected entries are manipulated as well. **/ std::cout << "Hankel matrix before manipulation: " << vcl_hankel << std::endl << std::endl; vcl_hankel(1, 2) = ScalarType(3.14); std::cout << "Hankel matrix after manipulation: " << vcl_hankel << std::endl << std::endl; std::cout << "Vandermonde matrix before manipulation: " << vcl_vandermonde << std::endl << std::endl; vcl_vandermonde(1) = ScalarType(1.1); //NOTE: Write access only via row index std::cout << "Vandermonde matrix after manipulation: " << vcl_vandermonde << std::endl << std::endl; /** * Compute matrix-vector product for a Toeplitz matrix (FFT-accelerated). Similarly for the other matrices. **/ std::cout << "Toeplitz matrix: " << vcl_toeplitz << std::endl; std::cout << "Vector: " << vcl_vec << std::endl << std::endl; vcl_result = viennacl::linalg::prod(vcl_toeplitz, vcl_vec); std::cout << "Result of matrix-vector product: " << vcl_result << std::endl << std::endl; /** * That's it. Print success message and exit. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
void run_tutorial() { typedef mtl::dense2D<ScalarType> MTL4DenseMatrix; typedef mtl::compressed2D<ScalarType> MTL4SparseMatrix; /** * Create and fill dense matrices from the MTL4 library: **/ mtl::dense2D<ScalarType> mtl4_densemat(5, 5); mtl::dense2D<ScalarType> mtl4_densemat2(5, 5); mtl4_densemat(0,0) = 2.0; mtl4_densemat(0,1) = -1.0; mtl4_densemat(1,0) = -1.0; mtl4_densemat(1,1) = 2.0; mtl4_densemat(1,2) = -1.0; mtl4_densemat(2,1) = -1.0; mtl4_densemat(2,2) = -1.0; mtl4_densemat(2,3) = -1.0; mtl4_densemat(3,2) = -1.0; mtl4_densemat(3,3) = 2.0; mtl4_densemat(3,4) = -1.0; mtl4_densemat(4,4) = -1.0; mtl4_densemat(4,4) = -1.0; /** * Create and fill sparse matrices from the MTL4 library: **/ MTL4SparseMatrix mtl4_sparsemat; set_to_zero(mtl4_sparsemat); mtl4_sparsemat.change_dim(5, 5); MTL4SparseMatrix mtl4_sparsemat2; set_to_zero(mtl4_sparsemat2); mtl4_sparsemat2.change_dim(5, 5); { mtl::matrix::inserter< MTL4SparseMatrix > ins(mtl4_sparsemat); typedef typename mtl::Collection<MTL4SparseMatrix>::value_type ValueType; ins(0,0) << ValueType(2.0); ins(0,1) << ValueType(-1.0); ins(1,1) << ValueType(2.0); ins(1,2) << ValueType(-1.0); ins(2,2) << ValueType(-1.0); ins(2,3) << ValueType(-1.0); ins(3,3) << ValueType(2.0); ins(3,4) << ValueType(-1.0); ins(4,4) << ValueType(-1.0); } /** * Create and fill a few vectors from the MTL4 library: **/ mtl::dense_vector<ScalarType> mtl4_rhs(5, 0.0); mtl::dense_vector<ScalarType> mtl4_result(5, 0.0); mtl::dense_vector<ScalarType> mtl4_temp(5, 0.0); mtl4_rhs(0) = 10.0; mtl4_rhs(1) = 11.0; mtl4_rhs(2) = 12.0; mtl4_rhs(3) = 13.0; mtl4_rhs(4) = 14.0; /** * Create the corresponding ViennaCL objects: **/ viennacl::vector<ScalarType> vcl_rhs(5); viennacl::vector<ScalarType> vcl_result(5); viennacl::matrix<ScalarType> vcl_densemat(5, 5); viennacl::compressed_matrix<ScalarType> vcl_sparsemat(5, 5); /** * Directly copy the MTL4 objects to ViennaCL objects **/ viennacl::copy(&(mtl4_rhs[0]), &(mtl4_rhs[0]) + 5, vcl_rhs.begin()); //method 1: via iterator interface (cf. std::copy()) viennacl::copy(mtl4_rhs, vcl_rhs); //method 2: via built-in wrappers (convenience layer) viennacl::copy(mtl4_densemat, vcl_densemat); viennacl::copy(mtl4_sparsemat, vcl_sparsemat); // For completeness: Copy matrices from ViennaCL back to Eigen: viennacl::copy(vcl_densemat, mtl4_densemat2); viennacl::copy(vcl_sparsemat, mtl4_sparsemat2); /** * Run dense matrix-vector products and compare results: **/ mtl4_result = mtl4_densemat * mtl4_rhs; vcl_result = viennacl::linalg::prod(vcl_densemat, vcl_rhs); viennacl::copy(vcl_result, mtl4_temp); mtl4_result -= mtl4_temp; std::cout << "Difference for dense matrix-vector product: " << mtl::two_norm(mtl4_result) << std::endl; mtl4_result = mtl4_densemat2 * mtl4_rhs - mtl4_temp; std::cout << "Difference for dense matrix-vector product (MTL4->ViennaCL->MTL4): " << mtl::two_norm(mtl4_result) << std::endl; /** * Run sparse matrix-vector products and compare results: **/ mtl4_result = mtl4_sparsemat * mtl4_rhs; vcl_result = viennacl::linalg::prod(vcl_sparsemat, vcl_rhs); viennacl::copy(vcl_result, mtl4_temp); mtl4_result -= mtl4_temp; std::cout << "Difference for sparse matrix-vector product: " << mtl::two_norm(mtl4_result) << std::endl; mtl4_result = mtl4_sparsemat2 * mtl4_rhs - mtl4_temp; std::cout << "Difference for sparse matrix-vector product (MTL4->ViennaCL->MTL4): " << mtl::two_norm(mtl4_result) << std::endl; }
void run_test() { // // get Eigen matrix and vector types for the provided ScalarType: // typedef typename Eigen_dense_matrix<ScalarType>::type EigenMatrix; typedef typename Eigen_vector<ScalarType>::type EigenVector; // // Create and fill dense matrices from the Eigen library: // EigenMatrix eigen_densemat(6, 5); EigenMatrix eigen_densemat2(6, 5); eigen_densemat(0,0) = 2.0; eigen_densemat(0,1) = -1.0; eigen_densemat(1,0) = -1.0; eigen_densemat(1,1) = 2.0; eigen_densemat(1,2) = -1.0; eigen_densemat(2,1) = -1.0; eigen_densemat(2,2) = -1.0; eigen_densemat(2,3) = -1.0; eigen_densemat(3,2) = -1.0; eigen_densemat(3,3) = 2.0; eigen_densemat(3,4) = -1.0; eigen_densemat(5,4) = -1.0; eigen_densemat(4,4) = -1.0; // // Create and fill sparse matrices from the Eigen library: // Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat(6, 5); Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat2(6, 5); eigen_sparsemat.reserve(5*2); eigen_sparsemat.insert(0,0) = 2.0; eigen_sparsemat.insert(0,1) = -1.0; eigen_sparsemat.insert(1,1) = 2.0; eigen_sparsemat.insert(1,2) = -1.0; eigen_sparsemat.insert(2,2) = -1.0; eigen_sparsemat.insert(2,3) = -1.0; eigen_sparsemat.insert(3,3) = 2.0; eigen_sparsemat.insert(3,4) = -1.0; eigen_sparsemat.insert(5,4) = -1.0; //eigen_sparsemat.endFill(); // // Create and fill a few vectors from the Eigen library: // EigenVector eigen_rhs(5); EigenVector eigen_result(6); EigenVector eigen_temp(6); eigen_rhs(0) = 10.0; eigen_rhs(1) = 11.0; eigen_rhs(2) = 12.0; eigen_rhs(3) = 13.0; eigen_rhs(4) = 14.0; // // Let us create the ViennaCL analogues: // viennacl::vector<ScalarType> vcl_rhs(5); viennacl::vector<ScalarType> vcl_result(6); viennacl::matrix<ScalarType> vcl_densemat(6, 5); viennacl::compressed_matrix<ScalarType> vcl_sparsemat(6, 5); // // Directly copy the Eigen objects to ViennaCL objects // viennacl::copy(&(eigen_rhs[0]), &(eigen_rhs[0]) + 5, vcl_rhs.begin()); //method 1: via iterator interface (cf. std::copy()) viennacl::copy(eigen_rhs, vcl_rhs); //method 2: via built-in wrappers (convenience layer) viennacl::copy(eigen_densemat, vcl_densemat); viennacl::copy(eigen_sparsemat, vcl_sparsemat); std::cout << "VCL sparsematrix dimensions: " << vcl_sparsemat.size1() << ", " << vcl_sparsemat.size2() << std::endl; // For completeness: Copy matrices from ViennaCL back to Eigen: viennacl::copy(vcl_densemat, eigen_densemat2); viennacl::copy(vcl_sparsemat, eigen_sparsemat2); // // Run matrix-vector products and compare results: // eigen_result = eigen_densemat * eigen_rhs; vcl_result = viennacl::linalg::prod(vcl_densemat, vcl_rhs); viennacl::copy(vcl_result, eigen_temp); std::cout << "Difference for dense matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl; std::cout << "Difference for dense matrix-vector product (Eigen->ViennaCL->Eigen): " << (eigen_densemat2 * eigen_rhs - eigen_temp).norm() << std::endl; // // Same for sparse matrix: // eigen_result = eigen_sparsemat * eigen_rhs; vcl_result = viennacl::linalg::prod(vcl_sparsemat, vcl_rhs); viennacl::copy(vcl_result, eigen_temp); std::cout << "Difference for sparse matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl; std::cout << "Difference for sparse matrix-vector product (Eigen->ViennaCL->Eigen): " << (eigen_sparsemat2 * eigen_rhs - eigen_temp).norm() << std::endl; // // Please have a look at the other tutorials on how to use the ViennaCL types // }
int hankel_test(ScalarType epsilon) { std::size_t HANKEL_SIZE = 7; viennacl::hankel_matrix<ScalarType> vcl_hankel1(HANKEL_SIZE, HANKEL_SIZE); viennacl::hankel_matrix<ScalarType> vcl_hankel2(HANKEL_SIZE, HANKEL_SIZE); viennacl::vector<ScalarType> vcl_input(HANKEL_SIZE); viennacl::vector<ScalarType> vcl_result(HANKEL_SIZE); std::vector<ScalarType> input_ref(HANKEL_SIZE); std::vector<ScalarType> result_ref(HANKEL_SIZE); dense_matrix<ScalarType> m1(vcl_hankel1.size1(), vcl_hankel1.size2()); dense_matrix<ScalarType> m2(m1.size1(), m1.size2()); for (std::size_t i = 0; i < m1.size1(); i++) for (std::size_t j = 0; j < m1.size2(); j++) { m1(i,j) = static_cast<ScalarType>((i + j) % (2 * m1.size1())); m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1); } for (std::size_t i = 0; i < input_ref.size(); i++) input_ref[i] = ScalarType(i); // Copy to ViennaCL viennacl::copy(m1, vcl_hankel1); viennacl::copy(m2, vcl_hankel2); viennacl::copy(input_ref, vcl_input); // // Matrix-Vector product: // vcl_result = viennacl::linalg::prod(vcl_hankel1, vcl_input); for (std::size_t i = 0; i < m1.size1(); i++) //reference calculation { ScalarType entry = 0; for (std::size_t j = 0; j < m1.size2(); j++) entry += m1(i,j) * input_ref[j]; result_ref[i] = entry; } viennacl::copy(vcl_result, input_ref); std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref); if (diff_max(input_ref, result_ref) < epsilon) std::cout << " [OK]" << std::endl; else { for (std::size_t i=0; i<input_ref.size(); ++i) std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl; std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } // // Matrix addition: // vcl_hankel1 += vcl_hankel2; for (std::size_t i = 0; i < m1.size1(); i++) //reference calculation for (std::size_t j = 0; j < m1.size2(); j++) m1(i,j) += m2(i,j); viennacl::copy(vcl_hankel1, m2); std::cout << "Matrix Addition: " << diff(m1, m2); if (diff(m1, m2) < epsilon) std::cout << " [OK]" << std::endl; else { std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } // // Per-Element access: // vcl_hankel1(4,2) = 42; for (std::size_t i = 0; i < m1.size1(); i++) //reference calculation for (std::size_t j = 0; j < m1.size2(); j++) { if ((i + j) % (2*m1.size1()) == 6) m1(i, j) = 42; } viennacl::copy(vcl_hankel1, m2); std::cout << "Element manipulation: " << diff(m1, m2); if (diff(m1, m2) < epsilon) std::cout << " [OK]" << std::endl; else { std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }
int vandermonde_test(ScalarType epsilon) { std::size_t VANDERMONDE_SIZE = 61; viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde1(VANDERMONDE_SIZE, VANDERMONDE_SIZE); viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde2(VANDERMONDE_SIZE, VANDERMONDE_SIZE); viennacl::vector<ScalarType> vcl_input(VANDERMONDE_SIZE); viennacl::vector<ScalarType> vcl_result(VANDERMONDE_SIZE); std::vector<ScalarType> input_ref(VANDERMONDE_SIZE); std::vector<ScalarType> result_ref(VANDERMONDE_SIZE); dense_matrix<ScalarType> m1(vcl_vandermonde1.size1(), vcl_vandermonde1.size2()); dense_matrix<ScalarType> m2(m1.size1(), m1.size2()); for (std::size_t i = 0; i < m1.size1(); i++) for (std::size_t j = 0; j < m1.size2(); j++) { m1(i,j) = std::pow(ScalarType(1.0 + i/1000.0), ScalarType(j)); m2(i,j) = std::pow(ScalarType(1.0 - i/2000.0), ScalarType(j)); } for (std::size_t i = 0; i < input_ref.size(); i++) input_ref[i] = ScalarType(i); // Copy to ViennaCL viennacl::copy(m1, vcl_vandermonde1); viennacl::copy(m2, vcl_vandermonde2); viennacl::copy(input_ref, vcl_input); // // Matrix-Vector product: // vcl_result = viennacl::linalg::prod(vcl_vandermonde1, vcl_input); for (std::size_t i = 0; i < m1.size1(); i++) //reference calculation { ScalarType entry = 0; for (std::size_t j = 0; j < m1.size2(); j++) entry += m1(i,j) * input_ref[j]; result_ref[i] = entry; } viennacl::copy(vcl_result, input_ref); std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref); if (diff_max(input_ref, result_ref) < epsilon) std::cout << " [OK]" << std::endl; else { for (std::size_t i=0; i<input_ref.size(); ++i) std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl; std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } // // Note: Matrix addition does not make sense for a Vandermonde matrix // // // Per-Element access: // vcl_vandermonde1(4) = static_cast<ScalarType>(1.0001); for (std::size_t j = 0; j < m1.size2(); j++) { m1(4, j) = std::pow(ScalarType(1.0001), ScalarType(j)); } viennacl::copy(vcl_vandermonde1, m2); std::cout << "Element manipulation: " << diff(m1, m2); if (diff(m1, m2) < epsilon) std::cout << " [OK]" << std::endl; else { std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }
int toeplitz_test(ScalarType epsilon) { std::size_t TOEPLITZ_SIZE = 47; viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz1(TOEPLITZ_SIZE, TOEPLITZ_SIZE); viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz2(TOEPLITZ_SIZE, TOEPLITZ_SIZE); viennacl::vector<ScalarType> vcl_input(TOEPLITZ_SIZE); viennacl::vector<ScalarType> vcl_result(TOEPLITZ_SIZE); std::vector<ScalarType> input_ref(TOEPLITZ_SIZE); std::vector<ScalarType> result_ref(TOEPLITZ_SIZE); dense_matrix<ScalarType> m1(TOEPLITZ_SIZE, TOEPLITZ_SIZE); dense_matrix<ScalarType> m2(TOEPLITZ_SIZE, TOEPLITZ_SIZE); for (std::size_t i = 0; i < TOEPLITZ_SIZE; i++) for (std::size_t j = 0; j < TOEPLITZ_SIZE; j++) { m1(i,j) = static_cast<ScalarType>(i) - static_cast<ScalarType>(j); m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1); } for (std::size_t i = 0; i < TOEPLITZ_SIZE; i++) input_ref[i] = ScalarType(i); // Copy to ViennaCL viennacl::copy(m1, vcl_toeplitz1); viennacl::copy(m2, vcl_toeplitz2); viennacl::copy(input_ref, vcl_input); // // Matrix-Vector product: // vcl_result = viennacl::linalg::prod(vcl_toeplitz1, vcl_input); for (std::size_t i = 0; i < m1.size1(); i++) //reference calculation { ScalarType entry = 0; for (std::size_t j = 0; j < m1.size2(); j++) entry += m1(i,j) * input_ref[j]; result_ref[i] = entry; } viennacl::copy(vcl_result, input_ref); std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref); if (diff_max(input_ref, result_ref) < epsilon) std::cout << " [OK]" << std::endl; else { for (std::size_t i=0; i<input_ref.size(); ++i) std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl; std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } // // Matrix addition: // vcl_toeplitz1 += vcl_toeplitz2; for (std::size_t i = 0; i < m1.size1(); i++) //reference calculation for (std::size_t j = 0; j < m1.size2(); j++) m1(i,j) += m2(i,j); viennacl::copy(vcl_toeplitz1, m2); std::cout << "Matrix Addition: " << diff(m1, m2); if (diff(m1, m2) < epsilon) std::cout << " [OK]" << std::endl; else { std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } // // Per-Element access: // vcl_toeplitz1(2,4) = 42; for (std::size_t i=0; i<m1.size1(); ++i) //reference calculation { if (i + 2 < m1.size2()) m1(i, i+2) = 42; } viennacl::copy(vcl_toeplitz1, m2); std::cout << "Element manipulation: " << diff(m1, m2); if (diff(m1, m2) < epsilon) std::cout << " [OK]" << std::endl; else { std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }
int main() { // // Print some device info // std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << " Device Info" << std::endl; std::cout << "----------------------------------------------" << std::endl; #ifdef VIENNACL_WITH_OPENCL std::cout << viennacl::ocl::current_device().info() << std::endl; #endif typedef float ScalarType; // feel free to change this to double if supported by your device // // Set up the matrices and vectors for the iterative solvers (cf. iterative.cpp) // boost::numeric::ublas::vector<ScalarType> ublas_vec, ublas_result; boost::numeric::ublas::compressed_matrix<ScalarType> ublas_matrix; viennacl::linalg::cg_tag cg_solver; viennacl::linalg::amg_tag amg_tag; viennacl::linalg::amg_precond<boost::numeric::ublas::compressed_matrix<ScalarType> > ublas_amg; // Read matrix if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../examples/testdata/mat65k.mtx")) { std::cout << "Error reading Matrix file" << std::endl; return EXIT_FAILURE; } // Set up rhs and result vector if (!readVectorFromFile("../examples/testdata/rhs65025.txt", ublas_vec)) { std::cout << "Error reading RHS file" << std::endl; return 0; } if (!readVectorFromFile("../examples/testdata/result65025.txt", ublas_result)) { std::cout << "Error reading Result file" << std::endl; return 0; } viennacl::vector<ScalarType> vcl_vec(ublas_vec.size()); viennacl::vector<ScalarType> vcl_result(ublas_vec.size()); viennacl::compressed_matrix<ScalarType> vcl_compressed_matrix(ublas_vec.size(), ublas_vec.size()); // Copy to GPU viennacl::copy(ublas_matrix, vcl_compressed_matrix); viennacl::copy(ublas_vec, vcl_vec); viennacl::copy(ublas_result, vcl_result); // // Run solver without preconditioner // std::cout << "-- CG solver (CPU, no preconditioner) --" << std::endl; run_solver(ublas_matrix, ublas_vec, ublas_result, cg_solver, viennacl::linalg::no_precond()); std::cout << "-- CG solver (GPU, no preconditioner) --" << std::endl; run_solver(vcl_compressed_matrix, vcl_vec, vcl_result, cg_solver, viennacl::linalg::no_precond()); // // With AMG Preconditioner RS+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS, // coarsening strategy VIENNACL_AMG_INTERPOL_DIRECT, // interpolation strategy 0.25, // strength of dependence threshold 0.2, // interpolation weight 0.67, // jacobi smoother weight 3, // presmoothing steps 3, // postsmoothing steps 0); // number of coarse levels to be used (0: automatically use as many as reasonable) run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner RS+CLASSIC // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS, VIENNACL_AMG_INTERPOL_CLASSIC, 0.25, 0.2, 0.67, 3, 3, 0); run_amg ( cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS COARSENING, CLASSIC INTERPOLATION", amg_tag); // // With AMG Preconditioner ONEPASS+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_ONEPASS, VIENNACL_AMG_INTERPOL_DIRECT,0.25, 0.2, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "ONEPASS COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner RS0+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS0, VIENNACL_AMG_INTERPOL_DIRECT, 0.25, 0.2, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS0 COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner RS3+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS3, VIENNACL_AMG_INTERPOL_DIRECT, 0.25, 0.2, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS3 COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner AG // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_AG, VIENNACL_AMG_INTERPOL_AG, 0.08, 0, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "AG COARSENING, AG INTERPOLATION", amg_tag); // // With AMG Preconditioner SA // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_AG, VIENNACL_AMG_INTERPOL_SA, 0.08, 0.67, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "AG COARSENING, SA INTERPOLATION",amg_tag); // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int main() { typedef float ScalarType; std::size_t size = 4; // // Set up ublas objects // boost::numeric::ublas::vector<ScalarType> ublas_vec(size); boost::numeric::ublas::matrix<ScalarType> ublas_circulant(size, size); boost::numeric::ublas::matrix<ScalarType> ublas_hankel(size, size); boost::numeric::ublas::matrix<ScalarType> ublas_toeplitz(size, size); boost::numeric::ublas::matrix<ScalarType> ublas_vandermonde(size, size); for(std::size_t i = 0; i < size; i++) for(std::size_t j = 0; j < size; j++) { ublas_circulant(i,j) = static_cast<ScalarType>((i - j + size) % size); ublas_hankel(i,j) = static_cast<ScalarType>((i + j) % (2 * size)); ublas_toeplitz(i,j) = static_cast<ScalarType>(i) - static_cast<ScalarType>(j); ublas_vandermonde(i,j) = pow(ScalarType(1.0 + i/1000.0), ScalarType(j)); } // // Set up ViennaCL objects // viennacl::vector<ScalarType> vcl_vec(size); viennacl::vector<ScalarType> vcl_result(size); viennacl::circulant_matrix<ScalarType> vcl_circulant(size, size); viennacl::hankel_matrix<ScalarType> vcl_hankel(size, size); viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz(size, size); viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde(size, size); // copy matrices: viennacl::copy(ublas_circulant, vcl_circulant); viennacl::copy(ublas_hankel, vcl_hankel); viennacl::copy(ublas_toeplitz, vcl_toeplitz); viennacl::copy(ublas_vandermonde, vcl_vandermonde); // fill vectors: for(std::size_t i = 0; i < size; i++) { ublas_vec[i] = ScalarType(i); vcl_vec[i] = ScalarType(i); } // // Add matrices: // std::cout << "Circulant matrix before addition: " << vcl_circulant << std::endl << std::endl; vcl_circulant += vcl_circulant; std::cout << "Circulant matrix after addition: " << vcl_circulant << std::endl << std::endl; // // Manipulate single entry // std::cout << "Hankel matrix before manipulation: " << vcl_hankel << std::endl << std::endl; vcl_hankel(1, 2) = ScalarType(3.14); std::cout << "Hankel matrix after manipulation: " << vcl_hankel << std::endl << std::endl; std::cout << "Vandermonde matrix before manipulation: " << vcl_vandermonde << std::endl << std::endl; vcl_vandermonde(1) = ScalarType(1.1); //NOTE: Write access only via row index std::cout << "Vandermonde matrix after manipulation: " << vcl_vandermonde << std::endl << std::endl; // // Compute matrix-vector product (FFT-accelerated) // std::cout << "Toeplitz matrix: " << vcl_toeplitz << std::endl; std::cout << "Vector: " << vcl_vec << std::endl << std::endl; vcl_result = viennacl::linalg::prod(vcl_toeplitz, vcl_vec); std::cout << "Result of matrix-vector product: " << vcl_result << std::endl << std::endl; // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
/** * With this let us go right to main(): **/ int main() { typedef float ScalarType; /** * <h2>Part 1: Set up a custom context</h2> * * The following is rather lengthy because OpenCL is a fairly low-level framework. * For comparison, the subsequent code explicitly performs the OpenCL setup that is done * in the background within the 'custom_kernels'-tutorial **/ //manually set up a custom OpenCL context: std::vector<cl_device_id> device_id_array; //get all available devices viennacl::ocl::platform pf; std::cout << "Platform info: " << pf.info() << std::endl; std::vector<viennacl::ocl::device> devices = pf.devices(CL_DEVICE_TYPE_DEFAULT); std::cout << devices[0].name() << std::endl; std::cout << "Number of devices for custom context: " << devices.size() << std::endl; //set up context using all found devices: for (std::size_t i=0; i<devices.size(); ++i) { device_id_array.push_back(devices[i].id()); } std::cout << "Creating context..." << std::endl; cl_int err; cl_context my_context = clCreateContext(0, cl_uint(device_id_array.size()), &(device_id_array[0]), NULL, NULL, &err); VIENNACL_ERR_CHECK(err); //create two Vectors: unsigned int vector_size = 10; std::vector<ScalarType> vec1(vector_size); std::vector<ScalarType> vec2(vector_size); std::vector<ScalarType> result(vector_size); // // fill the operands vec1 and vec2: // for (unsigned int i=0; i<vector_size; ++i) { vec1[i] = static_cast<ScalarType>(i); vec2[i] = static_cast<ScalarType>(vector_size-i); } // // create memory in OpenCL context: // cl_mem mem_vec1 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec1[0]), &err); VIENNACL_ERR_CHECK(err); cl_mem mem_vec2 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec2[0]), &err); VIENNACL_ERR_CHECK(err); cl_mem mem_result = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(result[0]), &err); VIENNACL_ERR_CHECK(err); // // create a command queue for each device: // std::vector<cl_command_queue> queues(devices.size()); for (std::size_t i=0; i<devices.size(); ++i) { queues[i] = clCreateCommandQueue(my_context, devices[i].id(), 0, &err); VIENNACL_ERR_CHECK(err); } // // create and build a program in the context: // std::size_t source_len = std::string(my_compute_program).length(); cl_program my_prog = clCreateProgramWithSource(my_context, 1, &my_compute_program, &source_len, &err); err = clBuildProgram(my_prog, 0, NULL, NULL, NULL, NULL); /* char buffer[1024]; cl_build_status status; clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL); clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_LOG, sizeof(char)*1024, &buffer, NULL); std::cout << "Build Scalar: Err = " << err << " Status = " << status << std::endl; std::cout << "Log: " << buffer << std::endl;*/ VIENNACL_ERR_CHECK(err); // // create a kernel from the program: // const char * kernel_name = "elementwise_prod"; cl_kernel my_kernel = clCreateKernel(my_prog, kernel_name, &err); VIENNACL_ERR_CHECK(err); // // Execute elementwise_prod kernel on first queue: result = vec1 .* vec2; // err = clSetKernelArg(my_kernel, 0, sizeof(cl_mem), (void*)&mem_vec1); VIENNACL_ERR_CHECK(err); err = clSetKernelArg(my_kernel, 1, sizeof(cl_mem), (void*)&mem_vec2); VIENNACL_ERR_CHECK(err); err = clSetKernelArg(my_kernel, 2, sizeof(cl_mem), (void*)&mem_result); VIENNACL_ERR_CHECK(err); err = clSetKernelArg(my_kernel, 3, sizeof(unsigned int), (void*)&vector_size); VIENNACL_ERR_CHECK(err); std::size_t global_size = vector_size; std::size_t local_size = vector_size; err = clEnqueueNDRangeKernel(queues[0], my_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); VIENNACL_ERR_CHECK(err); // // Read and output result: // err = clEnqueueReadBuffer(queues[0], mem_vec1, CL_TRUE, 0, sizeof(ScalarType)*vector_size, &(vec1[0]), 0, NULL, NULL); VIENNACL_ERR_CHECK(err); err = clEnqueueReadBuffer(queues[0], mem_result, CL_TRUE, 0, sizeof(ScalarType)*vector_size, &(result[0]), 0, NULL, NULL); VIENNACL_ERR_CHECK(err); std::cout << "vec1 : "; for (std::size_t i=0; i<vec1.size(); ++i) std::cout << vec1[i] << " "; std::cout << std::endl; std::cout << "vec2 : "; for (std::size_t i=0; i<vec2.size(); ++i) std::cout << vec2[i] << " "; std::cout << std::endl; std::cout << "result: "; for (std::size_t i=0; i<result.size(); ++i) std::cout << result[i] << " "; std::cout << std::endl; /** * <h2>Part 2: Reuse Custom OpenCL Context with ViennaCL</h2> * * To let ViennaCL reuse the previously created context, we need to make it known to ViennaCL \em before any ViennaCL objects are created. * We inject the custom context as the context with default id '0' when using viennacl::ocl::switch_context(). **/ viennacl::ocl::setup_context(0, my_context, device_id_array, queues); viennacl::ocl::switch_context(0); //activate the new context (only mandatory with context-id not equal to zero) /** * Check that ViennaCL really uses the new context: **/ std::cout << "Existing context: " << my_context << std::endl; std::cout << "ViennaCL uses context: " << viennacl::ocl::current_context().handle().get() << std::endl; /** * Wrap existing OpenCL objects into ViennaCL: **/ viennacl::vector<ScalarType> vcl_vec1(mem_vec1, vector_size); viennacl::vector<ScalarType> vcl_vec2(mem_vec2, vector_size); viennacl::vector<ScalarType> vcl_result(mem_result, vector_size); viennacl::scalar<ScalarType> vcl_s = 2.0; std::cout << "Standard vector operations within ViennaCL:" << std::endl; vcl_result = vcl_s * vcl_vec1 + vcl_vec2; std::cout << "vec1 : "; std::cout << vcl_vec1 << std::endl; std::cout << "vec2 : "; std::cout << vcl_vec2 << std::endl; std::cout << "result: "; std::cout << vcl_result << std::endl; /** * We can also reuse the existing elementwise_prod kernel. * Therefore, we first have to make the existing program known to ViennaCL * For more details on the three lines, see tutorial 'custom-kernels' **/ std::cout << "Using existing kernel within the OpenCL backend of ViennaCL:" << std::endl; viennacl::ocl::program & my_vcl_prog = viennacl::ocl::current_context().add_program(my_prog, "my_compute_program"); viennacl::ocl::kernel & my_vcl_kernel = my_vcl_prog.add_kernel(my_kernel, "elementwise_prod"); viennacl::ocl::enqueue(my_vcl_kernel(vcl_vec1, vcl_vec2, vcl_result, static_cast<cl_uint>(vcl_vec1.size()))); //Note that std::size_t might differ between host and device. Thus, a cast to cl_uint is necessary here. std::cout << "vec1 : "; std::cout << vcl_vec1 << std::endl; std::cout << "vec2 : "; std::cout << vcl_vec2 << std::endl; std::cout << "result: "; std::cout << vcl_result << std::endl; /** * Since a linear piece of memory can be interpreted in several ways, * we will now create a 3x3 row-major matrix out of the linear memory in mem_vec1/ * The first three entries in vcl_vec2 and vcl_result are used to carry out matrix-vector products: **/ viennacl::matrix<ScalarType> vcl_matrix(mem_vec1, 3, 3); vcl_vec2.resize(3); //note that the resize operation leads to new memory, thus vcl_vec2 is now at a different memory location (values are copied) vcl_result.resize(3); //note that the resize operation leads to new memory, thus vcl_vec2 is now at a different memory location (values are copied) vcl_result = viennacl::linalg::prod(vcl_matrix, vcl_vec2); std::cout << "result of matrix-vector product: "; std::cout << vcl_result << std::endl; /** * Any further operations can be carried out in the same way. * Just keep in mind that any resizing of vectors or matrices leads to a reallocation of the underlying memory buffer, through which the 'wrapper' is lost. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }