/** @brief Constructor * @param A matrix whose approximate inverse is calculated. Must be quadratic. * @param tag SPAI configuration tag */ fspai_precond(const MatrixType & A, const fspai_tag & tag) : tag_(tag), L(viennacl::traits::context(A)), L_trans(viennacl::traits::context(A)), temp_apply_vec_(A.size1(), viennacl::traits::context(A)) { //UBLASSparseMatrixType ubls_A; UBLASSparseMatrixType ublas_A(A.size1(), A.size2()); UBLASSparseMatrixType pA(A.size1(), A.size2()); UBLASSparseMatrixType ublas_L(A.size1(), A.size2()); UBLASSparseMatrixType ublas_L_trans(A.size1(), A.size2()); viennacl::copy(A, ublas_A); //viennacl::copy(ubls_A, vcl_A); //vcl_At = viennacl::linalg::prod(vcl_A, vcl_A); //vcl_pA = viennacl::linalg::prod(vcl_A, vcl_At); //viennacl::copy(vcl_pA, pA); pA = ublas_A; //execute SPAI with ublas matrix types viennacl::linalg::detail::spai::computeFSPAI(ublas_A, pA, ublas_L, ublas_L_trans, tag_); //copy back to GPU viennacl::copy(ublas_L, L); viennacl::copy(ublas_L_trans, L_trans); }
int main (int, const char **) { typedef float ScalarType; //feel free to change this to 'double' if supported by your hardware typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef viennacl::matrix<ScalarType, viennacl::row_major> VCLMatrixType; std::size_t dim_large = 5; std::size_t dim_small = 3; // // Setup ublas objects and fill with data: // MatrixType ublas_A(dim_large, dim_large); MatrixType ublas_B(dim_small, dim_small); MatrixType ublas_C(dim_large, dim_small); MatrixType ublas_D(dim_small, dim_large); for (std::size_t i=0; i<ublas_A.size1(); ++i) for (std::size_t j=0; j<ublas_A.size2(); ++j) ublas_A(i,j) = static_cast<ScalarType>((i+1) + (j+1)*(i+1)); for (std::size_t i=0; i<ublas_B.size1(); ++i) for (std::size_t j=0; j<ublas_B.size2(); ++j) ublas_B(i,j) = static_cast<ScalarType>((i+1) + (j+1)*(i+1)); for (std::size_t i=0; i<ublas_C.size1(); ++i) for (std::size_t j=0; j<ublas_C.size2(); ++j) ublas_C(i,j) = static_cast<ScalarType>((j+2) + (j+1)*(i+1)); for (std::size_t i=0; i<ublas_D.size1(); ++i) for (std::size_t j=0; j<ublas_D.size2(); ++j) ublas_D(i,j) = static_cast<ScalarType>((j+2) + (j+1)*(i+1)); // // Extract submatrices using the ranges in ublas // boost::numeric::ublas::range ublas_r1(0, dim_small); //the first 'dim_small' entries boost::numeric::ublas::range ublas_r2(dim_large - dim_small, dim_large); //the last 'dim_small' entries boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub1(ublas_A, ublas_r1, ublas_r1); //upper left part of A boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub2(ublas_A, ublas_r2, ublas_r2); //lower right part of A boost::numeric::ublas::matrix_range<MatrixType> ublas_C_sub(ublas_C, ublas_r1, ublas_r1); //upper left part of C boost::numeric::ublas::matrix_range<MatrixType> ublas_D_sub(ublas_D, ublas_r1, ublas_r1); //upper left part of D // // Setup ViennaCL objects // VCLMatrixType vcl_A(dim_large, dim_large); VCLMatrixType vcl_B(dim_small, dim_small); VCLMatrixType vcl_C(dim_large, dim_small); VCLMatrixType vcl_D(dim_small, dim_large); viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); viennacl::copy(ublas_D, vcl_D); // // Extract submatrices using the ranges in ViennaCL // viennacl::range vcl_r1(0, dim_small); //the first 'dim_small' entries viennacl::range vcl_r2(dim_large - dim_small, dim_large); //the last 'dim_small' entries viennacl::matrix_range<VCLMatrixType> vcl_A_sub1(vcl_A, vcl_r1, vcl_r1); //upper left part of A viennacl::matrix_range<VCLMatrixType> vcl_A_sub2(vcl_A, vcl_r2, vcl_r2); //lower right part of A viennacl::matrix_range<VCLMatrixType> vcl_C_sub(vcl_C, vcl_r1, vcl_r1); //upper left part of C viennacl::matrix_range<VCLMatrixType> vcl_D_sub(vcl_D, vcl_r1, vcl_r1); //upper left part of D // // Copy from ublas to submatrices and back: // ublas_A_sub1 = ublas_B; viennacl::copy(ublas_B, vcl_A_sub1); viennacl::copy(vcl_A_sub1, ublas_B); // // Addition: // // range to range: ublas_A_sub2 += ublas_A_sub2; vcl_A_sub2 += vcl_A_sub2; // range to matrix: ublas_B += ublas_A_sub2; vcl_B += vcl_A_sub2; // // use matrix range with matrix-matrix product: // ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub); vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub); // // Print result matrices: // std::cout << "Result ublas: " << ublas_A << std::endl; std::cout << "Result ViennaCL: " << vcl_A << std::endl; // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int main (int, const char **) { typedef double ScalarType; //feel free to change this to 'double' if supported by your hardware typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::matrix<ScalarType, viennacl::column_major> VCLMatrixType; typedef viennacl::vector<ScalarType> VCLVectorType; std::size_t rows = 113; //number of rows in the matrix std::size_t cols = 54; //number of columns // // Create matrices with some data // MatrixType ublas_A(rows, cols); MatrixType Q(rows, rows); MatrixType R(rows, cols); // Some random data with a bit of extra weight on the diagonal for (std::size_t i=0; i<rows; ++i) { for (std::size_t j=0; j<cols; ++j) { ublas_A(i,j) = -1.0 + (i+1)*(j+1) + ( (rand() % 1000) - 500.0) / 1000.0; if (i == j) ublas_A(i,j) += 10.0; R(i,j) = 0.0; } for (std::size_t j=0; j<rows; ++j) Q(i,j) = 0.0; } // keep initial input matrix for comparison MatrixType ublas_A_backup(ublas_A); // // Setup the matrix in ViennaCL: // VCLVectorType dummy(10); VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2()); viennacl::copy(ublas_A, vcl_A); // // Compute QR factorization of A. A is overwritten with Householder vectors. Coefficients are returned and a block size of 3 is used. // Note that at the moment the number of columns of A must be divisible by the block size // std::cout << "--- Boost.uBLAS ---" << std::endl; std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A); //computes the QR factorization // // A check for the correct result: // viennacl::linalg::recoverQ(ublas_A, ublas_betas, Q, R); MatrixType ublas_QR = prod(Q, R); double ublas_error = check(ublas_QR, ublas_A_backup); std::cout << "Max rel error (ublas): " << ublas_error << std::endl; // // QR factorization in ViennaCL using Boost.uBLAS for the panel factorization // std::cout << "--- Hybrid (default) ---" << std::endl; viennacl::copy(ublas_A_backup, vcl_A); std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A); // // A check for the correct result: // viennacl::copy(vcl_A, ublas_A); Q.clear(); R.clear(); viennacl::linalg::recoverQ(ublas_A, hybrid_betas, Q, R); double hybrid_error = check(ublas_QR, ublas_A_backup); std::cout << "Max rel error (hybrid): " << hybrid_error << std::endl; // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
/** * We set up a random matrix using Boost.uBLAS and use it to initialize a ViennaCL matrix. * Then we compute the QR factorization directly for the uBLAS matrix as well as the ViennaCL matrix. **/ int main (int, const char **) { typedef double ScalarType; //feel free to change this to 'double' if supported by your hardware typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef viennacl::matrix<ScalarType, viennacl::column_major> VCLMatrixType; std::size_t rows = 113; // number of rows in the matrix std::size_t cols = 54; // number of columns /** * Create uBLAS matrices with some random input data. **/ MatrixType ublas_A(rows, cols); MatrixType Q(rows, rows); MatrixType R(rows, cols); // Some random data with a bit of extra weight on the diagonal for (std::size_t i=0; i<rows; ++i) { for (std::size_t j=0; j<cols; ++j) { ublas_A(i,j) = ScalarType(-1.0) + ScalarType((i+1)*(j+1)) + ScalarType( (rand() % 1000) - 500.0) / ScalarType(1000.0); if (i == j) ublas_A(i,j) += ScalarType(10.0); R(i,j) = 0.0; } for (std::size_t j=0; j<rows; ++j) Q(i,j) = ScalarType(0.0); } // keep initial input matrix for comparison MatrixType ublas_A_backup(ublas_A); /** * Setup the matrix in ViennaCL and copy the data from the uBLAS matrix: **/ VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2()); viennacl::copy(ublas_A, vcl_A); /** * <h2>QR Factorization with Boost.uBLAS Matrices</h2> * Compute QR factorization of A. A is overwritten with Householder vectors. Coefficients are returned and a block size of 3 is used. * Note that at the moment the number of columns of A must be divisible by the block size **/ std::cout << "--- Boost.uBLAS ---" << std::endl; std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A); //computes the QR factorization /** * Let us check for the correct result: **/ viennacl::linalg::recoverQ(ublas_A, ublas_betas, Q, R); MatrixType ublas_QR = prod(Q, R); double ublas_error = check(ublas_QR, ublas_A_backup); std::cout << "Maximum relative error (ublas): " << ublas_error << std::endl; /** * <h2>QR Factorization with Boost.uBLAS Matrices</h2> * We now compute the QR factorization from a ViennaCL matrix. Internally it uses Boost.uBLAS for the panel factorization. **/ std::cout << "--- Hybrid (default) ---" << std::endl; viennacl::copy(ublas_A_backup, vcl_A); std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A); /** * Let us check for the correct result: **/ viennacl::copy(vcl_A, ublas_A); Q.clear(); R.clear(); viennacl::linalg::recoverQ(ublas_A, hybrid_betas, Q, R); double hybrid_error = check(ublas_QR, ublas_A_backup); std::cout << "Maximum relative error (hybrid): " << hybrid_error << std::endl; /** * That's it. Print a success message and exit. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int main (int, const char **) { typedef float ScalarType; //feel free to change this to 'double' if supported by your hardware typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::matrix<ScalarType, viennacl::column_major> VCLMatrixType; typedef viennacl::vector<ScalarType> VCLVectorType; // // Create vectors and matrices with data, cf. http://tutorial.math.lamar.edu/Classes/LinAlg/QRDecomposition.aspx // VectorType ublas_b(4); ublas_b(0) = -4; ublas_b(1) = 2; ublas_b(2) = 5; ublas_b(3) = -1; MatrixType ublas_A(4, 3); MatrixType Q = boost::numeric::ublas::zero_matrix<ScalarType>(4, 4); MatrixType R = boost::numeric::ublas::zero_matrix<ScalarType>(4, 3); ublas_A(0, 0) = 2; ublas_A(0, 1) = -1; ublas_A(0, 2) = 1; ublas_A(1, 0) = 1; ublas_A(1, 1) = -5; ublas_A(1, 2) = 2; ublas_A(2, 0) = -3; ublas_A(2, 1) = 1; ublas_A(2, 2) = -4; ublas_A(3, 0) = 1; ublas_A(3, 1) = -1; ublas_A(3, 2) = 1; // // Setup the matrix in ViennaCL: // VCLVectorType vcl_b(ublas_b.size()); VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2()); viennacl::copy(ublas_b, vcl_b); viennacl::copy(ublas_A, vcl_A); //////////// Part 1: Use Boost.uBLAS for all computations //////////////// std::cout << "--- Boost.uBLAS ---" << std::endl; std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A); //computes the QR factorization // compute modified RHS of the minimization problem: // b' := Q^T b viennacl::linalg::inplace_qr_apply_trans_Q(ublas_A, ublas_betas, ublas_b); // Final step: triangular solve: Rx = b'', where b'' are the first three entries in b' // We only need the upper left square part of A, which defines the upper triangular matrix R boost::numeric::ublas::range ublas_range(0, 3); boost::numeric::ublas::matrix_range<MatrixType> ublas_R(ublas_A, ublas_range, ublas_range); boost::numeric::ublas::vector_range<VectorType> ublas_b2(ublas_b, ublas_range); boost::numeric::ublas::inplace_solve(ublas_R, ublas_b2, boost::numeric::ublas::upper_tag()); std::cout << "Result: " << ublas_b2 << std::endl; //////////// Part 2: Use ViennaCL types for BLAS 3 computations, but use Boost.uBLAS for the panel factorization //////////////// std::cout << "--- ViennaCL (hybrid implementation) ---" << std::endl; std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A); // compute modified RHS of the minimization problem: // b := Q^T b viennacl::linalg::inplace_qr_apply_trans_Q(vcl_A, hybrid_betas, vcl_b); // Final step: triangular solve: Rx = b'. // We only need the upper part of A such that R is a square matrix viennacl::range vcl_range(0, 3); viennacl::matrix_range<VCLMatrixType> vcl_R(vcl_A, vcl_range, vcl_range); viennacl::vector_range<VCLVectorType> vcl_b2(vcl_b, vcl_range); viennacl::linalg::inplace_solve(vcl_R, vcl_b2, viennacl::linalg::upper_tag()); std::cout << "Result: " << vcl_b2 << std::endl; // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
/** * The minimization problem of finding x such that \f$ \Vert Ax - b \Vert \f$ is solved as follows: * - Compute the QR-factorization of A = QR. * - Compute \f$ b' = Q^{\mathrm{T}} b \f$ for the equivalent minimization problem \f$ \Vert Rx - Q^{\mathrm{T}} b \f$. * - Solve the triangular system \f$ \tilde{R} x = b' \f$, where \f$ \tilde{R} \f$ is the upper square matrix of R. * **/ int main (int, const char **) { typedef float ScalarType; //feel free to change this to 'double' if supported by your hardware typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::matrix<ScalarType, viennacl::column_major> VCLMatrixType; typedef viennacl::vector<ScalarType> VCLVectorType; /** * Create vectors and matrices with data: **/ VectorType ublas_b(4); ublas_b(0) = -4; ublas_b(1) = 2; ublas_b(2) = 5; ublas_b(3) = -1; MatrixType ublas_A(4, 3); ublas_A(0, 0) = 2; ublas_A(0, 1) = -1; ublas_A(0, 2) = 1; ublas_A(1, 0) = 1; ublas_A(1, 1) = -5; ublas_A(1, 2) = 2; ublas_A(2, 0) = -3; ublas_A(2, 1) = 1; ublas_A(2, 2) = -4; ublas_A(3, 0) = 1; ublas_A(3, 1) = -1; ublas_A(3, 2) = 1; /** * Setup the matrix and vector with ViennaCL objects and copy the data from the uBLAS objects: **/ VCLVectorType vcl_b(ublas_b.size()); VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2()); viennacl::copy(ublas_b, vcl_b); viennacl::copy(ublas_A, vcl_A); /** * <h2>Option 1: Using Boost.uBLAS</h2> * * The implementation in ViennaCL accepts both uBLAS and ViennaCL types. * We start with a single-threaded implementation using Boost.uBLAS. **/ std::cout << "--- Boost.uBLAS ---" << std::endl; /** * The first (and computationally most expensive) step is to compute the QR factorization of A. * Since we do not need A later, we directly overwrite A with the householder reflectors and the upper triangular matrix R. * The returned vector holds the scalar coefficients (betas) for the Householder reflections \f$ I - \beta v v^{\mathrm{T}} \f$ **/ std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A); /** * Compute the modified RHS of the minimization problem from the QR factorization, but do not form \f$ Q^{\mathrm{T}} \f$ explicitly: * b' := Q^T b **/ viennacl::linalg::inplace_qr_apply_trans_Q(ublas_A, ublas_betas, ublas_b); /** * Final step: triangular solve: Rx = b'', where b'' are the first three entries in b' * We only need the upper left square part of A, which defines the upper triangular matrix R **/ boost::numeric::ublas::range ublas_range(0, 3); boost::numeric::ublas::matrix_range<MatrixType> ublas_R(ublas_A, ublas_range, ublas_range); boost::numeric::ublas::vector_range<VectorType> ublas_b2(ublas_b, ublas_range); boost::numeric::ublas::inplace_solve(ublas_R, ublas_b2, boost::numeric::ublas::upper_tag()); std::cout << "Result: " << ublas_b2 << std::endl; /** * <h2>Option 2: Use ViennaCL types</h2> * * ViennaCL is used for the computationally intensive BLAS 3 computations. * Boost.uBLAS is used for the panel factorization on the host (CPU). */ std::cout << "--- ViennaCL (hybrid implementation) ---" << std::endl; std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A); /** * compute modified RHS of the minimization problem: \f$ b' := Q^T b \f$ **/ viennacl::linalg::inplace_qr_apply_trans_Q(vcl_A, hybrid_betas, vcl_b); /** * Final step: triangular solve: Rx = b'. * We only need the upper part of A such that R is a square matrix **/ viennacl::range vcl_range(0, 3); viennacl::matrix_range<VCLMatrixType> vcl_R(vcl_A, vcl_range, vcl_range); viennacl::vector_range<VCLVectorType> vcl_b2(vcl_b, vcl_range); viennacl::linalg::inplace_solve(vcl_R, vcl_b2, viennacl::linalg::upper_tag()); std::cout << "Result: " << vcl_b2 << std::endl; /** * That's it. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int run_test() { //typedef float ScalarType; typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::matrix<ScalarType, T> VCLMatrixType; typedef viennacl::vector<ScalarType> VCLVectorType; viennacl::scalar<ScalarType> gpu_pi = ScalarType(3.1415); std::size_t dim_large = 151; std::size_t dim_small = 37; //std::size_t dim_large = 35; //std::size_t dim_small = 11; //setup ublas objects: MatrixType ublas_A(dim_large, dim_large); for (std::size_t i=0; i<ublas_A.size1(); ++i) for (std::size_t j=0; j<ublas_A.size2(); ++j) ublas_A(i,j) = ScalarType((i+1) + (j+1)*(i+1)); MatrixType ublas_B(dim_small, dim_small); for (std::size_t i=0; i<ublas_B.size1(); ++i) for (std::size_t j=0; j<ublas_B.size2(); ++j) ublas_B(i,j) = ScalarType((i+1) + (j+1)*(i+1)); MatrixType ublas_C(dim_large, dim_small); for (std::size_t i=0; i<ublas_C.size1(); ++i) for (std::size_t j=0; j<ublas_C.size2(); ++j) ublas_C(i,j) = ScalarType((j+2) + (j+1)*(i+1)); MatrixType ublas_D(dim_small, dim_large); for (std::size_t i=0; i<ublas_D.size1(); ++i) for (std::size_t j=0; j<ublas_D.size2(); ++j) ublas_D(i,j) = ScalarType((j+2) + (j+1)*(i+1)); boost::numeric::ublas::range ublas_r1(0, dim_small); boost::numeric::ublas::range ublas_r2(dim_large - dim_small, dim_large); boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub1(ublas_A, ublas_r1, ublas_r1); boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub2(ublas_A, ublas_r2, ublas_r2); boost::numeric::ublas::matrix_range<MatrixType> ublas_C_sub(ublas_C, ublas_r1, ublas_r1); boost::numeric::ublas::matrix_range<MatrixType> ublas_D_sub(ublas_D, ublas_r1, ublas_r1); //Setup ViennaCL objects VCLMatrixType vcl_A(dim_large, dim_large); viennacl::copy(ublas_A, vcl_A); VCLMatrixType vcl_B(dim_small, dim_small); viennacl::copy(ublas_B, vcl_B); VCLMatrixType vcl_C(dim_large, dim_small); viennacl::copy(ublas_C, vcl_C); VCLMatrixType vcl_D(dim_small, dim_large); viennacl::copy(ublas_D, vcl_D); viennacl::range vcl_r1(0, dim_small); viennacl::range vcl_r2(dim_large - dim_small, dim_large); viennacl::matrix_range<VCLMatrixType> vcl_A_sub1(vcl_A, vcl_r1, vcl_r1); viennacl::matrix_range<VCLMatrixType> vcl_A_sub2(vcl_A, vcl_r2, vcl_r2); viennacl::matrix_range<VCLMatrixType> vcl_C_sub(vcl_C, vcl_r1, vcl_r1); viennacl::matrix_range<VCLMatrixType> vcl_D_sub(vcl_D, vcl_r1, vcl_r1); std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test: Copy CTOR //////////" << std::endl; std::cout << "//" << std::endl; { std::cout << "Testing matrix created from range... "; ublas_B = ublas_A_sub1; VCLMatrixType vcl_temp = vcl_A_sub1; if (check_for_equality(ublas_B, vcl_temp)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing range created from range... "; //ublas_A_sub1 = ublas_A_sub1; VCLMatrixType vcl_ctor_sub1 = vcl_A_sub1; //Note: This is mostly a compilation test only if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } } std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test: Assignments //////////" << std::endl; std::cout << "//" << std::endl; std::cout << "Testing matrix assigned to range... "; ublas_A_sub1 = ublas_B; vcl_A_sub1 = vcl_B; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing range assigned to matrix... "; ublas_B = ublas_A_sub2; vcl_B = vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing range assigned to range... "; ublas_A_sub1 = ublas_C_sub; vcl_A_sub1 = vcl_C_sub; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl; std::cout << "//" << std::endl; ublas_A_sub1 = ublas_B; viennacl::copy(ublas_B, vcl_A_sub1); std::cout << "Testing upper left copy to A... "; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } ublas_A_sub2 = ublas_B; viennacl::copy(ublas_B, vcl_A_sub2); std::cout << "Testing lower right copy to A... "; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } ublas_C_sub = ublas_B; viennacl::copy(ublas_B, vcl_C_sub); std::cout << "Testing upper copy to C... "; if (check_for_equality(ublas_C, vcl_C)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } ublas_D_sub = ublas_B; viennacl::copy(ublas_B, vcl_D_sub); std::cout << "Testing left copy to D... "; if (check_for_equality(ublas_D, vcl_D)) std::cout << "PASSED!" << std::endl; else std::cout << std::endl << "TEST failed!"; std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl; std::cout << "//" << std::endl; std::cout << "Testing upper left copy to A... "; if (check_for_equality(ublas_A_sub1, vcl_A_sub1)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing lower right copy to A... "; if (check_for_equality(ublas_A_sub2, vcl_A_sub2)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing upper copy to C... "; if (check_for_equality(ublas_C_sub, vcl_C_sub)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing left copy to D... "; if (check_for_equality(ublas_D_sub, vcl_D_sub)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 3: Addition //////////" << std::endl; std::cout << "//" << std::endl; viennacl::copy(ublas_A_sub2, vcl_A_sub2); std::cout << "Inplace add to submatrix: "; ublas_A_sub2 += ublas_A_sub2; vcl_A_sub2 += vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace add to matrix: "; ublas_B += ublas_A_sub2; vcl_B += vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace add of matrix: "; ublas_A_sub2 += ublas_B; vcl_A_sub2 += vcl_B; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Add to submatrix: "; ublas_A_sub2 = ublas_A_sub2 + ublas_A_sub2; vcl_A_sub2 = vcl_A_sub2 + vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Add to matrix: "; ublas_B = ublas_A_sub2 + ublas_A_sub2; vcl_B = vcl_A_sub2 + vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 4: Subtraction //////////" << std::endl; std::cout << "//" << std::endl; viennacl::copy(ublas_A_sub2, vcl_A_sub2); std::cout << "Inplace sub to submatrix: "; ublas_A_sub2 -= ublas_A_sub2; vcl_A_sub2 -= vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace sub to matrix: "; ublas_B -= ublas_A_sub2; vcl_B -= vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace sub of matrix: "; ublas_A_sub2 -= ublas_B; vcl_A_sub2 -= vcl_B; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Sub from submatrix: "; ublas_A_sub2 = ublas_A_sub2 - ublas_A_sub2; vcl_A_sub2 = vcl_A_sub2 - vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Sub from matrix: "; ublas_B = ublas_A_sub2 - ublas_A_sub2; vcl_B = vcl_A_sub2 - vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 5: Scaling //////////" << std::endl; std::cout << "//" << std::endl; viennacl::copy(ublas_A, vcl_A); std::cout << "Multiplication with CPU scalar: "; ublas_A_sub2 *= ScalarType(3.1415); vcl_A_sub2 *= ScalarType(3.1415); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Multiplication with GPU scalar: "; ublas_A_sub2 *= gpu_pi; vcl_A_sub2 *= gpu_pi; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Division with CPU scalar: "; ublas_A_sub2 /= ScalarType(3.1415); vcl_A_sub2 /= ScalarType(3.1415); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Division with GPU scalar: "; ublas_A_sub2 /= gpu_pi; vcl_A_sub2 /= gpu_pi; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 6: Matrix-Matrix Products //////////" << std::endl; std::cout << "//" << std::endl; std::cout << "Assigned C = A * B: "; ublas_A_sub1 = prod(ublas_C_sub, ublas_D_sub); vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, vcl_D_sub); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Assigned C = A^T * B: "; ublas_A_sub1 = prod(trans(ublas_C_sub), ublas_D_sub); vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), vcl_D_sub); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Assigned C = A * B^T: "; ublas_A_sub1 = prod(ublas_C_sub, trans(ublas_D_sub)); vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, trans(vcl_D_sub)); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Assigned C = A^T * B^T: "; ublas_A_sub1 = prod(trans(ublas_C_sub), trans(ublas_D_sub)); vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), trans(vcl_D_sub)); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace add of prod(): "; ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub); vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 7: Matrix-Vector Products //////////" << std::endl; std::cout << "//" << std::endl; VectorType ublas_v1(dim_large); for (std::size_t i=0; i<ublas_v1.size(); ++i) ublas_v1(i) = i; boost::numeric::ublas::vector_range<VectorType> ublas_v1_sub(ublas_v1, ublas_r1); VectorType ublas_v2(dim_large); for (std::size_t i=0; i<ublas_v2.size(); ++i) ublas_v2(i) = i - 5; boost::numeric::ublas::vector_range<VectorType> ublas_v2_sub(ublas_v2, ublas_r1); VCLVectorType vcl_v1(ublas_v1.size()); viennacl::vector_range<VCLVectorType> vcl_v1_sub(vcl_v1, vcl_r1); VCLVectorType vcl_v2(ublas_v2.size()); viennacl::vector_range<VCLVectorType> vcl_v2_sub(vcl_v2, vcl_r1); viennacl::copy(ublas_v1, vcl_v1); viennacl::copy(ublas_v2, vcl_v2); viennacl::copy(ublas_A_sub1, vcl_A_sub1); ublas_v2_sub = prod(ublas_A_sub1, ublas_v1_sub); vcl_v2_sub = viennacl::linalg::prod(vcl_A_sub1, vcl_v1_sub); if (check_for_equality_vector(ublas_v2, vcl_v2)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; }