Example #1
0
 /** @brief Constructor
 * @param A matrix whose approximate inverse is calculated. Must be quadratic.
 * @param tag SPAI configuration tag
 */
 fspai_precond(const MatrixType & A,
               const fspai_tag & tag) : tag_(tag), L(viennacl::traits::context(A)), L_trans(viennacl::traits::context(A)), temp_apply_vec_(A.size1(), viennacl::traits::context(A))
 {
     //UBLASSparseMatrixType ubls_A;
     UBLASSparseMatrixType ublas_A(A.size1(), A.size2());
     UBLASSparseMatrixType pA(A.size1(), A.size2());
     UBLASSparseMatrixType ublas_L(A.size1(), A.size2());
     UBLASSparseMatrixType ublas_L_trans(A.size1(), A.size2());
     viennacl::copy(A, ublas_A);
     //viennacl::copy(ubls_A, vcl_A);
     //vcl_At = viennacl::linalg::prod(vcl_A, vcl_A);
     //vcl_pA = viennacl::linalg::prod(vcl_A, vcl_At);
     //viennacl::copy(vcl_pA, pA);
     pA = ublas_A;
     //execute SPAI with ublas matrix types
     viennacl::linalg::detail::spai::computeFSPAI(ublas_A, pA, ublas_L, ublas_L_trans, tag_);
     //copy back to GPU
     viennacl::copy(ublas_L, L);
     viennacl::copy(ublas_L_trans, L_trans);
 }
Example #2
0
int main (int, const char **)
{
  typedef float                                           ScalarType;    //feel free to change this to 'double' if supported by your hardware
  typedef boost::numeric::ublas::matrix<ScalarType>       MatrixType;
  
  typedef viennacl::matrix<ScalarType, viennacl::row_major>    VCLMatrixType;
  
  std::size_t dim_large = 5;
  std::size_t dim_small = 3;
  
  //
  // Setup ublas objects and fill with data:
  //
  MatrixType ublas_A(dim_large, dim_large);
  MatrixType ublas_B(dim_small, dim_small);
  MatrixType ublas_C(dim_large, dim_small);
  MatrixType ublas_D(dim_small, dim_large);
  
  
  for (std::size_t i=0; i<ublas_A.size1(); ++i)
    for (std::size_t j=0; j<ublas_A.size2(); ++j)
      ublas_A(i,j) = static_cast<ScalarType>((i+1) + (j+1)*(i+1));

  for (std::size_t i=0; i<ublas_B.size1(); ++i)
    for (std::size_t j=0; j<ublas_B.size2(); ++j)
      ublas_B(i,j) = static_cast<ScalarType>((i+1) + (j+1)*(i+1));

  for (std::size_t i=0; i<ublas_C.size1(); ++i)
    for (std::size_t j=0; j<ublas_C.size2(); ++j)
      ublas_C(i,j) = static_cast<ScalarType>((j+2) + (j+1)*(i+1));

  for (std::size_t i=0; i<ublas_D.size1(); ++i)
    for (std::size_t j=0; j<ublas_D.size2(); ++j)
      ublas_D(i,j) = static_cast<ScalarType>((j+2) + (j+1)*(i+1));
  
  //
  // Extract submatrices using the ranges in ublas
  //
  boost::numeric::ublas::range ublas_r1(0, dim_small);                      //the first 'dim_small' entries
  boost::numeric::ublas::range ublas_r2(dim_large - dim_small, dim_large);  //the last 'dim_small' entries
  boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub1(ublas_A, ublas_r1, ublas_r1); //upper left part of A
  boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub2(ublas_A, ublas_r2, ublas_r2); //lower right part of A

  boost::numeric::ublas::matrix_range<MatrixType> ublas_C_sub(ublas_C, ublas_r1, ublas_r1); //upper left part of C
  boost::numeric::ublas::matrix_range<MatrixType> ublas_D_sub(ublas_D, ublas_r1, ublas_r1); //upper left part of D

  //
  // Setup ViennaCL objects
  //
  VCLMatrixType vcl_A(dim_large, dim_large);
  VCLMatrixType vcl_B(dim_small, dim_small);
  VCLMatrixType vcl_C(dim_large, dim_small);
  VCLMatrixType vcl_D(dim_small, dim_large);
  
  viennacl::copy(ublas_A, vcl_A);
  viennacl::copy(ublas_B, vcl_B);
  viennacl::copy(ublas_C, vcl_C);
  viennacl::copy(ublas_D, vcl_D);
  
  //
  // Extract submatrices using the ranges in ViennaCL
  //
  viennacl::range vcl_r1(0, dim_small);   //the first 'dim_small' entries
  viennacl::range vcl_r2(dim_large - dim_small, dim_large); //the last 'dim_small' entries
  viennacl::matrix_range<VCLMatrixType>   vcl_A_sub1(vcl_A, vcl_r1, vcl_r1); //upper left part of A
  viennacl::matrix_range<VCLMatrixType>   vcl_A_sub2(vcl_A, vcl_r2, vcl_r2); //lower right part of A
  
  viennacl::matrix_range<VCLMatrixType>   vcl_C_sub(vcl_C, vcl_r1, vcl_r1); //upper left part of C
  viennacl::matrix_range<VCLMatrixType>   vcl_D_sub(vcl_D, vcl_r1, vcl_r1); //upper left part of D

  //
  // Copy from ublas to submatrices and back:
  //
  
  ublas_A_sub1 = ublas_B;
  viennacl::copy(ublas_B, vcl_A_sub1);
  viennacl::copy(vcl_A_sub1, ublas_B);
  
  //
  // Addition:
  //
  
  // range to range:
  ublas_A_sub2 += ublas_A_sub2;
  vcl_A_sub2 += vcl_A_sub2;

  // range to matrix:
  ublas_B += ublas_A_sub2;
  vcl_B += vcl_A_sub2;

  
  //
  // use matrix range with matrix-matrix product:
  //
  ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub);
  vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub);

  //
  // Print result matrices:
  //
  std::cout << "Result ublas:    " << ublas_A << std::endl;
  std::cout << "Result ViennaCL: " << vcl_A << std::endl;
  
  //
  //  That's it.
  //
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
Example #3
0
int main (int, const char **)
{
  typedef double               ScalarType;     //feel free to change this to 'double' if supported by your hardware
  typedef boost::numeric::ublas::matrix<ScalarType>        MatrixType;
  typedef boost::numeric::ublas::vector<ScalarType>        VectorType;
  typedef viennacl::matrix<ScalarType, viennacl::column_major>        VCLMatrixType;
  typedef viennacl::vector<ScalarType>        VCLVectorType;

  std::size_t rows = 113;   //number of rows in the matrix
  std::size_t cols = 54;   //number of columns

  //
  // Create matrices with some data
  //
  MatrixType ublas_A(rows, cols);
  MatrixType Q(rows, rows);
  MatrixType R(rows, cols);

  // Some random data with a bit of extra weight on the diagonal
  for (std::size_t i=0; i<rows; ++i)
  {
    for (std::size_t j=0; j<cols; ++j)
    {
      ublas_A(i,j) = -1.0 + (i+1)*(j+1)
                     + ( (rand() % 1000) - 500.0) / 1000.0;

      if (i == j)
        ublas_A(i,j) += 10.0;

      R(i,j) = 0.0;
    }

    for (std::size_t j=0; j<rows; ++j)
      Q(i,j) = 0.0;
  }

  // keep initial input matrix for comparison
  MatrixType ublas_A_backup(ublas_A);


  //
  // Setup the matrix in ViennaCL:
  //
  VCLVectorType dummy(10);
  VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2());

  viennacl::copy(ublas_A, vcl_A);

  //
  // Compute QR factorization of A. A is overwritten with Householder vectors. Coefficients are returned and a block size of 3 is used.
  // Note that at the moment the number of columns of A must be divisible by the block size
  //

  std::cout << "--- Boost.uBLAS ---" << std::endl;
  std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A);  //computes the QR factorization

  //
  // A check for the correct result:
  //
  viennacl::linalg::recoverQ(ublas_A, ublas_betas, Q, R);
  MatrixType ublas_QR = prod(Q, R);
  double ublas_error = check(ublas_QR, ublas_A_backup);
  std::cout << "Max rel error (ublas): " << ublas_error << std::endl;

  //
  // QR factorization in ViennaCL using Boost.uBLAS for the panel factorization
  //
  std::cout << "--- Hybrid (default) ---" << std::endl;
  viennacl::copy(ublas_A_backup, vcl_A);
  std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A);


  //
  // A check for the correct result:
  //
  viennacl::copy(vcl_A, ublas_A);
  Q.clear(); R.clear();
  viennacl::linalg::recoverQ(ublas_A, hybrid_betas, Q, R);
  double hybrid_error = check(ublas_QR, ublas_A_backup);
  std::cout << "Max rel error (hybrid): " << hybrid_error << std::endl;


  //
  //  That's it.
  //
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
Example #4
0
/**
*  We set up a random matrix using Boost.uBLAS and use it to initialize a ViennaCL matrix.
*  Then we compute the QR factorization directly for the uBLAS matrix as well as the ViennaCL matrix.
**/
int main (int, const char **)
{
  typedef double               ScalarType;     //feel free to change this to 'double' if supported by your hardware
  typedef boost::numeric::ublas::matrix<ScalarType>              MatrixType;
  typedef viennacl::matrix<ScalarType, viennacl::column_major>   VCLMatrixType;

  std::size_t rows = 113;   // number of rows in the matrix
  std::size_t cols = 54;    // number of columns

  /**
  * Create uBLAS matrices with some random input data.
  **/
  MatrixType ublas_A(rows, cols);
  MatrixType Q(rows, rows);
  MatrixType R(rows, cols);

  // Some random data with a bit of extra weight on the diagonal
  for (std::size_t i=0; i<rows; ++i)
  {
    for (std::size_t j=0; j<cols; ++j)
    {
      ublas_A(i,j) = ScalarType(-1.0) + ScalarType((i+1)*(j+1))
                     + ScalarType( (rand() % 1000) - 500.0) / ScalarType(1000.0);

      if (i == j)
        ublas_A(i,j) += ScalarType(10.0);

      R(i,j) = 0.0;
    }

    for (std::size_t j=0; j<rows; ++j)
      Q(i,j) = ScalarType(0.0);
  }

  // keep initial input matrix for comparison
  MatrixType ublas_A_backup(ublas_A);


  /**
  *   Setup the matrix in ViennaCL and copy the data from the uBLAS matrix:
  **/
  VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2());

  viennacl::copy(ublas_A, vcl_A);

  /**
  *  <h2>QR Factorization with Boost.uBLAS Matrices</h2>
  * Compute QR factorization of A. A is overwritten with Householder vectors. Coefficients are returned and a block size of 3 is used.
  * Note that at the moment the number of columns of A must be divisible by the block size
  **/

  std::cout << "--- Boost.uBLAS ---" << std::endl;
  std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A);  //computes the QR factorization

  /**
  *  Let us check for the correct result:
  **/
  viennacl::linalg::recoverQ(ublas_A, ublas_betas, Q, R);
  MatrixType ublas_QR = prod(Q, R);
  double ublas_error = check(ublas_QR, ublas_A_backup);
  std::cout << "Maximum relative error (ublas): " << ublas_error << std::endl;

  /**
  *  <h2>QR Factorization with Boost.uBLAS Matrices</h2>
  *  We now compute the QR factorization from a ViennaCL matrix. Internally it uses Boost.uBLAS for the panel factorization.
  **/
  std::cout << "--- Hybrid (default) ---" << std::endl;
  viennacl::copy(ublas_A_backup, vcl_A);
  std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A);

  /**
  *  Let us check for the correct result:
  **/
  viennacl::copy(vcl_A, ublas_A);
  Q.clear(); R.clear();
  viennacl::linalg::recoverQ(ublas_A, hybrid_betas, Q, R);
  double hybrid_error = check(ublas_QR, ublas_A_backup);
  std::cout << "Maximum relative error (hybrid): " << hybrid_error << std::endl;


  /**
  *  That's it. Print a success message and exit.
  **/
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
int main (int, const char **)
{
  typedef float               ScalarType;     //feel free to change this to 'double' if supported by your hardware
  typedef boost::numeric::ublas::matrix<ScalarType>              MatrixType;
  typedef boost::numeric::ublas::vector<ScalarType>              VectorType;
  typedef viennacl::matrix<ScalarType, viennacl::column_major>   VCLMatrixType;
  typedef viennacl::vector<ScalarType>                           VCLVectorType;

  //
  // Create vectors and matrices with data, cf. http://tutorial.math.lamar.edu/Classes/LinAlg/QRDecomposition.aspx
  //
  VectorType ublas_b(4);
  ublas_b(0) = -4;
  ublas_b(1) =  2;
  ublas_b(2) =  5;
  ublas_b(3) = -1;

  MatrixType ublas_A(4, 3);
  MatrixType Q = boost::numeric::ublas::zero_matrix<ScalarType>(4, 4);
  MatrixType R = boost::numeric::ublas::zero_matrix<ScalarType>(4, 3);

  ublas_A(0, 0) =  2; ublas_A(0, 1) = -1; ublas_A(0, 2) =  1;
  ublas_A(1, 0) =  1; ublas_A(1, 1) = -5; ublas_A(1, 2) =  2;
  ublas_A(2, 0) = -3; ublas_A(2, 1) =  1; ublas_A(2, 2) = -4;
  ublas_A(3, 0) =  1; ublas_A(3, 1) = -1; ublas_A(3, 2) =  1;

  //
  // Setup the matrix in ViennaCL:
  //
  VCLVectorType vcl_b(ublas_b.size());
  VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2());

  viennacl::copy(ublas_b, vcl_b);
  viennacl::copy(ublas_A, vcl_A);



  //////////// Part 1: Use Boost.uBLAS for all computations ////////////////

  std::cout << "--- Boost.uBLAS ---" << std::endl;
  std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A);  //computes the QR factorization

  // compute modified RHS of the minimization problem:
  // b' := Q^T b
  viennacl::linalg::inplace_qr_apply_trans_Q(ublas_A, ublas_betas, ublas_b);

  // Final step: triangular solve: Rx = b'', where b'' are the first three entries in b'
  // We only need the upper left square part of A, which defines the upper triangular matrix R
  boost::numeric::ublas::range ublas_range(0, 3);
  boost::numeric::ublas::matrix_range<MatrixType> ublas_R(ublas_A, ublas_range, ublas_range);
  boost::numeric::ublas::vector_range<VectorType> ublas_b2(ublas_b, ublas_range);
  boost::numeric::ublas::inplace_solve(ublas_R, ublas_b2, boost::numeric::ublas::upper_tag());

  std::cout << "Result: " << ublas_b2 << std::endl;

  //////////// Part 2: Use ViennaCL types for BLAS 3 computations, but use Boost.uBLAS for the panel factorization ////////////////

  std::cout << "--- ViennaCL (hybrid implementation)  ---" << std::endl;
  std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A);

  // compute modified RHS of the minimization problem:
  // b := Q^T b
  viennacl::linalg::inplace_qr_apply_trans_Q(vcl_A, hybrid_betas, vcl_b);

  // Final step: triangular solve: Rx = b'.
  // We only need the upper part of A such that R is a square matrix
  viennacl::range vcl_range(0, 3);
  viennacl::matrix_range<VCLMatrixType> vcl_R(vcl_A, vcl_range, vcl_range);
  viennacl::vector_range<VCLVectorType> vcl_b2(vcl_b, vcl_range);
  viennacl::linalg::inplace_solve(vcl_R, vcl_b2, viennacl::linalg::upper_tag());

  std::cout << "Result: " << vcl_b2 << std::endl;



  //
  //  That's it.
  //
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
Example #6
0
/**
*  The minimization problem of finding x such that \f$ \Vert Ax - b \Vert \f$ is solved as follows:
*   - Compute the QR-factorization of A = QR.
*   - Compute \f$ b' = Q^{\mathrm{T}} b \f$ for the equivalent minimization problem \f$ \Vert Rx - Q^{\mathrm{T}} b \f$.
*   - Solve the triangular system \f$ \tilde{R} x = b' \f$, where \f$ \tilde{R} \f$ is the upper square matrix of R.
*
**/
int main (int, const char **)
{
    typedef float               ScalarType;     //feel free to change this to 'double' if supported by your hardware

    typedef boost::numeric::ublas::matrix<ScalarType>              MatrixType;
    typedef boost::numeric::ublas::vector<ScalarType>              VectorType;
    typedef viennacl::matrix<ScalarType, viennacl::column_major>   VCLMatrixType;
    typedef viennacl::vector<ScalarType>                           VCLVectorType;

    /**
    *  Create vectors and matrices with data:
    **/
    VectorType ublas_b(4);
    ublas_b(0) = -4;
    ublas_b(1) =  2;
    ublas_b(2) =  5;
    ublas_b(3) = -1;

    MatrixType ublas_A(4, 3);

    ublas_A(0, 0) =  2;
    ublas_A(0, 1) = -1;
    ublas_A(0, 2) =  1;
    ublas_A(1, 0) =  1;
    ublas_A(1, 1) = -5;
    ublas_A(1, 2) =  2;
    ublas_A(2, 0) = -3;
    ublas_A(2, 1) =  1;
    ublas_A(2, 2) = -4;
    ublas_A(3, 0) =  1;
    ublas_A(3, 1) = -1;
    ublas_A(3, 2) =  1;

    /**
    * Setup the matrix and vector with ViennaCL objects and copy the data from the uBLAS objects:
    **/
    VCLVectorType vcl_b(ublas_b.size());
    VCLMatrixType vcl_A(ublas_A.size1(), ublas_A.size2());

    viennacl::copy(ublas_b, vcl_b);
    viennacl::copy(ublas_A, vcl_A);


    /**
    * <h2>Option 1: Using Boost.uBLAS</h2>
    *
    * The implementation in ViennaCL accepts both uBLAS and ViennaCL types.
    * We start with a single-threaded implementation using Boost.uBLAS.
    **/

    std::cout << "--- Boost.uBLAS ---" << std::endl;
    /**
    * The first (and computationally most expensive) step is to compute the QR factorization of A.
    * Since we do not need A later, we directly overwrite A with the householder reflectors and the upper triangular matrix R.
    * The returned vector holds the scalar coefficients (betas) for the Householder reflections \f$ I - \beta v v^{\mathrm{T}} \f$
    **/
    std::vector<ScalarType> ublas_betas = viennacl::linalg::inplace_qr(ublas_A);

    /**
    * Compute the modified RHS of the minimization problem from the QR factorization, but do not form \f$ Q^{\mathrm{T}} \f$ explicitly:
    * b' := Q^T b
    **/
    viennacl::linalg::inplace_qr_apply_trans_Q(ublas_A, ublas_betas, ublas_b);

    /**
    * Final step: triangular solve: Rx = b'', where b'' are the first three entries in b'
    * We only need the upper left square part of A, which defines the upper triangular matrix R
    **/
    boost::numeric::ublas::range ublas_range(0, 3);
    boost::numeric::ublas::matrix_range<MatrixType> ublas_R(ublas_A, ublas_range, ublas_range);
    boost::numeric::ublas::vector_range<VectorType> ublas_b2(ublas_b, ublas_range);
    boost::numeric::ublas::inplace_solve(ublas_R, ublas_b2, boost::numeric::ublas::upper_tag());

    std::cout << "Result: " << ublas_b2 << std::endl;

    /**
    *  <h2>Option 2: Use ViennaCL types</h2>
    *
    *  ViennaCL is used for the computationally intensive BLAS 3 computations.
    *  Boost.uBLAS is used for the panel factorization on the host (CPU).
    */

    std::cout << "--- ViennaCL (hybrid implementation)  ---" << std::endl;
    std::vector<ScalarType> hybrid_betas = viennacl::linalg::inplace_qr(vcl_A);

    /**
    * compute modified RHS of the minimization problem: \f$ b' := Q^T b \f$
    **/
    viennacl::linalg::inplace_qr_apply_trans_Q(vcl_A, hybrid_betas, vcl_b);

    /**
    * Final step: triangular solve: Rx = b'.
    * We only need the upper part of A such that R is a square matrix
    **/
    viennacl::range vcl_range(0, 3);
    viennacl::matrix_range<VCLMatrixType> vcl_R(vcl_A, vcl_range, vcl_range);
    viennacl::vector_range<VCLVectorType> vcl_b2(vcl_b, vcl_range);
    viennacl::linalg::inplace_solve(vcl_R, vcl_b2, viennacl::linalg::upper_tag());

    std::cout << "Result: " << vcl_b2 << std::endl;

    /**
    *  That's it.
    **/
    std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

    return EXIT_SUCCESS;
}
int run_test()
{
    //typedef float               ScalarType;
    typedef boost::numeric::ublas::matrix<ScalarType>       MatrixType;
    typedef boost::numeric::ublas::vector<ScalarType>       VectorType;
    
    typedef viennacl::matrix<ScalarType, T>    VCLMatrixType;
    typedef viennacl::vector<ScalarType>       VCLVectorType;
    
    viennacl::scalar<ScalarType> gpu_pi = ScalarType(3.1415);
    
    std::size_t dim_large = 151;
    std::size_t dim_small = 37;
    //std::size_t dim_large = 35;
    //std::size_t dim_small = 11;
    
    //setup ublas objects:
    MatrixType ublas_A(dim_large, dim_large);
    for (std::size_t i=0; i<ublas_A.size1(); ++i)
      for (std::size_t j=0; j<ublas_A.size2(); ++j)
        ublas_A(i,j) = ScalarType((i+1) + (j+1)*(i+1));

    MatrixType ublas_B(dim_small, dim_small);
    for (std::size_t i=0; i<ublas_B.size1(); ++i)
      for (std::size_t j=0; j<ublas_B.size2(); ++j)
        ublas_B(i,j) = ScalarType((i+1) + (j+1)*(i+1));

    MatrixType ublas_C(dim_large, dim_small);
    for (std::size_t i=0; i<ublas_C.size1(); ++i)
      for (std::size_t j=0; j<ublas_C.size2(); ++j)
        ublas_C(i,j) = ScalarType((j+2) + (j+1)*(i+1));

    MatrixType ublas_D(dim_small, dim_large);
    for (std::size_t i=0; i<ublas_D.size1(); ++i)
      for (std::size_t j=0; j<ublas_D.size2(); ++j)
        ublas_D(i,j) = ScalarType((j+2) + (j+1)*(i+1));
      
    boost::numeric::ublas::range ublas_r1(0, dim_small);
    boost::numeric::ublas::range ublas_r2(dim_large - dim_small, dim_large);
    boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub1(ublas_A, ublas_r1, ublas_r1);
    boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub2(ublas_A, ublas_r2, ublas_r2);

    boost::numeric::ublas::matrix_range<MatrixType> ublas_C_sub(ublas_C, ublas_r1, ublas_r1);
    boost::numeric::ublas::matrix_range<MatrixType> ublas_D_sub(ublas_D, ublas_r1, ublas_r1);

    //Setup ViennaCL objects    
    VCLMatrixType vcl_A(dim_large, dim_large);
    viennacl::copy(ublas_A, vcl_A);
    VCLMatrixType vcl_B(dim_small, dim_small);
    viennacl::copy(ublas_B, vcl_B);
    VCLMatrixType vcl_C(dim_large, dim_small);
    viennacl::copy(ublas_C, vcl_C);
    VCLMatrixType vcl_D(dim_small, dim_large);
    viennacl::copy(ublas_D, vcl_D);
    
    viennacl::range vcl_r1(0, dim_small);
    viennacl::range vcl_r2(dim_large - dim_small, dim_large);
    viennacl::matrix_range<VCLMatrixType>   vcl_A_sub1(vcl_A, vcl_r1, vcl_r1);
    viennacl::matrix_range<VCLMatrixType>   vcl_A_sub2(vcl_A, vcl_r2, vcl_r2);
    
    viennacl::matrix_range<VCLMatrixType>   vcl_C_sub(vcl_C, vcl_r1, vcl_r1);
    viennacl::matrix_range<VCLMatrixType>   vcl_D_sub(vcl_D, vcl_r1, vcl_r1);

    std::cout << std::endl;
    std::cout << "//" << std::endl;
    std::cout << "////////// Test: Copy CTOR //////////" << std::endl;
    std::cout << "//" << std::endl;

    {
      std::cout << "Testing matrix created from range... ";
      ublas_B = ublas_A_sub1;
      VCLMatrixType vcl_temp = vcl_A_sub1;
      if (check_for_equality(ublas_B, vcl_temp))
        std::cout << "PASSED!" << std::endl;
      else
      {
        std::cout << std::endl << "TEST failed!";
        return EXIT_FAILURE;
      }
      
      std::cout << "Testing range created from range... ";
      //ublas_A_sub1 = ublas_A_sub1;
      VCLMatrixType vcl_ctor_sub1 = vcl_A_sub1;  //Note: This is mostly a compilation test only
      if (check_for_equality(ublas_A, vcl_A))
        std::cout << "PASSED!" << std::endl;
      else
      {
        std::cout << std::endl << "TEST failed!";
        return EXIT_FAILURE;
      }
    }
    
    
    std::cout << std::endl;
    std::cout << "//" << std::endl;
    std::cout << "////////// Test: Assignments //////////" << std::endl;
    std::cout << "//" << std::endl;

    std::cout << "Testing matrix assigned to range... ";
    ublas_A_sub1 = ublas_B;
    vcl_A_sub1 = vcl_B;
    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    std::cout << "Testing range assigned to matrix... ";
    ublas_B = ublas_A_sub2;
    vcl_B = vcl_A_sub2;
    if (check_for_equality(ublas_B, vcl_B))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    std::cout << "Testing range assigned to range... ";
    ublas_A_sub1 = ublas_C_sub;
    vcl_A_sub1 = vcl_C_sub;
    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    
    
    std::cout << std::endl;
    std::cout << "//" << std::endl;
    std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl;
    std::cout << "//" << std::endl;
    
    ublas_A_sub1 = ublas_B;
    viennacl::copy(ublas_B, vcl_A_sub1);
    std::cout << "Testing upper left copy to A... ";
    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    
    ublas_A_sub2 = ublas_B;
    viennacl::copy(ublas_B, vcl_A_sub2);
    std::cout << "Testing lower right copy to A... ";
    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    
    
    ublas_C_sub = ublas_B;
    viennacl::copy(ublas_B, vcl_C_sub);
    std::cout << "Testing upper copy to C... ";
    if (check_for_equality(ublas_C, vcl_C))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    
    ublas_D_sub = ublas_B;
    viennacl::copy(ublas_B, vcl_D_sub);
    std::cout << "Testing left copy to D... ";
    if (check_for_equality(ublas_D, vcl_D))
      std::cout << "PASSED!" << std::endl;
    else
      std::cout << std::endl << "TEST failed!";
    
    std::cout << std::endl;
    std::cout << "//" << std::endl;
    std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl;
    std::cout << "//" << std::endl;
    
    std::cout << "Testing upper left copy to A... ";
    if (check_for_equality(ublas_A_sub1, vcl_A_sub1))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    std::cout << "Testing lower right copy to A... ";
    if (check_for_equality(ublas_A_sub2, vcl_A_sub2))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    std::cout << "Testing upper copy to C... ";
    if (check_for_equality(ublas_C_sub, vcl_C_sub))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Testing left copy to D... ";
    if (check_for_equality(ublas_D_sub, vcl_D_sub))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    
    std::cout << "//" << std::endl;
    std::cout << "////////// Test 3: Addition //////////" << std::endl;
    std::cout << "//" << std::endl;
    viennacl::copy(ublas_A_sub2, vcl_A_sub2);
    
    std::cout << "Inplace add to submatrix: ";
    ublas_A_sub2 += ublas_A_sub2;
    vcl_A_sub2 += vcl_A_sub2;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Inplace add to matrix: ";
    ublas_B += ublas_A_sub2;
    vcl_B += vcl_A_sub2;

    if (check_for_equality(ublas_B, vcl_B))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Inplace add of matrix: ";
    ublas_A_sub2 += ublas_B;
    vcl_A_sub2 += vcl_B;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    std::cout << "Add to submatrix: ";
    ublas_A_sub2 = ublas_A_sub2 + ublas_A_sub2;
    vcl_A_sub2 = vcl_A_sub2 + vcl_A_sub2;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Add to matrix: ";
    ublas_B = ublas_A_sub2 + ublas_A_sub2;
    vcl_B = vcl_A_sub2 + vcl_A_sub2;

    if (check_for_equality(ublas_B, vcl_B))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    
    
    std::cout << "//" << std::endl;
    std::cout << "////////// Test 4: Subtraction //////////" << std::endl;
    std::cout << "//" << std::endl;
    viennacl::copy(ublas_A_sub2, vcl_A_sub2);
    
    std::cout << "Inplace sub to submatrix: ";
    ublas_A_sub2 -= ublas_A_sub2;
    vcl_A_sub2 -= vcl_A_sub2;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Inplace sub to matrix: ";
    ublas_B -= ublas_A_sub2;
    vcl_B -= vcl_A_sub2;

    if (check_for_equality(ublas_B, vcl_B))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    std::cout << "Inplace sub of matrix: ";
    ublas_A_sub2 -= ublas_B;
    vcl_A_sub2 -= vcl_B;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    std::cout << "Sub from submatrix: ";
    ublas_A_sub2 = ublas_A_sub2 - ublas_A_sub2;
    vcl_A_sub2 = vcl_A_sub2 - vcl_A_sub2;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Sub from matrix: ";
    ublas_B = ublas_A_sub2 - ublas_A_sub2;
    vcl_B = vcl_A_sub2 - vcl_A_sub2;

    if (check_for_equality(ublas_B, vcl_B))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    
    std::cout << "//" << std::endl;
    std::cout << "////////// Test 5: Scaling //////////" << std::endl;
    std::cout << "//" << std::endl;
    viennacl::copy(ublas_A, vcl_A);
    
    std::cout << "Multiplication with CPU scalar: ";
    ublas_A_sub2 *= ScalarType(3.1415);
    vcl_A_sub2 *= ScalarType(3.1415);

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Multiplication with GPU scalar: ";
    ublas_A_sub2 *= gpu_pi;
    vcl_A_sub2 *= gpu_pi;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    
    std::cout << "Division with CPU scalar: ";
    ublas_A_sub2 /= ScalarType(3.1415);
    vcl_A_sub2 /= ScalarType(3.1415);

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Division with GPU scalar: ";
    ublas_A_sub2 /= gpu_pi;
    vcl_A_sub2 /= gpu_pi;

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }
    
    

    std::cout << "//" << std::endl;
    std::cout << "////////// Test 6: Matrix-Matrix Products //////////" << std::endl;
    std::cout << "//" << std::endl;

    std::cout << "Assigned C = A * B: ";
    ublas_A_sub1 = prod(ublas_C_sub, ublas_D_sub);
    vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, vcl_D_sub);

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Assigned C = A^T * B: ";
    ublas_A_sub1 = prod(trans(ublas_C_sub), ublas_D_sub);
    vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), vcl_D_sub);

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Assigned C = A * B^T: ";
    ublas_A_sub1 = prod(ublas_C_sub, trans(ublas_D_sub));
    vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, trans(vcl_D_sub));

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Assigned C = A^T * B^T: ";
    ublas_A_sub1 = prod(trans(ublas_C_sub), trans(ublas_D_sub));
    vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), trans(vcl_D_sub));

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << "Inplace add of prod(): ";
    ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub);
    vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub);

    if (check_for_equality(ublas_A, vcl_A))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }


    std::cout << "//" << std::endl;
    std::cout << "////////// Test 7: Matrix-Vector Products //////////" << std::endl;
    std::cout << "//" << std::endl;

    VectorType ublas_v1(dim_large);
    for (std::size_t i=0; i<ublas_v1.size(); ++i)
      ublas_v1(i) = i;
    boost::numeric::ublas::vector_range<VectorType> ublas_v1_sub(ublas_v1, ublas_r1);

    VectorType ublas_v2(dim_large);
    for (std::size_t i=0; i<ublas_v2.size(); ++i)
      ublas_v2(i) = i - 5;
    boost::numeric::ublas::vector_range<VectorType> ublas_v2_sub(ublas_v2, ublas_r1);

    
    VCLVectorType vcl_v1(ublas_v1.size());
    viennacl::vector_range<VCLVectorType> vcl_v1_sub(vcl_v1, vcl_r1);
    VCLVectorType vcl_v2(ublas_v2.size());
    viennacl::vector_range<VCLVectorType> vcl_v2_sub(vcl_v2, vcl_r1);
    viennacl::copy(ublas_v1, vcl_v1);
    viennacl::copy(ublas_v2, vcl_v2);
    viennacl::copy(ublas_A_sub1, vcl_A_sub1);
    
    
    ublas_v2_sub = prod(ublas_A_sub1, ublas_v1_sub);
    vcl_v2_sub = viennacl::linalg::prod(vcl_A_sub1, vcl_v1_sub);

    if (check_for_equality_vector(ublas_v2, vcl_v2))
      std::cout << "PASSED!" << std::endl;
    else
    {
      std::cout << std::endl << "TEST failed!";
      return EXIT_FAILURE;
    }

    std::cout << std::endl;
    std::cout << "----------------------------------------------" << std::endl;
    std::cout << std::endl;


    return EXIT_SUCCESS;
}