コード例 #1
0
ファイル: viennacl_gmres.cpp プロジェクト: 10imaging/viennacl
void viennacl_gmres(double	* result,   //output vector
  		            mwIndex	* cols,    //input vector holding column jumpers
    	            mwIndex	* rows,    //input vector holding row indices
                    double *entries,
                    double *rhs,
                    mwSize     num_cols,
                    mwSize     nnzmax
		   )
{
    viennacl::vector<double>                    vcl_rhs(num_cols);
    viennacl::vector<double>                    vcl_result(num_cols);
    viennacl::compressed_matrix<double>     vcl_matrix(num_cols, num_cols);

    //convert from column-wise storage to row-wise storage
    std::vector< std::map< unsigned int, double > >  stl_matrix(num_cols);

    for (mwIndex j=0; j<num_cols; ++j)
    {
       for (mwIndex i = cols[j]; i<cols[j+1]; ++i)
         stl_matrix[rows[i]][j] = entries[i];
    }

    //now copy matrix to GPU:
    copy(stl_matrix, vcl_matrix);
    copy(rhs, rhs + num_cols, vcl_rhs.begin());
    stl_matrix.clear(); //clean up this temporary storage

	//solve it:
    vcl_result = solve(vcl_matrix,
                       vcl_rhs,
                       viennacl::linalg::gmres_tag(1e-8, 30, 20));    //relative tolerance of 1e-8, krylov space of dimension 30, 20 restarts max.

    ///////////// copy back to CPU: ///////////////////
    copy(vcl_result.begin(), vcl_result.end(), result);

    return;
}
コード例 #2
0
void run_tutorial()
{
  /**
  * Get Eigen matrix and vector types for the provided ScalarType.
  * Involves a little bit of template-metaprogramming.
  **/
  typedef typename Eigen_dense_matrix<ScalarType>::type  EigenMatrix;
  typedef typename Eigen_vector<ScalarType>::type        EigenVector;

  /**
  * Create and fill dense matrices from the Eigen library:
  **/
  EigenMatrix eigen_densemat(6, 5);
  EigenMatrix eigen_densemat2(6, 5);
  eigen_densemat(0,0) = 2.0;   eigen_densemat(0,1) = -1.0;
  eigen_densemat(1,0) = -1.0;  eigen_densemat(1,1) =  2.0;  eigen_densemat(1,2) = -1.0;
  eigen_densemat(2,1) = -1.0;  eigen_densemat(2,2) = -1.0;  eigen_densemat(2,3) = -1.0;
  eigen_densemat(3,2) = -1.0;  eigen_densemat(3,3) =  2.0;  eigen_densemat(3,4) = -1.0;
                               eigen_densemat(5,4) = -1.0;  eigen_densemat(4,4) = -1.0;
  Eigen::Map<EigenMatrix> eigen_densemat_map(eigen_densemat.data(), 6, 5); // same as eigen_densemat, but emulating user-provided buffer

  /**
  * Create and fill sparse matrices from the Eigen library:
  **/
  Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat(6, 5);
  Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat2(6, 5);
  eigen_sparsemat.reserve(5*2);
  eigen_sparsemat.insert(0,0) = 2.0;   eigen_sparsemat.insert(0,1) = -1.0;
  eigen_sparsemat.insert(1,1) = 2.0;   eigen_sparsemat.insert(1,2) = -1.0;
  eigen_sparsemat.insert(2,2) = -1.0;  eigen_sparsemat.insert(2,3) = -1.0;
  eigen_sparsemat.insert(3,3) = 2.0;   eigen_sparsemat.insert(3,4) = -1.0;
  eigen_sparsemat.insert(5,4) = -1.0;
  //eigen_sparsemat.endFill();

  /**
  * Create and fill a few vectors from the Eigen library:
  **/
  EigenVector eigen_rhs(5);
  Eigen::Map<EigenVector> eigen_rhs_map(eigen_rhs.data(), 5);
  EigenVector eigen_result(6);
  EigenVector eigen_temp(6);

  eigen_rhs(0) = 10.0;
  eigen_rhs(1) = 11.0;
  eigen_rhs(2) = 12.0;
  eigen_rhs(3) = 13.0;
  eigen_rhs(4) = 14.0;


  /**
  * Create the corresponding ViennaCL objects:
  **/
  viennacl::vector<ScalarType> vcl_rhs(5);
  viennacl::vector<ScalarType> vcl_result(6);
  viennacl::matrix<ScalarType> vcl_densemat(6, 5);
  viennacl::compressed_matrix<ScalarType> vcl_sparsemat(6, 5);


  /**
  * Directly copy the Eigen objects to ViennaCL objects
  **/
  viennacl::copy(&(eigen_rhs[0]), &(eigen_rhs[0]) + 5, vcl_rhs.begin());  // Method 1: via iterator interface (cf. std::copy())
  viennacl::copy(eigen_rhs, vcl_rhs);                                     // Method 2: via built-in wrappers (convenience layer)
  viennacl::copy(eigen_rhs_map, vcl_rhs);                                 // Same as method 2, but for a mapped vector

  viennacl::copy(eigen_densemat, vcl_densemat);
  viennacl::copy(eigen_densemat_map, vcl_densemat); //same as above, using mapped matrix
  viennacl::copy(eigen_sparsemat, vcl_sparsemat);
  std::cout << "VCL sparsematrix dimensions: " << vcl_sparsemat.size1() << ", " << vcl_sparsemat.size2() << std::endl;

  // For completeness: Copy matrices from ViennaCL back to Eigen:
  viennacl::copy(vcl_densemat, eigen_densemat2);
  viennacl::copy(vcl_sparsemat, eigen_sparsemat2);


  /**
  * Run dense matrix-vector products and compare results:
  **/
  eigen_result = eigen_densemat * eigen_rhs;
  vcl_result = viennacl::linalg::prod(vcl_densemat, vcl_rhs);
  viennacl::copy(vcl_result, eigen_temp);
  std::cout << "Difference for dense matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl;
  std::cout << "Difference for dense matrix-vector product (Eigen->ViennaCL->Eigen): "
            << (eigen_densemat2 * eigen_rhs - eigen_temp).norm() << std::endl;

  /**
  * Run sparse matrix-vector products and compare results:
  **/
  eigen_result = eigen_sparsemat * eigen_rhs;
  vcl_result = viennacl::linalg::prod(vcl_sparsemat, vcl_rhs);
  viennacl::copy(vcl_result, eigen_temp);
  std::cout << "Difference for sparse matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl;
  std::cout << "Difference for sparse matrix-vector product (Eigen->ViennaCL->Eigen): "
            << (eigen_sparsemat2 * eigen_rhs - eigen_temp).norm() << std::endl;
}
コード例 #3
0
/**
*   We first setup the respective matrices in uBLAS matrices and then copy them over to the respective ViennaCL objects.
*   After that we run a couple of operations (mostly matrix-vector products).
**/
int main()
{
  typedef float      ScalarType;

  std::size_t size = 4;

  /**
  * Set up ublas objects
  **/
  boost::numeric::ublas::vector<ScalarType> ublas_vec(size);
  boost::numeric::ublas::matrix<ScalarType> ublas_circulant(size, size);
  boost::numeric::ublas::matrix<ScalarType> ublas_hankel(size, size);
  boost::numeric::ublas::matrix<ScalarType> ublas_toeplitz(size, size);
  boost::numeric::ublas::matrix<ScalarType> ublas_vandermonde(size, size);

  for (std::size_t i = 0; i < size; i++)
    for (std::size_t j = 0; j < size; j++)
    {
      ublas_circulant(i,j)   = static_cast<ScalarType>((i - j + size) % size);
      ublas_hankel(i,j)      = static_cast<ScalarType>((i + j) % (2 * size));
      ublas_toeplitz(i,j)    = static_cast<ScalarType>(i) - static_cast<ScalarType>(j);
      ublas_vandermonde(i,j) = std::pow(ScalarType(1.0) + ScalarType(i)/ScalarType(1000.0), ScalarType(j));
    }

  /**
  * Set up ViennaCL objects
  **/
  viennacl::vector<ScalarType>             vcl_vec(size);
  viennacl::vector<ScalarType>             vcl_result(size);
  viennacl::circulant_matrix<ScalarType>   vcl_circulant(size, size);
  viennacl::hankel_matrix<ScalarType>      vcl_hankel(size, size);
  viennacl::toeplitz_matrix<ScalarType>    vcl_toeplitz(size, size);
  viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde(size, size);

  // copy matrices:
  viennacl::copy(ublas_circulant, vcl_circulant);
  viennacl::copy(ublas_hankel, vcl_hankel);
  viennacl::copy(ublas_toeplitz, vcl_toeplitz);
  viennacl::copy(ublas_vandermonde, vcl_vandermonde);

  // fill vectors:
  for (std::size_t i = 0; i < size; i++)
  {
    ublas_vec[i] = ScalarType(i);
    vcl_vec[i] = ScalarType(i);
  }

  /**
  * Add circulant matrices using operator overloads:
  **/
  std::cout << "Circulant matrix before addition: " << vcl_circulant << std::endl << std::endl;
  vcl_circulant += vcl_circulant;
  std::cout << "Circulant matrix after addition: " << vcl_circulant << std::endl << std::endl;

  /**
  * Manipulate single entries of structured matrices.
  * These manipulations are structure-preserving, so any other affected entries are manipulated as well.
  **/
  std::cout << "Hankel matrix before manipulation: " << vcl_hankel << std::endl << std::endl;
  vcl_hankel(1, 2) = ScalarType(3.14);
  std::cout << "Hankel matrix after manipulation: " << vcl_hankel << std::endl << std::endl;

  std::cout << "Vandermonde matrix before manipulation: " << vcl_vandermonde << std::endl << std::endl;
  vcl_vandermonde(1) = ScalarType(1.1); //NOTE: Write access only via row index
  std::cout << "Vandermonde matrix after manipulation: " << vcl_vandermonde << std::endl << std::endl;

  /**
  * Compute matrix-vector product for a Toeplitz matrix (FFT-accelerated). Similarly for the other matrices.
  **/
  std::cout << "Toeplitz matrix: " << vcl_toeplitz << std::endl;
  std::cout << "Vector: " << vcl_vec << std::endl << std::endl;
  vcl_result = viennacl::linalg::prod(vcl_toeplitz, vcl_vec);
  std::cout << "Result of matrix-vector product: " << vcl_result << std::endl << std::endl;

  /**
  *  That's it. Print success message and exit.
  **/
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
コード例 #4
0
void run_tutorial()
{
  typedef mtl::dense2D<ScalarType>        MTL4DenseMatrix;
  typedef mtl::compressed2D<ScalarType>   MTL4SparseMatrix;

  /**
  * Create and fill dense matrices from the MTL4 library:
  **/
  mtl::dense2D<ScalarType>   mtl4_densemat(5, 5);
  mtl::dense2D<ScalarType>   mtl4_densemat2(5, 5);
  mtl4_densemat(0,0) = 2.0;   mtl4_densemat(0,1) = -1.0;
  mtl4_densemat(1,0) = -1.0;  mtl4_densemat(1,1) =  2.0;  mtl4_densemat(1,2) = -1.0;
  mtl4_densemat(2,1) = -1.0;  mtl4_densemat(2,2) = -1.0;  mtl4_densemat(2,3) = -1.0;
  mtl4_densemat(3,2) = -1.0;  mtl4_densemat(3,3) =  2.0;  mtl4_densemat(3,4) = -1.0;
                              mtl4_densemat(4,4) = -1.0;  mtl4_densemat(4,4) = -1.0;


  /**
  * Create and fill sparse matrices from the MTL4 library:
  **/
  MTL4SparseMatrix mtl4_sparsemat;
  set_to_zero(mtl4_sparsemat);
  mtl4_sparsemat.change_dim(5, 5);

  MTL4SparseMatrix mtl4_sparsemat2;
  set_to_zero(mtl4_sparsemat2);
  mtl4_sparsemat2.change_dim(5, 5);

  {
    mtl::matrix::inserter< MTL4SparseMatrix >  ins(mtl4_sparsemat);
    typedef typename mtl::Collection<MTL4SparseMatrix>::value_type  ValueType;
    ins(0,0) <<  ValueType(2.0);   ins(0,1) << ValueType(-1.0);
    ins(1,1) <<  ValueType(2.0);   ins(1,2) << ValueType(-1.0);
    ins(2,2) << ValueType(-1.0);   ins(2,3) << ValueType(-1.0);
    ins(3,3) <<  ValueType(2.0);   ins(3,4) << ValueType(-1.0);
    ins(4,4) << ValueType(-1.0);
  }

  /**
  * Create and fill a few vectors from the MTL4 library:
  **/
  mtl::dense_vector<ScalarType> mtl4_rhs(5, 0.0);
  mtl::dense_vector<ScalarType> mtl4_result(5, 0.0);
  mtl::dense_vector<ScalarType> mtl4_temp(5, 0.0);


  mtl4_rhs(0) = 10.0;
  mtl4_rhs(1) = 11.0;
  mtl4_rhs(2) = 12.0;
  mtl4_rhs(3) = 13.0;
  mtl4_rhs(4) = 14.0;

  /**
  * Create the corresponding ViennaCL objects:
  **/
  viennacl::vector<ScalarType> vcl_rhs(5);
  viennacl::vector<ScalarType> vcl_result(5);
  viennacl::matrix<ScalarType> vcl_densemat(5, 5);
  viennacl::compressed_matrix<ScalarType> vcl_sparsemat(5, 5);

  /**
  * Directly copy the MTL4 objects to ViennaCL objects
  **/
  viennacl::copy(&(mtl4_rhs[0]), &(mtl4_rhs[0]) + 5, vcl_rhs.begin());  //method 1: via iterator interface (cf. std::copy())
  viennacl::copy(mtl4_rhs, vcl_rhs);  //method 2: via built-in wrappers (convenience layer)

  viennacl::copy(mtl4_densemat, vcl_densemat);
  viennacl::copy(mtl4_sparsemat, vcl_sparsemat);

  // For completeness: Copy matrices from ViennaCL back to Eigen:
  viennacl::copy(vcl_densemat, mtl4_densemat2);
  viennacl::copy(vcl_sparsemat, mtl4_sparsemat2);

  /**
  * Run dense matrix-vector products and compare results:
  **/
  mtl4_result = mtl4_densemat * mtl4_rhs;
  vcl_result = viennacl::linalg::prod(vcl_densemat, vcl_rhs);
  viennacl::copy(vcl_result, mtl4_temp);
  mtl4_result -= mtl4_temp;
  std::cout << "Difference for dense matrix-vector product: " << mtl::two_norm(mtl4_result) << std::endl;
  mtl4_result = mtl4_densemat2 * mtl4_rhs - mtl4_temp;
  std::cout << "Difference for dense matrix-vector product (MTL4->ViennaCL->MTL4): "
            << mtl::two_norm(mtl4_result) << std::endl;

  /**
  * Run sparse matrix-vector products and compare results:
  **/
  mtl4_result = mtl4_sparsemat * mtl4_rhs;
  vcl_result = viennacl::linalg::prod(vcl_sparsemat, vcl_rhs);
  viennacl::copy(vcl_result, mtl4_temp);
  mtl4_result -= mtl4_temp;
  std::cout << "Difference for sparse matrix-vector product: " << mtl::two_norm(mtl4_result) << std::endl;
  mtl4_result = mtl4_sparsemat2 * mtl4_rhs - mtl4_temp;
  std::cout << "Difference for sparse matrix-vector product (MTL4->ViennaCL->MTL4): "
            << mtl::two_norm(mtl4_result) << std::endl;

}
コード例 #5
0
void run_test()
{
  //
  // get Eigen matrix and vector types for the provided ScalarType:
  //
  typedef typename Eigen_dense_matrix<ScalarType>::type  EigenMatrix;
  typedef typename Eigen_vector<ScalarType>::type        EigenVector;
  
  //
  // Create and fill dense matrices from the Eigen library:
  //
  EigenMatrix eigen_densemat(6, 5);
  EigenMatrix eigen_densemat2(6, 5);
  eigen_densemat(0,0) = 2.0;   eigen_densemat(0,1) = -1.0;
  eigen_densemat(1,0) = -1.0;  eigen_densemat(1,1) =  2.0;  eigen_densemat(1,2) = -1.0;
  eigen_densemat(2,1) = -1.0;  eigen_densemat(2,2) = -1.0;  eigen_densemat(2,3) = -1.0;
  eigen_densemat(3,2) = -1.0;  eigen_densemat(3,3) =  2.0;  eigen_densemat(3,4) = -1.0;
                               eigen_densemat(5,4) = -1.0;  eigen_densemat(4,4) = -1.0;

  //
  // Create and fill sparse matrices from the Eigen library:
  //
  Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat(6, 5);
  Eigen::SparseMatrix<ScalarType, Eigen::RowMajor> eigen_sparsemat2(6, 5);
  eigen_sparsemat.reserve(5*2);
  eigen_sparsemat.insert(0,0) = 2.0;   eigen_sparsemat.insert(0,1) = -1.0;
  eigen_sparsemat.insert(1,1) = 2.0;   eigen_sparsemat.insert(1,2) = -1.0;
  eigen_sparsemat.insert(2,2) = -1.0;  eigen_sparsemat.insert(2,3) = -1.0;
  eigen_sparsemat.insert(3,3) = 2.0;   eigen_sparsemat.insert(3,4) = -1.0;
  eigen_sparsemat.insert(5,4) = -1.0;
  //eigen_sparsemat.endFill();
  
  //
  // Create and fill a few vectors from the Eigen library:
  //
  EigenVector eigen_rhs(5);
  EigenVector eigen_result(6);
  EigenVector eigen_temp(6);

  eigen_rhs(0) = 10.0;
  eigen_rhs(1) = 11.0;
  eigen_rhs(2) = 12.0;
  eigen_rhs(3) = 13.0;
  eigen_rhs(4) = 14.0;
  
  
  //
  // Let us create the ViennaCL analogues:
  //
  viennacl::vector<ScalarType> vcl_rhs(5);
  viennacl::vector<ScalarType> vcl_result(6);
  viennacl::matrix<ScalarType> vcl_densemat(6, 5);
  viennacl::compressed_matrix<ScalarType> vcl_sparsemat(6, 5);
  
  
  //
  // Directly copy the Eigen objects to ViennaCL objects
  //
  viennacl::copy(&(eigen_rhs[0]), &(eigen_rhs[0]) + 5, vcl_rhs.begin());  //method 1: via iterator interface (cf. std::copy())
  viennacl::copy(eigen_rhs, vcl_rhs);  //method 2: via built-in wrappers (convenience layer)
  
  viennacl::copy(eigen_densemat, vcl_densemat);
  viennacl::copy(eigen_sparsemat, vcl_sparsemat);
  std::cout << "VCL sparsematrix dimensions: " << vcl_sparsemat.size1() << ", " << vcl_sparsemat.size2() << std::endl;
  
  // For completeness: Copy matrices from ViennaCL back to Eigen:
  viennacl::copy(vcl_densemat, eigen_densemat2);
  viennacl::copy(vcl_sparsemat, eigen_sparsemat2);
  
  
  //
  // Run matrix-vector products and compare results:
  //
  eigen_result = eigen_densemat * eigen_rhs;
  vcl_result = viennacl::linalg::prod(vcl_densemat, vcl_rhs);
  viennacl::copy(vcl_result, eigen_temp);
  std::cout << "Difference for dense matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl;
  std::cout << "Difference for dense matrix-vector product (Eigen->ViennaCL->Eigen): "
            << (eigen_densemat2 * eigen_rhs - eigen_temp).norm() << std::endl;
  
  //
  // Same for sparse matrix:
  //          
  eigen_result = eigen_sparsemat * eigen_rhs;
  vcl_result = viennacl::linalg::prod(vcl_sparsemat, vcl_rhs);
  viennacl::copy(vcl_result, eigen_temp);
  std::cout << "Difference for sparse matrix-vector product: " << (eigen_result - eigen_temp).norm() << std::endl;
  std::cout << "Difference for sparse matrix-vector product (Eigen->ViennaCL->Eigen): "
            << (eigen_sparsemat2 * eigen_rhs - eigen_temp).norm() << std::endl;
            
  //
  // Please have a look at the other tutorials on how to use the ViennaCL types
  //
}
コード例 #6
0
int hankel_test(ScalarType epsilon)
{
    std::size_t HANKEL_SIZE = 7;
    viennacl::hankel_matrix<ScalarType> vcl_hankel1(HANKEL_SIZE, HANKEL_SIZE);
    viennacl::hankel_matrix<ScalarType> vcl_hankel2(HANKEL_SIZE, HANKEL_SIZE);

    viennacl::vector<ScalarType> vcl_input(HANKEL_SIZE);
    viennacl::vector<ScalarType> vcl_result(HANKEL_SIZE);

    std::vector<ScalarType> input_ref(HANKEL_SIZE);
    std::vector<ScalarType> result_ref(HANKEL_SIZE);

    dense_matrix<ScalarType> m1(vcl_hankel1.size1(), vcl_hankel1.size2());
    dense_matrix<ScalarType> m2(m1.size1(), m1.size2());

    for (std::size_t i = 0; i < m1.size1(); i++)
      for (std::size_t j = 0; j < m1.size2(); j++)
      {
        m1(i,j) = static_cast<ScalarType>((i + j) % (2 * m1.size1()));
        m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1);
      }

    for (std::size_t i = 0; i < input_ref.size(); i++)
      input_ref[i] = ScalarType(i);

    // Copy to ViennaCL
    viennacl::copy(m1, vcl_hankel1);
    viennacl::copy(m2, vcl_hankel2);
    viennacl::copy(input_ref, vcl_input);

    //
    // Matrix-Vector product:
    //
    vcl_result = viennacl::linalg::prod(vcl_hankel1, vcl_input);

    for (std::size_t i = 0; i < m1.size1(); i++)     //reference calculation
    {
      ScalarType entry = 0;
      for (std::size_t j = 0; j < m1.size2(); j++)
        entry += m1(i,j) * input_ref[j];

      result_ref[i] = entry;
    }

    viennacl::copy(vcl_result, input_ref);
    std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref);
    if (diff_max(input_ref, result_ref) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      for (std::size_t i=0; i<input_ref.size(); ++i)
        std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl;
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }


    //
    // Matrix addition:
    //
    vcl_hankel1 += vcl_hankel2;

    for (std::size_t i = 0; i < m1.size1(); i++)    //reference calculation
      for (std::size_t j = 0; j < m1.size2(); j++)
        m1(i,j) += m2(i,j);

    viennacl::copy(vcl_hankel1, m2);
    std::cout << "Matrix Addition: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    //
    // Per-Element access:
    //
    vcl_hankel1(4,2) = 42;

    for (std::size_t i = 0; i < m1.size1(); i++)    //reference calculation
      for (std::size_t j = 0; j < m1.size2(); j++)
      {
        if ((i + j) % (2*m1.size1()) == 6)
          m1(i, j) = 42;
      }

    viennacl::copy(vcl_hankel1, m2);
    std::cout << "Element manipulation: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
コード例 #7
0
int vandermonde_test(ScalarType epsilon)
{
    std::size_t VANDERMONDE_SIZE = 61;

    viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde1(VANDERMONDE_SIZE, VANDERMONDE_SIZE);
    viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde2(VANDERMONDE_SIZE, VANDERMONDE_SIZE);

    viennacl::vector<ScalarType> vcl_input(VANDERMONDE_SIZE);
    viennacl::vector<ScalarType> vcl_result(VANDERMONDE_SIZE);

    std::vector<ScalarType> input_ref(VANDERMONDE_SIZE);
    std::vector<ScalarType> result_ref(VANDERMONDE_SIZE);

    dense_matrix<ScalarType> m1(vcl_vandermonde1.size1(), vcl_vandermonde1.size2());
    dense_matrix<ScalarType> m2(m1.size1(), m1.size2());

    for (std::size_t i = 0; i < m1.size1(); i++)
      for (std::size_t j = 0; j < m1.size2(); j++)
      {
        m1(i,j) = std::pow(ScalarType(1.0 + i/1000.0), ScalarType(j));
        m2(i,j) = std::pow(ScalarType(1.0 - i/2000.0), ScalarType(j));
      }

    for (std::size_t i = 0; i < input_ref.size(); i++)
      input_ref[i] = ScalarType(i);

    // Copy to ViennaCL
    viennacl::copy(m1, vcl_vandermonde1);
    viennacl::copy(m2, vcl_vandermonde2);
    viennacl::copy(input_ref, vcl_input);

    //
    // Matrix-Vector product:
    //
    vcl_result = viennacl::linalg::prod(vcl_vandermonde1, vcl_input);

    for (std::size_t i = 0; i < m1.size1(); i++)     //reference calculation
    {
      ScalarType entry = 0;
      for (std::size_t j = 0; j < m1.size2(); j++)
        entry += m1(i,j) * input_ref[j];

      result_ref[i] = entry;
    }

    viennacl::copy(vcl_result, input_ref);
    std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref);
    if (diff_max(input_ref, result_ref) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      for (std::size_t i=0; i<input_ref.size(); ++i)
        std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl;
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }


    //
    // Note: Matrix addition does not make sense for a Vandermonde matrix
    //


    //
    // Per-Element access:
    //
    vcl_vandermonde1(4) = static_cast<ScalarType>(1.0001);

    for (std::size_t j = 0; j < m1.size2(); j++)
    {
      m1(4, j) = std::pow(ScalarType(1.0001), ScalarType(j));
    }

    viennacl::copy(vcl_vandermonde1, m2);
    std::cout << "Element manipulation: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
コード例 #8
0
int toeplitz_test(ScalarType epsilon)
{
    std::size_t TOEPLITZ_SIZE = 47;
    viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz1(TOEPLITZ_SIZE, TOEPLITZ_SIZE);
    viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz2(TOEPLITZ_SIZE, TOEPLITZ_SIZE);

    viennacl::vector<ScalarType> vcl_input(TOEPLITZ_SIZE);
    viennacl::vector<ScalarType> vcl_result(TOEPLITZ_SIZE);

    std::vector<ScalarType> input_ref(TOEPLITZ_SIZE);
    std::vector<ScalarType> result_ref(TOEPLITZ_SIZE);

    dense_matrix<ScalarType> m1(TOEPLITZ_SIZE, TOEPLITZ_SIZE);
    dense_matrix<ScalarType> m2(TOEPLITZ_SIZE, TOEPLITZ_SIZE);

    for (std::size_t i = 0; i < TOEPLITZ_SIZE; i++)
      for (std::size_t j = 0; j < TOEPLITZ_SIZE; j++)
      {
        m1(i,j) = static_cast<ScalarType>(i) - static_cast<ScalarType>(j);
        m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1);
      }

    for (std::size_t i = 0; i < TOEPLITZ_SIZE; i++)
      input_ref[i] = ScalarType(i);

    // Copy to ViennaCL
    viennacl::copy(m1, vcl_toeplitz1);
    viennacl::copy(m2, vcl_toeplitz2);
    viennacl::copy(input_ref, vcl_input);

    //
    // Matrix-Vector product:
    //
    vcl_result = viennacl::linalg::prod(vcl_toeplitz1, vcl_input);

    for (std::size_t i = 0; i < m1.size1(); i++)     //reference calculation
    {
      ScalarType entry = 0;
      for (std::size_t j = 0; j < m1.size2(); j++)
        entry += m1(i,j) * input_ref[j];

      result_ref[i] = entry;
    }

    viennacl::copy(vcl_result, input_ref);
    std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref);
    if (diff_max(input_ref, result_ref) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      for (std::size_t i=0; i<input_ref.size(); ++i)
        std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl;
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }


    //
    // Matrix addition:
    //
    vcl_toeplitz1 += vcl_toeplitz2;

    for (std::size_t i = 0; i < m1.size1(); i++)    //reference calculation
      for (std::size_t j = 0; j < m1.size2(); j++)
        m1(i,j) += m2(i,j);

    viennacl::copy(vcl_toeplitz1, m2);
    std::cout << "Matrix Addition: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    //
    // Per-Element access:
    //
    vcl_toeplitz1(2,4) = 42;

    for (std::size_t i=0; i<m1.size1(); ++i)    //reference calculation
    {
      if (i + 2 < m1.size2())
        m1(i, i+2) = 42;
    }

    viennacl::copy(vcl_toeplitz1, m2);
    std::cout << "Element manipulation: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
コード例 #9
0
ファイル: amg.cpp プロジェクト: jayavanth/viennacl-dev
int main()
{
  //
  // Print some device info
  //
  std::cout << std::endl;
  std::cout << "----------------------------------------------" << std::endl;
  std::cout << "               Device Info" << std::endl;
  std::cout << "----------------------------------------------" << std::endl;
  
#ifdef VIENNACL_WITH_OPENCL
  std::cout << viennacl::ocl::current_device().info() << std::endl;
#endif
  
  typedef float    ScalarType;  // feel free to change this to double if supported by your device


  //
  // Set up the matrices and vectors for the iterative solvers (cf. iterative.cpp)
  //
  boost::numeric::ublas::vector<ScalarType> ublas_vec, ublas_result;
  boost::numeric::ublas::compressed_matrix<ScalarType> ublas_matrix;
  
  viennacl::linalg::cg_tag cg_solver;
  viennacl::linalg::amg_tag amg_tag;
  viennacl::linalg::amg_precond<boost::numeric::ublas::compressed_matrix<ScalarType> > ublas_amg;
    
  // Read matrix
  if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../examples/testdata/mat65k.mtx"))
  {
    std::cout << "Error reading Matrix file" << std::endl;
    return EXIT_FAILURE;
  }
  
  // Set up rhs and result vector
  if (!readVectorFromFile("../examples/testdata/rhs65025.txt", ublas_vec))
  {
    std::cout << "Error reading RHS file" << std::endl;
    return 0;
  }

  if (!readVectorFromFile("../examples/testdata/result65025.txt", ublas_result))
  {
    std::cout << "Error reading Result file" << std::endl;
    return 0;
  }
  
  viennacl::vector<ScalarType> vcl_vec(ublas_vec.size());
  viennacl::vector<ScalarType> vcl_result(ublas_vec.size());
  viennacl::compressed_matrix<ScalarType> vcl_compressed_matrix(ublas_vec.size(), ublas_vec.size());

  // Copy to GPU
  viennacl::copy(ublas_matrix, vcl_compressed_matrix);
  viennacl::copy(ublas_vec, vcl_vec);
  viennacl::copy(ublas_result, vcl_result);

  //
  // Run solver without preconditioner
  //
  std::cout << "-- CG solver (CPU, no preconditioner) --" << std::endl;
  run_solver(ublas_matrix, ublas_vec, ublas_result, cg_solver, viennacl::linalg::no_precond());
  
  std::cout << "-- CG solver (GPU, no preconditioner) --" << std::endl;   
  run_solver(vcl_compressed_matrix, vcl_vec, vcl_result, cg_solver, viennacl::linalg::no_precond());
  
  //
  // With AMG Preconditioner RS+DIRECT
  //
  amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS,       // coarsening strategy
                                      VIENNACL_AMG_INTERPOL_DIRECT, // interpolation strategy
                                      0.25, // strength of dependence threshold
                                      0.2,  // interpolation weight
                                      0.67, // jacobi smoother weight
                                      3,    // presmoothing steps
                                      3,    // postsmoothing steps
                                      0);   // number of coarse levels to be used (0: automatically use as many as reasonable)
  run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS COARSENING, DIRECT INTERPOLATION", amg_tag);
  
  //
  // With AMG Preconditioner RS+CLASSIC
  //
  amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS, VIENNACL_AMG_INTERPOL_CLASSIC, 0.25, 0.2, 0.67, 3, 3, 0);
  run_amg ( cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS COARSENING, CLASSIC INTERPOLATION", amg_tag);
  
  //
  // With AMG Preconditioner ONEPASS+DIRECT
  //
  amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_ONEPASS, VIENNACL_AMG_INTERPOL_DIRECT,0.25, 0.2, 0.67, 3, 3, 0);
  run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "ONEPASS COARSENING, DIRECT INTERPOLATION", amg_tag);
  
  //
  // With AMG Preconditioner RS0+DIRECT
  //
  amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS0, VIENNACL_AMG_INTERPOL_DIRECT, 0.25, 0.2, 0.67, 3, 3, 0);
  run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS0 COARSENING, DIRECT INTERPOLATION", amg_tag);
  
  //
  // With AMG Preconditioner RS3+DIRECT
  //
  amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS3, VIENNACL_AMG_INTERPOL_DIRECT, 0.25, 0.2, 0.67, 3, 3, 0);
  run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS3 COARSENING, DIRECT INTERPOLATION", amg_tag);
  
  //
  // With AMG Preconditioner AG
  //
  amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_AG, VIENNACL_AMG_INTERPOL_AG, 0.08, 0, 0.67, 3, 3, 0);
  run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "AG COARSENING, AG INTERPOLATION", amg_tag);
  
  //
  // With AMG Preconditioner SA
  //
  amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_AG, VIENNACL_AMG_INTERPOL_SA, 0.08, 0.67, 0.67, 3, 3, 0);
  run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "AG COARSENING, SA INTERPOLATION",amg_tag);
  
  
  //
  //  That's it.
  //
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
  
  return EXIT_SUCCESS;
}
コード例 #10
0
int main() 
{
  typedef float      ScalarType;
    
  std::size_t size = 4;
  
  //
  // Set up ublas objects
  //
  boost::numeric::ublas::vector<ScalarType> ublas_vec(size);
  boost::numeric::ublas::matrix<ScalarType> ublas_circulant(size, size);
  boost::numeric::ublas::matrix<ScalarType> ublas_hankel(size, size);
  boost::numeric::ublas::matrix<ScalarType> ublas_toeplitz(size, size);
  boost::numeric::ublas::matrix<ScalarType> ublas_vandermonde(size, size);

  for(std::size_t i = 0; i < size; i++) 
    for(std::size_t j = 0; j < size; j++) 
    {
      ublas_circulant(i,j)   = static_cast<ScalarType>((i - j + size) % size);
      ublas_hankel(i,j)      = static_cast<ScalarType>((i + j) % (2 * size));
      ublas_toeplitz(i,j)    = static_cast<ScalarType>(i) - static_cast<ScalarType>(j);
      ublas_vandermonde(i,j) = pow(ScalarType(1.0 + i/1000.0), ScalarType(j));
    }

  //
  // Set up ViennaCL objects
  //
  viennacl::vector<ScalarType>             vcl_vec(size);
  viennacl::vector<ScalarType>             vcl_result(size);
  viennacl::circulant_matrix<ScalarType>   vcl_circulant(size, size);
  viennacl::hankel_matrix<ScalarType>      vcl_hankel(size, size);
  viennacl::toeplitz_matrix<ScalarType>    vcl_toeplitz(size, size);
  viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde(size, size);

  // copy matrices:
  viennacl::copy(ublas_circulant, vcl_circulant);
  viennacl::copy(ublas_hankel, vcl_hankel);
  viennacl::copy(ublas_toeplitz, vcl_toeplitz);
  viennacl::copy(ublas_vandermonde, vcl_vandermonde);
  
  // fill vectors:
  for(std::size_t i = 0; i < size; i++)
  {
    ublas_vec[i] = ScalarType(i);
    vcl_vec[i] = ScalarType(i);
  }
  
  //
  // Add matrices:
  //
  std::cout << "Circulant matrix before addition: " << vcl_circulant << std::endl << std::endl;
  vcl_circulant += vcl_circulant;
  std::cout << "Circulant matrix after addition: " << vcl_circulant << std::endl << std::endl;
  
  //
  // Manipulate single entry
  //
  std::cout << "Hankel matrix before manipulation: " << vcl_hankel << std::endl << std::endl;
  vcl_hankel(1, 2) = ScalarType(3.14);
  std::cout << "Hankel matrix after manipulation: " << vcl_hankel << std::endl << std::endl;
  
  std::cout << "Vandermonde matrix before manipulation: " << vcl_vandermonde << std::endl << std::endl;
  vcl_vandermonde(1) = ScalarType(1.1); //NOTE: Write access only via row index
  std::cout << "Vandermonde matrix after manipulation: " << vcl_vandermonde << std::endl << std::endl;
  
  //
  // Compute matrix-vector product (FFT-accelerated)
  //
  std::cout << "Toeplitz matrix: " << vcl_toeplitz << std::endl;
  std::cout << "Vector: " << vcl_vec << std::endl << std::endl;
  vcl_result = viennacl::linalg::prod(vcl_toeplitz, vcl_vec);
  std::cout << "Result of matrix-vector product: " << vcl_result << std::endl << std::endl;
  
  //
  //  That's it.
  //
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
コード例 #11
0
/**
* With this let us go right to main():
**/
int main()
{
  typedef float       ScalarType;


  /**
  * <h2>Part 1: Set up a custom context</h2>
  *
  * The following is rather lengthy because OpenCL is a fairly low-level framework.
  * For comparison, the subsequent code explicitly performs the OpenCL setup that is done
  * in the background within the 'custom_kernels'-tutorial
  **/

  //manually set up a custom OpenCL context:
  std::vector<cl_device_id> device_id_array;

  //get all available devices
  viennacl::ocl::platform pf;
  std::cout << "Platform info: " << pf.info() << std::endl;
  std::vector<viennacl::ocl::device> devices = pf.devices(CL_DEVICE_TYPE_DEFAULT);
  std::cout << devices[0].name() << std::endl;
  std::cout << "Number of devices for custom context: " << devices.size() << std::endl;

  //set up context using all found devices:
  for (std::size_t i=0; i<devices.size(); ++i)
  {
      device_id_array.push_back(devices[i].id());
  }

  std::cout << "Creating context..." << std::endl;
  cl_int err;
  cl_context my_context = clCreateContext(0, cl_uint(device_id_array.size()), &(device_id_array[0]), NULL, NULL, &err);
  VIENNACL_ERR_CHECK(err);


  //create two Vectors:
  unsigned int vector_size = 10;
  std::vector<ScalarType> vec1(vector_size);
  std::vector<ScalarType> vec2(vector_size);
  std::vector<ScalarType> result(vector_size);

  //
  // fill the operands vec1 and vec2:
  //
  for (unsigned int i=0; i<vector_size; ++i)
  {
    vec1[i] = static_cast<ScalarType>(i);
    vec2[i] = static_cast<ScalarType>(vector_size-i);
  }

  //
  // create memory in OpenCL context:
  //
  cl_mem mem_vec1 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec1[0]), &err);
  VIENNACL_ERR_CHECK(err);
  cl_mem mem_vec2 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec2[0]), &err);
  VIENNACL_ERR_CHECK(err);
  cl_mem mem_result = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(result[0]), &err);
  VIENNACL_ERR_CHECK(err);

  //
  // create a command queue for each device:
  //

  std::vector<cl_command_queue> queues(devices.size());
  for (std::size_t i=0; i<devices.size(); ++i)
  {
    queues[i] = clCreateCommandQueue(my_context, devices[i].id(), 0, &err);
    VIENNACL_ERR_CHECK(err);
  }

  //
  // create and build a program in the context:
  //
  std::size_t source_len = std::string(my_compute_program).length();
  cl_program my_prog = clCreateProgramWithSource(my_context, 1, &my_compute_program, &source_len, &err);
  err = clBuildProgram(my_prog, 0, NULL, NULL, NULL, NULL);

/*            char buffer[1024];
            cl_build_status status;
            clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
            clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_LOG, sizeof(char)*1024, &buffer, NULL);
            std::cout << "Build Scalar: Err = " << err << " Status = " << status << std::endl;
            std::cout << "Log: " << buffer << std::endl;*/

  VIENNACL_ERR_CHECK(err);

  //
  // create a kernel from the program:
  //
  const char * kernel_name = "elementwise_prod";
  cl_kernel my_kernel = clCreateKernel(my_prog, kernel_name, &err);
  VIENNACL_ERR_CHECK(err);


  //
  // Execute elementwise_prod kernel on first queue: result = vec1 .* vec2;
  //
  err = clSetKernelArg(my_kernel, 0, sizeof(cl_mem), (void*)&mem_vec1);
  VIENNACL_ERR_CHECK(err);
  err = clSetKernelArg(my_kernel, 1, sizeof(cl_mem), (void*)&mem_vec2);
  VIENNACL_ERR_CHECK(err);
  err = clSetKernelArg(my_kernel, 2, sizeof(cl_mem), (void*)&mem_result);
  VIENNACL_ERR_CHECK(err);
  err = clSetKernelArg(my_kernel, 3, sizeof(unsigned int), (void*)&vector_size);
  VIENNACL_ERR_CHECK(err);
  std::size_t global_size = vector_size;
  std::size_t local_size = vector_size;
  err = clEnqueueNDRangeKernel(queues[0], my_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
  VIENNACL_ERR_CHECK(err);


  //
  // Read and output result:
  //
  err = clEnqueueReadBuffer(queues[0], mem_vec1, CL_TRUE, 0, sizeof(ScalarType)*vector_size, &(vec1[0]), 0, NULL, NULL);
  VIENNACL_ERR_CHECK(err);
  err = clEnqueueReadBuffer(queues[0], mem_result, CL_TRUE, 0, sizeof(ScalarType)*vector_size, &(result[0]), 0, NULL, NULL);
  VIENNACL_ERR_CHECK(err);

  std::cout << "vec1  : ";
  for (std::size_t i=0; i<vec1.size(); ++i)
    std::cout << vec1[i] << " ";
  std::cout << std::endl;

  std::cout << "vec2  : ";
  for (std::size_t i=0; i<vec2.size(); ++i)
    std::cout << vec2[i] << " ";
  std::cout << std::endl;

  std::cout << "result: ";
  for (std::size_t i=0; i<result.size(); ++i)
    std::cout << result[i] << " ";
  std::cout << std::endl;

  /**
  * <h2>Part 2: Reuse Custom OpenCL Context with ViennaCL</h2>
  *
  * To let ViennaCL reuse the previously created context, we need to make it known to ViennaCL \em before any ViennaCL objects are created.
  * We inject the custom context as the context with default id '0' when using viennacl::ocl::switch_context().
  **/
  viennacl::ocl::setup_context(0, my_context, device_id_array, queues);
  viennacl::ocl::switch_context(0); //activate the new context (only mandatory with context-id not equal to zero)

  /**
  * Check that ViennaCL really uses the new context:
  **/
  std::cout << "Existing context: " << my_context << std::endl;
  std::cout << "ViennaCL uses context: " << viennacl::ocl::current_context().handle().get() << std::endl;

  /**
  * Wrap existing OpenCL objects into ViennaCL:
  **/
  viennacl::vector<ScalarType> vcl_vec1(mem_vec1, vector_size);
  viennacl::vector<ScalarType> vcl_vec2(mem_vec2, vector_size);
  viennacl::vector<ScalarType> vcl_result(mem_result, vector_size);
  viennacl::scalar<ScalarType> vcl_s = 2.0;

  std::cout << "Standard vector operations within ViennaCL:" << std::endl;
  vcl_result = vcl_s * vcl_vec1 + vcl_vec2;

  std::cout << "vec1  : ";
  std::cout << vcl_vec1 << std::endl;

  std::cout << "vec2  : ";
  std::cout << vcl_vec2 << std::endl;

  std::cout << "result: ";
  std::cout << vcl_result << std::endl;

  /**
  * We can also reuse the existing elementwise_prod kernel.
  * Therefore, we first have to make the existing program known to ViennaCL
  * For more details on the three lines, see tutorial 'custom-kernels'
  **/
  std::cout << "Using existing kernel within the OpenCL backend of ViennaCL:" << std::endl;
  viennacl::ocl::program & my_vcl_prog = viennacl::ocl::current_context().add_program(my_prog, "my_compute_program");
  viennacl::ocl::kernel & my_vcl_kernel = my_vcl_prog.add_kernel(my_kernel, "elementwise_prod");
  viennacl::ocl::enqueue(my_vcl_kernel(vcl_vec1, vcl_vec2, vcl_result, static_cast<cl_uint>(vcl_vec1.size())));  //Note that std::size_t might differ between host and device. Thus, a cast to cl_uint is necessary here.

  std::cout << "vec1  : ";
  std::cout << vcl_vec1 << std::endl;

  std::cout << "vec2  : ";
  std::cout << vcl_vec2 << std::endl;

  std::cout << "result: ";
  std::cout << vcl_result << std::endl;


  /**
  * Since a linear piece of memory can be interpreted in several ways,
  * we will now create a 3x3 row-major matrix out of the linear memory in mem_vec1/
  * The first three entries in vcl_vec2 and vcl_result are used to carry out matrix-vector products:
  **/
  viennacl::matrix<ScalarType> vcl_matrix(mem_vec1, 3, 3);

  vcl_vec2.resize(3);   //note that the resize operation leads to new memory, thus vcl_vec2 is now at a different memory location (values are copied)
  vcl_result.resize(3); //note that the resize operation leads to new memory, thus vcl_vec2 is now at a different memory location (values are copied)
  vcl_result = viennacl::linalg::prod(vcl_matrix, vcl_vec2);

  std::cout << "result of matrix-vector product: ";
  std::cout << vcl_result << std::endl;

  /**
  *  Any further operations can be carried out in the same way.
  *  Just keep in mind that any resizing of vectors or matrices leads to a reallocation of the underlying memory buffer, through which the 'wrapper' is lost.
  **/
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}