Example #1
0
int test(Epsilon const& epsilon, 
         UblasVectorType     & ublas_v1, UblasVectorType     & ublas_v2,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2)
{
  int retval = EXIT_SUCCESS;
  
  NumericT                    cpu_result = 42.0;
  viennacl::scalar<NumericT>  gpu_result = 43.0;
  
  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size());
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5);
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }
  
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  //
  // Part 1: Norms and inner product
  //
  
  // --------------------------------------------------------------------------
  std::cout << "Testing inner_prod..." << std::endl;
  cpu_result = viennacl::linalg::inner_prod(ublas_v1, ublas_v2);
  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);

  if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  std::cout << "Testing norm_1..." << std::endl;
  cpu_result = ublas::norm_1(ublas_v1);
  gpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  gpu_result = 2 * cpu_result; //reset
  gpu_result = ublas::norm_1(ublas_v1);
  cpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------
  std::cout << "Testing norm_2..." << std::endl;
  cpu_result = ublas::norm_2(ublas_v1);
  gpu_result = viennacl::linalg::norm_2(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  gpu_result = 2 * cpu_result; //reset
  gpu_result = ublas::norm_2(ublas_v1);
  cpu_result = viennacl::linalg::norm_2(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  std::cout << "Testing norm_inf..." << std::endl;
  cpu_result = ublas::norm_inf(ublas_v1);
  gpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  gpu_result = 2 * cpu_result; //reset
  gpu_result = ublas::norm_inf(ublas_v1);
  cpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  std::cout << "Testing index_norm_inf..." << std::endl;
  std::size_t cpu_index = ublas::index_norm_inf(ublas_v1);
  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);

  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  cpu_result = ublas_v1[index_norm_inf(ublas_v1)];
  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  //
  // Plane rotation and assignments
  //
  
  // --------------------------------------------------------------------------
    
  ublas::vector<NumericT> x = ublas_v1;
  ublas::vector<NumericT> y = ublas_v2;
  ublas::vector<NumericT> t = ublas_v1;
  t.assign (NumericT(1.1) * x + NumericT(2.3) * y),
  y.assign (- NumericT(2.3) * x + NumericT(1.1) * y),
  x.assign (t);

  viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1.1), NumericT(2.3));

  if (check(x, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(y, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  
  std::cout << "Testing assignments..." << std::endl;
  NumericT val = static_cast<NumericT>(1e-3);
  for (size_t i=0; i < ublas_v1.size(); ++i)
    ublas_v1(i) = val;

  for (size_t i=0; i < vcl_v1.size(); ++i)
    vcl_v1(i) = val;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  //
  // multiplication and division of vectors by scalars
  //
  std::cout << "Testing scaling with CPU scalar..." << std::endl;
  NumericT alpha = static_cast<NumericT>(2.7182);
  viennacl::scalar<NumericT> gpu_alpha = alpha;

  ublas_v1  *= alpha;
  vcl_v1    *= alpha;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing scaling with GPU scalar..." << std::endl;
  ublas_v1  *= alpha;
  vcl_v1    *= gpu_alpha;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  NumericT beta  = static_cast<NumericT>(1.4153);
  viennacl::scalar<NumericT> gpu_beta = beta;

  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
  ublas_v1 /= beta;
  vcl_v1   /= beta;  

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
  ublas_v1 /= beta;
  vcl_v1   /= gpu_beta;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  


  //
  // add and inplace_add of vectors
  //
    
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
    
  std::cout << "Testing add on vector..." << std::endl;
  
  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1     = ublas_v1 + ublas_v2;
  vcl_v1       =   vcl_v1 +   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace-add on vector..." << std::endl;
  ublas_v1 += ublas_v2;
  vcl_v1   +=   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  //
  // subtract and inplace_subtract of vectors
  //
  std::cout << "Testing sub on vector..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1     = ublas_v1 - ublas_v2;
  vcl_v1       =   vcl_v1 -   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace-sub on vector..." << std::endl;
  ublas_v1 -= ublas_v2;
  vcl_v1   -= vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  
  //
  // multiply-add
  //
  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 + alpha * ublas_v2;
  vcl_v1   = vcl_v1   + alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 + ublas_v2;
  vcl_v1   = alpha *   vcl_v1 +   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 + beta * ublas_v2;
  vcl_v1   = alpha *   vcl_v1 + beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 += alpha * ublas_v2;
  vcl_v1   += alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 +     alpha * ublas_v2;
  vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 +     alpha * ublas_v2;
  vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 =     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   = gpu_alpha *   vcl_v1 + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 +=     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v1 + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 +=     alpha * ublas_v1 -     beta * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v1 - gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  
  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 +=     alpha * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  


  //
  // multiply-subtract
  //
  std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 - alpha * ublas_v2;
  vcl_v1   = vcl_v1   - alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 - ublas_v2;
  vcl_v1   = alpha * vcl_v1   -   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 - beta * ublas_v2;
  vcl_v1   = alpha * vcl_v1   - beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 -= alpha * ublas_v2;
  vcl_v1   -= alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 -     alpha * ublas_v2;
  vcl_v1   = vcl_v1   - gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 -     alpha * ublas_v2;
  vcl_v1   = vcl_v1   - gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 =     alpha * ublas_v1 -     beta * ublas_v2;
  vcl_v1   = gpu_alpha * vcl_v1   - gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 -=     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   -= gpu_alpha * vcl_v1   + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 -=     alpha * ublas_v1 -     beta * ublas_v2;
  vcl_v1   -= gpu_alpha * vcl_v1   - gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 -=     alpha * ublas_v2;
  vcl_v1   -= gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  
  //
  // More complicated expressions (for ensuring the operator overloads work correctly)
  //
  
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing three vector additions..." << std::endl;
  ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2;
  vcl_v1   =   vcl_v2 +   vcl_v1 +   vcl_v2;
  
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl;
  ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2);
  vcl_v1   = beta * (vcl_v1   - alpha * vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl;
  ublas_v1 =     beta * (ublas_v1 -     alpha * ublas_v2);
  vcl_v1   = gpu_beta * (vcl_v1   - gpu_alpha * vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------      
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing swap..." << std::endl;
  swap(ublas_v1, ublas_v2);
  swap(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------      
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }
  
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing elementwise multiplication..." << std::endl;
  ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing elementwise division..." << std::endl;
  ublas_v1 = ublas::element_div(ublas_v1, ublas_v2);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_div(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------         
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing another complicated vector expression with CPU scalars..." << std::endl;
  ublas_v1 = ublas_v2 / alpha + beta * (ublas_v1 - alpha*ublas_v2);
  vcl_v1   = vcl_v2 / alpha   + beta * (vcl_v1   - alpha*vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing another complicated vector expression with GPU scalars..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v2 / alpha   +     beta * (ublas_v1 - alpha*ublas_v2);
  vcl_v1   = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1   - gpu_alpha*vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing lenghty sum of scaled vectors..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v2 / alpha   +     beta * ublas_v1 - alpha * ublas_v2 + beta * ublas_v1 - alpha * ublas_v1;
  vcl_v1   = vcl_v2 / gpu_alpha + gpu_beta *   vcl_v1 - alpha *   vcl_v2 + beta *   vcl_v1 - alpha *   vcl_v1;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------            
  return retval;
}
int test_prod_rank1(UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2,
                    VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1, VCLVectorType2 & vcl_v2)
{
   int retval = EXIT_SUCCESS;

   // sync data:
   ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), NumericT(2));
   ublas_v2 = ublas::scalar_vector<NumericT>(ublas_v2.size(), NumericT(3));
   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
   viennacl::copy(ublas_m1, vcl_m1);

   // --------------------------------------------------------------------------
   std::cout << "Rank 1 update" << std::endl;

   ublas_m1 += ublas::outer_prod(ublas_v1, ublas_v2);
   vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2);
   if ( diff(ublas_m1, vcl_m1) != 0 )
   {
      std::cout << "# Error at operation: rank 1 update" << std::endl;
      std::cout << "  diff: " << diff(ublas_m1, vcl_m1) << std::endl;
      return EXIT_FAILURE;
   }



   // --------------------------------------------------------------------------
   std::cout << "Scaled rank 1 update - CPU Scalar" << std::endl;
   ublas_m1 += NumericT(4) * ublas::outer_prod(ublas_v1, ublas_v2);
   vcl_m1 += NumericT(2) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2);
   vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * NumericT(2);  //check proper compilation
   if ( diff(ublas_m1, vcl_m1) != 0 )
   {
      std::cout << "# Error at operation: scaled rank 1 update - CPU Scalar" << std::endl;
      std::cout << "  diff: " << diff(ublas_m1, vcl_m1) << std::endl;
      return EXIT_FAILURE;
   }

      // --------------------------------------------------------------------------
   std::cout << "Scaled rank 1 update - GPU Scalar" << std::endl;
   ublas_m1 += NumericT(4) * ublas::outer_prod(ublas_v1, ublas_v2);
   vcl_m1 += viennacl::scalar<NumericT>(2) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2);
   vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * viennacl::scalar<NumericT>(2);  //check proper compilation
   if ( diff(ublas_m1, vcl_m1) != 0 )
   {
      std::cout << "# Error at operation: scaled rank 1 update - GPU Scalar" << std::endl;
      std::cout << "  diff: " << diff(ublas_m1, vcl_m1) << std::endl;
      return EXIT_FAILURE;
   }

   //reset vcl_matrix:
   viennacl::copy(ublas_m1, vcl_m1);

   // --------------------------------------------------------------------------
   std::cout << "Matrix-Vector product" << std::endl;
   ublas_v1 = viennacl::linalg::prod(ublas_m1, ublas_v2);
   vcl_v1   = viennacl::linalg::prod(vcl_m1, vcl_v2);

   if ( diff(ublas_v1, vcl_v1) != 0 )
   {
      std::cout << "# Error at operation: matrix-vector product" << std::endl;
      std::cout << "  diff: " << diff(ublas_v1, vcl_v1) << std::endl;
      retval = EXIT_FAILURE;
   }
   // --------------------------------------------------------------------------
   std::cout << "Matrix-Vector product with scaled add" << std::endl;
   NumericT alpha = static_cast<NumericT>(2);
   NumericT beta = static_cast<NumericT>(3);
   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

   ublas_v1 = alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) + beta * ublas_v1;
   vcl_v1   = alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) + beta * vcl_v1;

   if ( diff(ublas_v1, vcl_v1) != 0 )
   {
      std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << diff(ublas_v1, vcl_v1) << std::endl;
      retval = EXIT_FAILURE;
   }
   // --------------------------------------------------------------------------

   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

   std::cout << "Transposed Matrix-Vector product" << std::endl;
   ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1);
   vcl_v2   = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1);

   if ( diff(ublas_v2, vcl_v2) != 0 )
   {
      std::cout << "# Error at operation: transposed matrix-vector product" << std::endl;
      std::cout << "  diff: " << diff(ublas_v2, vcl_v2) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl;
   ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2;
   vcl_v2   = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2;

   if ( diff(ublas_v2, vcl_v2) != 0 )
   {
      std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << diff(ublas_v2, vcl_v2) << std::endl;
      retval = EXIT_FAILURE;
   }
   // --------------------------------------------------------------------------

   return retval;
}
int test(Epsilon const& epsilon,
         UblasVectorType     & ublas_v1, UblasVectorType     & ublas_v2,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2)
{
  int retval = EXIT_SUCCESS;

  NumericT                    cpu_result = 42.0;
  viennacl::scalar<NumericT>  gpu_result = 43.0;

  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size());
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5);
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }

  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  // --------------------------------------------------------------------------

  std::cout << "Testing simple assignments..." << std::endl;

  {
  ublas_v1 = ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v2); // same as vcl_v1 = vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 += ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), vcl_v2); // same as vcl_v1 += vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 -= ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_sub(), vcl_v2); // same as vcl_v1 -= vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "Testing composite assignments..." << std::endl;
  {
  ublas_v1 = ublas_v1 + ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 + vcl_v2); // same as vcl_v1 = vcl_v1 + vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 = ublas_v1 - ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 - vcl_v2); // same as vcl_v1 = vcl_v1 - vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }


  // --------------------------------------------------------------------------
  return retval;
}
Example #4
0
int test(UblasVectorType     & ublas_v1, UblasVectorType     & ublas_v2,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2)
{
  int retval = EXIT_SUCCESS;

  NumericT                    cpu_result = 42;
  viennacl::scalar<NumericT>  gpu_result = 43;

  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  //ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size());
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    ublas_v1[i] = 0;
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  //ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result);
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    ublas_v1[i] = cpu_result;
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  //ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result);
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    ublas_v1[i] = cpu_result + 1;
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  //ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5);
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    ublas_v1[i] = (i == 5) ? 1 : 0;
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(i);
    ublas_v2[i] = NumericT(i+42);
  }

  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  //
  // Part 1: Norms and inner product
  //

  // --------------------------------------------------------------------------
  std::cout << "Testing inner_prod..." << std::endl;
  cpu_result = viennacl::linalg::inner_prod(ublas_v1, ublas_v2);
  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result = inner_prod(ublas_v1 + ublas_v2, 2*ublas_v2);
  NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, 2*vcl_v2);
  gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, 2*vcl_v2);

  if (check(cpu_result, cpu_result3) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing norm_1..." << std::endl;
  cpu_result = 0;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)   //note: norm_1 broken for unsigned ints on MacOS
    cpu_result += ublas_v1[i];
  gpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0; //reset
  for (std::size_t i=0; i<ublas_v1.size(); ++i)   //note: norm_1 broken for unsigned ints on MacOS
    cpu_result2 += ublas_v1[i];
  cpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)   //note: norm_1 broken for unsigned ints on MacOS
    cpu_result2 += ublas_v1[i] + ublas_v2[i];
  cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing norm_inf..." << std::endl;
  cpu_result = 0;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    if (ublas_v1[i] > cpu_result)
      cpu_result = ublas_v1[i];
  gpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    if (ublas_v1[i] > cpu_result2)
      cpu_result2 = ublas_v1[i];
  cpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    if (ublas_v1[i] + ublas_v2[i] > cpu_result2)
      cpu_result2 = ublas_v1[i] + ublas_v2[i];
  cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing index_norm_inf..." << std::endl;

  std::size_t cpu_index = 0;
  cpu_result = 0;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    if (ublas_v1[i] > cpu_result)
    {
      cpu_result = ublas_v1[i];
      cpu_index = i;
    }
  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);

  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index)) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_index = 0;
  cpu_result = 0;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
    if (ublas_v1[i] + ublas_v2[i] > cpu_result)
    {
      cpu_result = ublas_v1[i];
      cpu_index = i;
    }
  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)];

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  // --------------------------------------------------------------------------

  std::cout << "Testing assignments..." << std::endl;
  NumericT val = static_cast<NumericT>(1);
  for (size_t i=0; i < ublas_v1.size(); ++i)
    ublas_v1(i) = val;

  for (size_t i=0; i < vcl_v1.size(); ++i)
    vcl_v1(i) = val;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // multiplication and division of vectors by scalars
  //
  std::cout << "Testing scaling with CPU scalar..." << std::endl;
  NumericT alpha = static_cast<NumericT>(3);
  viennacl::scalar<NumericT> gpu_alpha = alpha;

  ublas_v1  *= alpha;
  vcl_v1    *= alpha;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing scaling with GPU scalar..." << std::endl;
  ublas_v1  *= alpha;
  vcl_v1    *= gpu_alpha;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  NumericT beta  = static_cast<NumericT>(2);
  viennacl::scalar<NumericT> gpu_beta = beta;

  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
  ublas_v1 /= beta;
  vcl_v1   /= beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
  ublas_v1 /= beta;
  vcl_v1   /= gpu_beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // add and inplace_add of vectors
  //
  for (size_t i=0; i < ublas_v1.size(); ++i)
    ublas_v1(i) = NumericT(i);
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing add on vector..." << std::endl;

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1     = ublas_v1 + ublas_v2;
  vcl_v1       =   vcl_v1 +   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace-add on vector..." << std::endl;
  ublas_v1 += ublas_v2;
  vcl_v1   +=   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // multiply-add
  //
  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
  for (size_t i=0; i < ublas_v1.size(); ++i)
    ublas_v1(i) = NumericT(i);
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 + alpha * ublas_v2;
  vcl_v1   = vcl_v1   + alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 + ublas_v2;
  vcl_v1   = alpha *   vcl_v1 +   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 + beta * ublas_v2;
  vcl_v1   = alpha *   vcl_v1 + beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 += alpha * ublas_v2;
  vcl_v1   += alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 +     alpha * ublas_v2;
  vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 +     alpha * ublas_v2;
  vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 =     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   = gpu_alpha *   vcl_v1 + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 +=     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v1 + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 +=     alpha * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // division-add
  //
  std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl;
  for (size_t i=0; i < ublas_v1.size(); ++i)
    ublas_v1(i) = NumericT(i);
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 + ublas_v2 / alpha;
  vcl_v1   = vcl_v1   + vcl_v2 / alpha;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 / alpha + ublas_v2;
  vcl_v1   =   vcl_v1 / alpha +   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta;
  vcl_v1   =   vcl_v1 / alpha +   vcl_v2 / beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 / alpha + ublas_v2 * beta;
  vcl_v1   =   vcl_v1 / alpha +   vcl_v2 * beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 * alpha + ublas_v2 / beta;
  vcl_v1   =   vcl_v1 * alpha +   vcl_v2 / beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;



  std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 += ublas_v2 / alpha;
  vcl_v1   += vcl_v2 / alpha;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 + ublas_v2 / alpha;
  vcl_v1   = vcl_v1   +   vcl_v2 / gpu_alpha;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 + ublas_v2 / alpha;
  vcl_v1   = vcl_v1   +   vcl_v2 / gpu_alpha;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 / alpha     + ublas_v2 / beta;
  vcl_v1   =   vcl_v1 / gpu_alpha +   vcl_v2 / gpu_beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 += ublas_v1 / alpha     + ublas_v2 / beta;
  vcl_v1   +=   vcl_v1 / gpu_alpha +   vcl_v2 / gpu_beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 += ublas_v1 / alpha     + ublas_v2 * beta;
  vcl_v1   +=   vcl_v1 / gpu_alpha +   vcl_v2 * gpu_beta;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl;
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 += ublas_v2 * alpha;
  vcl_v1   +=   vcl_v2 * gpu_alpha;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  //
  // More complicated expressions (for ensuring the operator overloads work correctly)
  //
  for (size_t i=0; i < ublas_v1.size(); ++i)
    ublas_v1(i) = NumericT(i);
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing three vector additions..." << std::endl;
  ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2;
  vcl_v1   =   vcl_v2 +   vcl_v1 +   vcl_v2;

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  ublas_v2 = 3 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing swap..." << std::endl;
  swap(ublas_v1, ublas_v2);
  swap(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing elementwise multiplication..." << std::endl;
  std::cout << " v1 = element_prod(v1, v2);" << std::endl;
  ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1, v2);" << std::endl;
  ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2);
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl;
  ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl;
  ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl;
  ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl;
  ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
  ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
  ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing elementwise division..." << std::endl;
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1 + i);
    ublas_v2[i] = NumericT(5 + i);
  }

  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas::element_div(ublas_v1, ublas_v2);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 += ublas::element_div(ublas_v1, ublas_v2);
  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 += ublas::element_div(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  ublas_v1 = ublas::element_div(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 += ublas::element_div(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 += ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  return retval;
}
Example #5
0
int test(Epsilon const& epsilon,
         UblasVectorType     & ublas_v1, UblasVectorType     & ublas_v2,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2)
{
  int retval = EXIT_SUCCESS;

  NumericT                    cpu_result = 42.0;
  viennacl::scalar<NumericT>  gpu_result = 43.0;
  NumericT                    alpha      = NumericT(3.1415);
  NumericT                    beta       = NumericT(2.7172);

  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size());
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5);
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }

  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  // --------------------------------------------------------------------------

  std::cout << "Testing simple assignments..." << std::endl;

  {
  ublas_v1 = ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v2); // same as vcl_v1 = vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 += ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), vcl_v2); // same as vcl_v1 += vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 -= ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_sub(), vcl_v2); // same as vcl_v1 -= vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "Testing composite assignments..." << std::endl;
  {
  ublas_v1 = ublas_v1 + ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 + vcl_v2); // same as vcl_v1 = vcl_v1 + vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }
  {
  ublas_v1 += alpha * ublas_v1 - beta * ublas_v2 + ublas_v1 / beta - ublas_v2 / alpha;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), alpha * vcl_v1 - beta * vcl_v2 + vcl_v1 / beta - vcl_v2 / alpha); // same as vcl_v1 += alpha * vcl_v1 - beta * vcl_v2 + beta * vcl_v1 - alpha * vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 = ublas_v1 - ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 - vcl_v2); // same as vcl_v1 = vcl_v1 - vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "--- Testing reductions ---" << std::endl;
  std::cout << "inner_prod..." << std::endl;
  {
  cpu_result = inner_prod(ublas_v1, ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = inner_prod(ublas_v1 + ublas_v2, ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1 + vcl_v2, vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = inner_prod(ublas_v1, ublas_v2 - ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2 - vcl_v1)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2 - vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = inner_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); // same as gpu_result = inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "norm_1..." << std::endl;
  {
  cpu_result = norm_1(ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1)); // same as gpu_result = norm_1(vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = norm_1(ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1 + vcl_v2)); // same as gpu_result = norm_1(vcl_v1 + vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "norm_2..." << std::endl;
  {
  cpu_result = norm_2(ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1)); // same as gpu_result = norm_2(vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = norm_2(ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1 + vcl_v2)); // same as gpu_result = norm_2(vcl_v1 + vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "norm_inf..." << std::endl;
  {
  cpu_result = norm_inf(ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1)); // same as gpu_result = norm_inf(vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = norm_inf(ublas_v1 - ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1 - vcl_v2)); // same as gpu_result = norm_inf(vcl_v1 - vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "--- Testing elementwise operations (binary) ---" << std::endl;
  std::cout << "x = element_prod(x, y)... ";
  {
  ublas_v1 = element_prod(ublas_v1, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_prod(x + y, y)... ";
  {
  ublas_v1 = element_prod(ublas_v1 + ublas_v2, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_prod(x, x + y)... ";
  {
  ublas_v1 = element_prod(ublas_v1, ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_prod(x - y, y + x)... ";
  {
  ublas_v1 = element_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }



  std::cout << "x = element_div(x, y)... ";
  {
  ublas_v1 = element_div(ublas_v1, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_div(x + y, y)... ";
  {
  ublas_v1 = element_div(ublas_v1 + ublas_v2, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_div(x, x + y)... ";
  {
  ublas_v1 = element_div(ublas_v1, ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_div(x - y, y + x)... ";
  {
  ublas_v1 = element_div(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "--- Testing elementwise operations (unary) ---" << std::endl;
#define GENERATE_UNARY_OP_TEST(OPNAME) \
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), NumericT(0.21)); \
  ublas_v2 = NumericT(3.1415) * ublas_v1; \
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); \
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); \
  { \
  for (std::size_t i=0; i<ublas_v1.size(); ++i) \
    ublas_v1[i] = OPNAME(ublas_v2[i]); \
  viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_##OPNAME(vcl_v2)); \
  viennacl::scheduler::execute(my_statement); \
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
    return EXIT_FAILURE; \
  } \
  { \
  for (std::size_t i=0; i<ublas_v1.size(); ++i) \
  ublas_v1[i] = std::OPNAME(ublas_v2[i] / NumericT(2)); \
  viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_##OPNAME(vcl_v2 / NumericT(2))); \
  viennacl::scheduler::execute(my_statement); \
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
    return EXIT_FAILURE; \
  }

  GENERATE_UNARY_OP_TEST(cos);
  GENERATE_UNARY_OP_TEST(cosh);
  GENERATE_UNARY_OP_TEST(exp);
  GENERATE_UNARY_OP_TEST(floor);
  GENERATE_UNARY_OP_TEST(fabs);
  GENERATE_UNARY_OP_TEST(log);
  GENERATE_UNARY_OP_TEST(log10);
  GENERATE_UNARY_OP_TEST(sin);
  GENERATE_UNARY_OP_TEST(sinh);
  GENERATE_UNARY_OP_TEST(fabs);
  //GENERATE_UNARY_OP_TEST(abs); //OpenCL allows abs on integers only
  GENERATE_UNARY_OP_TEST(sqrt);
  GENERATE_UNARY_OP_TEST(tan);
  GENERATE_UNARY_OP_TEST(tanh);

#undef GENERATE_UNARY_OP_TEST

  std::cout << "--- Testing complicated composite operations ---" << std::endl;
  std::cout << "x = inner_prod(x, y) * y..." << std::endl;
  {
  ublas_v1 = inner_prod(ublas_v1, ublas_v2) * ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2) * vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = y / norm_1(x)..." << std::endl;
  {
  ublas_v1 = ublas_v2 / norm_1(ublas_v1);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v2 / viennacl::linalg::norm_1(vcl_v1) );
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }


  // --------------------------------------------------------------------------
  return retval;
}
int test_prod_rank1(Epsilon const & epsilon,
                    UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2,
                    VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1, VCLVectorType2 & vcl_v2)
{
   int retval = EXIT_SUCCESS;

   // sync data:
   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
   viennacl::copy(ublas_m1, vcl_m1);

   /* TODO: Add rank-1 operations here */

   //reset vcl_matrix:
   viennacl::copy(ublas_m1, vcl_m1);

   // --------------------------------------------------------------------------
   std::cout << "Matrix-Vector product" << std::endl;
   ublas_v1 = viennacl::linalg::prod(ublas_m1, ublas_v2);
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(vcl_m1, vcl_v2));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Matrix-Vector product with inplace-add" << std::endl;
   ublas_v1 += viennacl::linalg::prod(ublas_m1, ublas_v2);
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), viennacl::linalg::prod(vcl_m1, vcl_v2));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Matrix-Vector product with inplace-sub" << std::endl;
   ublas_v1 -= viennacl::linalg::prod(ublas_m1, ublas_v2);
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_sub(), viennacl::linalg::prod(vcl_m1, vcl_v2));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }

   // --------------------------------------------------------------------------
   /*
   std::cout << "Matrix-Vector product with scaled matrix" << std::endl;
   ublas_v1 = viennacl::linalg::prod(NumericT(2.0) * ublas_m1, ublas_v2);
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(NumericT(2.0) * vcl_m1, vcl_v2));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }*/

   // --------------------------------------------------------------------------
   std::cout << "Matrix-Vector product with scaled vector" << std::endl;
   /*
   ublas_v1 = viennacl::linalg::prod(ublas_m1, NumericT(2.0) * ublas_v2);
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(vcl_m1, NumericT(2.0) * vcl_v2));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }*/

   // --------------------------------------------------------------------------
   std::cout << "Matrix-Vector product with scaled matrix and scaled vector" << std::endl;
   /*
   ublas_v1 = viennacl::linalg::prod(NumericT(2.0) * ublas_m1, NumericT(2.0) * ublas_v2);
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(NumericT(2.0) * vcl_m1, NumericT(2.0) * vcl_v2));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }*/


   // --------------------------------------------------------------------------
   std::cout << "Matrix-Vector product with scaled add" << std::endl;
   NumericT alpha = static_cast<NumericT>(2.786);
   NumericT beta = static_cast<NumericT>(3.1415);
   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

   ublas_v1 = alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) - beta * ublas_v1;
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) - beta * vcl_v1);
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Matrix-Vector product with scaled add, inplace-add" << std::endl;
   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

   ublas_v1 += alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) - beta * ublas_v1;
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) - beta * vcl_v1);
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Matrix-Vector product with scaled add, inplace-sub" << std::endl;
   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

   ublas_v1 -= alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) - beta * ublas_v1;
   {
   viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_sub(), alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) - beta * vcl_v1);
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon )
   {
      std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl;
      retval = EXIT_FAILURE;
   }

   // --------------------------------------------------------------------------

   viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
   viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

   std::cout << "Transposed Matrix-Vector product" << std::endl;
   ublas_v2 = viennacl::linalg::prod(trans(ublas_m1), ublas_v1);
   {
   viennacl::scheduler::statement   my_statement(vcl_v2, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_m1), vcl_v1));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon )
   {
      std::cout << "# Error at operation: transposed matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Transposed Matrix-Vector product, inplace-add" << std::endl;
   ublas_v2 += viennacl::linalg::prod(trans(ublas_m1), ublas_v1);
   {
   viennacl::scheduler::statement   my_statement(vcl_v2, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_m1), vcl_v1));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon )
   {
      std::cout << "# Error at operation: transposed matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Transposed Matrix-Vector product, inplace-sub" << std::endl;
   ublas_v2 -= viennacl::linalg::prod(trans(ublas_m1), ublas_v1);
   {
   viennacl::scheduler::statement   my_statement(vcl_v2, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_m1), vcl_v1));
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon )
   {
      std::cout << "# Error at operation: transposed matrix-vector product" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl;
      retval = EXIT_FAILURE;
   }

   // --------------------------------------------------------------------------
   std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl;
   ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2;
   {
   viennacl::scheduler::statement   my_statement(vcl_v2, viennacl::op_assign(), alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2);
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon )
   {
      std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Transposed Matrix-Vector product with scaled add, inplace-add" << std::endl;
   ublas_v2 += alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2;
   {
   viennacl::scheduler::statement   my_statement(vcl_v2, viennacl::op_inplace_add(), alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2);
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon )
   {
      std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl;
      retval = EXIT_FAILURE;
   }

   std::cout << "Transposed Matrix-Vector product with scaled add, inplace-sub" << std::endl;
   ublas_v2 -= alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2;
   {
   viennacl::scheduler::statement   my_statement(vcl_v2, viennacl::op_inplace_sub(), alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2);
   viennacl::scheduler::execute(my_statement);
   }

   if ( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon )
   {
      std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl;
      std::cout << "  diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl;
      retval = EXIT_FAILURE;
   }

   // --------------------------------------------------------------------------

   return retval;
}