Esempio n. 1
0
int test(Epsilon const& epsilon, 
         UblasVectorType     & ublas_v1, UblasVectorType     & ublas_v2,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2)
{
  int retval = EXIT_SUCCESS;
  
  NumericT                    cpu_result = 42.0;
  viennacl::scalar<NumericT>  gpu_result = 43.0;
  
  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size());
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5);
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }
  
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  //
  // Part 1: Norms and inner product
  //
  
  // --------------------------------------------------------------------------
  std::cout << "Testing inner_prod..." << std::endl;
  cpu_result = viennacl::linalg::inner_prod(ublas_v1, ublas_v2);
  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);

  if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  std::cout << "Testing norm_1..." << std::endl;
  cpu_result = ublas::norm_1(ublas_v1);
  gpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  gpu_result = 2 * cpu_result; //reset
  gpu_result = ublas::norm_1(ublas_v1);
  cpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------
  std::cout << "Testing norm_2..." << std::endl;
  cpu_result = ublas::norm_2(ublas_v1);
  gpu_result = viennacl::linalg::norm_2(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  gpu_result = 2 * cpu_result; //reset
  gpu_result = ublas::norm_2(ublas_v1);
  cpu_result = viennacl::linalg::norm_2(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  std::cout << "Testing norm_inf..." << std::endl;
  cpu_result = ublas::norm_inf(ublas_v1);
  gpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  gpu_result = 2 * cpu_result; //reset
  gpu_result = ublas::norm_inf(ublas_v1);
  cpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  std::cout << "Testing index_norm_inf..." << std::endl;
  std::size_t cpu_index = ublas::index_norm_inf(ublas_v1);
  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);

  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  cpu_result = ublas_v1[index_norm_inf(ublas_v1)];
  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  //
  // Plane rotation and assignments
  //
  
  // --------------------------------------------------------------------------
    
  ublas::vector<NumericT> x = ublas_v1;
  ublas::vector<NumericT> y = ublas_v2;
  ublas::vector<NumericT> t = ublas_v1;
  t.assign (NumericT(1.1) * x + NumericT(2.3) * y),
  y.assign (- NumericT(2.3) * x + NumericT(1.1) * y),
  x.assign (t);

  viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1.1), NumericT(2.3));

  if (check(x, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(y, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  
  std::cout << "Testing assignments..." << std::endl;
  NumericT val = static_cast<NumericT>(1e-3);
  for (size_t i=0; i < ublas_v1.size(); ++i)
    ublas_v1(i) = val;

  for (size_t i=0; i < vcl_v1.size(); ++i)
    vcl_v1(i) = val;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  //
  // multiplication and division of vectors by scalars
  //
  std::cout << "Testing scaling with CPU scalar..." << std::endl;
  NumericT alpha = static_cast<NumericT>(2.7182);
  viennacl::scalar<NumericT> gpu_alpha = alpha;

  ublas_v1  *= alpha;
  vcl_v1    *= alpha;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing scaling with GPU scalar..." << std::endl;
  ublas_v1  *= alpha;
  vcl_v1    *= gpu_alpha;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  NumericT beta  = static_cast<NumericT>(1.4153);
  viennacl::scalar<NumericT> gpu_beta = beta;

  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
  ublas_v1 /= beta;
  vcl_v1   /= beta;  

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
  ublas_v1 /= beta;
  vcl_v1   /= gpu_beta;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  


  //
  // add and inplace_add of vectors
  //
    
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
    
  std::cout << "Testing add on vector..." << std::endl;
  
  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1     = ublas_v1 + ublas_v2;
  vcl_v1       =   vcl_v1 +   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace-add on vector..." << std::endl;
  ublas_v1 += ublas_v2;
  vcl_v1   +=   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  //
  // subtract and inplace_subtract of vectors
  //
  std::cout << "Testing sub on vector..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1     = ublas_v1 - ublas_v2;
  vcl_v1       =   vcl_v1 -   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace-sub on vector..." << std::endl;
  ublas_v1 -= ublas_v2;
  vcl_v1   -= vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  
  //
  // multiply-add
  //
  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 + alpha * ublas_v2;
  vcl_v1   = vcl_v1   + alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 + ublas_v2;
  vcl_v1   = alpha *   vcl_v1 +   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 + beta * ublas_v2;
  vcl_v1   = alpha *   vcl_v1 + beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 += alpha * ublas_v2;
  vcl_v1   += alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 +     alpha * ublas_v2;
  vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 +     alpha * ublas_v2;
  vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 =     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   = gpu_alpha *   vcl_v1 + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 +=     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v1 + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 +=     alpha * ublas_v1 -     beta * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v1 - gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  
  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 +=     alpha * ublas_v2;
  vcl_v1   += gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  


  //
  // multiply-subtract
  //
  std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = ublas_v1 - alpha * ublas_v2;
  vcl_v1   = vcl_v1   - alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 - ublas_v2;
  vcl_v1   = alpha * vcl_v1   -   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 = alpha * ublas_v1 - beta * ublas_v2;
  vcl_v1   = alpha * vcl_v1   - beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 -= alpha * ublas_v2;
  vcl_v1   -= alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 -     alpha * ublas_v2;
  vcl_v1   = vcl_v1   - gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v1 -     alpha * ublas_v2;
  vcl_v1   = vcl_v1   - gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 =     alpha * ublas_v1 -     beta * ublas_v2;
  vcl_v1   = gpu_alpha * vcl_v1   - gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 -=     alpha * ublas_v1 +     beta * ublas_v2;
  vcl_v1   -= gpu_alpha * vcl_v1   + gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  ublas_v1 -=     alpha * ublas_v1 -     beta * ublas_v2;
  vcl_v1   -= gpu_alpha * vcl_v1   - gpu_beta *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 -=     alpha * ublas_v2;
  vcl_v1   -= gpu_alpha *   vcl_v2;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  
  //
  // More complicated expressions (for ensuring the operator overloads work correctly)
  //
  
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing three vector additions..." << std::endl;
  ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2;
  vcl_v1   =   vcl_v2 +   vcl_v1 +   vcl_v2;
  
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl;
  ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2);
  vcl_v1   = beta * (vcl_v1   - alpha * vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl;
  ublas_v1 =     beta * (ublas_v1 -     alpha * ublas_v2);
  vcl_v1   = gpu_beta * (vcl_v1   - gpu_alpha * vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------      
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing swap..." << std::endl;
  swap(ublas_v1, ublas_v2);
  swap(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------      
  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }
  
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing elementwise multiplication..." << std::endl;
  ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  
  std::cout << "Testing elementwise division..." << std::endl;
  ublas_v1 = ublas::element_div(ublas_v1, ublas_v2);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_div(ublas_v1, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------         
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Testing another complicated vector expression with CPU scalars..." << std::endl;
  ublas_v1 = ublas_v2 / alpha + beta * (ublas_v1 - alpha*ublas_v2);
  vcl_v1   = vcl_v2 / alpha   + beta * (vcl_v1   - alpha*vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  std::cout << "Testing another complicated vector expression with GPU scalars..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v2 / alpha   +     beta * (ublas_v1 - alpha*ublas_v2);
  vcl_v1   = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1   - gpu_alpha*vcl_v2);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  
  std::cout << "Testing lenghty sum of scaled vectors..." << std::endl;
  ublas_v2 = 3.1415 * ublas_v1;
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  
  ublas_v1 = ublas_v2 / alpha   +     beta * ublas_v1 - alpha * ublas_v2 + beta * ublas_v1 - alpha * ublas_v1;
  vcl_v1   = vcl_v2 / gpu_alpha + gpu_beta *   vcl_v1 - alpha *   vcl_v2 + beta *   vcl_v1 - alpha *   vcl_v1;

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  
  // --------------------------------------------------------------------------            
  return retval;
}
int test(Epsilon const& epsilon,
         UblasVectorType     & ublas_v1, UblasVectorType     & ublas_v2,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2)
{
  int retval = EXIT_SUCCESS;

  NumericT                    cpu_result = 42.0;
  viennacl::scalar<NumericT>  gpu_result = 43.0;

  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size());
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5);
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }

  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  // --------------------------------------------------------------------------

  std::cout << "Testing simple assignments..." << std::endl;

  {
  ublas_v1 = ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v2); // same as vcl_v1 = vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 += ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), vcl_v2); // same as vcl_v1 += vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 -= ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_sub(), vcl_v2); // same as vcl_v1 -= vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "Testing composite assignments..." << std::endl;
  {
  ublas_v1 = ublas_v1 + ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 + vcl_v2); // same as vcl_v1 = vcl_v1 + vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 = ublas_v1 - ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 - vcl_v2); // same as vcl_v1 = vcl_v1 - vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }


  // --------------------------------------------------------------------------
  return retval;
}
Esempio n. 3
0
int test(STLVectorType       & std_v1, STLVectorType       & std_v2,
         ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
{
  int retval = EXIT_SUCCESS;

  NumericT                    cpu_result = 42;
  viennacl::scalar<NumericT>  gpu_result = 43;

  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = 0;
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = cpu_result;
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = cpu_result + 1;
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = (i == 5) ? 1 : 0;
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (std::size_t i=0; i<std_v1.size(); ++i)
  {
    std_v1[i] = NumericT(i);
    std_v2[i] = NumericT(i+42);
  }

  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(std_v2, vcl_v2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  //
  // Part 1: Norms and inner product
  //

  // --------------------------------------------------------------------------
  std::cout << "Testing inner_prod..." << std::endl;
  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result += std_v1[i] * std_v2[i];
  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result += (std_v1[i] + std_v2[i]) * (2*std_v2[i]);
  NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, 2*vcl_v2);
  gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, 2*vcl_v2);

  if (check(cpu_result, cpu_result3) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing norm_1..." << std::endl;
  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)   //note: norm_1 broken for unsigned ints on MacOS
    cpu_result += std_v1[i];
  gpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0; //reset
  for (std::size_t i=0; i<std_v1.size(); ++i)   //note: norm_1 broken for unsigned ints on MacOS
    cpu_result2 += std_v1[i];
  cpu_result = viennacl::linalg::norm_1(vcl_v1);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)   //note: norm_1 broken for unsigned ints on MacOS
    cpu_result2 += std_v1[i] + std_v2[i];
  cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing norm_inf..." << std::endl;
  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    if (std_v1[i] > cpu_result)
      cpu_result = std_v1[i];
  gpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    if (std_v1[i] > cpu_result2)
      cpu_result2 = std_v1[i];
  cpu_result = viennacl::linalg::norm_inf(vcl_v1);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result2 = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    if (std_v1[i] + std_v2[i] > cpu_result2)
      cpu_result2 = std_v1[i] + std_v2[i];
  cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing index_norm_inf..." << std::endl;

  std::size_t cpu_index = 0;
  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    if (std_v1[i] > cpu_result)
    {
      cpu_result = std_v1[i];
      cpu_index = i;
    }
  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);

  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index)) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  // --------------------------------------------------------------------------
  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_index = 0;
  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    if (std_v1[i] + std_v2[i] > cpu_result)
    {
      cpu_result = std_v1[i];
      cpu_index = i;
    }
  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)];

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing max..." << std::endl;
  cpu_result = std_v1[0];
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
  gpu_result = viennacl::linalg::max(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result = std_v1[0];
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
  gpu_result = cpu_result;
  cpu_result *= 2; //reset
  cpu_result = viennacl::linalg::max(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result = std_v1[0] + std_v2[0];
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result = std::max<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
  gpu_result = cpu_result;
  cpu_result *= 2; //reset
  cpu_result = viennacl::linalg::max(vcl_v1 + vcl_v2);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  // --------------------------------------------------------------------------
  std::cout << "Testing min..." << std::endl;
  cpu_result = std_v1[0];
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
  gpu_result = viennacl::linalg::min(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result = std_v1[0];
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
  gpu_result = cpu_result;
  cpu_result *= 2; //reset
  cpu_result = viennacl::linalg::min(vcl_v1);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result = std_v1[0] + std_v2[0];
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result = std::min<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
  gpu_result = cpu_result;
  cpu_result *= 2; //reset
  cpu_result = viennacl::linalg::min(vcl_v1 + vcl_v2);

  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  std::cout << "Testing sum..." << std::endl;
  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result += std_v1[i];
  cpu_result2 = viennacl::linalg::sum(vcl_v1);
  gpu_result = viennacl::linalg::sum(vcl_v1);

  if (check(cpu_result, cpu_result2) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  cpu_result = 0;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    cpu_result += std_v1[i] + std_v2[i];
  cpu_result3 = viennacl::linalg::sum(vcl_v1 + vcl_v2);
  gpu_result = viennacl::linalg::sum(vcl_v1 + vcl_v2);

  if (check(cpu_result, cpu_result3) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(cpu_result, gpu_result) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  // --------------------------------------------------------------------------

  std::cout << "Testing assignments..." << std::endl;
  NumericT val = static_cast<NumericT>(1);
  for (size_t i=0; i < std_v1.size(); ++i)
    std_v1[i] = val;

  for (size_t i=0; i < vcl_v1.size(); ++i)
    vcl_v1(i) = val;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // multiplication and division of vectors by scalars
  //
  std::cout << "Testing scaling with CPU scalar..." << std::endl;
  NumericT alpha = static_cast<NumericT>(3);
  viennacl::scalar<NumericT> gpu_alpha = alpha;

  for (size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] *= alpha;
  vcl_v1 *= alpha;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing scaling with GPU scalar..." << std::endl;
  for (size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] *= alpha;
  vcl_v1 *= gpu_alpha;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  NumericT beta  = static_cast<NumericT>(2);
  viennacl::scalar<NumericT> gpu_beta = beta;

  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
  for (size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] /= beta;
  vcl_v1 /= beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
  for (size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] /= beta;
  vcl_v1 /= gpu_beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // add and inplace_add of vectors
  //
  for (size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = NumericT(i);
  for (size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  std::cout << "Testing add on vector..." << std::endl;

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(std_v2, vcl_v2) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] + std_v2[i];
  vcl_v1 = vcl_v1 + vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace-add on vector..." << std::endl;
  for (size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v2[i];
  vcl_v1 += vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // multiply-add
  //
  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
  for (size_t i=0; i < std_v1.size(); ++i)
    std_v1[i] = NumericT(i);
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] + alpha * std_v2[i];
  vcl_v1 = vcl_v1 + alpha * vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = alpha * std_v1[i] + std_v2[i];
  vcl_v1 = alpha * vcl_v1 + vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
  vcl_v1 = alpha * vcl_v1 + beta * vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += alpha * std_v2[i];
  vcl_v1 += alpha * vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] + alpha * std_v2[i];
  vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] + alpha * std_v2[i];
  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
  vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += alpha * std_v1[i] + beta * std_v2[i];
  vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += alpha * std_v2[i];
  vcl_v1 += gpu_alpha * vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  //
  // division-add
  //
  std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl;
  for (size_t i=0; i < std_v1.size(); ++i)
    std_v1[i] = NumericT(i);
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] + std_v2[i] / alpha;
  vcl_v1 = vcl_v1 + vcl_v2 / alpha;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] / alpha + std_v2[i];
  vcl_v1 = vcl_v1 / alpha + vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
  vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] / alpha + std_v2[i] * beta;
  vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] * alpha + std_v2[i] / beta;
  vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;



  std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v2[i] / alpha;
  vcl_v1 += vcl_v2 / alpha;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] + std_v2[i] / alpha;
  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] + std_v2[i] / alpha;
  vcl_v1   = vcl_v1   +   vcl_v2 / gpu_alpha;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
  vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v1[i] / alpha + std_v2[i] / beta;
  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v1[i] / alpha + std_v2[i] * beta;
  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v2[i] * alpha;
  vcl_v1 += vcl_v2 * gpu_alpha;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  //
  // More complicated expressions (for ensuring the operator overloads work correctly)
  //
  for (size_t i=0; i < std_v1.size(); ++i)
    std_v1[i] = NumericT(i);
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  std::cout << "Testing three vector additions..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v2[i] + std_v1[i] + std_v2[i];
  vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v2[i] = 3 * std_v1[i];
  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  std::cout << "Testing swap..." << std::endl;
  swap(std_v1, std_v2);
  swap(vcl_v1, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Testing elementwise multiplication..." << std::endl;
  std::cout << " v1 = element_prod(v1, v2);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] * std_v2[i];
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1, v2);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v1[i] * std_v2[i];
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = (std_v1[i] + std_v2[i]) * std_v2[i];
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += (std_v1[i] + std_v2[i]) * std_v2[i];
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] * (std_v2[i] + std_v1[i]);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v1[i] * (std_v2[i] + std_v1[i]);
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  std::cout << "Testing elementwise division..." << std::endl;
  for (std::size_t i=0; i<std_v1.size(); ++i)
  {
    std_v1[i] = NumericT(1 + i);
    std_v2[i] = NumericT(5 + i);
  }

  viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin());
  viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin());

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] / std_v2[i];
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v1[i] / std_v2[i];
  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = (std_v1[i] + std_v2[i]) / std_v2[i];
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += (std_v1[i] + std_v2[i]) / std_v2[i];
  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = std_v1[i] / (std_v2[i] + std_v1[i]);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += std_v1[i] / (std_v2[i] + std_v1[i]);
  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ///////
  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] = (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  for (std::size_t i=0; i<std_v1.size(); ++i)
    std_v1[i] += (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);

  if (check(std_v1, vcl_v1) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  // --------------------------------------------------------------------------
  return retval;
}
int test(Epsilon const& epsilon,
         UblasVectorType1    & ublas_v1, UblasVectorType2    & ublas_v2, UblasVectorType3    & ublas_v3, UblasVectorType4    & ublas_v4,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2, ViennaCLVectorType3 &   vcl_v3, ViennaCLVectorType4 &   vcl_v4)
{
  int retval = EXIT_SUCCESS;

  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
    ublas_v3[i] = NumericT(1.0) + random<NumericT>();
    ublas_v4[i] = NumericT(1.0) + random<NumericT>();
  }

  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());
  viennacl::copy(ublas_v3.begin(), ublas_v3.end(), vcl_v3.begin());
  viennacl::copy(ublas_v4.begin(), ublas_v4.end(), vcl_v4.begin());

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v3, vcl_v3, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v4, vcl_v4, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas::vector<NumericT> ref_result = ublas::scalar_vector<NumericT>(40, 0.0);
  viennacl::vector<NumericT> result = viennacl::scalar_vector<NumericT>(40, 0.0);

  std::cout << "Testing inner_prod with two vectors..." << std::endl;
  ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2);
  viennacl::project(result, viennacl::slice(2, 3, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }

  ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4);
  viennacl::project(result, viennacl::slice(3, 4, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v4));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }


  std::cout << "Testing inner_prod with three vectors..." << std::endl;
  ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3);
  viennacl::project(result, viennacl::slice(1, 2, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }

  ref_result(2)  = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(6)  = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v4);
  viennacl::project(result, viennacl::slice(2, 4, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }

  std::cout << "Testing inner_prod with four vectors..." << std::endl;
  ref_result(4) = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4);
  viennacl::project(result, viennacl::slice(4, 1, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }

  ref_result(3)  = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(6)  = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(9)  = ublas::inner_prod(ublas_v1, ublas_v4);
  ref_result(12) = ublas::inner_prod(ublas_v1, ublas_v1);
  viennacl::project(result, viennacl::slice(3, 3, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }

  std::cout << "Testing inner_prod with five vectors..." << std::endl;
  ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4);
  ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v2);
  viennacl::project(result, viennacl::slice(1, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4, vcl_v2));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }

  ref_result(2)  = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(4)  = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(6)  = ublas::inner_prod(ublas_v1, ublas_v4);
  ref_result(8)  = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v2);
  viennacl::project(result, viennacl::slice(2, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1, vcl_v2));
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }


  std::cout << "Testing inner_prod with eight vectors..." << std::endl;
  ref_result(1)  = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(5)  = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(9)  = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v4);
  ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v3);
  ref_result(21) = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(25) = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(29) = ublas::inner_prod(ublas_v1, ublas_v2);
  std::vector<viennacl::vector_base<NumericT> const *> vecs1(8);
  vecs1[0] = &vcl_v1;
  vecs1[1] = &vcl_v2;
  vecs1[2] = &vcl_v3;
  vecs1[3] = &vcl_v4;
  vecs1[4] = &vcl_v3;
  vecs1[5] = &vcl_v2;
  vecs1[6] = &vcl_v1;
  vecs1[7] = &vcl_v2;
  viennacl::vector_tuple<NumericT> tuple1(vecs1);
  viennacl::project(result, viennacl::slice(1, 4, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple1);
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }

  ref_result(3)  = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(5)  = ublas::inner_prod(ublas_v1, ublas_v4);
  ref_result(7)  = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(9)  = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(11) = ublas::inner_prod(ublas_v1, ublas_v2);
  ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v1);
  ref_result(15) = ublas::inner_prod(ublas_v1, ublas_v4);
  ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v2);
  std::vector<viennacl::vector_base<NumericT> const *> vecs2(8);
  vecs2[0] = &vcl_v2;
  vecs2[1] = &vcl_v4;
  vecs2[2] = &vcl_v1;
  vecs2[3] = &vcl_v2;
  vecs2[4] = &vcl_v2;
  vecs2[5] = &vcl_v1;
  vecs2[6] = &vcl_v4;
  vecs2[7] = &vcl_v2;
  viennacl::vector_tuple<NumericT> tuple2(vecs2);
  viennacl::project(result, viennacl::slice(3, 2, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple2);
  if (check(ref_result, result, epsilon) != EXIT_SUCCESS)
  {
    std::cout << ref_result << std::endl;
    std::cout << result << std::endl;
    return EXIT_FAILURE;
  }


  // --------------------------------------------------------------------------
  return retval;
}
Esempio n. 5
0
int test(Epsilon const& epsilon,
         UblasVectorType     & ublas_v1, UblasVectorType     & ublas_v2,
         ViennaCLVectorType1 &   vcl_v1, ViennaCLVectorType2 &   vcl_v2)
{
  int retval = EXIT_SUCCESS;

  NumericT                    cpu_result = 42.0;
  viennacl::scalar<NumericT>  gpu_result = 43.0;
  NumericT                    alpha      = NumericT(3.1415);
  NumericT                    beta       = NumericT(2.7172);

  //
  // Initializer:
  //
  std::cout << "Checking for zero_vector initializer..." << std::endl;
  ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size());
  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for scalar_vector initializer..." << std::endl;
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result);
  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;

  std::cout << "Checking for unit_vector initializer..." << std::endl;
  ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5);
  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  for (std::size_t i=0; i<ublas_v1.size(); ++i)
  {
    ublas_v1[i] = NumericT(1.0) + random<NumericT>();
    ublas_v2[i] = NumericT(1.0) + random<NumericT>();
  }

  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin());  //resync
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin());

  std::cout << "Checking for successful copy..." << std::endl;
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;


  // --------------------------------------------------------------------------

  std::cout << "Testing simple assignments..." << std::endl;

  {
  ublas_v1 = ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v2); // same as vcl_v1 = vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 += ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), vcl_v2); // same as vcl_v1 += vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 -= ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_sub(), vcl_v2); // same as vcl_v1 -= vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "Testing composite assignments..." << std::endl;
  {
  ublas_v1 = ublas_v1 + ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 + vcl_v2); // same as vcl_v1 = vcl_v1 + vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }
  {
  ublas_v1 += alpha * ublas_v1 - beta * ublas_v2 + ublas_v1 / beta - ublas_v2 / alpha;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_inplace_add(), alpha * vcl_v1 - beta * vcl_v2 + vcl_v1 / beta - vcl_v2 / alpha); // same as vcl_v1 += alpha * vcl_v1 - beta * vcl_v2 + beta * vcl_v1 - alpha * vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  ublas_v1 = ublas_v1 - ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 - vcl_v2); // same as vcl_v1 = vcl_v1 - vcl_v2;
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "--- Testing reductions ---" << std::endl;
  std::cout << "inner_prod..." << std::endl;
  {
  cpu_result = inner_prod(ublas_v1, ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = inner_prod(ublas_v1 + ublas_v2, ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1 + vcl_v2, vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = inner_prod(ublas_v1, ublas_v2 - ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2 - vcl_v1)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2 - vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = inner_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); // same as gpu_result = inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "norm_1..." << std::endl;
  {
  cpu_result = norm_1(ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1)); // same as gpu_result = norm_1(vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = norm_1(ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1 + vcl_v2)); // same as gpu_result = norm_1(vcl_v1 + vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "norm_2..." << std::endl;
  {
  cpu_result = norm_2(ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1)); // same as gpu_result = norm_2(vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = norm_2(ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1 + vcl_v2)); // same as gpu_result = norm_2(vcl_v1 + vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "norm_inf..." << std::endl;
  {
  cpu_result = norm_inf(ublas_v1);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1)); // same as gpu_result = norm_inf(vcl_v1);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  {
  cpu_result = norm_inf(ublas_v1 - ublas_v2);
  viennacl::scheduler::statement   my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1 - vcl_v2)); // same as gpu_result = norm_inf(vcl_v1 - vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "--- Testing elementwise operations (binary) ---" << std::endl;
  std::cout << "x = element_prod(x, y)... ";
  {
  ublas_v1 = element_prod(ublas_v1, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_prod(x + y, y)... ";
  {
  ublas_v1 = element_prod(ublas_v1 + ublas_v2, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_prod(x, x + y)... ";
  {
  ublas_v1 = element_prod(ublas_v1, ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_prod(x - y, y + x)... ";
  {
  ublas_v1 = element_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }



  std::cout << "x = element_div(x, y)... ";
  {
  ublas_v1 = element_div(ublas_v1, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_div(x + y, y)... ";
  {
  ublas_v1 = element_div(ublas_v1 + ublas_v2, ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_div(x, x + y)... ";
  {
  ublas_v1 = element_div(ublas_v1, ublas_v1 + ublas_v2);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = element_div(x - y, y + x)... ";
  {
  ublas_v1 = element_div(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1));
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "--- Testing elementwise operations (unary) ---" << std::endl;
#define GENERATE_UNARY_OP_TEST(OPNAME) \
  ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), NumericT(0.21)); \
  ublas_v2 = NumericT(3.1415) * ublas_v1; \
  viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); \
  viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); \
  { \
  for (std::size_t i=0; i<ublas_v1.size(); ++i) \
    ublas_v1[i] = OPNAME(ublas_v2[i]); \
  viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_##OPNAME(vcl_v2)); \
  viennacl::scheduler::execute(my_statement); \
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
    return EXIT_FAILURE; \
  } \
  { \
  for (std::size_t i=0; i<ublas_v1.size(); ++i) \
  ublas_v1[i] = std::OPNAME(ublas_v2[i] / NumericT(2)); \
  viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_##OPNAME(vcl_v2 / NumericT(2))); \
  viennacl::scheduler::execute(my_statement); \
  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
    return EXIT_FAILURE; \
  }

  GENERATE_UNARY_OP_TEST(cos);
  GENERATE_UNARY_OP_TEST(cosh);
  GENERATE_UNARY_OP_TEST(exp);
  GENERATE_UNARY_OP_TEST(floor);
  GENERATE_UNARY_OP_TEST(fabs);
  GENERATE_UNARY_OP_TEST(log);
  GENERATE_UNARY_OP_TEST(log10);
  GENERATE_UNARY_OP_TEST(sin);
  GENERATE_UNARY_OP_TEST(sinh);
  GENERATE_UNARY_OP_TEST(fabs);
  //GENERATE_UNARY_OP_TEST(abs); //OpenCL allows abs on integers only
  GENERATE_UNARY_OP_TEST(sqrt);
  GENERATE_UNARY_OP_TEST(tan);
  GENERATE_UNARY_OP_TEST(tanh);

#undef GENERATE_UNARY_OP_TEST

  std::cout << "--- Testing complicated composite operations ---" << std::endl;
  std::cout << "x = inner_prod(x, y) * y..." << std::endl;
  {
  ublas_v1 = inner_prod(ublas_v1, ublas_v2) * ublas_v2;
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2) * vcl_v2);
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }

  std::cout << "x = y / norm_1(x)..." << std::endl;
  {
  ublas_v1 = ublas_v2 / norm_1(ublas_v1);
  viennacl::scheduler::statement   my_statement(vcl_v1, viennacl::op_assign(), vcl_v2 / viennacl::linalg::norm_1(vcl_v1) );
  viennacl::scheduler::execute(my_statement);

  if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
    return EXIT_FAILURE;
  }


  // --------------------------------------------------------------------------
  return retval;
}