int test(Epsilon const& epsilon, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42.0; viennacl::scalar<NumericT> gpu_result = 43.0; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size()); vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size()); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result); vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result); vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5); vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<ublas_v1.size(); ++i) { ublas_v1[i] = NumericT(1.0) + random<NumericT>(); ublas_v2[i] = NumericT(1.0) + random<NumericT>(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Part 1: Norms and inner product // // -------------------------------------------------------------------------- std::cout << "Testing inner_prod..." << std::endl; cpu_result = viennacl::linalg::inner_prod(ublas_v1, ublas_v2); NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2); gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2); if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_1..." << std::endl; cpu_result = ublas::norm_1(ublas_v1); gpu_result = viennacl::linalg::norm_1(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = 2 * cpu_result; //reset gpu_result = ublas::norm_1(ublas_v1); cpu_result = viennacl::linalg::norm_1(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_2..." << std::endl; cpu_result = ublas::norm_2(ublas_v1); gpu_result = viennacl::linalg::norm_2(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = 2 * cpu_result; //reset gpu_result = ublas::norm_2(ublas_v1); cpu_result = viennacl::linalg::norm_2(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_inf..." << std::endl; cpu_result = ublas::norm_inf(ublas_v1); gpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = 2 * cpu_result; //reset gpu_result = ublas::norm_inf(ublas_v1); cpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing index_norm_inf..." << std::endl; std::size_t cpu_index = ublas::index_norm_inf(ublas_v1); std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1); if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- cpu_result = ublas_v1[index_norm_inf(ublas_v1)]; gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)]; if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Plane rotation and assignments // // -------------------------------------------------------------------------- ublas::vector<NumericT> x = ublas_v1; ublas::vector<NumericT> y = ublas_v2; ublas::vector<NumericT> t = ublas_v1; t.assign (NumericT(1.1) * x + NumericT(2.3) * y), y.assign (- NumericT(2.3) * x + NumericT(1.1) * y), x.assign (t); viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1.1), NumericT(2.3)); if (check(x, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(y, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing assignments..." << std::endl; NumericT val = static_cast<NumericT>(1e-3); for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = val; for (size_t i=0; i < vcl_v1.size(); ++i) vcl_v1(i) = val; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiplication and division of vectors by scalars // std::cout << "Testing scaling with CPU scalar..." << std::endl; NumericT alpha = static_cast<NumericT>(2.7182); viennacl::scalar<NumericT> gpu_alpha = alpha; ublas_v1 *= alpha; vcl_v1 *= alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with GPU scalar..." << std::endl; ublas_v1 *= alpha; vcl_v1 *= gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; NumericT beta = static_cast<NumericT>(1.4153); viennacl::scalar<NumericT> gpu_beta = beta; std::cout << "Testing shrinking with CPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing shrinking with GPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // add and inplace_add of vectors // ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing add on vector..." << std::endl; std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas_v1 + ublas_v2; vcl_v1 = vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-add on vector..." << std::endl; ublas_v1 += ublas_v2; vcl_v1 += vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // subtract and inplace_subtract of vectors // std::cout << "Testing sub on vector..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2; vcl_v1 = vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-sub on vector..." << std::endl; ublas_v1 -= ublas_v2; vcl_v1 -= vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-add // std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + ublas_v2; vcl_v1 = alpha * vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = alpha * vcl_v1 + beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 + beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 - beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-subtract // std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - ublas_v2; vcl_v1 = alpha * vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = alpha * vcl_v1 - beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 + beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 - beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // More complicated expressions (for ensuring the operator overloads work correctly) // ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing three vector additions..." << std::endl; ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2; vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing swap..." << std::endl; swap(ublas_v1, ublas_v2); swap(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- for (std::size_t i=0; i<ublas_v1.size(); ++i) { ublas_v1[i] = NumericT(1.0) + random<NumericT>(); ublas_v2[i] = NumericT(1.0) + random<NumericT>(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing elementwise multiplication..." << std::endl; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise division..." << std::endl; ublas_v1 = ublas::element_div(ublas_v1, ublas_v2); vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::element_div(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing another complicated vector expression with CPU scalars..." << std::endl; ublas_v1 = ublas_v2 / alpha + beta * (ublas_v1 - alpha*ublas_v2); vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing another complicated vector expression with GPU scalars..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v2 / alpha + beta * (ublas_v1 - alpha*ublas_v2); vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing lenghty sum of scaled vectors..." << std::endl; ublas_v2 = 3.1415 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v2 / alpha + beta * ublas_v1 - alpha * ublas_v2 + beta * ublas_v1 - alpha * ublas_v1; vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- return retval; }
int test(Epsilon const& epsilon, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42.0; viennacl::scalar<NumericT> gpu_result = 43.0; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size()); vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size()); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result); vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result); vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5); vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<ublas_v1.size(); ++i) { ublas_v1[i] = NumericT(1.0) + random<NumericT>(); ublas_v2[i] = NumericT(1.0) + random<NumericT>(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing simple assignments..." << std::endl; { ublas_v1 = ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v2); // same as vcl_v1 = vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 += ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_add(), vcl_v2); // same as vcl_v1 += vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 -= ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_sub(), vcl_v2); // same as vcl_v1 -= vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "Testing composite assignments..." << std::endl; { ublas_v1 = ublas_v1 + ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 + vcl_v2); // same as vcl_v1 = vcl_v1 + vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 = ublas_v1 - ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 - vcl_v2); // same as vcl_v1 = vcl_v1 - vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; }
int test(STLVectorType & std_v1, STLVectorType & std_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42; viennacl::scalar<NumericT> gpu_result = 43; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = 0; vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size()); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = cpu_result; vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = cpu_result + 1; vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = (i == 5) ? 1 : 0; vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<std_v1.size(); ++i) { std_v1[i] = NumericT(i); std_v2[i] = NumericT(i+42); } viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); //resync viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(std_v2, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; // // Part 1: Norms and inner product // // -------------------------------------------------------------------------- std::cout << "Testing inner_prod..." << std::endl; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result += std_v1[i] * std_v2[i]; NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2); gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result += (std_v1[i] + std_v2[i]) * (2*std_v2[i]); NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, 2*vcl_v2); gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, 2*vcl_v2); if (check(cpu_result, cpu_result3) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_1..." << std::endl; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) //note: norm_1 broken for unsigned ints on MacOS cpu_result += std_v1[i]; gpu_result = viennacl::linalg::norm_1(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; //reset for (std::size_t i=0; i<std_v1.size(); ++i) //note: norm_1 broken for unsigned ints on MacOS cpu_result2 += std_v1[i]; cpu_result = viennacl::linalg::norm_1(vcl_v1); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; for (std::size_t i=0; i<std_v1.size(); ++i) //note: norm_1 broken for unsigned ints on MacOS cpu_result2 += std_v1[i] + std_v2[i]; cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_inf..." << std::endl; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) if (std_v1[i] > cpu_result) cpu_result = std_v1[i]; gpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; for (std::size_t i=0; i<std_v1.size(); ++i) if (std_v1[i] > cpu_result2) cpu_result2 = std_v1[i]; cpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; for (std::size_t i=0; i<std_v1.size(); ++i) if (std_v1[i] + std_v2[i] > cpu_result2) cpu_result2 = std_v1[i] + std_v2[i]; cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing index_norm_inf..." << std::endl; std::size_t cpu_index = 0; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) if (std_v1[i] > cpu_result) { cpu_result = std_v1[i]; cpu_index = i; } std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1); if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index)) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_index = 0; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) if (std_v1[i] + std_v2[i] > cpu_result) { cpu_result = std_v1[i]; cpu_index = i; } gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing max..." << std::endl; cpu_result = std_v1[0]; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result = std::max<NumericT>(cpu_result, std_v1[i]); gpu_result = viennacl::linalg::max(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = std_v1[0]; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result = std::max<NumericT>(cpu_result, std_v1[i]); gpu_result = cpu_result; cpu_result *= 2; //reset cpu_result = viennacl::linalg::max(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = std_v1[0] + std_v2[0]; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result = std::max<NumericT>(cpu_result, std_v1[i] + std_v2[i]); gpu_result = cpu_result; cpu_result *= 2; //reset cpu_result = viennacl::linalg::max(vcl_v1 + vcl_v2); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing min..." << std::endl; cpu_result = std_v1[0]; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result = std::min<NumericT>(cpu_result, std_v1[i]); gpu_result = viennacl::linalg::min(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = std_v1[0]; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result = std::min<NumericT>(cpu_result, std_v1[i]); gpu_result = cpu_result; cpu_result *= 2; //reset cpu_result = viennacl::linalg::min(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = std_v1[0] + std_v2[0]; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result = std::min<NumericT>(cpu_result, std_v1[i] + std_v2[i]); gpu_result = cpu_result; cpu_result *= 2; //reset cpu_result = viennacl::linalg::min(vcl_v1 + vcl_v2); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing sum..." << std::endl; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result += std_v1[i]; cpu_result2 = viennacl::linalg::sum(vcl_v1); gpu_result = viennacl::linalg::sum(vcl_v1); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = 0; for (std::size_t i=0; i<std_v1.size(); ++i) cpu_result += std_v1[i] + std_v2[i]; cpu_result3 = viennacl::linalg::sum(vcl_v1 + vcl_v2); gpu_result = viennacl::linalg::sum(vcl_v1 + vcl_v2); if (check(cpu_result, cpu_result3) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing assignments..." << std::endl; NumericT val = static_cast<NumericT>(1); for (size_t i=0; i < std_v1.size(); ++i) std_v1[i] = val; for (size_t i=0; i < vcl_v1.size(); ++i) vcl_v1(i) = val; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiplication and division of vectors by scalars // std::cout << "Testing scaling with CPU scalar..." << std::endl; NumericT alpha = static_cast<NumericT>(3); viennacl::scalar<NumericT> gpu_alpha = alpha; for (size_t i=0; i<std_v1.size(); ++i) std_v1[i] *= alpha; vcl_v1 *= alpha; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with GPU scalar..." << std::endl; for (size_t i=0; i<std_v1.size(); ++i) std_v1[i] *= alpha; vcl_v1 *= gpu_alpha; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; NumericT beta = static_cast<NumericT>(2); viennacl::scalar<NumericT> gpu_beta = beta; std::cout << "Testing shrinking with CPU scalar..." << std::endl; for (size_t i=0; i<std_v1.size(); ++i) std_v1[i] /= beta; vcl_v1 /= beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing shrinking with GPU scalar..." << std::endl; for (size_t i=0; i<std_v1.size(); ++i) std_v1[i] /= beta; vcl_v1 /= gpu_beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // add and inplace_add of vectors // for (size_t i=0; i<std_v1.size(); ++i) std_v1[i] = NumericT(i); for (size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); //resync viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); std::cout << "Testing add on vector..." << std::endl; std::cout << "Checking for successful copy..." << std::endl; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(std_v2, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; for (size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] + std_v2[i]; vcl_v1 = vcl_v1 + vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-add on vector..." << std::endl; for (size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v2[i]; vcl_v1 += vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-add // std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < std_v1.size(); ++i) std_v1[i] = NumericT(i); for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] + alpha * std_v2[i]; vcl_v1 = vcl_v1 + alpha * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = alpha * std_v1[i] + std_v2[i]; vcl_v1 = alpha * vcl_v1 + vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = alpha * std_v1[i] + beta * std_v2[i]; vcl_v1 = alpha * vcl_v1 + beta * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += alpha * std_v2[i]; vcl_v1 += alpha * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] + alpha * std_v2[i]; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] + alpha * std_v2[i]; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = alpha * std_v1[i] + beta * std_v2[i]; vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += alpha * std_v1[i] + beta * std_v2[i]; vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += alpha * std_v2[i]; vcl_v1 += gpu_alpha * vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-add // std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < std_v1.size(); ++i) std_v1[i] = NumericT(i); for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] + std_v2[i] / alpha; vcl_v1 = vcl_v1 + vcl_v2 / alpha; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] / alpha + std_v2[i]; vcl_v1 = vcl_v1 / alpha + vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] / alpha + std_v2[i] * beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] * alpha + std_v2[i] / beta; vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v2[i] / alpha; vcl_v1 += vcl_v2 / alpha; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] + std_v2[i] / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] + std_v2[i] / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta; vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v1[i] / alpha + std_v2[i] / beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v1[i] / alpha + std_v2[i] * beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v2[i] * alpha; vcl_v1 += vcl_v2 * gpu_alpha; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // More complicated expressions (for ensuring the operator overloads work correctly) // for (size_t i=0; i < std_v1.size(); ++i) std_v1[i] = NumericT(i); for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); std::cout << "Testing three vector additions..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v2[i] + std_v1[i] + std_v2[i]; vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2; if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- for (std::size_t i=0; i<std_v1.size(); ++i) std_v2[i] = 3 * std_v1[i]; viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); std::cout << "Testing swap..." << std::endl; swap(std_v1, std_v2); swap(vcl_v1, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise multiplication..." << std::endl; std::cout << " v1 = element_prod(v1, v2);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] * std_v2[i]; vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v1[i] * std_v2[i]; vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = (std_v1[i] + std_v2[i]) * std_v2[i]; vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += (std_v1[i] + std_v2[i]) * std_v2[i]; vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] * (std_v2[i] + std_v1[i]); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v1[i] * (std_v2[i] + std_v1[i]); vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]); vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise division..." << std::endl; for (std::size_t i=0; i<std_v1.size(); ++i) { std_v1[i] = NumericT(1 + i); std_v2[i] = NumericT(5 + i); } viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] / std_v2[i]; vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v1[i] / std_v2[i]; vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = (std_v1[i] + std_v2[i]) / std_v2[i]; vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += (std_v1[i] + std_v2[i]) / std_v2[i]; vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = std_v1[i] / (std_v2[i] + std_v1[i]); vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += std_v1[i] / (std_v2[i] + std_v1[i]); vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] = (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]); vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<std_v1.size(); ++i) std_v1[i] += (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]); vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(std_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- return retval; }
int test(Epsilon const& epsilon, UblasVectorType1 & ublas_v1, UblasVectorType2 & ublas_v2, UblasVectorType3 & ublas_v3, UblasVectorType4 & ublas_v4, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2, ViennaCLVectorType3 & vcl_v3, ViennaCLVectorType4 & vcl_v4) { int retval = EXIT_SUCCESS; for (std::size_t i=0; i<ublas_v1.size(); ++i) { ublas_v1[i] = NumericT(1.0) + random<NumericT>(); ublas_v2[i] = NumericT(1.0) + random<NumericT>(); ublas_v3[i] = NumericT(1.0) + random<NumericT>(); ublas_v4[i] = NumericT(1.0) + random<NumericT>(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_v3.begin(), ublas_v3.end(), vcl_v3.begin()); viennacl::copy(ublas_v4.begin(), ublas_v4.end(), vcl_v4.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v3, vcl_v3, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v4, vcl_v4, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas::vector<NumericT> ref_result = ublas::scalar_vector<NumericT>(40, 0.0); viennacl::vector<NumericT> result = viennacl::scalar_vector<NumericT>(40, 0.0); std::cout << "Testing inner_prod with two vectors..." << std::endl; ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(2, 3, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(3, 4, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with three vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3); viennacl::project(result, viennacl::slice(1, 2, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(2, 4, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with four vectors..." << std::endl; ref_result(4) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(4, 1, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(12) = ublas::inner_prod(ublas_v1, ublas_v1); viennacl::project(result, viennacl::slice(3, 3, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with five vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(1, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(4) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(8) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(2, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with eight vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(21) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(25) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(29) = ublas::inner_prod(ublas_v1, ublas_v2); std::vector<viennacl::vector_base<NumericT> const *> vecs1(8); vecs1[0] = &vcl_v1; vecs1[1] = &vcl_v2; vecs1[2] = &vcl_v3; vecs1[3] = &vcl_v4; vecs1[4] = &vcl_v3; vecs1[5] = &vcl_v2; vecs1[6] = &vcl_v1; vecs1[7] = &vcl_v2; viennacl::vector_tuple<NumericT> tuple1(vecs1); viennacl::project(result, viennacl::slice(1, 4, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple1); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(11) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(15) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v2); std::vector<viennacl::vector_base<NumericT> const *> vecs2(8); vecs2[0] = &vcl_v2; vecs2[1] = &vcl_v4; vecs2[2] = &vcl_v1; vecs2[3] = &vcl_v2; vecs2[4] = &vcl_v2; vecs2[5] = &vcl_v1; vecs2[6] = &vcl_v4; vecs2[7] = &vcl_v2; viennacl::vector_tuple<NumericT> tuple2(vecs2); viennacl::project(result, viennacl::slice(3, 2, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple2); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; }
int test(Epsilon const& epsilon, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42.0; viennacl::scalar<NumericT> gpu_result = 43.0; NumericT alpha = NumericT(3.1415); NumericT beta = NumericT(2.7172); // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; ublas_v1 = ublas::zero_vector<NumericT>(ublas_v1.size()); vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size()); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), cpu_result); vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), gpu_result); vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; ublas_v1 = ublas::unit_vector<NumericT>(ublas_v1.size(), 5); vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i<ublas_v1.size(); ++i) { ublas_v1[i] = NumericT(1.0) + random<NumericT>(); ublas_v2[i] = NumericT(1.0) + random<NumericT>(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing simple assignments..." << std::endl; { ublas_v1 = ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v2); // same as vcl_v1 = vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 += ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_add(), vcl_v2); // same as vcl_v1 += vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 -= ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_sub(), vcl_v2); // same as vcl_v1 -= vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "Testing composite assignments..." << std::endl; { ublas_v1 = ublas_v1 + ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 + vcl_v2); // same as vcl_v1 = vcl_v1 + vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 += alpha * ublas_v1 - beta * ublas_v2 + ublas_v1 / beta - ublas_v2 / alpha; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_add(), alpha * vcl_v1 - beta * vcl_v2 + vcl_v1 / beta - vcl_v2 / alpha); // same as vcl_v1 += alpha * vcl_v1 - beta * vcl_v2 + beta * vcl_v1 - alpha * vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 = ublas_v1 - ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 - vcl_v2); // same as vcl_v1 = vcl_v1 - vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "--- Testing reductions ---" << std::endl; std::cout << "inner_prod..." << std::endl; { cpu_result = inner_prod(ublas_v1, ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = inner_prod(ublas_v1 + ublas_v2, ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1 + vcl_v2, vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = inner_prod(ublas_v1, ublas_v2 - ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2 - vcl_v1)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2 - vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = inner_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); // same as gpu_result = inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "norm_1..." << std::endl; { cpu_result = norm_1(ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1)); // same as gpu_result = norm_1(vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = norm_1(ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1 + vcl_v2)); // same as gpu_result = norm_1(vcl_v1 + vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "norm_2..." << std::endl; { cpu_result = norm_2(ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1)); // same as gpu_result = norm_2(vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = norm_2(ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1 + vcl_v2)); // same as gpu_result = norm_2(vcl_v1 + vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "norm_inf..." << std::endl; { cpu_result = norm_inf(ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1)); // same as gpu_result = norm_inf(vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = norm_inf(ublas_v1 - ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1 - vcl_v2)); // same as gpu_result = norm_inf(vcl_v1 - vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "--- Testing elementwise operations (binary) ---" << std::endl; std::cout << "x = element_prod(x, y)... "; { ublas_v1 = element_prod(ublas_v1, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x + y, y)... "; { ublas_v1 = element_prod(ublas_v1 + ublas_v2, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x, x + y)... "; { ublas_v1 = element_prod(ublas_v1, ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x - y, y + x)... "; { ublas_v1 = element_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x, y)... "; { ublas_v1 = element_div(ublas_v1, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x + y, y)... "; { ublas_v1 = element_div(ublas_v1 + ublas_v2, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x, x + y)... "; { ublas_v1 = element_div(ublas_v1, ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x - y, y + x)... "; { ublas_v1 = element_div(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "--- Testing elementwise operations (unary) ---" << std::endl; #define GENERATE_UNARY_OP_TEST(OPNAME) \ ublas_v1 = ublas::scalar_vector<NumericT>(ublas_v1.size(), NumericT(0.21)); \ ublas_v2 = NumericT(3.1415) * ublas_v1; \ viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); \ viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); \ { \ for (std::size_t i=0; i<ublas_v1.size(); ++i) \ ublas_v1[i] = OPNAME(ublas_v2[i]); \ viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_##OPNAME(vcl_v2)); \ viennacl::scheduler::execute(my_statement); \ if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \ return EXIT_FAILURE; \ } \ { \ for (std::size_t i=0; i<ublas_v1.size(); ++i) \ ublas_v1[i] = std::OPNAME(ublas_v2[i] / NumericT(2)); \ viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_##OPNAME(vcl_v2 / NumericT(2))); \ viennacl::scheduler::execute(my_statement); \ if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \ return EXIT_FAILURE; \ } GENERATE_UNARY_OP_TEST(cos); GENERATE_UNARY_OP_TEST(cosh); GENERATE_UNARY_OP_TEST(exp); GENERATE_UNARY_OP_TEST(floor); GENERATE_UNARY_OP_TEST(fabs); GENERATE_UNARY_OP_TEST(log); GENERATE_UNARY_OP_TEST(log10); GENERATE_UNARY_OP_TEST(sin); GENERATE_UNARY_OP_TEST(sinh); GENERATE_UNARY_OP_TEST(fabs); //GENERATE_UNARY_OP_TEST(abs); //OpenCL allows abs on integers only GENERATE_UNARY_OP_TEST(sqrt); GENERATE_UNARY_OP_TEST(tan); GENERATE_UNARY_OP_TEST(tanh); #undef GENERATE_UNARY_OP_TEST std::cout << "--- Testing complicated composite operations ---" << std::endl; std::cout << "x = inner_prod(x, y) * y..." << std::endl; { ublas_v1 = inner_prod(ublas_v1, ublas_v2) * ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2) * vcl_v2); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = y / norm_1(x)..." << std::endl; { ublas_v1 = ublas_v2 / norm_1(ublas_v1); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v2 / viennacl::linalg::norm_1(vcl_v1) ); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; }