int main() { std::size_t size = 10; // at least 7 float eps_float = 1e-5; double eps_double = 1e-12; float ref_float_alpha; double ref_double_alpha; std::vector<float> ref_float_x(size, 1.0f); std::vector<float> ref_float_y(size, 2.0f); std::vector<double> ref_double_x(size, 1.0); std::vector<double> ref_double_y(size, 2.0); // Host setup ViennaCLHostBackend my_host_backend = NULL; float host_float_alpha = 0; viennacl::vector<float> host_float_x = viennacl::scalar_vector<float>(size, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::vector<float> host_float_y = viennacl::scalar_vector<float>(size, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); double host_double_alpha = 0; viennacl::vector<double> host_double_x = viennacl::scalar_vector<double>(size, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::vector<double> host_double_y = viennacl::scalar_vector<double>(size, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); // CUDA setup #ifdef VIENNACL_WITH_CUDA ViennaCLCUDABackend my_cuda_backend = NULL; float cuda_float_alpha = 0; viennacl::vector<float> cuda_float_x = viennacl::scalar_vector<float>(size, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::vector<float> cuda_float_y = viennacl::scalar_vector<float>(size, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); double cuda_double_alpha = 0; viennacl::vector<double> cuda_double_x = viennacl::scalar_vector<double>(size, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::vector<double> cuda_double_y = viennacl::scalar_vector<double>(size, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); #endif // OpenCL setup #ifdef VIENNACL_WITH_OPENCL std::size_t context_id = 0; float opencl_float_alpha = 0; viennacl::vector<float> opencl_float_x = viennacl::scalar_vector<float>(size, 1.0, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::vector<float> opencl_float_y = viennacl::scalar_vector<float>(size, 2.0, viennacl::context(viennacl::ocl::get_context(context_id))); double opencl_double_alpha = 0; viennacl::vector<double> *opencl_double_x = NULL; viennacl::vector<double> *opencl_double_y = NULL; if( viennacl::ocl::current_device().double_support() ) { *opencl_double_x = viennacl::scalar_vector<double>(size, 1.0, viennacl::context(viennacl::ocl::get_context(context_id))); *opencl_double_y = viennacl::scalar_vector<double>(size, 2.0, viennacl::context(viennacl::ocl::get_context(context_id))); } ViennaCLOpenCLBackend_impl my_opencl_backend_impl; my_opencl_backend_impl.context_id = context_id; ViennaCLOpenCLBackend my_opencl_backend = &my_opencl_backend_impl; #endif // consistency checks: check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // ASUM std::cout << std::endl << "-- Testing xASUM..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i<size/4; ++i) { ref_float_alpha += std::fabs(ref_float_x[2 + 3*i]); ref_double_alpha += std::fabs(ref_double_x[2 + 3*i]); } std::cout << std::endl << "Host: "; ViennaCLHostSasum(my_host_backend, size/4, &host_float_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 3); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDasum(my_host_backend, size/4, &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 3); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASasum(my_cuda_backend, size/4, &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 2, 3); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADasum(my_cuda_backend, size/4, &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 2, 3); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSasum(my_opencl_backend, size/4, &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDasum(my_opencl_backend, size/4, &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // AXPY std::cout << std::endl << "-- Testing xAXPY..."; for (std::size_t i=0; i<size/3; ++i) { ref_float_y[1 + 2*i] += 2.0f * ref_float_x[0 + 2*i]; ref_double_y[1 + 2*i] += 2.0 * ref_double_x[0 + 2*i]; } std::cout << std::endl << "Host: "; ViennaCLHostSaxpy(my_host_backend, size/3, 2.0f, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 0, 2, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 2); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDaxpy(my_host_backend, size/3, 2.0, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 0, 2, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 2); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASaxpy(my_cuda_backend, size/3, 2.0f, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 0, 2, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_y), 1, 2); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADaxpy(my_cuda_backend, size/3, 2.0, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 0, 2, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_y), 1, 2); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSaxpy(my_opencl_backend, size/3, 2.0f, viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDaxpy(my_opencl_backend, size/3, 2.0, viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // COPY std::cout << std::endl << "-- Testing xCOPY..."; for (std::size_t i=0; i<size/3; ++i) { ref_float_y[0 + 2*i] = ref_float_x[1 + 2*i]; ref_double_y[0 + 2*i] = ref_double_x[1 + 2*i]; } std::cout << std::endl << "Host: "; ViennaCLHostScopy(my_host_backend, size/3, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 2, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 0, 2); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDcopy(my_host_backend, size/3, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 2, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 0, 2); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDAScopy(my_cuda_backend, size/3, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 1, 2, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_y), 0, 2); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADcopy(my_cuda_backend, size/3, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 1, 2, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_y), 0, 2); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLScopy(my_opencl_backend, size/3, viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 0, 2); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDcopy(my_opencl_backend, size/3, viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 0, 2); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // DOT std::cout << std::endl << "-- Testing xDOT..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i<size/2; ++i) { ref_float_alpha += ref_float_y[3 + 2*i] * ref_float_x[2 + 2*i]; ref_double_alpha += ref_double_y[3 + 2*i] * ref_double_x[2 + 2*i]; } std::cout << std::endl << "Host: "; ViennaCLHostSdot(my_host_backend, size/2, &host_float_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 1, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 3, 1); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDdot(my_host_backend, size/2, &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 1, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 3, 1); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASdot(my_cuda_backend, size/2, &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 2, 1, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_y), 3, 1); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADdot(my_cuda_backend, size/2, &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 2, 1, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_y), 3, 1); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSdot(my_opencl_backend, size/2, &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 1, viennacl::traits::opencl_handle(opencl_float_y).get(), 3, 1); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDdot(my_opencl_backend, size/2, &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 1, viennacl::traits::opencl_handle(*opencl_double_y).get(), 3, 1); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // NRM2 std::cout << std::endl << "-- Testing xNRM2..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i<size/3; ++i) { ref_float_alpha += ref_float_x[1 + 2*i] * ref_float_x[1 + 2*i]; ref_double_alpha += ref_double_x[1 + 2*i] * ref_double_x[1 + 2*i]; } ref_float_alpha = std::sqrt(ref_float_alpha); ref_double_alpha = std::sqrt(ref_double_alpha); std::cout << std::endl << "Host: "; ViennaCLHostSnrm2(my_host_backend, size/3, &host_float_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 2); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDnrm2(my_host_backend, size/3, &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 2); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASnrm2(my_cuda_backend, size/3, &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 1, 2); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADnrm2(my_cuda_backend, size/3, &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 1, 2); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSnrm2(my_opencl_backend, size/3, &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDnrm2(my_opencl_backend, size/3, &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // ROT std::cout << std::endl << "-- Testing xROT..."; for (std::size_t i=0; i<size/4; ++i) { float tmp = 0.6 * ref_float_x[2 + 3*i] + 0.8 * ref_float_y[1 + 2*i]; ref_float_y[1 + 2*i] = -0.8 * ref_float_x[2 + 3*i] + 0.6 * ref_float_y[1 + 2*i];; ref_float_x[2 + 3*i] = tmp; double tmp2 = 0.6 * ref_double_x[2 + 3*i] + 0.8 * ref_double_y[1 + 2*i]; ref_double_y[1 + 2*i] = -0.8 * ref_double_x[2 + 3*i] + 0.6 * ref_double_y[1 + 2*i];; ref_double_x[2 + 3*i] = tmp2; } std::cout << std::endl << "Host: "; ViennaCLHostSrot(my_host_backend, size/4, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 3, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 2, 0.6f, 0.8f); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDrot(my_host_backend, size/4, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 3, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 2, 0.6, 0.8); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASrot(my_cuda_backend, size/4, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 2, 3, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_y), 1, 2, 0.6f, 0.8f); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADrot(my_cuda_backend, size/4, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 2, 3, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_y), 1, 2, 0.6, 0.8); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSrot(my_opencl_backend, size/4, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2, 0.6f, 0.8f); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDrot(my_opencl_backend, size/4, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2, 0.6, 0.8); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // SCAL std::cout << std::endl << "-- Testing xSCAL..."; for (std::size_t i=0; i<size/4; ++i) { ref_float_x[1 + 3*i] *= 2.0f; ref_double_x[1 + 3*i] *= 2.0; } std::cout << std::endl << "Host: "; ViennaCLHostSscal(my_host_backend, size/4, 2.0f, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 3); check(ref_float_x, host_float_x, eps_float); ViennaCLHostDscal(my_host_backend, size/4, 2.0, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 3); check(ref_double_x, host_double_x, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASscal(my_cuda_backend, size/4, 2.0f, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 1, 3); check(ref_float_x, cuda_float_x, eps_float); ViennaCLCUDADscal(my_cuda_backend, size/4, 2.0, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 1, 3); check(ref_double_x, cuda_double_x, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSscal(my_opencl_backend, size/4, 2.0f, viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 3); check(ref_float_x, opencl_float_x, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDscal(my_opencl_backend, size/4, 2.0, viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 3); check(ref_double_x, *opencl_double_x, eps_double); } #endif // SWAP std::cout << std::endl << "-- Testing xSWAP..."; for (std::size_t i=0; i<size/3; ++i) { float tmp = ref_float_x[2 + 2*i]; ref_float_x[2 + 2*i] = ref_float_y[1 + 2*i]; ref_float_y[1 + 2*i] = tmp; double tmp2 = ref_double_x[2 + 2*i]; ref_double_x[2 + 2*i] = ref_double_y[1 + 2*i]; ref_double_y[1 + 2*i] = tmp2; } std::cout << std::endl << "Host: "; ViennaCLHostSswap(my_host_backend, size/3, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 2, 2, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 2); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDswap(my_host_backend, size/3, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 2, 2, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 2); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASswap(my_cuda_backend, size/3, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 2, 2, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_y), 1, 2); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADswap(my_cuda_backend, size/3, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 2, 2, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_y), 1, 2); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSswap(my_opencl_backend, size/3, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDswap(my_opencl_backend, size/3, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2); check(ref_double_y, *opencl_double_y, eps_double); } #endif // IAMAX std::cout << std::endl << "-- Testing IxASUM..."; size_t ref_index = 0; ref_float_alpha = 0; for (std::size_t i=0; i<size/3; ++i) { if (ref_float_x[0 + 2*i] > std::fabs(ref_float_alpha)) { ref_index = i; ref_float_alpha = std::fabs(ref_float_x[0 + 2*i]); } } std::cout << std::endl << "Host: "; size_t idx = 0; ViennaCLHostiSamax(my_host_backend, size/3, &idx, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 0, 2); check(ref_index, idx, eps_float); idx = 0; ViennaCLHostiDamax(my_host_backend, size/3, &idx, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 0, 2); check(ref_index, idx, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; idx = 0; ViennaCLCUDAiSamax(my_cuda_backend, size/3, &idx, viennacl::linalg::cuda::detail::cuda_arg<float>(cuda_float_x), 0, 2); check(ref_float_x[2*ref_index], ref_float_x[2*idx], eps_float); idx = 0; ViennaCLCUDAiDamax(my_cuda_backend, size/3, &idx, viennacl::linalg::cuda::detail::cuda_arg<double>(cuda_double_x), 0, 2); check(ref_double_x[2*ref_index], ref_double_x[2*idx], eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; idx = 0; ViennaCLOpenCLiSamax(my_opencl_backend, size/3, &idx, viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2); check(ref_float_x[2*ref_index], ref_float_x[2*idx], eps_float); idx = 0; if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLiDamax(my_opencl_backend, size/3, &idx, viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2); check(ref_double_x[2*ref_index], ref_double_x[2*idx], eps_double); } #endif // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int main() { std::size_t size1 = 13; // at least 7 std::size_t size2 = 11; // at least 7 float eps_float = 1e-5f; double eps_double = 1e-12; ViennaCLBackend my_backend; ViennaCLBackendCreate(&my_backend); std::vector<float> ref_float_x(size1); for (std::size_t i=0; i<size1; ++i) ref_float_x[i] = static_cast<float>(i); std::vector<float> ref_float_y(size2); for (std::size_t i=0; i<size2; ++i) ref_float_y[i] = static_cast<float>(size2 - i); std::vector<float> ref_float_A(size1*size2); for (std::size_t i=0; i<size1*size2; ++i) ref_float_A[i] = static_cast<float>(3*i); std::vector<float> ref_float_B(size1*size2); for (std::size_t i=0; i<size1*size2; ++i) ref_float_B[i] = static_cast<float>(2*i); std::vector<double> ref_double_x(size1, 1.0); for (std::size_t i=0; i<size1; ++i) ref_double_x[i] = static_cast<double>(i); std::vector<double> ref_double_y(size2, 2.0); for (std::size_t i=0; i<size2; ++i) ref_double_y[i] = static_cast<double>(size2 - i); std::vector<double> ref_double_A(size1*size2, 3.0); for (std::size_t i=0; i<size1*size2; ++i) ref_double_A[i] = static_cast<double>(3*i); std::vector<double> ref_double_B(size1*size2, 4.0); for (std::size_t i=0; i<size1*size2; ++i) ref_double_B[i] = static_cast<double>(2*i); // Host setup viennacl::vector<float> host_float_x = viennacl::scalar_vector<float>(size1, 1.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1; ++i) host_float_x[i] = float(i); viennacl::vector<float> host_float_y = viennacl::scalar_vector<float>(size2, 2.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size2; ++i) host_float_y[i] = float(size2 - i); viennacl::vector<float> host_float_A = viennacl::scalar_vector<float>(size1*size2, 3.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_float_A[i] = float(3*i); viennacl::vector<float> host_float_B = viennacl::scalar_vector<float>(size1*size2, 4.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_float_B[i] = float(2*i); viennacl::vector<double> host_double_x = viennacl::scalar_vector<double>(size1, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1; ++i) host_double_x[i] = double(i); viennacl::vector<double> host_double_y = viennacl::scalar_vector<double>(size2, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size2; ++i) host_double_y[i] = double(size2 - i); viennacl::vector<double> host_double_A = viennacl::scalar_vector<double>(size1*size2, 3.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_double_A[i] = double(3*i); viennacl::vector<double> host_double_B = viennacl::scalar_vector<double>(size1*size2, 4.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) host_double_B[i] = double(2*i); // CUDA setup #ifdef VIENNACL_WITH_CUDA viennacl::vector<float> cuda_float_x = viennacl::scalar_vector<float>(size1, 1.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1; ++i) cuda_float_x[i] = float(i); viennacl::vector<float> cuda_float_y = viennacl::scalar_vector<float>(size2, 2.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size2; ++i) cuda_float_y[i] = float(size2 - i); viennacl::vector<float> cuda_float_A = viennacl::scalar_vector<float>(size1*size2, 3.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_float_A[i] = float(3*i); viennacl::vector<float> cuda_float_B = viennacl::scalar_vector<float>(size1*size2, 4.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_float_B[i] = float(2*i); viennacl::vector<double> cuda_double_x = viennacl::scalar_vector<double>(size1, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1; ++i) cuda_double_x[i] = double(i); viennacl::vector<double> cuda_double_y = viennacl::scalar_vector<double>(size2, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size2; ++i) cuda_double_y[i] = double(size2 - i); viennacl::vector<double> cuda_double_A = viennacl::scalar_vector<double>(size1*size2, 3.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_double_A[i] = double(3*i); viennacl::vector<double> cuda_double_B = viennacl::scalar_vector<double>(size1*size2, 4.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i<size1*size2; ++i) cuda_double_B[i] = double(2*i); #endif // OpenCL setup #ifdef VIENNACL_WITH_OPENCL ViennaCLInt context_id = 0; viennacl::vector<float> opencl_float_x = viennacl::scalar_vector<float>(size1, 1.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size1; ++i) opencl_float_x[i] = float(i); viennacl::vector<float> opencl_float_y = viennacl::scalar_vector<float>(size2, 2.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size2; ++i) opencl_float_y[i] = float(size2 - i); viennacl::vector<float> opencl_float_A = viennacl::scalar_vector<float>(size1*size2, 3.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size1*size2; ++i) opencl_float_A[i] = float(3*i); viennacl::vector<float> opencl_float_B = viennacl::scalar_vector<float>(size1*size2, 4.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i<size1*size2; ++i) opencl_float_B[i] = float(2*i); viennacl::vector<double> *opencl_double_x = NULL; viennacl::vector<double> *opencl_double_y = NULL; viennacl::vector<double> *opencl_double_A = NULL; viennacl::vector<double> *opencl_double_B = NULL; if ( viennacl::ocl::current_device().double_support() ) { opencl_double_x = new viennacl::vector<double>(viennacl::scalar_vector<double>(size1, 1.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size1; ++i) (*opencl_double_x)[i] = double(i); opencl_double_y = new viennacl::vector<double>(viennacl::scalar_vector<double>(size2, 2.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size2; ++i) (*opencl_double_y)[i] = double(size2 - i); opencl_double_A = new viennacl::vector<double>(viennacl::scalar_vector<double>(size1*size2, 3.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size1*size2; ++i) (*opencl_double_A)[i] = double(3*i); opencl_double_B = new viennacl::vector<double>(viennacl::scalar_vector<double>(size1*size2, 4.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i<size1*size2; ++i) (*opencl_double_B)[i] = double(2*i); } ViennaCLBackendSetOpenCLContextID(my_backend, context_id); #endif // consistency checks: check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); check(ref_float_A, host_float_A, eps_float); check(ref_float_B, host_float_B, eps_float); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); check(ref_double_A, host_double_A, eps_double); check(ref_double_B, host_double_B, eps_double); #ifdef VIENNACL_WITH_CUDA check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); check(ref_float_A, cuda_float_A, eps_float); check(ref_float_B, cuda_float_B, eps_float); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); check(ref_double_A, cuda_double_A, eps_double); check(ref_double_B, cuda_double_B, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); check(ref_float_A, opencl_float_A, eps_float); check(ref_float_B, opencl_float_B, eps_float); if ( viennacl::ocl::current_device().double_support() ) { check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); check(ref_double_A, *opencl_double_A, eps_double); check(ref_double_B, *opencl_double_B, eps_double); } #endif // GEMV std::cout << std::endl << "-- Testing xGEMV..."; for (std::size_t i=0; i<size1/3; ++i) { ref_float_x[i * 2 + 1] *= 0.1234f; ref_double_x[i * 2 + 1] *= 0.1234; for (std::size_t j=0; j<size2/4; ++j) { ref_float_x[i * 2 + 1] += 3.1415f * ref_float_A[(2*i+2) * size2 + 3 * j + 1] * ref_float_y[j * 3 + 1]; ref_double_x[i * 2 + 1] += 3.1415 * ref_double_A[(2*i+2) * size2 + 3 * j + 1] * ref_double_y[j * 3 + 1]; } } std::cout << std::endl << "Host: "; ViennaCLHostSgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 3, 0.1234f, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 2); check(ref_float_x, host_float_x, eps_float); ViennaCLHostDgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 3, 0.1234, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 2); check(ref_double_x, host_double_x, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::cuda_arg(cuda_float_A), 2, 1, 2, 3, size2, viennacl::cuda_arg(cuda_float_y), 1, 3, 0.1234f, viennacl::cuda_arg(cuda_float_x), 1, 2); check(ref_float_x, cuda_float_x, eps_float); ViennaCLCUDADgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::cuda_arg(cuda_double_A), 2, 1, 2, 3, size2, viennacl::cuda_arg(cuda_double_y), 1, 3, 0.1234, viennacl::cuda_arg(cuda_double_x), 1, 2); check(ref_double_x, cuda_double_x, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::traits::opencl_handle(opencl_float_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::traits::opencl_handle(opencl_float_y), 1, 3, 0.1234f, viennacl::traits::opencl_handle(opencl_float_x), 1, 2); check(ref_float_x, opencl_float_x, eps_float); if ( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::traits::opencl_handle(*opencl_double_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::traits::opencl_handle(*opencl_double_y), 1, 3, 0.1234, viennacl::traits::opencl_handle(*opencl_double_x), 1, 2); check(ref_double_x, *opencl_double_x, eps_double); } #endif #ifdef VIENNACL_WITH_OPENCL delete opencl_double_x; delete opencl_double_y; delete opencl_double_A; delete opencl_double_B; #endif ViennaCLBackendDestroy(&my_backend); // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }