void operator()(MatrixT& A, VectorT& b, VectorT& x) { row_normalize_system(A, b); // // Determine the linear solver kernel and forward to an internal solve method // which determines the preconditioner and actually calls the solver backend // if(solver_id_ == viennafvm::linsolv::viennacl::solver_ids::bicgstab) { // std::cout << "using solver: bicgstab .. " << std::endl; ::viennacl::linalg::bicgstab_tag solver_tag(break_tolerance_, max_iterations_); solve_intern(A, b, x, solver_tag); } else if(solver_id_ == viennafvm::linsolv::viennacl::solver_ids::gmres) { // std::cout << "using solver: gmres .. " << std::endl; ::viennacl::linalg::gmres_tag solver_tag(break_tolerance_, max_iterations_); solve_intern(A, b, x, solver_tag); } else if(solver_id_ == viennafvm::linsolv::viennacl::solver_ids::cg) { // std::cout << "using solver: cg .. " << std::endl; ::viennacl::linalg::cg_tag solver_tag(break_tolerance_, max_iterations_); solve_intern(A, b, x, solver_tag); } else { std::cerr << "[ERROR] ViennaFVM::LinearSolver: solver not supported .. " << std::endl; } }
VectorType solve(MatrixType const & system_matrix, VectorType const & rhs, viennashe::solvers::linear_solver_config const & config, viennashe::solvers::serial_linear_solver_tag ) { typedef typename VectorType::value_type NumericT; // // Step 1: Convert data to ViennaCL types: // viennacl::compressed_matrix<NumericT> A(system_matrix.size1(), system_matrix.size2()); viennacl::vector<NumericT> b(system_matrix.size1()); viennacl::fast_copy(&(rhs[0]), &(rhs[0]) + rhs.size(), b.begin()); detail::copy(system_matrix, A); // // Step 2: Setup preconditioner and run solver // log::info<log_linear_solver>() << "* solve(): Computing preconditioner (single-threaded)... " << std::endl; //viennacl::linalg::ilut_tag precond_tag(config.ilut_entries(), // config.ilut_drop_tolerance()); viennacl::linalg::ilu0_tag precond_tag; viennacl::linalg::ilu0_precond<viennacl::compressed_matrix<NumericT> > preconditioner(A, precond_tag); log::info<log_linear_solver>() << "* solve(): Solving system (single-threaded)... " << std::endl; viennacl::linalg::bicgstab_tag solver_tag(config.tolerance(), config.max_iters()); //log::debug<log_linear_solver>() << "Compressed matrix: " << system_matrix << std::endl; //log::debug<log_linear_solver>() << "Compressed rhs: " << rhs << std::endl; viennacl::vector<NumericT> vcl_result = viennacl::linalg::solve(A, b, solver_tag, preconditioner); //log::debug<log_linear_solver>() << "Number of iterations (ILUT): " << solver_tag.iters() << std::endl; // // Step 3: Convert data back: // VectorType result(vcl_result.size()); viennacl::fast_copy(vcl_result.begin(), vcl_result.end(), &(result[0])); viennashe::util::check_vector_for_valid_entries(result); // // As a check, compute residual: // log::info<log_linear_solver>() << "* solve(): residual: " << viennacl::linalg::norm_2(viennacl::linalg::prod(A, vcl_result) - b) / viennacl::linalg::norm_2(b) << " after " << solver_tag.iters() << " iterations." << std::endl; //log::debug<log_linear_solver>() << "SHE result (compressed): " << compressed_result << std::endl; return result; }
/** * The main steps in this tutorial are the following: * - Setup the systems * - Run solvers without preconditioner and with ILUT preconditioner for comparison * - Run solver with SPAI preconditioner on CPU * - Run solver with SPAI preconditioner on GPU * - Run solver with factored SPAI preconditioner on CPU * - Run solver with factored SPAI preconditioner on GPU * **/ int main (int, const char **) { typedef float ScalarType; typedef boost::numeric::ublas::compressed_matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::compressed_matrix<ScalarType> GPUMatrixType; typedef viennacl::vector<ScalarType> GPUVectorType; /** * If you have multiple OpenCL-capable devices in your system, we pick the second device for this tutorial. **/ #ifdef VIENNACL_WITH_OPENCL // Optional: Customize OpenCL backend viennacl::ocl::platform pf = viennacl::ocl::get_platforms()[0]; std::vector<viennacl::ocl::device> const & devices = pf.devices(); // Optional: Set first device to first context: viennacl::ocl::setup_context(0, devices[0]); // Optional: Set second device for second context (use the same device for the second context if only one device available): if (devices.size() > 1) viennacl::ocl::setup_context(1, devices[1]); else viennacl::ocl::setup_context(1, devices[0]); std::cout << viennacl::ocl::current_device().info() << std::endl; viennacl::context ctx(viennacl::ocl::get_context(1)); #else viennacl::context ctx; #endif /** * Create uBLAS-based sparse matrix and read system matrix from file **/ MatrixType M; if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx")) { std::cerr<<"ERROR: Could not read matrix file " << std::endl; exit(EXIT_FAILURE); } std::cout << "Size of matrix: " << M.size1() << std::endl; std::cout << "Avg. Entries per row: " << double(M.nnz()) / static_cast<double>(M.size1()) << std::endl; /** * Use a constant load vector for simplicity **/ VectorType rhs(M.size2()); for (std::size_t i=0; i<rhs.size(); ++i) rhs(i) = ScalarType(1); /** * Create the ViennaCL matrix and vector and initialize with uBLAS data: **/ GPUMatrixType gpu_M(M.size1(), M.size2(), ctx); GPUVectorType gpu_rhs(M.size1(), ctx); viennacl::copy(M, gpu_M); viennacl::copy(rhs, gpu_rhs); /** * <h2>Solver Runs</h2> * We use a relative tolerance of \f$ 10^{-10} \f$ with a maximum of 50 iterations for each use case. * Usually more than 50 solver iterations are required for convergence, but this choice ensures shorter execution times and suffices for this tutorial. **/ viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here /** * The first reference is to use no preconditioner (CPU and GPU): **/ std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl; VectorType result = viennacl::linalg::solve(M, rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; VectorType residual = viennacl::linalg::prod(M, result) - rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl; std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl; GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result); gpu_residual -= gpu_rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl; /** * The second reference is a standard ILUT preconditioner (only CPU): **/ std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::ilut_precond<MatrixType> ilut(M, viennacl::linalg::ilut_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, ilut); /** * <h2>Step 1: SPAI with CPU</h2> **/ std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<MatrixType> spai_cpu(M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, spai_cpu); /** * <h2>Step 2: FSPAI with CPU</h2> **/ std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::fspai_precond<MatrixType> fspai_cpu(M, viennacl::linalg::fspai_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, fspai_cpu); /** * <h2>Step 3: SPAI with GPU</h2> **/ std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<GPUMatrixType> spai_gpu(gpu_M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu); /** * <h2>Step 4: FSPAI with GPU</h2> **/ std::cout << "--- Test 4: GPU-based FSPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::fspai_precond<GPUMatrixType> fspai_gpu(gpu_M, viennacl::linalg::fspai_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(gpu_M, gpu_rhs, solver_tag, fspai_gpu); /** * That's it! Print success message and exit. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { typedef float ScalarType; typedef boost::numeric::ublas::compressed_matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::compressed_matrix<ScalarType> GPUMatrixType; typedef viennacl::vector<ScalarType> GPUVectorType; MatrixType M; // // Read system matrix from file // #ifdef _MSC_VER if (!viennacl::io::read_matrix_market_file(M, "../../examples/testdata/mat65k.mtx")) #else if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx")) #endif { std::cerr<<"ERROR: Could not read matrix file " << std::endl; exit(EXIT_FAILURE); } std::cout << "Size of matrix: " << M.size1() << std::endl; std::cout << "Avg. Entries per row: " << M.nnz() / static_cast<double>(M.size1()) << std::endl; // // Use uniform load vector: // VectorType rhs(M.size2()); for (size_t i=0; i<rhs.size(); ++i) rhs(i) = 1; GPUMatrixType gpu_M(M.size1(), M.size2()); GPUVectorType gpu_rhs(M.size1()); viennacl::copy(M, gpu_M); viennacl::copy(rhs, gpu_rhs); ///////////////////////////////// Tests to follow ///////////////////////////// viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here // // Reference: No preconditioner: // std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl; VectorType result = viennacl::linalg::solve(M, rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; VectorType residual = viennacl::linalg::prod(M, result) - rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl; std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl; GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result) - gpu_rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl; // // Reference: ILUT preconditioner: // std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::ilut_precond<MatrixType> ilut(M, viennacl::linalg::ilut_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, ilut); // // Test 1: SPAI with CPU: // std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<MatrixType> spai_cpu(M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, spai_cpu); // // Test 2: FSPAI with CPU: // std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::fspai_precond<MatrixType> fspai_cpu(M, viennacl::linalg::fspai_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, fspai_cpu); // // Test 3: SPAI with GPU: // std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<GPUMatrixType> spai_gpu(gpu_M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu); return EXIT_SUCCESS; }