void run_amg(viennacl::linalg::cg_tag & cg_solver, boost::numeric::ublas::vector<ScalarType> & ublas_vec, boost::numeric::ublas::vector<ScalarType> & ublas_result, boost::numeric::ublas::compressed_matrix<ScalarType> & ublas_matrix, viennacl::vector<ScalarType> & vcl_vec, viennacl::vector<ScalarType> & vcl_result, viennacl::compressed_matrix<ScalarType> & vcl_compressed_matrix, std::string info, viennacl::linalg::amg_tag & amg_tag) { viennacl::linalg::amg_precond<boost::numeric::ublas::compressed_matrix<ScalarType> > ublas_amg = viennacl::linalg::amg_precond<boost::numeric::ublas::compressed_matrix<ScalarType> > (ublas_matrix, amg_tag); boost::numeric::ublas::vector<ScalarType> avgstencil; unsigned int coarselevels = amg_tag.get_coarselevels(); std::cout << "-- CG with AMG preconditioner, " << info << " --" << std::endl; std::cout << " * Setup phase (ublas types)..." << std::endl; // Coarse level measure might have been changed during setup. Reload! ublas_amg.tag().set_coarselevels(coarselevels); ublas_amg.setup(); std::cout << " * Operator complexity: " << ublas_amg.calc_complexity(avgstencil) << std::endl; amg_tag.set_coarselevels(coarselevels); viennacl::linalg::amg_precond<viennacl::compressed_matrix<ScalarType> > vcl_amg = viennacl::linalg::amg_precond<viennacl::compressed_matrix<ScalarType> > (vcl_compressed_matrix, amg_tag); std::cout << " * Setup phase (ViennaCL types)..." << std::endl; vcl_amg.tag().set_coarselevels(coarselevels); vcl_amg.setup(); std::cout << " * CG solver (ublas types)..." << std::endl; run_solver(ublas_matrix, ublas_vec, ublas_result, cg_solver, ublas_amg); std::cout << " * CG solver (ViennaCL types)..." << std::endl; run_solver(vcl_compressed_matrix, vcl_vec, vcl_result, cg_solver, vcl_amg); }
/** * cb_dialog_solve_clicked: * @button: * @state: * * **/ static void cb_dialog_solve_clicked (G_GNUC_UNUSED GtkWidget *button, SolverState *state) { GnmSolverResult *res; GnmSolverParameters *param = state->sheet->solver_parameters; GError *err = NULL; if (state->warning_dialog != NULL) { gtk_widget_destroy (state->warning_dialog); state->warning_dialog = NULL; } extract_settings (state); if (!gnm_solver_param_valid (param, &err)) { GtkWidget *top = gtk_widget_get_toplevel (state->dialog); go_gtk_notice_dialog (GTK_WINDOW (top), GTK_MESSAGE_ERROR, "%s", err->message); goto out; } check_for_changed_options (state); res = run_solver (state, param); gnm_app_recalc (); if (res != NULL) { if ((res->quality == GNM_SOLVER_RESULT_OPTIMAL || res->quality == GNM_SOLVER_RESULT_FEASIBLE) && param->options.add_scenario) solver_add_scenario (state, res, param->options.scenario_name); g_object_unref (res); } else if (err) { go_gtk_notice_nonmodal_dialog (GTK_WINDOW (state->dialog), &(state->warning_dialog), GTK_MESSAGE_ERROR, "%s", err->message); } out: if (err) g_error_free (err); }
int main(int argc, char **argv) { u32 nthreads = 0; u32 ntrims = 0; u32 nonce = 0; u32 range = 1; #ifdef SAVEEDGES bool showcycle = 1; #else bool showcycle = 0; #endif char header[HEADERLEN]; u32 len; bool allrounds = false; int c; memset(header, 0, sizeof(header)); while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) { switch (c) { case 'a': allrounds = true; break; case 'h': len = strlen(optarg); assert(len <= sizeof(header)); memcpy(header, optarg, len); break; case 'x': len = strlen(optarg)/2; assert(len == sizeof(header)); for (u32 i=0; i<len; i++) sscanf(optarg+2*i, "%2hhx", header+i); break; case 'n': nonce = atoi(optarg); break; case 'r': range = atoi(optarg); break; case 'm': ntrims = atoi(optarg) & -2; // make even as required by solve() break; case 's': showcycle = true; break; case 't': nthreads = atoi(optarg); break; } } SolverParams params; params.nthreads = nthreads; params.ntrims = ntrims; params.showcycle = showcycle; params.allrounds = allrounds; SolverCtx* ctx = create_solver_ctx(¶ms); print_log("Looking for %d-cycle on cuckatoo%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce); if (range > 1) print_log("-%d", nonce+range-1); print_log(") with 50%% edges\n"); u64 sbytes = ctx->sharedbytes(); u32 tbytes = ctx->threadbytes(); int sunit,tunit; for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ; for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ; print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets); print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets); print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX); run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL); destroy_solver_ctx(ctx); }
int main() { // // Print some device info // std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << " Device Info" << std::endl; std::cout << "----------------------------------------------" << std::endl; #ifdef VIENNACL_WITH_OPENCL std::cout << viennacl::ocl::current_device().info() << std::endl; #endif typedef float ScalarType; // feel free to change this to double if supported by your device // // Set up the matrices and vectors for the iterative solvers (cf. iterative.cpp) // boost::numeric::ublas::vector<ScalarType> ublas_vec, ublas_result; boost::numeric::ublas::compressed_matrix<ScalarType> ublas_matrix; viennacl::linalg::cg_tag cg_solver; viennacl::linalg::amg_tag amg_tag; viennacl::linalg::amg_precond<boost::numeric::ublas::compressed_matrix<ScalarType> > ublas_amg; // Read matrix if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../examples/testdata/mat65k.mtx")) { std::cout << "Error reading Matrix file" << std::endl; return EXIT_FAILURE; } // Set up rhs and result vector if (!readVectorFromFile("../examples/testdata/rhs65025.txt", ublas_vec)) { std::cout << "Error reading RHS file" << std::endl; return 0; } if (!readVectorFromFile("../examples/testdata/result65025.txt", ublas_result)) { std::cout << "Error reading Result file" << std::endl; return 0; } viennacl::vector<ScalarType> vcl_vec(ublas_vec.size()); viennacl::vector<ScalarType> vcl_result(ublas_vec.size()); viennacl::compressed_matrix<ScalarType> vcl_compressed_matrix(ublas_vec.size(), ublas_vec.size()); // Copy to GPU viennacl::copy(ublas_matrix, vcl_compressed_matrix); viennacl::copy(ublas_vec, vcl_vec); viennacl::copy(ublas_result, vcl_result); // // Run solver without preconditioner // std::cout << "-- CG solver (CPU, no preconditioner) --" << std::endl; run_solver(ublas_matrix, ublas_vec, ublas_result, cg_solver, viennacl::linalg::no_precond()); std::cout << "-- CG solver (GPU, no preconditioner) --" << std::endl; run_solver(vcl_compressed_matrix, vcl_vec, vcl_result, cg_solver, viennacl::linalg::no_precond()); // // With AMG Preconditioner RS+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS, // coarsening strategy VIENNACL_AMG_INTERPOL_DIRECT, // interpolation strategy 0.25, // strength of dependence threshold 0.2, // interpolation weight 0.67, // jacobi smoother weight 3, // presmoothing steps 3, // postsmoothing steps 0); // number of coarse levels to be used (0: automatically use as many as reasonable) run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner RS+CLASSIC // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS, VIENNACL_AMG_INTERPOL_CLASSIC, 0.25, 0.2, 0.67, 3, 3, 0); run_amg ( cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS COARSENING, CLASSIC INTERPOLATION", amg_tag); // // With AMG Preconditioner ONEPASS+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_ONEPASS, VIENNACL_AMG_INTERPOL_DIRECT,0.25, 0.2, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "ONEPASS COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner RS0+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS0, VIENNACL_AMG_INTERPOL_DIRECT, 0.25, 0.2, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS0 COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner RS3+DIRECT // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_RS3, VIENNACL_AMG_INTERPOL_DIRECT, 0.25, 0.2, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "RS3 COARSENING, DIRECT INTERPOLATION", amg_tag); // // With AMG Preconditioner AG // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_AG, VIENNACL_AMG_INTERPOL_AG, 0.08, 0, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "AG COARSENING, AG INTERPOLATION", amg_tag); // // With AMG Preconditioner SA // amg_tag = viennacl::linalg::amg_tag(VIENNACL_AMG_COARSE_AG, VIENNACL_AMG_INTERPOL_SA, 0.08, 0.67, 0.67, 3, 3, 0); run_amg (cg_solver, ublas_vec, ublas_result, ublas_matrix, vcl_vec, vcl_result, vcl_compressed_matrix, "AG COARSENING, SA INTERPOLATION",amg_tag); // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
/** * The main steps in this tutorial are the following: * - Setup the systems * - Run solvers without preconditioner and with ILUT preconditioner for comparison * - Run solver with SPAI preconditioner on CPU * - Run solver with SPAI preconditioner on GPU * - Run solver with factored SPAI preconditioner on CPU * - Run solver with factored SPAI preconditioner on GPU * **/ int main (int, const char **) { typedef float ScalarType; typedef boost::numeric::ublas::compressed_matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::compressed_matrix<ScalarType> GPUMatrixType; typedef viennacl::vector<ScalarType> GPUVectorType; /** * If you have multiple OpenCL-capable devices in your system, we pick the second device for this tutorial. **/ #ifdef VIENNACL_WITH_OPENCL // Optional: Customize OpenCL backend viennacl::ocl::platform pf = viennacl::ocl::get_platforms()[0]; std::vector<viennacl::ocl::device> const & devices = pf.devices(); // Optional: Set first device to first context: viennacl::ocl::setup_context(0, devices[0]); // Optional: Set second device for second context (use the same device for the second context if only one device available): if (devices.size() > 1) viennacl::ocl::setup_context(1, devices[1]); else viennacl::ocl::setup_context(1, devices[0]); std::cout << viennacl::ocl::current_device().info() << std::endl; viennacl::context ctx(viennacl::ocl::get_context(1)); #else viennacl::context ctx; #endif /** * Create uBLAS-based sparse matrix and read system matrix from file **/ MatrixType M; if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx")) { std::cerr<<"ERROR: Could not read matrix file " << std::endl; exit(EXIT_FAILURE); } std::cout << "Size of matrix: " << M.size1() << std::endl; std::cout << "Avg. Entries per row: " << double(M.nnz()) / static_cast<double>(M.size1()) << std::endl; /** * Use a constant load vector for simplicity **/ VectorType rhs(M.size2()); for (std::size_t i=0; i<rhs.size(); ++i) rhs(i) = ScalarType(1); /** * Create the ViennaCL matrix and vector and initialize with uBLAS data: **/ GPUMatrixType gpu_M(M.size1(), M.size2(), ctx); GPUVectorType gpu_rhs(M.size1(), ctx); viennacl::copy(M, gpu_M); viennacl::copy(rhs, gpu_rhs); /** * <h2>Solver Runs</h2> * We use a relative tolerance of \f$ 10^{-10} \f$ with a maximum of 50 iterations for each use case. * Usually more than 50 solver iterations are required for convergence, but this choice ensures shorter execution times and suffices for this tutorial. **/ viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here /** * The first reference is to use no preconditioner (CPU and GPU): **/ std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl; VectorType result = viennacl::linalg::solve(M, rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; VectorType residual = viennacl::linalg::prod(M, result) - rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl; std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl; GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result); gpu_residual -= gpu_rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl; /** * The second reference is a standard ILUT preconditioner (only CPU): **/ std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::ilut_precond<MatrixType> ilut(M, viennacl::linalg::ilut_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, ilut); /** * <h2>Step 1: SPAI with CPU</h2> **/ std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<MatrixType> spai_cpu(M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, spai_cpu); /** * <h2>Step 2: FSPAI with CPU</h2> **/ std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::fspai_precond<MatrixType> fspai_cpu(M, viennacl::linalg::fspai_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, fspai_cpu); /** * <h2>Step 3: SPAI with GPU</h2> **/ std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<GPUMatrixType> spai_gpu(gpu_M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu); /** * <h2>Step 4: FSPAI with GPU</h2> **/ std::cout << "--- Test 4: GPU-based FSPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::fspai_precond<GPUMatrixType> fspai_gpu(gpu_M, viennacl::linalg::fspai_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(gpu_M, gpu_rhs, solver_tag, fspai_gpu); /** * That's it! Print success message and exit. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { typedef float ScalarType; typedef boost::numeric::ublas::compressed_matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::compressed_matrix<ScalarType> GPUMatrixType; typedef viennacl::vector<ScalarType> GPUVectorType; MatrixType M; // // Read system matrix from file // #ifdef _MSC_VER if (!viennacl::io::read_matrix_market_file(M, "../../examples/testdata/mat65k.mtx")) #else if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx")) #endif { std::cerr<<"ERROR: Could not read matrix file " << std::endl; exit(EXIT_FAILURE); } std::cout << "Size of matrix: " << M.size1() << std::endl; std::cout << "Avg. Entries per row: " << M.nnz() / static_cast<double>(M.size1()) << std::endl; // // Use uniform load vector: // VectorType rhs(M.size2()); for (size_t i=0; i<rhs.size(); ++i) rhs(i) = 1; GPUMatrixType gpu_M(M.size1(), M.size2()); GPUVectorType gpu_rhs(M.size1()); viennacl::copy(M, gpu_M); viennacl::copy(rhs, gpu_rhs); ///////////////////////////////// Tests to follow ///////////////////////////// viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here // // Reference: No preconditioner: // std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl; VectorType result = viennacl::linalg::solve(M, rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; VectorType residual = viennacl::linalg::prod(M, result) - rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl; std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl; GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag); std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl; GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result) - gpu_rhs; std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl; // // Reference: ILUT preconditioner: // std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::ilut_precond<MatrixType> ilut(M, viennacl::linalg::ilut_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, ilut); // // Test 1: SPAI with CPU: // std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<MatrixType> spai_cpu(M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, spai_cpu); // // Test 2: FSPAI with CPU: // std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::fspai_precond<MatrixType> fspai_cpu(M, viennacl::linalg::fspai_tag()); std::cout << " * Iterative solver run..." << std::endl; run_solver(M, rhs, solver_tag, fspai_cpu); // // Test 3: SPAI with GPU: // std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl; std::cout << " * Preconditioner setup..." << std::endl; viennacl::linalg::spai_precond<GPUMatrixType> spai_gpu(gpu_M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); std::cout << " * Iterative solver run..." << std::endl; run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu); return EXIT_SUCCESS; }