bool SystemSolver_FASTCG::solve(const SparseMatrix& A_in, Vector& x_in, const Vector& b_in) { typedef double CoeffType ; typedef Array1d<CoeffType> VectorType ; typedef SparseMatrixBCRS<CoeffType, 2, 2> MatrixType ; unsigned int N0 = A_in.n() ; std::cerr << "N0 = " << N0 << std::endl ; Permutation permutation; compute_permutation(A_in, permutation) ; MatrixType A ; ::OGF::convert_matrix(A_in, A, permutation) ; // ::OGF::compress_indices(A) ; std::cerr << "filling ratio:" << (A.filling_ratio() * 100.0) << "%" << std::endl ; if(false) { std::cerr << "Saving matrix to disk (matrix.dat)" << std::endl ; std::ofstream out("matrix.dat") ; // A.print(out) ; ::OGF::output_matrix(A, out) ; } unsigned int N = A.n() ; // Can be greater than N0 due to blocking N = QuickBLAS::aligned_size(N, sizeof(CoeffType)) ; std::cerr <<"N = " << N << std::endl ; int max_iter = (nb_iters_ == 0) ? 5 * N : nb_iters_ ; double eps = threshold_ ; std::cerr << "nb iters = " << max_iter << " threshold = " << eps << std::endl ; VectorType b(N, alignment_for_SSE2) ; VectorType x(N, alignment_for_SSE2) ; permutation.invert_permute_vector(b_in, b) ; permutation.invert_permute_vector(x_in, x) ; solve_cg(A, x, b, eps, max_iter) ; permutation.permute_vector(x, x_in) ; return true ; }
int bicg(spinor * const k, spinor * const l, double eps_sq) { int iteration; double xxx; xxx=0.0; gamma5(g_spinor_field[DUM_SOLVER+1], l, VOLUME/2); /* main loop */ for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) { /* compute the residual*/ M_psi(DUM_SOLVER,k,q_off); xxx=diff_and_square_norm(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+1], VOLUME/2); /*apply the solver step for the residual*/ M_psi(DUM_SOLVER+2,DUM_SOLVER,q_off-(2.+2.*q_off)); assign_add_mul_r(k,-1./((1.+q_off)*(1.+q_off)),g_spinor_field[DUM_SOLVER+2], VOLUME/2); if(xxx <= eps_sq) break; } if(g_proc_id==0) { sout = fopen(solvout, "a"); fprintf(sout, "%d %e %f\n",iteration,xxx, g_mu); fclose(sout); } /* if the geometric series fails, redo with conjugate gradient */ if(iteration>=ITER_MAX_BCG) { if(ITER_MAX_BCG == 0) { iteration = 0; } zero_spinor_field(k,VOLUME/2); iteration += solve_cg(k,l,q_off,eps_sq); Q_psi(k,k,q_off); if(ITER_MAX_BCG != 0) { iteration -= 1000000; } if(g_proc_id == 0) { sout = fopen(solvout, "a"); fprintf(sout, "%d %e\n",iteration, g_mu); fclose(sout); } } return iteration; }
int CGSolver::solve (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { switch (def_cg_solver) { case CG: return solve_cg(sol, rhs, eps_rel, eps_abs, bc_mode); case BiCGStab: return solve_bicgstab(sol, rhs, eps_rel, eps_abs, bc_mode); case CABiCGStab: return solve_cabicgstab(sol, rhs, eps_rel, eps_abs, bc_mode); default: BoxLib::Error("CGSolver::solve(): unknown solver"); } return -1; }
/* k output , l input */ int bicg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) { double err, d1, squarenorm=0.; complex rho0, rho1, omega, alpha, beta, nom, denom; int iteration, N=VOLUME/2; spinor * r, * p, * v, *hatr, * s, * t, * P, * Q; if(ITER_MAX_BCG > 0) { hatr = g_spinor_field[DUM_SOLVER]; r = g_spinor_field[DUM_SOLVER+1]; v = g_spinor_field[DUM_SOLVER+2]; p = g_spinor_field[DUM_SOLVER+3]; s = g_spinor_field[DUM_SOLVER+4]; t = g_spinor_field[DUM_SOLVER+5]; P = k; Q = l; squarenorm = square_norm(Q, VOLUME/2, 1); Mtm_plus_psi(r, P); gamma5(g_spinor_field[DUM_SOLVER], l, VOLUME/2); diff(p, hatr, r, N); assign(r, p, N); assign(hatr, p, N); rho0 = scalar_prod(hatr, r, N, 1); for(iteration = 0; iteration < ITER_MAX_BCG; iteration++){ err = square_norm(r, N, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { printf("BiCGstab: iterations: %d res^2 %e\n", iteration, err); fflush(stdout); } if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){ break; } Mtm_plus_psi(v, p); denom = scalar_prod(hatr, v, N, 1); _div_complex(alpha, rho0, denom); assign(s, r, N); assign_diff_mul(s, v, alpha, N); Mtm_plus_psi(t, s); omega = scalar_prod(t,s, N, 1); d1 = square_norm(t, N, 1); omega.re/=d1; omega.im/=d1; assign_add_mul_add_mul(P, p, s, alpha, omega, N); assign(r, s, N); assign_diff_mul(r, t, omega, N); rho1 = scalar_prod(hatr, r, N, 1); _mult_assign_complex(nom, alpha, rho1); _mult_assign_complex(denom, omega, rho0); _div_complex(beta, nom, denom); omega.re=-omega.re; omega.im=-omega.im; assign_mul_bra_add_mul_ket_add(p, v, r, omega, beta, N); rho0.re = rho1.re; rho0.im = rho1.im; } if(g_proc_id==0 && g_debug_level > 0) { printf("BiCGstab: iterations: %d eps_sq: %1.4e\n", iteration, eps_sq); } } else{ iteration = ITER_MAX_BCG; gamma5(k, l, VOLUME/2); } /* if bicg fails, redo with conjugate gradient */ if(iteration>=ITER_MAX_BCG){ iteration = solve_cg(k,l,eps_sq, rel_prec); /* Save the solution for reuse! not needed since Chronological inverter is there */ /* assign(g_spinor_field[DUM_DERI+6], k, VOLUME/2); */ Qtm_minus_psi(k, k);; } return iteration; }
int main(int argc, char *argv[]) { int n; double length; pstruct model; /* primary model data structure */ int solver; /* Parse command-line */ if(argc < 3) { printf("\nUsage: laplacian [num_pts] [length] [gauss/cg]\n\n"); printf("\"num_pts\" is the desired number of mesh points \n"); printf("\"length\" is the physical length-scale dimension in one direction\n"); printf("\"gauss/cg\" is which method is used: \n"); printf(" 0 --> Gauss-Seidel \n"); printf(" 1 --> Conjugate Gradient. \n\n"); exit(1); } else if(argc == 3) { n = atoi(argv[1]); length = (double) atof(argv[2]); solver = 0; // default to gauss } else { n = atoi(argv[1]); length = (double) atof(argv[2]); solver = atoi(argv[3]); if(solver > 1 || solver < 0) { printf("solver only accepts 0,1 for input"); exit(1); } } /* Problem Initialization */ problem_initialize (n,length,&model); assemble_matrix (central_2nd_order,&model); //assemble_matrix (central_4th_order,&model); init_masa (&model); apply_bcs (&model); print_matrix(&model); /* Solve */ if(solver == 1) { solve_cg (&model); } else { solve_gauss(&model); } /* Compute Error */ printf("\n** Error Analysis\n"); printf(" --> npts = %i\n",model.npts); printf(" --> h = %12.5e\n",model.h); printf(" --> l2 error = %12.5e\n",compute_l2_error(&model)); return 0; }