int MultiGrid::solve_ (MultiFab& _sol, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode, Real bnorm, Real resnorm0) { BL_PROFILE("MultiGrid::solve_()"); // // If do_fixed_number_of_iters = 1, then do maxiter iterations without checking for convergence // // If do_fixed_number_of_iters = 0, then relax system maxiter times, // and stop if relative error <= _eps_rel or if absolute err <= _abs_eps // const Real strt_time = ParallelDescriptor::second(); const int level = 0; // // We take the max of the norms of the initial RHS and the initial residual in order to capture both cases // Real norm_to_test_against; bool using_bnorm; if (bnorm >= resnorm0) { norm_to_test_against = bnorm; using_bnorm = true; } else { norm_to_test_against = resnorm0; using_bnorm = false; } int returnVal = 0; Real error = resnorm0; // // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed // if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 ) { std::cout << "MultiGrid: Tolerance " << eps_rel << " < 1e-16 is probably set too low" << '\n'; } // // We initially define norm_cor based on the initial solution only so we can use it in the very first iteration // to decide whether the problem is already solved (this is relevant if the previous solve used was only solved // according to the Anorm test and not the bnorm test). // Real norm_cor = norm_inf(*initialsolution,true); ParallelDescriptor::ReduceRealMax(norm_cor); int nit = 1; const Real norm_Lp = Lp.norm(0, level); Real cg_time = 0; if ( use_Anorm_for_convergence == 1 ) { // // Don't need to go any further -- no iterations are required // if (error <= eps_abs || error < eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) { if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { std::cout << " Problem is already converged -- no iterations required\n"; } return 1; } for ( ; ( (error > eps_abs && error > eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) || (do_fixed_number_of_iters == 1) ) && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time); Real tmp[2] = { norm_inf(*cor[level],true), errorEstimate(level,bc_mode,true) }; ParallelDescriptor::ReduceRealMax(tmp,2); norm_cor = tmp[0]; error = tmp[1]; if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit << " resid/resid0 = " << rel_error << '\n'; } } } } else { // // Don't need to go any further -- no iterations are required // if (error <= eps_abs || error < eps_rel*norm_to_test_against) { if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { std::cout << " Problem is already converged -- no iterations required\n"; } return 1; } for ( ; ( (error > eps_abs && error > eps_rel*norm_to_test_against) || (do_fixed_number_of_iters == 1) ) && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time); error = errorEstimate(level, bc_mode); if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit << " resid/resid0 = " << rel_error << '\n'; } } } } Real run_time = (ParallelDescriptor::second() - strt_time); if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor() ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit-1 << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit-1 << " resid/resid0 = " << rel_error << '\n'; } } if ( verbose > 1 ) { Real tmp[2] = { run_time, cg_time }; ParallelDescriptor::ReduceRealMax(tmp,2,ParallelDescriptor::IOProcessorNumber()); if ( ParallelDescriptor::IOProcessor() ) std::cout << ", Solve time: " << tmp[0] << ", CG time: " << tmp[1]; } if ( ParallelDescriptor::IOProcessor() ) std::cout << '\n'; } if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { if ( do_fixed_number_of_iters == 1) { std::cout << " Did fixed number of iterations: " << maxiter << std::endl; } else if ( error < eps_rel*norm_to_test_against ) { std::cout << " Converged res < eps_rel*max(bnorm,res_norm)\n"; } else if ( (use_Anorm_for_convergence == 1) && (error < eps_rel*norm_Lp*norm_cor) ) { std::cout << " Converged res < eps_rel*Anorm*sol\n"; } else if ( error < eps_abs ) { std::cout << " Converged res < eps_abs\n"; } } // // Omit ghost update since maybe not initialized in calling routine. // Add to boundary values stored in initialsolution. // _sol.copy(*cor[level]); _sol.plus(*initialsolution,0,_sol.nComp(),0); if ( use_Anorm_for_convergence == 1 ) { if ( do_fixed_number_of_iters == 1 || error <= eps_rel*(norm_Lp*norm_cor+norm_to_test_against) || error <= eps_abs ) returnVal = 1; } else { if ( do_fixed_number_of_iters == 1 || error <= eps_rel*(norm_to_test_against) || error <= eps_abs ) returnVal = 1; } // // Otherwise, failed to solve satisfactorily // return returnVal; }
int CGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_bicgstab()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab ph(ba, ncomp, nghost, dm); MultiFab sh(ba, ncomp, nghost, dm); MultiFab sorig(ba, ncomp, 0, dm); MultiFab p (ba, ncomp, 0, dm); MultiFab r (ba, ncomp, 0, dm); MultiFab s (ba, ncomp, 0, dm); MultiFab rh (ba, ncomp, 0, dm); MultiFab v (ba, ncomp, 0, dm); MultiFab t (ba, ncomp, 0, dm); Lp.residual(r, rhs, sol, lev, bc_mode); MultiFab::Copy(sorig,sol,0,0,1,0); MultiFab::Copy(rh, r, 0,0,1,0); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA Real rnorm = norm_inf(r); #else // // Calculate the local values of these norms & reduce their values together. // Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) }; ParallelDescriptor::ReduceRealMax(vals,2,color()); Real rnorm = vals[0]; const Real Lp_norm = vals[1]; Real sol_norm = 0; #endif const Real rnorm0 = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } int ret = 0, nit = 1; Real rho_1 = 0, alpha = 0, omega = 0; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return ret; } for (; nit <= maxiter; ++nit) { const Real rho = dotxy(rh,r); if ( rho == 0 ) { ret = 1; break; } if ( nit == 1 ) { MultiFab::Copy(p,r,0,0,1,0); } else { const Real beta = (rho/rho_1)*(alpha/omega); sxay(p, p, -omega, v); sxay(p, r, beta, p); } if ( use_mg_precond ) { ph.setVal(0); mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { ph.setVal(0); Lp.jacobi_smooth(ph, p, lev, temp_bc_mode); } else { MultiFab::Copy(ph,p,0,0,1,0); } Lp.apply(v, ph, lev, temp_bc_mode); if ( Real rhTv = dotxy(rh,v) ) { alpha = rho/rhTv; } else { ret = 2; break; } sxay(sol, sol, alpha, ph); sxay(s, r, -alpha, v); rnorm = norm_inf(s); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Half Iter " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( use_mg_precond ) { sh.setVal(0); mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { sh.setVal(0); Lp.jacobi_smooth(sh, s, lev, temp_bc_mode); } else { MultiFab::Copy(sh,s,0,0,1,0); } Lp.apply(t, sh, lev, temp_bc_mode); // // This is a little funky. I want to elide one of the reductions // in the following two dotxy()s. We do that by calculating the "local" // values and then reducing the two local values at the same time. // Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) }; ParallelDescriptor::ReduceRealSum(vals,2,color()); if ( vals[0] ) { omega = vals[1]/vals[0]; } else { ret = 3; break; } sxay(sol, sol, omega, sh); sxay(r, s, -omega, t); rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Iteration " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( omega == 0 ) { ret = 4; break; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Final: Iteration " << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::solve_cg (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cg()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); MultiFab r1(ba, ncomp, nghost, dm); MultiFab z1(ba, ncomp, nghost, dm); MultiFab r2(ba, ncomp, nghost, dm); MultiFab z2(ba, ncomp, nghost, dm); MultiFab::Copy(sorig,sol,0,0,1,0); Lp.residual(r, rhs, sorig, lev, bc_mode); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC; Real rnorm = norm_inf(r); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev); Real sol_norm = 0; Real rho_1 = 0; int ret = 0; int nit = 1; if ( rnorm == 0 || rnorm < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: niter = 0," << ", rnorm = " << rnorm << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" << eps_rel*(Lp_norm*sol_norm + rnorm0 ) << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1) { z.setVal(0); jbb_precond(z,r,lev,Lp); } else { MultiFab::Copy(z,r,0,0,1,0); } Real rho = dotxy(z,r); if (nit == 1) { MultiFab::Copy(p,z,0,0,1,0); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev, temp_bc_mode); Real alpha; if ( Real pw = dotxy(p,q) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_cg:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r); sol_norm = norm_inf(sol); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break; #endif if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs ) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_cg: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::jbb_precond (MultiFab& sol, const MultiFab& rhs, int lev, LinOp& Lp) { // // This is a local routine. No parallel is allowed to happen here. // int lev_loc = lev; const Real eps_rel = 1.e-2; const Real eps_abs = 1.e-16; const int nghost = sol.nGrow(); const int ncomp = sol.nComp(); const bool local = true; const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC; BL_ASSERT(ncomp == 1 ); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc)); const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); sorig.copy(sol); Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local); sol.setVal(0); Real rnorm = norm_inf(r,local); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << " jbb_precond: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev_loc, local); Real sol_norm = 0; int ret = 0; // will return this value if all goes well Real rho_1 = 0; int nit = 1; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { z.copy(r); Real rho = dotxy(z,r,local); if (nit == 1) { p.copy(z); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev_loc, bc_mode, local); Real alpha; if ( Real pw = dotxy(p,q,local) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r, local); sol_norm = norm_inf(sol, local); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) { break; } if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) { if ( ParallelDescriptor::IOProcessor(color()) ) { BoxLib::Warning("jbb_precond:: failed to converge!"); } ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int MCMultiGrid::solve_ (MultiFab& _sol, Real eps_rel, Real eps_abs, MCBC_Mode bc_mode, int level) { // // Relax system maxiter times, stop if relative error <= _eps_rel or // if absolute err <= _abs_eps // const Real strt_time = ParallelDescriptor::second(); // // Elide a reduction by doing these together. // Real tmp[2] = { norm_inf(*rhs[level],true), errorEstimate(level,bc_mode,true) }; ParallelDescriptor::ReduceRealMax(tmp,2); const Real norm_rhs = tmp[0]; const Real error0 = tmp[1]; int returnVal = 0; Real error = error0; if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { Spacer(std::cout, level); std::cout << "MCMultiGrid: Initial rhs = " << norm_rhs << '\n'; std::cout << "MCMultiGrid: Initial error (error0) = " << error0 << '\n'; } if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 ) { std::cout << "MCMultiGrid: Tolerance " << eps_rel << " < 1e-16 is probably set too low" << '\n'; } // // Initialize correction to zero at this level (auto-filled at levels below) // (*cor[level]).setVal(0.0); // // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed. // int nit = 1; const Real new_error_0 = norm_rhs; //const Real norm_Lp = Lp.norm(0, level); for ( ; error > eps_abs && error > eps_rel*norm_rhs && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode); error = errorEstimate(level,bc_mode); if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = (error0 != 0) ? error/new_error_0 : 0; Spacer(std::cout, level); std::cout << "MCMultiGrid: Iteration " << nit << " error/error0 = " << rel_error << '\n'; } } Real run_time = (ParallelDescriptor::second() - strt_time); if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor() ) { const Real rel_error = (error0 != 0) ? error/error0 : 0; Spacer(std::cout, level); std::cout << "MCMultiGrid: Final Iter. " << nit-1 << " error/error0 = " << rel_error; } if ( verbose > 1 ) { ParallelDescriptor::ReduceRealMax(run_time); if ( ParallelDescriptor::IOProcessor() ) std::cout << ", Solve time: " << run_time << '\n'; } } if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { if ( error < eps_rel*norm_rhs ) { std::cout << " Converged res < eps_rel*bnorm\n"; } else if ( error < eps_abs ) { std::cout << " Converged res < eps_abs\n"; } } // // Omit ghost update since maybe not initialized in calling routine. // Add to boundary values stored in initialsolution. // _sol.copy(*cor[level]); _sol.plus(*initialsolution,0,_sol.nComp(),0); if ( error <= eps_rel*(norm_rhs) || error <= eps_abs ) returnVal = 1; // // Otherwise, failed to solve satisfactorily // return returnVal; }