void solve(MultiFab& soln, const MultiFab& anaSoln, Real a, Real b, MultiFab& alpha, MultiFab beta[], MultiFab& rhs, const BoxArray& bs, const Geometry& geom, solver_t solver) { BL_PROFILE("solve"); soln.setVal(0.0); const Real run_strt = ParallelDescriptor::second(); BndryData bd(bs, 1, geom); set_boundary(bd, rhs); ABecLaplacian abec_operator(bd, dx); abec_operator.setScalars(a, b); abec_operator.setCoefficients(alpha, beta); MultiGrid mg(abec_operator); mg.setMaxIter(maxiter); mg.setVerbose(verbose); mg.setFixedIter(1); mg.solve(soln, rhs, tolerance_rel, tolerance_abs); Real run_time = ParallelDescriptor::second() - run_strt; ParallelDescriptor::ReduceRealMax(run_time, ParallelDescriptor::IOProcessorNumber()); if (ParallelDescriptor::IOProcessor()) { std::cout << "Run time : " << run_time << std::endl; } }
void MultiGrid::coarsestSmooth (MultiFab& solL, MultiFab& rhsL, int level, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode, int local_usecg, Real& cg_time) { BL_PROFILE("MultiGrid::coarsestSmooth()"); prepareForLevel(level); if ( local_usecg == 0 ) { Real error0 = 0; if ( verbose > 0 ) { error0 = errorEstimate(level, bc_mode); if ( ParallelDescriptor::IOProcessor() ) std::cout << " Bottom Smoother: Initial error (error0) = " << error0 << '\n'; } for (int i = finalSmooth(); i > 0; i--) { Lp.smooth(solL, rhsL, level, bc_mode); if ( verbose > 1 || (i == 1 && verbose) ) { Real error = errorEstimate(level, bc_mode); const Real rel_error = (error0 != 0) ? error/error0 : 0; if ( ParallelDescriptor::IOProcessor() ) std::cout << " Bottom Smoother: Iteration " << i << " error/error0 = " << rel_error << '\n'; } } } else { bool use_mg_precond = false; CGSolver cg(Lp, use_mg_precond, level); cg.setMaxIter(maxiter_b); const Real stime = ParallelDescriptor::second(); int ret = cg.solve(solL, rhsL, rtol_b, atol_b, bc_mode); // // The whole purpose of cg_time is to accumulate time spent in CGSolver. // cg_time += (ParallelDescriptor::second() - stime); if ( ret != 0 ) { if ( smooth_on_cg_unstable ) { // // If the CG solver returns a nonzero value indicating // the problem is unstable. Assume this is not an accuracy // issue and pound on it with the smoother. // if ret == 8, then you have failure to converge // if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) std::cout << "MultiGrid::coarsestSmooth(): CGSolver returns nonzero. Smoothing ...\n"; coarsestSmooth(solL, rhsL, level, eps_rel, eps_abs, bc_mode, 0, cg_time); } else { // // cg failure probably indicates loss of precision accident. // if ret == 8, then you have failure to converge // setting solL to 0 should be ok. // solL.setVal(0); if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { std::cout << "MultiGrid::coarsestSmooth(): setting coarse corr to zero" << '\n'; } } } for (int i = 0; i < nu_b; i++) { Lp.smooth(solL, rhsL, level, bc_mode); } } }
void MCLinOp::makeCoefficients (MultiFab& cs, const MultiFab& fn, int level) { const int nc = fn.nComp(); // // Determine index type of incoming MultiFab. // const IndexType iType(fn.boxArray().ixType()); const IndexType cType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::CELL)); const IndexType xType(D_DECL(IndexType::NODE, IndexType::CELL, IndexType::CELL)); const IndexType yType(D_DECL(IndexType::CELL, IndexType::NODE, IndexType::CELL)); #if (BL_SPACEDIM == 3) const IndexType zType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::NODE)); #endif int cdir; if (iType == cType) { cdir = -1; } else if (iType == xType) { cdir = 0; } else if (iType == yType) { cdir = 1; } #if (BL_SPACEDIM == 3) else if (iType == zType) { cdir = 2; } #endif else BoxLib::Abort("MCLinOp::makeCoeffients(): Bad index type"); BoxArray d(gbox[level]); if (cdir >= 0) d.surroundingNodes(cdir); int nGrow=0; cs.define(d, nc, nGrow, Fab_allocate); cs.setVal(0.0); const BoxArray& grids = gbox[level]; for (MFIter csmfi(cs); csmfi.isValid(); ++csmfi) { const Box& grd = grids[csmfi.index()]; FArrayBox& csfab = cs[csmfi]; const FArrayBox& fnfab = fn[csmfi]; switch(cdir) { case -1: FORT_AVERAGECC( csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()), fnfab.dataPtr(), ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()), grd.loVect(), grd.hiVect(), &nc); break; case 0: case 1: case 2: if ( harmavg ) { FORT_HARMONIC_AVERAGEEC( csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()), fnfab.dataPtr(), ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()), grd.loVect(), grd.hiVect(), &nc, &cdir); } else { FORT_AVERAGEEC( csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()), fnfab.dataPtr(), ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()), grd.loVect(), grd.hiVect(), &nc, &cdir); } break; default: BoxLib::Error("MCLinOp::makeCoeffients(): bad coefficient coarsening direction!"); } } }
void solve(MultiFab& soln, const MultiFab& anaSoln, MultiFab& gphi, Real a, Real b, MultiFab& alpha, PArray<MultiFab>& beta, MultiFab& beta_cc, MultiFab& rhs, const BoxArray& bs, const Geometry& geom, solver_t solver) { BL_PROFILE("solve()"); std::string ss; soln.setVal(0.0); const Real run_strt = ParallelDescriptor::second(); if (solver == BoxLib_C) { ss = "CPP"; solve_with_Cpp(soln, gphi, a, b, alpha, beta, rhs, bs, geom); } #ifdef USE_F90_SOLVERS else if (solver == BoxLib_F) { ss = "F90"; solve_with_F90(soln, gphi, a, b, alpha, beta, rhs, bs, geom); } #endif #ifdef USEHYPRE else if (solver == Hypre) { ss = "Hyp"; solve_with_hypre(soln, a, b, alpha, beta, rhs, bs, geom); } #endif #ifdef USEHPGMG else if (solver == HPGMG) { ss = "HPGMG"; solve_with_HPGMG(soln, gphi, a, b, alpha, beta, beta_cc, rhs, bs, geom, n_cell); } #endif else { BoxLib::Error("Invalid solver"); } Real run_time = ParallelDescriptor::second() - run_strt; ParallelDescriptor::ReduceRealMax(run_time, ParallelDescriptor::IOProcessorNumber()); if (ParallelDescriptor::IOProcessor()) { std::cout << " Run time : " << run_time << std::endl; } if (plot_soln) { writePlotFile("SOLN-"+ss, soln, geom); writePlotFile("GPHI-"+ss, gphi, geom); } if (plot_err || comp_norm) { soln.minus(anaSoln, 0, Ncomp, 0); // soln contains errors now MultiFab& err = soln; if (plot_err) { writePlotFile("ERR-"+ss, soln, geom); } if (comp_norm) { Real twoNorm = err.norm2(); Real maxNorm = err.norm0(); err.setVal(1.0); Real vol = err.norm2(); twoNorm /= vol; if (ParallelDescriptor::IOProcessor()) { std::cout << " 2 norm error : " << twoNorm << std::endl; std::cout << " max norm error: " << maxNorm << std::endl; } } } }
void solve4(MultiFab& soln, const MultiFab& anaSoln, Real a, Real b, MultiFab& alpha, MultiFab& beta, MultiFab& rhs, const BoxArray& bs, const Geometry& geom) { std::string ss = "CPP"; soln.setVal(0.0); const Real run_strt = ParallelDescriptor::second(); BndryData bd(bs, 1, geom); set_boundary(bd, rhs, 0); ABec4 abec_operator(bd, dx); abec_operator.setScalars(a, b); MultiFab betaca(bs,1,2); ABec4::cc2ca(beta,betaca,0,0,1); MultiFab alphaca(bs,1,2); ABec4::cc2ca(alpha,alphaca,0,0,1); abec_operator.setCoefficients(alphaca, betaca); MultiFab rhsca(bs,1,0); ABec4::cc2ca(rhs,rhsca,0,0,1); MultiFab out(bs,1,0); compute_analyticSolution(soln,Array<Real>(BL_SPACEDIM,0.5)); MultiFab solnca(bs,1,2); solnca.setVal(0); MultiGrid mg(abec_operator); mg.setVerbose(verbose); mg.solve(solnca, rhsca, tolerance_rel, tolerance_abs); Real run_time = ParallelDescriptor::second() - run_strt; ParallelDescriptor::ReduceRealMax(run_time, ParallelDescriptor::IOProcessorNumber()); if (ParallelDescriptor::IOProcessor()) { std::cout << " Run time : " << run_time << std::endl; } if (plot_soln) { writePlotFile("SOLN-"+ss, solnca, geom); } if (plot_err || comp_norm) { soln.minus(anaSoln, 0, Ncomp, 0); // soln contains errors now MultiFab& err = soln; if (plot_err) { writePlotFile("ERR-"+ss, soln, geom); } if (comp_norm) { Real twoNorm = err.norm2(); Real maxNorm = err.norm0(); err.setVal(1.0); Real vol = err.norm2(); twoNorm /= vol; if (ParallelDescriptor::IOProcessor()) { std::cout << " 2 norm error : " << twoNorm << std::endl; std::cout << " max norm error: " << maxNorm << std::endl; } } } }
int CGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_bicgstab()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab ph(ba, ncomp, nghost, dm); MultiFab sh(ba, ncomp, nghost, dm); MultiFab sorig(ba, ncomp, 0, dm); MultiFab p (ba, ncomp, 0, dm); MultiFab r (ba, ncomp, 0, dm); MultiFab s (ba, ncomp, 0, dm); MultiFab rh (ba, ncomp, 0, dm); MultiFab v (ba, ncomp, 0, dm); MultiFab t (ba, ncomp, 0, dm); Lp.residual(r, rhs, sol, lev, bc_mode); MultiFab::Copy(sorig,sol,0,0,1,0); MultiFab::Copy(rh, r, 0,0,1,0); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA Real rnorm = norm_inf(r); #else // // Calculate the local values of these norms & reduce their values together. // Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) }; ParallelDescriptor::ReduceRealMax(vals,2,color()); Real rnorm = vals[0]; const Real Lp_norm = vals[1]; Real sol_norm = 0; #endif const Real rnorm0 = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } int ret = 0, nit = 1; Real rho_1 = 0, alpha = 0, omega = 0; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return ret; } for (; nit <= maxiter; ++nit) { const Real rho = dotxy(rh,r); if ( rho == 0 ) { ret = 1; break; } if ( nit == 1 ) { MultiFab::Copy(p,r,0,0,1,0); } else { const Real beta = (rho/rho_1)*(alpha/omega); sxay(p, p, -omega, v); sxay(p, r, beta, p); } if ( use_mg_precond ) { ph.setVal(0); mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { ph.setVal(0); Lp.jacobi_smooth(ph, p, lev, temp_bc_mode); } else { MultiFab::Copy(ph,p,0,0,1,0); } Lp.apply(v, ph, lev, temp_bc_mode); if ( Real rhTv = dotxy(rh,v) ) { alpha = rho/rhTv; } else { ret = 2; break; } sxay(sol, sol, alpha, ph); sxay(s, r, -alpha, v); rnorm = norm_inf(s); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Half Iter " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( use_mg_precond ) { sh.setVal(0); mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { sh.setVal(0); Lp.jacobi_smooth(sh, s, lev, temp_bc_mode); } else { MultiFab::Copy(sh,s,0,0,1,0); } Lp.apply(t, sh, lev, temp_bc_mode); // // This is a little funky. I want to elide one of the reductions // in the following two dotxy()s. We do that by calculating the "local" // values and then reducing the two local values at the same time. // Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) }; ParallelDescriptor::ReduceRealSum(vals,2,color()); if ( vals[0] ) { omega = vals[1]/vals[0]; } else { ret = 3; break; } sxay(sol, sol, omega, sh); sxay(r, s, -omega, t); rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Iteration " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( omega == 0 ) { ret = 4; break; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Final: Iteration " << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::jbb_precond (MultiFab& sol, const MultiFab& rhs, int lev, LinOp& Lp) { // // This is a local routine. No parallel is allowed to happen here. // int lev_loc = lev; const Real eps_rel = 1.e-2; const Real eps_abs = 1.e-16; const int nghost = sol.nGrow(); const int ncomp = sol.nComp(); const bool local = true; const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC; BL_ASSERT(ncomp == 1 ); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc)); const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); sorig.copy(sol); Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local); sol.setVal(0); Real rnorm = norm_inf(r,local); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << " jbb_precond: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev_loc, local); Real sol_norm = 0; int ret = 0; // will return this value if all goes well Real rho_1 = 0; int nit = 1; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { z.copy(r); Real rho = dotxy(z,r,local); if (nit == 1) { p.copy(z); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev_loc, bc_mode, local); Real alpha; if ( Real pw = dotxy(p,q,local) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r, local); sol_norm = norm_inf(sol, local); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) { break; } if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) { if ( ParallelDescriptor::IOProcessor(color()) ) { BoxLib::Warning("jbb_precond:: failed to converge!"); } ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::solve_cg (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cg()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); MultiFab r1(ba, ncomp, nghost, dm); MultiFab z1(ba, ncomp, nghost, dm); MultiFab r2(ba, ncomp, nghost, dm); MultiFab z2(ba, ncomp, nghost, dm); MultiFab::Copy(sorig,sol,0,0,1,0); Lp.residual(r, rhs, sorig, lev, bc_mode); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC; Real rnorm = norm_inf(r); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev); Real sol_norm = 0; Real rho_1 = 0; int ret = 0; int nit = 1; if ( rnorm == 0 || rnorm < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: niter = 0," << ", rnorm = " << rnorm << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" << eps_rel*(Lp_norm*sol_norm + rnorm0 ) << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1) { z.setVal(0); jbb_precond(z,r,lev,Lp); } else { MultiFab::Copy(z,r,0,0,1,0); } Real rho = dotxy(z,r); if (nit == 1) { MultiFab::Copy(p,z,0,0,1,0); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev, temp_bc_mode); Real alpha; if ( Real pw = dotxy(p,q) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_cg:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r); sol_norm = norm_inf(sol); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break; #endif if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs ) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_cg: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int main (int argc, char* argv[]) { BoxLib::Initialize(argc,argv); std::cout << std::setprecision(10); if (argc < 2) { std::cerr << "usage: " << argv[0] << " inputsfile [options]" << '\n'; exit(-1); } ParmParse pp; int n; BoxArray bs; #if BL_SPACEDIM == 2 Box domain(IntVect(0,0),IntVect(11,11)); std::string boxfile("gr.2_small_a") ; #elif BL_SPACEDIM == 3 Box domain(IntVect(0,0,0),IntVect(11,11,11)); std::string boxfile("grids/gr.3_2x3x4") ; #endif pp.query("boxes", boxfile); std::ifstream ifs(boxfile.c_str(), std::ios::in); if (!ifs) { std::string msg = "problem opening grids file: "; msg += boxfile.c_str(); BoxLib::Abort(msg.c_str()); } ifs >> domain; if (ParallelDescriptor::IOProcessor()) std::cout << "domain: " << domain << std::endl; bs.readFrom(ifs); if (ParallelDescriptor::IOProcessor()) std::cout << "grids:\n" << bs << std::endl; Geometry geom(domain); const Real* H = geom.CellSize(); int ratio=2; pp.query("ratio", ratio); // allocate/init soln and rhs int Ncomp=BL_SPACEDIM; int Nghost=0; int Ngrids=bs.size(); MultiFab soln(bs, Ncomp, Nghost, Fab_allocate); soln.setVal(0.0); MultiFab out(bs, Ncomp, Nghost, Fab_allocate); MultiFab rhs(bs, Ncomp, Nghost, Fab_allocate); rhs.setVal(0.0); for(MFIter rhsmfi(rhs); rhsmfi.isValid(); ++rhsmfi) { FORT_FILLRHS(rhs[rhsmfi].dataPtr(), ARLIM(rhs[rhsmfi].loVect()),ARLIM(rhs[rhsmfi].hiVect()), H,&Ncomp); } // Create the boundary object MCViscBndry vbd(bs,geom); BCRec phys_bc; Array<int> lo_bc(BL_SPACEDIM), hi_bc(BL_SPACEDIM); pp.getarr("lo_bc",lo_bc,0,BL_SPACEDIM); pp.getarr("hi_bc",hi_bc,0,BL_SPACEDIM); for (int i = 0; i < BL_SPACEDIM; i++) { phys_bc.setLo(i,lo_bc[i]); phys_bc.setHi(i,hi_bc[i]); } // Create the BCRec's interpreted by ViscBndry objects #if BL_SPACEDIM==2 Array<BCRec> pbcarray(4); pbcarray[0] = BCRec(D_DECL(REFLECT_ODD,REFLECT_EVEN,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[1] = BCRec(D_DECL(REFLECT_EVEN,REFLECT_ODD,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[2] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[3] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); #elif BL_SPACEDIM==3 Array<BCRec> pbcarray(12); #if 1 pbcarray[0] = BCRec(EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR); pbcarray[1] = BCRec(EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR); pbcarray[2] = BCRec(EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR,EXT_DIR); pbcarray[3] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[4] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[5] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[6] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[7] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[8] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[9] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[10] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); pbcarray[11] = BCRec(D_DECL(EXT_DIR,EXT_DIR,EXT_DIR), D_DECL(EXT_DIR,EXT_DIR,EXT_DIR)); #else for (int i = 0; i < 12; i++) pbcarray[i] = phys_bc; #endif #endif Nghost = 1; // need space for bc info MultiFab fine(bs,Ncomp,Nghost,Fab_allocate); for(MFIter finemfi(fine); finemfi.isValid(); ++finemfi) { FORT_FILLFINE(fine[finemfi].dataPtr(), ARLIM(fine[finemfi].loVect()),ARLIM(fine[finemfi].hiVect()), H,&Ncomp); } // Create "background coarse data" Box crse_bx = Box(domain).coarsen(ratio).grow(1); BoxArray cba(crse_bx); cba.maxSize(32); Real h_crse[BL_SPACEDIM]; for (n=0; n<BL_SPACEDIM; n++) h_crse[n] = H[n]*ratio; MultiFab crse_mf(cba, Ncomp, 0); // FArrayBox crse_fab(crse_bx,Ncomp); for (MFIter mfi(crse_mf); mfi.isValid(); ++mfi) { FORT_FILLCRSE(crse_mf[mfi].dataPtr(), ARLIM(crse_mf[mfi].loVect()),ARLIM(crse_mf[mfi].hiVect()), h_crse,&Ncomp); } // Create coarse boundary register, fill w/data from coarse FAB int bndry_InRad=0; int bndry_OutRad=1; int bndry_Extent=1; BoxArray cbs = BoxArray(bs).coarsen(ratio); BndryRegister cbr(cbs,bndry_InRad,bndry_OutRad,bndry_Extent,Ncomp); for (OrientationIter face; face; ++face) { Orientation f = face(); FabSet& bnd_fs(cbr[f]); bnd_fs.copyFrom(crse_mf, 0, 0, 0, Ncomp); } // Interpolate crse data to fine boundary, where applicable int cbr_Nstart=0; int fine_Nstart=0; int bndry_Nstart=0; vbd.setBndryValues(cbr,cbr_Nstart,fine,fine_Nstart, bndry_Nstart,Ncomp,ratio,pbcarray); Nghost = 1; // other variables don't need extra space DivVis lp(vbd,H); Real a = 0.0; Real b[BL_SPACEDIM]; b[0] = 1.0; b[1] = 1.0; #if BL_SPACEDIM>2 b[2] = 1.0; #endif MultiFab acoefs; int NcompA = (BL_SPACEDIM == 2 ? 2 : 1); acoefs.define(bs, NcompA, Nghost, Fab_allocate); acoefs.setVal(a); MultiFab bcoefs[BL_SPACEDIM]; for (n=0; n<BL_SPACEDIM; ++n) { BoxArray bsC(bs); bcoefs[n].define(bsC.surroundingNodes(n), 1, Nghost, Fab_allocate); #if 1 for(MFIter bmfi(bcoefs[n]); bmfi.isValid(); ++bmfi) { FORT_MAKEMU(bcoefs[n][bmfi].dataPtr(), ARLIM(bcoefs[n][bmfi].loVect()),ARLIM(bcoefs[n][bmfi].hiVect()),H,n); } #else bcoefs[n].setVal(b[n]); #endif } // -->> over dimension lp.setCoefficients(acoefs, bcoefs); #if 1 lp.maxOrder(4); #endif Nghost = 1; MultiFab tsoln(bs, Ncomp, Nghost, Fab_allocate); tsoln.setVal(0.0); #if 1 tsoln.copy(fine); #endif #if 0 // testing apply lp.apply(out,tsoln); Box subbox = out[0].box(); Real n1 = out[0].norm(subbox,1,0,BL_SPACEDIM)*pow(H[0],BL_SPACEDIM); ParallelDescriptor::ReduceRealSum(n1); if (ParallelDescriptor::IOProcessor()) { cout << "n1 output is "<<n1<<std::endl; } out.minus(rhs,0,BL_SPACEDIM,0); // special to single grid prob Real n2 = out[0].norm(subbox,1,0,BL_SPACEDIM)*pow(H[0],BL_SPACEDIM); ParallelDescriptor::ReduceRealSum(n2); if (ParallelDescriptor::IOProcessor()) { cout << "n2 difference is "<<n2<<std::endl; } #if 0 subbox.grow(-1); Real n3 = out[0].norm(subbox,0,0,BL_SPACEDIM)*pow(H[0],BL_SPACEDIM); ParallelDescriptor::ReduceRealMax(n3); if (ParallelDescriptor::IOProcessor()) { cout << "n3 difference is "<<n3<<std::endl; } #endif #endif const IntVect refRatio(D_DECL(2,2,2)); const Real bgVal = 1.0; #if 1 #ifndef NDEBUG // testing flux computation BoxArray xfluxbox(bs); xfluxbox.surroundingNodes(0); MultiFab xflux(xfluxbox,Ncomp,Nghost,Fab_allocate); xflux.setVal(1.e30); BoxArray yfluxbox(bs); yfluxbox.surroundingNodes(1); MultiFab yflux(yfluxbox,Ncomp,Nghost,Fab_allocate); yflux.setVal(1.e30); #if BL_SPACEDIM>2 BoxArray zfluxbox(bs); zfluxbox.surroundingNodes(2); MultiFab zflux(zfluxbox,Ncomp,Nghost,Fab_allocate); zflux.setVal(1.e30); #endif lp.compFlux(xflux, yflux, #if BL_SPACEDIM>2 zflux, #endif tsoln); // Write fluxes //writeMF(&xflux,"xflux.mfab"); //writeMF(&yflux,"yflux.mfab"); #if BL_SPACEDIM>2 //writeMF(&zflux,"zflux.mfab"); #endif #endif #endif Real tolerance = 1.0e-10; pp.query("tol", tolerance); Real tolerance_abs = 1.0e-10; pp.query("tol_abs", tolerance_abs); #if 0 cout << "Bndry Data object:" << std::endl; cout << lp.bndryData() << std::endl; #endif #if 0 bool use_mg_pre = false; MCCGSolver cg(lp,use_mg_pre); cg.solve(soln,rhs,tolerance,tolerance_abs); #else MCMultiGrid mg(lp); mg.solve(soln,rhs,tolerance,tolerance_abs); #endif #if 0 cout << "MCLinOp object:" << std::endl; cout << lp << std::endl; #endif VisMF::Write(soln,"soln"); #if 0 // apply operator to soln to see if really satisfies eqn tsoln.copy(soln); lp.apply(out,tsoln); soln.copy(out); // Output "apply" results on soln VisMF::Write(soln,"apply"); // Compute truncation for (MFIter smfi(soln); smfi.isValid(); ++smfi) { soln[smfi] -= fine[smfi]; } for( int icomp=0; icomp < BL_SPACEDIM ; icomp++ ) { Real solnMin = soln.min(icomp); Real solnMax = soln.max(icomp); ParallelDescriptor::ReduceRealMin(solnMin); ParallelDescriptor::ReduceRealMax(solnMax); if (ParallelDescriptor::IOProcessor()) { cout << icomp << " "<<solnMin << " " << solnMax <<std::endl; } } // Output truncation VisMF::Write(soln,"trunc"); #endif int dumpLp=0; pp.query("dumpLp",dumpLp); bool write_lp = (dumpLp == 1 ? true : false); if (write_lp) std::cout << lp << std::endl; // Output trunc ParallelDescriptor::EndParallel(); }