void Nyx::strang_second_step (Real time, Real dt, MultiFab& S_new, MultiFab& D_new) { BL_PROFILE("Nyx::strang_second_step()"); Real half_dt = 0.5*dt; int min_iter = 100000; int max_iter = 0; int min_iter_grid; int max_iter_grid; // Set a at the half of the time step in the second strang const Real a = get_comoving_a(time-half_dt); MultiFab reset_e_src(S_new.boxArray(), S_new.DistributionMap(), 1, NUM_GROW); reset_e_src.setVal(0.0); reset_internal_energy(S_new,D_new,reset_e_src); compute_new_temp (S_new,D_new); #ifndef FORCING { const Real z = 1.0/a - 1.0; fort_interp_to_this_z(&z); } #endif #ifdef _OPENMP #pragma omp parallel private(min_iter_grid,max_iter_grid) reduction(min:min_iter) reduction(max:max_iter) #endif for (MFIter mfi(S_new,true); mfi.isValid(); ++mfi) { // Here bx is just the valid region const Box& bx = mfi.tilebox(); min_iter_grid = 100000; max_iter_grid = 0; integrate_state (bx.loVect(), bx.hiVect(), BL_TO_FORTRAN(S_new[mfi]), BL_TO_FORTRAN(D_new[mfi]), &a, &half_dt, &min_iter_grid, &max_iter_grid); if (S_new[mfi].contains_nan(bx,0,S_new.nComp())) { std::cout << "NANS IN THIS GRID " << bx << std::endl; } min_iter = std::min(min_iter,min_iter_grid); max_iter = std::max(max_iter,max_iter_grid); } ParallelDescriptor::ReduceIntMax(max_iter); ParallelDescriptor::ReduceIntMin(min_iter); if (heat_cool_type == 1) if (ParallelDescriptor::IOProcessor()) std::cout << "Min/Max Number of Iterations in Second Strang: " << min_iter << " " << max_iter << std::endl; }
void MCLinOp::applyBC (MultiFab& inout, int level, MCBC_Mode bc_mode) { // // The inout MultiFab must have at least MCLinOp_grow ghost cells // for applyBC() // BL_ASSERT(inout.nGrow() >= MCLinOp_grow); // // The inout MultiFab must have at least Periodic_BC_grow cells for the // algorithms taking care of periodic boundary conditions. // BL_ASSERT(inout.nGrow() >= MCLinOp_grow); // // No coarsened boundary values, cannot apply inhomog at lev>0. // BL_ASSERT(!(level>0 && bc_mode == MCInhomogeneous_BC)); int flagden = 1; // fill in the bndry data and undrrelxr int flagbc = 1; // with values if (bc_mode == MCHomogeneous_BC) flagbc = 0; // nodata if homog int nc = inout.nComp(); BL_ASSERT(nc == numcomp ); inout.setBndry(-1.e30); inout.FillBoundary(); prepareForLevel(level); geomarray[level].FillPeriodicBoundary(inout,0,nc); // // Fill boundary cells. // #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(inout); mfi.isValid(); ++mfi) { const int gn = mfi.index(); BL_ASSERT(gbox[level][gn] == inout.box(gn)); const BndryData::RealTuple& bdl = bgb.bndryLocs(gn); const Array< Array<BoundCond> >& bdc = bgb.bndryConds(gn); const MaskTuple& msk = maskvals[level][gn]; for (OrientationIter oitr; oitr; ++oitr) { const Orientation face = oitr(); FabSet& f = (*undrrelxr[level])[face]; FabSet& td = (*tangderiv[level])[face]; int cdr(face); const FabSet& fs = bgb.bndryValues(face); Real bcl = bdl[face]; const Array<BoundCond>& bc = bdc[face]; const int *bct = (const int*) bc.dataPtr(); const FArrayBox& fsfab = fs[gn]; const Real* bcvalptr = fsfab.dataPtr(); // // Way external derivs stored. // const Real* exttdptr = fsfab.dataPtr(numcomp); const int* fslo = fsfab.loVect(); const int* fshi = fsfab.hiVect(); FArrayBox& inoutfab = inout[gn]; FArrayBox& denfab = f[gn]; FArrayBox& tdfab = td[gn]; #if BL_SPACEDIM==2 int cdir = face.coordDir(), perpdir = -1; if (cdir == 0) perpdir = 1; else if (cdir == 1) perpdir = 0; else BoxLib::Abort("MCLinOp::applyBC(): bad logic"); const Mask& m = *msk[face]; const Mask& mphi = *msk[Orientation(perpdir,Orientation::high)]; const Mask& mplo = *msk[Orientation(perpdir,Orientation::low)]; FORT_APPLYBC( &flagden, &flagbc, &maxorder, inoutfab.dataPtr(), ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()), &cdr, bct, &bcl, bcvalptr, ARLIM(fslo), ARLIM(fshi), m.dataPtr(), ARLIM(m.loVect()), ARLIM(m.hiVect()), mphi.dataPtr(), ARLIM(mphi.loVect()), ARLIM(mphi.hiVect()), mplo.dataPtr(), ARLIM(mplo.loVect()), ARLIM(mplo.hiVect()), denfab.dataPtr(), ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()), exttdptr, ARLIM(fslo), ARLIM(fshi), tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()), inout.box(gn).loVect(), inout.box(gn).hiVect(), &nc, h[level]); #elif BL_SPACEDIM==3 const Mask& mn = *msk[Orientation(1,Orientation::high)]; const Mask& me = *msk[Orientation(0,Orientation::high)]; const Mask& mw = *msk[Orientation(0,Orientation::low)]; const Mask& ms = *msk[Orientation(1,Orientation::low)]; const Mask& mt = *msk[Orientation(2,Orientation::high)]; const Mask& mb = *msk[Orientation(2,Orientation::low)]; FORT_APPLYBC( &flagden, &flagbc, &maxorder, inoutfab.dataPtr(), ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()), &cdr, bct, &bcl, bcvalptr, ARLIM(fslo), ARLIM(fshi), mn.dataPtr(),ARLIM(mn.loVect()),ARLIM(mn.hiVect()), me.dataPtr(),ARLIM(me.loVect()),ARLIM(me.hiVect()), mw.dataPtr(),ARLIM(mw.loVect()),ARLIM(mw.hiVect()), ms.dataPtr(),ARLIM(ms.loVect()),ARLIM(ms.hiVect()), mt.dataPtr(),ARLIM(mt.loVect()),ARLIM(mt.hiVect()), mb.dataPtr(),ARLIM(mb.loVect()),ARLIM(mb.hiVect()), denfab.dataPtr(), ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()), exttdptr, ARLIM(fslo), ARLIM(fshi), tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()), inout.box(gn).loVect(), inout.box(gn).hiVect(), &nc, h[level]); #endif } } #if 0 // This "probably" works, but is not strictly needed just because of the way Bill // coded up the tangential derivative stuff. It's handy code though, so I want to // keep it around/ // Clean up corners: // The problem here is that APPLYBC fills only grow cells normal to the boundary. // As a result, any corner cell on the boundary (either coarse-fine or fine-fine) // is not filled. For coarse-fine, the operator adjusts itself, sliding away from // the box edge to avoid referencing that corner point. On the physical boundary // though, the corner point is needed. Particularly if a fine-fine boundary intersects // the physical boundary, since we want the stencil to be independent of the box // blocking. FillBoundary operations wont fix the problem because the "good" // data we need is living in the grow region of adjacent fabs. So, here we play // the usual games to treat the newly filled grow cells as "valid" data. // Note that we only need to do something where the grids touch the physical boundary. const Geometry& geomlev = geomarray[level]; const BoxArray& grids = inout.boxArray(); const Box& domain = geomlev.Domain(); int nGrow = 1; int src_comp = 0; int num_comp = BL_SPACEDIM; // Lets do a quick check to see if we need to do anything at all here BoxArray BIGba = BoxArray(grids).grow(nGrow); if (! (domain.contains(BIGba.minimalBox())) ) { BoxArray boundary_pieces; Array<int> proc_idxs; Array<Array<int> > old_to_new(grids.size()); const DistributionMapping& dmap=inout.DistributionMap(); for (int d=0; d<BL_SPACEDIM; ++d) { if (! (geomlev.isPeriodic(d)) ) { BoxArray gba = BoxArray(grids).grow(d,nGrow); for (int i=0; i<gba.size(); ++i) { BoxArray new_pieces = BoxLib::boxComplement(gba[i],domain); int size_new = new_pieces.size(); if (size_new>0) { int size_old = boundary_pieces.size(); boundary_pieces.resize(size_old+size_new); proc_idxs.resize(boundary_pieces.size()); for (int j=0; j<size_new; ++j) { boundary_pieces.set(size_old+j,new_pieces[j]); proc_idxs[size_old+j] = dmap[i]; old_to_new[i].push_back(size_old+j); } } } } } proc_idxs.push_back(ParallelDescriptor::MyProc()); MultiFab boundary_data(boundary_pieces,num_comp,nGrow, DistributionMapping(proc_idxs)); for (MFIter mfi(inout); mfi.isValid(); ++mfi) { const FArrayBox& src_fab = inout[mfi]; for (int j=0; j<old_to_new[mfi.index()].size(); ++j) { int new_box_idx = old_to_new[mfi.index()][j]; boundary_data[new_box_idx].copy(src_fab,src_comp,0,num_comp); } } boundary_data.FillBoundary(); // Use a hacked Geometry object to handle the periodic intersections for us. // Here, the "domain" is the plane of cells on non-periodic boundary faces. // and there may be cells over the periodic boundary in the remaining directions. // We do a Geometry::PFB on each non-periodic face to sync these up. if (geomlev.isAnyPeriodic()) { Array<int> is_per(BL_SPACEDIM,0); for (int d=0; d<BL_SPACEDIM; ++d) { is_per[d] = geomlev.isPeriodic(d); } for (int d=0; d<BL_SPACEDIM; ++d) { if (! is_per[d]) { Box tmpLo = BoxLib::adjCellLo(geomlev.Domain(),d,1); Geometry tmpGeomLo(tmpLo,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr()); tmpGeomLo.FillPeriodicBoundary(boundary_data); Box tmpHi = BoxLib::adjCellHi(geomlev.Domain(),d,1); Geometry tmpGeomHi(tmpHi,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr()); tmpGeomHi.FillPeriodicBoundary(boundary_data); } } } for (MFIter mfi(inout); mfi.isValid(); ++mfi) { int idx = mfi.index(); FArrayBox& dst_fab = inout[mfi]; for (int j=0; j<old_to_new[idx].size(); ++j) { int new_box_idx = old_to_new[mfi.index()][j]; const FArrayBox& src_fab = boundary_data[new_box_idx]; const Box& src_box = src_fab.box(); BoxArray pieces_outside_domain = BoxLib::boxComplement(src_box,domain); for (int k=0; k<pieces_outside_domain.size(); ++k) { const Box& outside = pieces_outside_domain[k] & dst_fab.box(); if (outside.ok()) { dst_fab.copy(src_fab,outside,0,outside,src_comp,num_comp); } } } } } #endif }
void Nyx::sdc_reactions (MultiFab& S_old, MultiFab& S_new, MultiFab& D_new, MultiFab& hydro_src, MultiFab& IR, Real delta_time, Real a_old, Real a_new, int sdc_iter) { BL_PROFILE("Nyx::sdc_reactions()"); const Real* dx = geom.CellSize(); // First reset internal energy before call to compute_temp MultiFab reset_e_src(S_new.boxArray(), S_new.DistributionMap(), 1, NUM_GROW); reset_e_src.setVal(0.0); reset_internal_energy(S_new,D_new,reset_e_src); compute_new_temp (S_new,D_new); #ifndef FORCING { const Real z = 1.0/a_old - 1.0; fort_interp_to_this_z(&z); } #endif int min_iter = 100000; int max_iter = 0; int min_iter_grid, max_iter_grid; /////////////////////Consider adding ifdefs for whether CVODE is compiled in for these statements if(heat_cool_type == 3) { #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(S_old,true); mfi.isValid(); ++mfi) { // Note that this "bx" is only the valid region (unlike for Strang) const Box& bx = mfi.tilebox(); min_iter_grid = 100000; max_iter_grid = 0; integrate_state_with_source (bx.loVect(), bx.hiVect(), BL_TO_FORTRAN(S_old[mfi]), BL_TO_FORTRAN(S_new[mfi]), BL_TO_FORTRAN(D_new[mfi]), BL_TO_FORTRAN(hydro_src[mfi]), BL_TO_FORTRAN(reset_e_src[mfi]), BL_TO_FORTRAN(IR[mfi]), &a_old, &delta_time, &min_iter_grid, &max_iter_grid); min_iter = std::min(min_iter,min_iter_grid); max_iter = std::max(max_iter,max_iter_grid); } } else if(heat_cool_type == 5) { #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(S_old,true); mfi.isValid(); ++mfi) { // Note that this "bx" is only the valid region (unlike for Strang) const Box& bx = mfi.tilebox(); min_iter_grid = 100000; max_iter_grid = 0; integrate_state_fcvode_with_source (bx.loVect(), bx.hiVect(), BL_TO_FORTRAN(S_old[mfi]), BL_TO_FORTRAN(S_new[mfi]), BL_TO_FORTRAN(D_new[mfi]), BL_TO_FORTRAN(hydro_src[mfi]), BL_TO_FORTRAN(reset_e_src[mfi]), BL_TO_FORTRAN(IR[mfi]), &a_old, &delta_time, &min_iter_grid, &max_iter_grid); min_iter = std::min(min_iter,min_iter_grid); max_iter = std::max(max_iter,max_iter_grid); } } ParallelDescriptor::ReduceIntMax(max_iter); ParallelDescriptor::ReduceIntMin(min_iter); amrex::Print() << "Min/Max Number of Iterations in SDC: " << min_iter << " " << max_iter << std::endl; }
int CGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_bicgstab()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab ph(ba, ncomp, nghost, dm); MultiFab sh(ba, ncomp, nghost, dm); MultiFab sorig(ba, ncomp, 0, dm); MultiFab p (ba, ncomp, 0, dm); MultiFab r (ba, ncomp, 0, dm); MultiFab s (ba, ncomp, 0, dm); MultiFab rh (ba, ncomp, 0, dm); MultiFab v (ba, ncomp, 0, dm); MultiFab t (ba, ncomp, 0, dm); Lp.residual(r, rhs, sol, lev, bc_mode); MultiFab::Copy(sorig,sol,0,0,1,0); MultiFab::Copy(rh, r, 0,0,1,0); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA Real rnorm = norm_inf(r); #else // // Calculate the local values of these norms & reduce their values together. // Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) }; ParallelDescriptor::ReduceRealMax(vals,2,color()); Real rnorm = vals[0]; const Real Lp_norm = vals[1]; Real sol_norm = 0; #endif const Real rnorm0 = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } int ret = 0, nit = 1; Real rho_1 = 0, alpha = 0, omega = 0; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return ret; } for (; nit <= maxiter; ++nit) { const Real rho = dotxy(rh,r); if ( rho == 0 ) { ret = 1; break; } if ( nit == 1 ) { MultiFab::Copy(p,r,0,0,1,0); } else { const Real beta = (rho/rho_1)*(alpha/omega); sxay(p, p, -omega, v); sxay(p, r, beta, p); } if ( use_mg_precond ) { ph.setVal(0); mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { ph.setVal(0); Lp.jacobi_smooth(ph, p, lev, temp_bc_mode); } else { MultiFab::Copy(ph,p,0,0,1,0); } Lp.apply(v, ph, lev, temp_bc_mode); if ( Real rhTv = dotxy(rh,v) ) { alpha = rho/rhTv; } else { ret = 2; break; } sxay(sol, sol, alpha, ph); sxay(s, r, -alpha, v); rnorm = norm_inf(s); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Half Iter " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( use_mg_precond ) { sh.setVal(0); mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { sh.setVal(0); Lp.jacobi_smooth(sh, s, lev, temp_bc_mode); } else { MultiFab::Copy(sh,s,0,0,1,0); } Lp.apply(t, sh, lev, temp_bc_mode); // // This is a little funky. I want to elide one of the reductions // in the following two dotxy()s. We do that by calculating the "local" // values and then reducing the two local values at the same time. // Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) }; ParallelDescriptor::ReduceRealSum(vals,2,color()); if ( vals[0] ) { omega = vals[1]/vals[0]; } else { ret = 3; break; } sxay(sol, sol, omega, sh); sxay(r, s, -omega, t); rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Iteration " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( omega == 0 ) { ret = 4; break; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Final: Iteration " << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::solve_cabicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cabicgstab()"); BL_ASSERT(sol.nComp() == 1); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); Real temp1[4*SSS_MAX+1]; Real temp2[4*SSS_MAX+1]; Real temp3[4*SSS_MAX+1]; Real Tp[4*SSS_MAX+1][4*SSS_MAX+1]; Real Tpp[4*SSS_MAX+1][4*SSS_MAX+1]; Real aj[4*SSS_MAX+1]; Real cj[4*SSS_MAX+1]; Real ej[4*SSS_MAX+1]; Real Tpaj[4*SSS_MAX+1]; Real Tpcj[4*SSS_MAX+1]; Real Tppaj[4*SSS_MAX+1]; Real G[4*SSS_MAX+1][4*SSS_MAX+1]; // Extracted from first 4*SSS+1 columns of Gg[][]. indexed as [row][col] Real g[4*SSS_MAX+1]; // Extracted from last [4*SSS+1] column of Gg[][]. Real Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul(). indexed as [row*(4*SSS+2) + col] // // If variable_SSS we "telescope" SSS. // We start with 1 and increase it up to SSS_MAX on the outer iterations. // if (variable_SSS) SSS = 1; zero( aj, 4*SSS_MAX+1); zero( cj, 4*SSS_MAX+1); zero( ej, 4*SSS_MAX+1); zero( Tpaj, 4*SSS_MAX+1); zero( Tpcj, 4*SSS_MAX+1); zero(Tppaj, 4*SSS_MAX+1); zero(temp1, 4*SSS_MAX+1); zero(temp2, 4*SSS_MAX+1); zero(temp3, 4*SSS_MAX+1); SetMonomialBasis(Tp,Tpp,SSS); const int ncomp = 1, nghost = sol.nGrow(); // // Contains the matrix powers of p[] and r[]. // // First 2*SSS+1 components are powers of p[]. // Next 2*SSS components are powers of r[]. // const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab PR(ba, 4*SSS_MAX+1, 0, dm); MultiFab p(ba, ncomp, 0, dm); MultiFab r(ba, ncomp, 0, dm); MultiFab rt(ba, ncomp, 0, dm); MultiFab tmp(ba, 4, nghost, dm); Lp.residual(r, rhs, sol, lev, bc_mode); BL_ASSERT(!r.contains_nan()); MultiFab::Copy(rt,r,0,0,1,0); MultiFab::Copy( p,r,0,0,1,0); const Real rnorm0 = norm_inf(r); Real delta = dotxy(r,rt); const Real L2_norm_of_rt = sqrt(delta); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: niter = 0," << ", rnorm = " << rnorm0 << ", delta = " << delta << ", eps_abs = " << eps_abs << '\n'; } return 0; } int niters = 0, ret = 0; Real L2_norm_of_resid = 0, atime = 0, gtime = 0; bool BiCGStabFailed = false, BiCGStabConverged = false; for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; ) { const Real time1 = ParallelDescriptor::second(); // // Compute the matrix powers on p[] & r[] (monomial basis). // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[]. // MultiFab::Copy(PR,p,0,0,1,0); MultiFab::Copy(PR,r,0,2*SSS+1,1,0); BL_ASSERT(!PR.contains_nan(0, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+1,1)); // // We use "tmp" to minimize the number of Lp.apply()s. // We do this by doing p & r together in a single call. // MultiFab::Copy(tmp,p,0,0,1,0); MultiFab::Copy(tmp,r,0,1,1,0); for (int n = 1; n < 2*SSS; n++) { Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2); MultiFab::Copy(tmp,tmp,2,0,2,0); MultiFab::Copy(PR,tmp,0, n,1,0); MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0); BL_ASSERT(!PR.contains_nan(n, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1)); } MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0); Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1); MultiFab::Copy(PR,tmp,1,2*SSS,1,0); BL_ASSERT(!PR.contains_nan(2*SSS-1,1)); BL_ASSERT(!PR.contains_nan(2*SSS, 1)); Real time2 = ParallelDescriptor::second(); atime += (time2-time1); BuildGramMatrix(Gg, PR, rt, SSS); const Real time3 = ParallelDescriptor::second(); gtime += (time3-time2); // // Form G[][] and g[] from Gg. // for (int i = 0, k = 0; i < 4*SSS+1; i++) { for (int j = 0; j < 4*SSS+1; j++) // // First 4*SSS+1 elements in each row go to G[][]. // G[i][j] = Gg[k++]; // // Last element in row goes to g[]. // g[i] = Gg[k++]; } zero(aj, 4*SSS+1); aj[0] = 1; zero(cj, 4*SSS+1); cj[2*SSS+1] = 1; zero(ej, 4*SSS+1); for (int nit = 0; nit < SSS; nit++) { gemv( Tpaj, Tp, aj, 4*SSS+1, 4*SSS+1); gemv( Tpcj, Tp, cj, 4*SSS+1, 4*SSS+1); gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1); const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1); if ( g_dot_Tpaj == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 1; break; } const Real alpha = delta / g_dot_Tpaj; if ( std::isinf(alpha) ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 2; break; } axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); axpy(temp3, cj, -alpha, Tpaj, 4*SSS+1); const Real omega_numerator = dot(temp3, temp2, 4*SSS+1); const Real omega_denominator = dot(temp1, temp2, 4*SSS+1); // // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0. // // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj. // If omega_numerator==0, and ||s||!=0, then stabilization breakdown. // // Partial update of ej must happen before the check on omega to ensure forward progress !!! // axpy(ej, ej, alpha, aj, 4*SSS+1); // // ej has been updated so consider that we've done an iteration since // even if we break out of the loop we'll be able to update both sol. // niters++; // // Calculate the norm of Saad's vector 's' to check intra s-step convergence. // axpy(temp1, cj,-alpha, Tpaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1); L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s)); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n'; BiCGStabConverged = true; break; } if ( omega_denominator == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 3; break; } const Real omega = omega_numerator / omega_denominator; if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( omega == 0 ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n'; if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n'; } if ( omega == 0 ) { BiCGStabFailed = true; ret = 4; break; } if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; } // // Complete the update of ej & cj now that omega is known to be ok. // axpy(ej, ej, omega, cj, 4*SSS+1); axpy(ej, ej,-omega*alpha, Tpaj, 4*SSS+1); axpy(cj, cj, -omega, Tpcj, 4*SSS+1); axpy(cj, cj, -alpha, Tpaj, 4*SSS+1); axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1); // // Do an early check of the residual to determine convergence. // gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1); // // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic. // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel). // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged. // const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1); L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n'; BiCGStabConverged = true; break; } const Real delta_next = dot(g, cj, 4*SSS+1); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( delta_next == 0 ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n'; if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n'; } if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf? if ( delta_next == 0 ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown... const Real beta = (delta_next/delta)*(alpha/omega); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( beta == 0 ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n'; if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n'; } if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf? if ( beta == 0 ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0? can't make further progress(?) axpy(aj, cj, beta, aj, 4*SSS+1); axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1); delta = delta_next; } // // Update iterates. // for (int i = 0; i < 4*SSS+1; i++) sxay(sol,sol,ej[i],PR,i); MultiFab::Copy(p,PR,0,0,1,0); p.mult(aj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(p,p,aj[i],PR,i); MultiFab::Copy(r,PR,0,0,1,0); r.mult(cj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(r,r,cj[i],PR,i); if ( !BiCGStabFailed && !BiCGStabConverged ) { m += SSS; if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); } } } if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Final: Iteration " << std::setw(4) << niters << " rel. err. " << L2_norm_of_resid << '\n'; } if ( verbose > 1 ) { Real tmp[2] = { atime, gtime }; ParallelDescriptor::ReduceRealMax(tmp,2,color()); if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n'; } } } if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged) { if ( L2_norm_of_resid > L2_norm_of_rt ) { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!"); // // Return code 8 tells the MultiGrid driver to zero out the solution! // ret = 8; } else { // // Return codes 1-7 tells the MultiGrid driver to smooth the solution! // ret = 7; } } return ret; }
int CGSolver::jbb_precond (MultiFab& sol, const MultiFab& rhs, int lev, LinOp& Lp) { // // This is a local routine. No parallel is allowed to happen here. // int lev_loc = lev; const Real eps_rel = 1.e-2; const Real eps_abs = 1.e-16; const int nghost = sol.nGrow(); const int ncomp = sol.nComp(); const bool local = true; const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC; BL_ASSERT(ncomp == 1 ); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc)); const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); sorig.copy(sol); Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local); sol.setVal(0); Real rnorm = norm_inf(r,local); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << " jbb_precond: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev_loc, local); Real sol_norm = 0; int ret = 0; // will return this value if all goes well Real rho_1 = 0; int nit = 1; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { z.copy(r); Real rho = dotxy(z,r,local); if (nit == 1) { p.copy(z); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev_loc, bc_mode, local); Real alpha; if ( Real pw = dotxy(p,q,local) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r, local); sol_norm = norm_inf(sol, local); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) { break; } if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) { if ( ParallelDescriptor::IOProcessor(color()) ) { BoxLib::Warning("jbb_precond:: failed to converge!"); } ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::solve_cg (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cg()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); MultiFab r1(ba, ncomp, nghost, dm); MultiFab z1(ba, ncomp, nghost, dm); MultiFab r2(ba, ncomp, nghost, dm); MultiFab z2(ba, ncomp, nghost, dm); MultiFab::Copy(sorig,sol,0,0,1,0); Lp.residual(r, rhs, sorig, lev, bc_mode); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC; Real rnorm = norm_inf(r); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev); Real sol_norm = 0; Real rho_1 = 0; int ret = 0; int nit = 1; if ( rnorm == 0 || rnorm < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: niter = 0," << ", rnorm = " << rnorm << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" << eps_rel*(Lp_norm*sol_norm + rnorm0 ) << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1) { z.setVal(0); jbb_precond(z,r,lev,Lp); } else { MultiFab::Copy(z,r,0,0,1,0); } Real rho = dotxy(z,r); if (nit == 1) { MultiFab::Copy(p,z,0,0,1,0); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev, temp_bc_mode); Real alpha; if ( Real pw = dotxy(p,q) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_cg:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r); sol_norm = norm_inf(sol); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break; #endif if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs ) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_cg: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }