// // What's the slowest way I can think of to compute all the norms?? // Real MFNorm (const MultiFab& mfab, const int exponent, const int srcComp, const int numComp, const int numGrow) { BL_ASSERT (numGrow <= mfab.nGrow()); BoxArray boxes = mfab.boxArray(); boxes.grow(numGrow); // // Get a copy of the multifab // MultiFab mftmp(mfab.boxArray(), numComp, 0); MultiFab::Copy(mftmp,mfab,srcComp,0,numComp,numGrow); // // Calculate the Norms // Real myNorm = 0; if ( exponent == 0 ) { for ( MFIter mftmpmfi(mftmp); mftmpmfi.isValid(); ++mftmpmfi) { mftmp[mftmpmfi].abs(boxes[mftmpmfi.index()], 0, numComp); myNorm = std::max(myNorm, mftmp[mftmpmfi].norm(0, 0, numComp)); } ParallelDescriptor::ReduceRealMax(myNorm); } else if ( exponent == 1 ) { for ( MFIter mftmpmfi(mftmp); mftmpmfi.isValid(); ++mftmpmfi) { mftmp[mftmpmfi].abs(boxes[mftmpmfi.index()], 0, numComp); myNorm += mftmp[mftmpmfi].norm(1, 0, numComp); } ParallelDescriptor::ReduceRealSum(myNorm); } else if ( exponent == 2 ) { for ( MFIter mftmpmfi(mftmp); mftmpmfi.isValid(); ++mftmpmfi) { mftmp[mftmpmfi].abs(boxes[mftmpmfi.index()], 0, numComp); myNorm += pow(mftmp[mftmpmfi].norm(2, 0, numComp), 2); } ParallelDescriptor::ReduceRealSum(myNorm); myNorm = sqrt( myNorm ); } else { BoxLib::Error("Invalid exponent to norm function"); } return myNorm; }
void MultiGrid::average (MultiFab& c, const MultiFab& f) { BL_PROFILE("MultiGrid::average()"); // // Use Fortran function to average down (restrict) f to c. // const bool tiling = true; #ifdef _OPENMP #pragma omp parallel #endif for (MFIter cmfi(c,tiling); cmfi.isValid(); ++cmfi) { BL_ASSERT(c.boxArray().get(cmfi.index()) == cmfi.validbox()); const int nc = c.nComp(); const Box& bx = cmfi.tilebox(); FArrayBox& cfab = c[cmfi]; const FArrayBox& ffab = f[cmfi]; FORT_AVERAGE(cfab.dataPtr(), ARLIM(cfab.loVect()), ARLIM(cfab.hiVect()), ffab.dataPtr(), ARLIM(ffab.loVect()), ARLIM(ffab.hiVect()), bx.loVect(), bx.hiVect(), &nc); } }
void MultiGrid::interpolate (MultiFab& f, const MultiFab& c) { BL_PROFILE("MultiGrid::interpolate()"); // // Use fortran function to interpolate up (prolong) c to f // Note: returns f=f+P(c) , i.e. ADDS interp'd c to f. // // OMP over boxes #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(c); mfi.isValid(); ++mfi) { const int k = mfi.index(); const Box& bx = c.boxArray()[k]; const int nc = f.nComp(); const FArrayBox& cfab = c[mfi]; FArrayBox& ffab = f[mfi]; FORT_INTERP(ffab.dataPtr(), ARLIM(ffab.loVect()), ARLIM(ffab.hiVect()), cfab.dataPtr(), ARLIM(cfab.loVect()), ARLIM(cfab.hiVect()), bx.loVect(), bx.hiVect(), &nc); } }
void average_down (MultiFab& S_fine, MultiFab& S_crse, int scomp, int ncomp, const IntVect& ratio) { BL_ASSERT(S_crse.nComp() == S_fine.nComp()); // // Coarsen() the fine stuff on processors owning the fine data. // BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio); MultiFab crse_S_fine(crse_S_fine_BA,ncomp,0); #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(crse_S_fine,true); mfi.isValid(); ++mfi) { // NOTE: The tilebox is defined at the coarse level. const Box& tbx = mfi.tilebox(); // NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab // because the crse fab is a temporary which was made starting at comp 0, it is // not part of the actual crse multifab which came in. BL_FORT_PROC_CALL(BL_AVGDOWN,bl_avgdown) (tbx.loVect(), tbx.hiVect(), BL_TO_FORTRAN_N(S_fine[mfi],scomp), BL_TO_FORTRAN_N(crse_S_fine[mfi],0), ratio.getVect(),&ncomp); } S_crse.copy(crse_S_fine,0,scomp,ncomp); }
void solve_with_Cpp(MultiFab& soln, MultiFab& gphi, Real a, Real b, MultiFab& alpha, PArray<MultiFab>& beta, MultiFab& rhs, const BoxArray& bs, const Geometry& geom) { BL_PROFILE("solve_with_Cpp()"); BndryData bd(bs, 1, geom); set_boundary(bd, rhs, 0); ABecLaplacian abec_operator(bd, dx); abec_operator.setScalars(a, b); abec_operator.setCoefficients(alpha, beta); MultiGrid mg(abec_operator); mg.setVerbose(verbose); mg.solve(soln, rhs, tolerance_rel, tolerance_abs); PArray<MultiFab> grad_phi(BL_SPACEDIM, PArrayManage); for (int n = 0; n < BL_SPACEDIM; ++n) grad_phi.set(n, new MultiFab(BoxArray(soln.boxArray()).surroundingNodes(n), 1, 0)); #if (BL_SPACEDIM == 2) abec_operator.compFlux(grad_phi[0],grad_phi[1],soln); #elif (BL_SPACEDIM == 3) abec_operator.compFlux(grad_phi[0],grad_phi[1],grad_phi[2],soln); #endif // Average edge-centered gradients to cell centers. BoxLib::average_face_to_cellcenter(gphi, grad_phi, geom); }
void Nyx::strang_second_step (Real time, Real dt, MultiFab& S_new, MultiFab& D_new) { BL_PROFILE("Nyx::strang_second_step()"); Real half_dt = 0.5*dt; int min_iter = 100000; int max_iter = 0; int min_iter_grid; int max_iter_grid; // Set a at the half of the time step in the second strang const Real a = get_comoving_a(time-half_dt); MultiFab reset_e_src(S_new.boxArray(), S_new.DistributionMap(), 1, NUM_GROW); reset_e_src.setVal(0.0); reset_internal_energy(S_new,D_new,reset_e_src); compute_new_temp (S_new,D_new); #ifndef FORCING { const Real z = 1.0/a - 1.0; fort_interp_to_this_z(&z); } #endif #ifdef _OPENMP #pragma omp parallel private(min_iter_grid,max_iter_grid) reduction(min:min_iter) reduction(max:max_iter) #endif for (MFIter mfi(S_new,true); mfi.isValid(); ++mfi) { // Here bx is just the valid region const Box& bx = mfi.tilebox(); min_iter_grid = 100000; max_iter_grid = 0; integrate_state (bx.loVect(), bx.hiVect(), BL_TO_FORTRAN(S_new[mfi]), BL_TO_FORTRAN(D_new[mfi]), &a, &half_dt, &min_iter_grid, &max_iter_grid); if (S_new[mfi].contains_nan(bx,0,S_new.nComp())) { std::cout << "NANS IN THIS GRID " << bx << std::endl; } min_iter = std::min(min_iter,min_iter_grid); max_iter = std::max(max_iter,max_iter_grid); } ParallelDescriptor::ReduceIntMax(max_iter); ParallelDescriptor::ReduceIntMin(min_iter); if (heat_cool_type == 1) if (ParallelDescriptor::IOProcessor()) std::cout << "Min/Max Number of Iterations in Second Strang: " << min_iter << " " << max_iter << std::endl; }
void ABec4::aCoefficients (const MultiFab& _a) { BL_ASSERT(_a.ok()); BL_ASSERT(_a.boxArray() == (acoefs[0])->boxArray()); invalidate_a_to_level(0); MultiFab::Copy(*acoefs[0],_a,0,0,acoefs[0]->nComp(),acoefs[0]->nGrow()); }
void ABec4::bCoefficients (const MultiFab& _b) { BL_ASSERT(_b.ok()); BL_ASSERT(_b.boxArray() == (bcoefs[0])->boxArray()); invalidate_b_to_level(0); MultiFab::Copy(*bcoefs[0],_b,0,0,bcoefs[0]->nComp(),bcoefs[0]->nGrow()); }
static void Write_N_Read (const MultiFab& mf, const std::string& mf_name) { if (ParallelDescriptor::IOProcessor()) { std::cout << "Writing the MultiFab to disk ...\n"; } double start, end; ParallelDescriptor::Barrier(); if (ParallelDescriptor::IOProcessor()) { start = BoxLib::wsecond(); } ParallelDescriptor::Barrier(); if (ParallelDescriptor::IOProcessor()) { end = BoxLib::wsecond(); std::cout << "\nWallclock time for MF write: " << (end-start) << '\n'; std::cout << "Reading the MultiFab from disk ...\n"; } VisMF vmf(mf_name); BL_ASSERT(vmf.size() == mf.boxArray().size()); for (MFIter mfi(mf); mfi.isValid(); ++mfi) { //const FArrayBox& fab = vmf[mfi.index()]; const FArrayBox& fab = vmf.GetFab(mfi.index(), 0); std::cout << "\tCPU #" << ParallelDescriptor::MyProc() << " read FAB #" << mfi.index() << '\n'; } ParallelDescriptor::Barrier(); if (ParallelDescriptor::IOProcessor()) { std::cout << "Building new MultiFab from disk version ....\n\n"; } MultiFab new_mf; VisMF::Read(new_mf, mf_name); }
// // Do a one-component dot product of r & z using supplied components. // static Real dotxy (const MultiFab& r, int rcomp, const MultiFab& z, int zcomp, bool local) { BL_PROFILE("CGSolver::dotxy()"); BL_ASSERT(r.nComp() > rcomp); BL_ASSERT(z.nComp() > zcomp); BL_ASSERT(r.boxArray() == z.boxArray()); const int ncomp = 1; const int nghost = 0; return MultiFab::Dot(r,rcomp,z,zcomp,ncomp,nghost,local); }
void average_down (MultiFab& S_fine, MultiFab& S_crse, const Geometry& fgeom, const Geometry& cgeom, int scomp, int ncomp, const IntVect& ratio) { if (S_fine.is_nodal() || S_crse.is_nodal()) { BoxLib::Error("Can't use BoxLib::average_down for nodal MultiFab!"); } #if (BL_SPACEDIM == 3) BoxLib::average_down(S_fine, S_crse, scomp, ncomp, ratio); return; #else BL_ASSERT(S_crse.nComp() == S_fine.nComp()); // // Coarsen() the fine stuff on processors owning the fine data. // const BoxArray& fine_BA = S_fine.boxArray(); BoxArray crse_S_fine_BA = fine_BA; crse_S_fine_BA.coarsen(ratio); MultiFab crse_S_fine(crse_S_fine_BA,ncomp,0); MultiFab fvolume; fgeom.GetVolume(fvolume, fine_BA, 0); #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(crse_S_fine,true); mfi.isValid(); ++mfi) { // NOTE: The tilebox is defined at the coarse level. const Box& tbx = mfi.tilebox(); // NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab // because the crse fab is a temporary which was made starting at comp 0, it is // not part of the actual crse multifab which came in. BL_FORT_PROC_CALL(BL_AVGDOWN_WITH_VOL,bl_avgdown_with_vol) (tbx.loVect(), tbx.hiVect(), BL_TO_FORTRAN_N(S_fine[mfi],scomp), BL_TO_FORTRAN_N(crse_S_fine[mfi],0), BL_TO_FORTRAN(fvolume[mfi]), ratio.getVect(),&ncomp); } S_crse.copy(crse_S_fine,0,scomp,ncomp); #endif }
void solve_with_F90(MultiFab& soln, MultiFab& gphi, Real a, Real b, MultiFab& alpha, PArray<MultiFab>& beta, MultiFab& rhs, const BoxArray& bs, const Geometry& geom) { BL_PROFILE("solve_with_F90()"); FMultiGrid fmg(geom); int mg_bc[2*BL_SPACEDIM]; if (bc_type == Periodic) { // Define the type of boundary conditions to be periodic for ( int n = 0; n < BL_SPACEDIM; ++n ) { mg_bc[2*n + 0] = MGT_BC_PER; mg_bc[2*n + 1] = MGT_BC_PER; } } else if (bc_type == Neumann) { // Define the type of boundary conditions to be Neumann for ( int n = 0; n < BL_SPACEDIM; ++n ) { mg_bc[2*n + 0] = MGT_BC_NEU; mg_bc[2*n + 1] = MGT_BC_NEU; } } else if (bc_type == Dirichlet) { // Define the type of boundary conditions to be Dirichlet for ( int n = 0; n < BL_SPACEDIM; ++n ) { mg_bc[2*n + 0] = MGT_BC_DIR; mg_bc[2*n + 1] = MGT_BC_DIR; } } fmg.set_bc(mg_bc); fmg.set_maxorder(maxorder); fmg.set_scalars(a, b); fmg.set_coefficients(alpha, beta); int always_use_bnorm = 0; int need_grad_phi = 1; fmg.solve(soln, rhs, tolerance_rel, tolerance_abs, always_use_bnorm, need_grad_phi); PArray<MultiFab> grad_phi(BL_SPACEDIM, PArrayManage); for (int n = 0; n < BL_SPACEDIM; ++n) grad_phi.set(n, new MultiFab(BoxArray(soln.boxArray()).surroundingNodes(n), 1, 0)); fmg.get_fluxes(grad_phi); // Average edge-centered gradients to cell centers. BoxLib::average_face_to_cellcenter(gphi, grad_phi, geom); }
void MultiFab_C_to_F::share (MultiFab& cmf, const std::string& fmf_name) { const Box& bx = cmf.boxArray()[0]; int nodal[BL_SPACEDIM]; for ( int i = 0; i < BL_SPACEDIM; ++i ) { nodal[i] = (bx.type(i) == IndexType::NODE) ? 1 : 0; } share_multifab_with_f (fmf_name.c_str(), cmf.nComp(), cmf.nGrow(), nodal); for (MFIter mfi(cmf); mfi.isValid(); ++mfi) { int li = mfi.LocalIndex(); const FArrayBox& fab = cmf[mfi]; share_fab_with_f (li, fab.dataPtr()); } }
void MCMultiGrid::interpolate (MultiFab& f, const MultiFab& c) { // // Use fortran function to interpolate up (prolong) c to f // Note: returns f=f+P(c) , i.e. ADDS interp'd c to f // for (MFIter fmfi(f); fmfi.isValid(); ++fmfi) { const Box& bx = c.boxArray()[fmfi.index()]; int nc = f.nComp(); const FArrayBox& cfab = c[fmfi]; FArrayBox& ffab = f[fmfi]; FORT_INTERP( ffab.dataPtr(),ARLIM(ffab.loVect()),ARLIM(ffab.hiVect()), cfab.dataPtr(),ARLIM(cfab.loVect()),ARLIM(cfab.hiVect()), bx.loVect(), bx.hiVect(), &nc); } }
void MacOperator::setCoefficients (MultiFab* area, MultiFab& rho, int rho_comp, const Real* dx) { // // Should check that all BoxArrays are consistant. // const BoxArray& ba = gbox[0]; BL_ASSERT(rho.boxArray() == ba); // // First set scalar coeficients. // setScalars(0.0,1.0); // // Don't need to set a because alpha is set to zero. // const int n_grow = 0; D_TERM(MultiFab bxcoef(area[0].boxArray(),area[0].nComp(),n_grow);,
int CGSolver::jbb_precond (MultiFab& sol, const MultiFab& rhs, int lev, LinOp& Lp) { // // This is a local routine. No parallel is allowed to happen here. // int lev_loc = lev; const Real eps_rel = 1.e-2; const Real eps_abs = 1.e-16; const int nghost = sol.nGrow(); const int ncomp = sol.nComp(); const bool local = true; const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC; BL_ASSERT(ncomp == 1 ); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc)); const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); sorig.copy(sol); Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local); sol.setVal(0); Real rnorm = norm_inf(r,local); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << " jbb_precond: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev_loc, local); Real sol_norm = 0; int ret = 0; // will return this value if all goes well Real rho_1 = 0; int nit = 1; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { z.copy(r); Real rho = dotxy(z,r,local); if (nit == 1) { p.copy(z); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev_loc, bc_mode, local); Real alpha; if ( Real pw = dotxy(p,q,local) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r, local); sol_norm = norm_inf(sol, local); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) { break; } if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) { if ( ParallelDescriptor::IOProcessor(color()) ) { BoxLib::Warning("jbb_precond:: failed to converge!"); } ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
void MCLinOp::makeCoefficients (MultiFab& cs, const MultiFab& fn, int level) { const int nc = fn.nComp(); // // Determine index type of incoming MultiFab. // const IndexType iType(fn.boxArray().ixType()); const IndexType cType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::CELL)); const IndexType xType(D_DECL(IndexType::NODE, IndexType::CELL, IndexType::CELL)); const IndexType yType(D_DECL(IndexType::CELL, IndexType::NODE, IndexType::CELL)); #if (BL_SPACEDIM == 3) const IndexType zType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::NODE)); #endif int cdir; if (iType == cType) { cdir = -1; } else if (iType == xType) { cdir = 0; } else if (iType == yType) { cdir = 1; } #if (BL_SPACEDIM == 3) else if (iType == zType) { cdir = 2; } #endif else BoxLib::Abort("MCLinOp::makeCoeffients(): Bad index type"); BoxArray d(gbox[level]); if (cdir >= 0) d.surroundingNodes(cdir); int nGrow=0; cs.define(d, nc, nGrow, Fab_allocate); cs.setVal(0.0); const BoxArray& grids = gbox[level]; for (MFIter csmfi(cs); csmfi.isValid(); ++csmfi) { const Box& grd = grids[csmfi.index()]; FArrayBox& csfab = cs[csmfi]; const FArrayBox& fnfab = fn[csmfi]; switch(cdir) { case -1: FORT_AVERAGECC( csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()), fnfab.dataPtr(), ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()), grd.loVect(), grd.hiVect(), &nc); break; case 0: case 1: case 2: if ( harmavg ) { FORT_HARMONIC_AVERAGEEC( csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()), fnfab.dataPtr(), ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()), grd.loVect(), grd.hiVect(), &nc, &cdir); } else { FORT_AVERAGEEC( csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()), fnfab.dataPtr(), ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()), grd.loVect(), grd.hiVect(), &nc, &cdir); } break; default: BoxLib::Error("MCLinOp::makeCoeffients(): bad coefficient coarsening direction!"); } } }
void MCLinOp::applyBC (MultiFab& inout, int level, MCBC_Mode bc_mode) { // // The inout MultiFab must have at least MCLinOp_grow ghost cells // for applyBC() // BL_ASSERT(inout.nGrow() >= MCLinOp_grow); // // The inout MultiFab must have at least Periodic_BC_grow cells for the // algorithms taking care of periodic boundary conditions. // BL_ASSERT(inout.nGrow() >= MCLinOp_grow); // // No coarsened boundary values, cannot apply inhomog at lev>0. // BL_ASSERT(!(level>0 && bc_mode == MCInhomogeneous_BC)); int flagden = 1; // fill in the bndry data and undrrelxr int flagbc = 1; // with values if (bc_mode == MCHomogeneous_BC) flagbc = 0; // nodata if homog int nc = inout.nComp(); BL_ASSERT(nc == numcomp ); inout.setBndry(-1.e30); inout.FillBoundary(); prepareForLevel(level); geomarray[level].FillPeriodicBoundary(inout,0,nc); // // Fill boundary cells. // #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(inout); mfi.isValid(); ++mfi) { const int gn = mfi.index(); BL_ASSERT(gbox[level][gn] == inout.box(gn)); const BndryData::RealTuple& bdl = bgb.bndryLocs(gn); const Array< Array<BoundCond> >& bdc = bgb.bndryConds(gn); const MaskTuple& msk = maskvals[level][gn]; for (OrientationIter oitr; oitr; ++oitr) { const Orientation face = oitr(); FabSet& f = (*undrrelxr[level])[face]; FabSet& td = (*tangderiv[level])[face]; int cdr(face); const FabSet& fs = bgb.bndryValues(face); Real bcl = bdl[face]; const Array<BoundCond>& bc = bdc[face]; const int *bct = (const int*) bc.dataPtr(); const FArrayBox& fsfab = fs[gn]; const Real* bcvalptr = fsfab.dataPtr(); // // Way external derivs stored. // const Real* exttdptr = fsfab.dataPtr(numcomp); const int* fslo = fsfab.loVect(); const int* fshi = fsfab.hiVect(); FArrayBox& inoutfab = inout[gn]; FArrayBox& denfab = f[gn]; FArrayBox& tdfab = td[gn]; #if BL_SPACEDIM==2 int cdir = face.coordDir(), perpdir = -1; if (cdir == 0) perpdir = 1; else if (cdir == 1) perpdir = 0; else BoxLib::Abort("MCLinOp::applyBC(): bad logic"); const Mask& m = *msk[face]; const Mask& mphi = *msk[Orientation(perpdir,Orientation::high)]; const Mask& mplo = *msk[Orientation(perpdir,Orientation::low)]; FORT_APPLYBC( &flagden, &flagbc, &maxorder, inoutfab.dataPtr(), ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()), &cdr, bct, &bcl, bcvalptr, ARLIM(fslo), ARLIM(fshi), m.dataPtr(), ARLIM(m.loVect()), ARLIM(m.hiVect()), mphi.dataPtr(), ARLIM(mphi.loVect()), ARLIM(mphi.hiVect()), mplo.dataPtr(), ARLIM(mplo.loVect()), ARLIM(mplo.hiVect()), denfab.dataPtr(), ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()), exttdptr, ARLIM(fslo), ARLIM(fshi), tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()), inout.box(gn).loVect(), inout.box(gn).hiVect(), &nc, h[level]); #elif BL_SPACEDIM==3 const Mask& mn = *msk[Orientation(1,Orientation::high)]; const Mask& me = *msk[Orientation(0,Orientation::high)]; const Mask& mw = *msk[Orientation(0,Orientation::low)]; const Mask& ms = *msk[Orientation(1,Orientation::low)]; const Mask& mt = *msk[Orientation(2,Orientation::high)]; const Mask& mb = *msk[Orientation(2,Orientation::low)]; FORT_APPLYBC( &flagden, &flagbc, &maxorder, inoutfab.dataPtr(), ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()), &cdr, bct, &bcl, bcvalptr, ARLIM(fslo), ARLIM(fshi), mn.dataPtr(),ARLIM(mn.loVect()),ARLIM(mn.hiVect()), me.dataPtr(),ARLIM(me.loVect()),ARLIM(me.hiVect()), mw.dataPtr(),ARLIM(mw.loVect()),ARLIM(mw.hiVect()), ms.dataPtr(),ARLIM(ms.loVect()),ARLIM(ms.hiVect()), mt.dataPtr(),ARLIM(mt.loVect()),ARLIM(mt.hiVect()), mb.dataPtr(),ARLIM(mb.loVect()),ARLIM(mb.hiVect()), denfab.dataPtr(), ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()), exttdptr, ARLIM(fslo), ARLIM(fshi), tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()), inout.box(gn).loVect(), inout.box(gn).hiVect(), &nc, h[level]); #endif } } #if 0 // This "probably" works, but is not strictly needed just because of the way Bill // coded up the tangential derivative stuff. It's handy code though, so I want to // keep it around/ // Clean up corners: // The problem here is that APPLYBC fills only grow cells normal to the boundary. // As a result, any corner cell on the boundary (either coarse-fine or fine-fine) // is not filled. For coarse-fine, the operator adjusts itself, sliding away from // the box edge to avoid referencing that corner point. On the physical boundary // though, the corner point is needed. Particularly if a fine-fine boundary intersects // the physical boundary, since we want the stencil to be independent of the box // blocking. FillBoundary operations wont fix the problem because the "good" // data we need is living in the grow region of adjacent fabs. So, here we play // the usual games to treat the newly filled grow cells as "valid" data. // Note that we only need to do something where the grids touch the physical boundary. const Geometry& geomlev = geomarray[level]; const BoxArray& grids = inout.boxArray(); const Box& domain = geomlev.Domain(); int nGrow = 1; int src_comp = 0; int num_comp = BL_SPACEDIM; // Lets do a quick check to see if we need to do anything at all here BoxArray BIGba = BoxArray(grids).grow(nGrow); if (! (domain.contains(BIGba.minimalBox())) ) { BoxArray boundary_pieces; Array<int> proc_idxs; Array<Array<int> > old_to_new(grids.size()); const DistributionMapping& dmap=inout.DistributionMap(); for (int d=0; d<BL_SPACEDIM; ++d) { if (! (geomlev.isPeriodic(d)) ) { BoxArray gba = BoxArray(grids).grow(d,nGrow); for (int i=0; i<gba.size(); ++i) { BoxArray new_pieces = BoxLib::boxComplement(gba[i],domain); int size_new = new_pieces.size(); if (size_new>0) { int size_old = boundary_pieces.size(); boundary_pieces.resize(size_old+size_new); proc_idxs.resize(boundary_pieces.size()); for (int j=0; j<size_new; ++j) { boundary_pieces.set(size_old+j,new_pieces[j]); proc_idxs[size_old+j] = dmap[i]; old_to_new[i].push_back(size_old+j); } } } } } proc_idxs.push_back(ParallelDescriptor::MyProc()); MultiFab boundary_data(boundary_pieces,num_comp,nGrow, DistributionMapping(proc_idxs)); for (MFIter mfi(inout); mfi.isValid(); ++mfi) { const FArrayBox& src_fab = inout[mfi]; for (int j=0; j<old_to_new[mfi.index()].size(); ++j) { int new_box_idx = old_to_new[mfi.index()][j]; boundary_data[new_box_idx].copy(src_fab,src_comp,0,num_comp); } } boundary_data.FillBoundary(); // Use a hacked Geometry object to handle the periodic intersections for us. // Here, the "domain" is the plane of cells on non-periodic boundary faces. // and there may be cells over the periodic boundary in the remaining directions. // We do a Geometry::PFB on each non-periodic face to sync these up. if (geomlev.isAnyPeriodic()) { Array<int> is_per(BL_SPACEDIM,0); for (int d=0; d<BL_SPACEDIM; ++d) { is_per[d] = geomlev.isPeriodic(d); } for (int d=0; d<BL_SPACEDIM; ++d) { if (! is_per[d]) { Box tmpLo = BoxLib::adjCellLo(geomlev.Domain(),d,1); Geometry tmpGeomLo(tmpLo,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr()); tmpGeomLo.FillPeriodicBoundary(boundary_data); Box tmpHi = BoxLib::adjCellHi(geomlev.Domain(),d,1); Geometry tmpGeomHi(tmpHi,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr()); tmpGeomHi.FillPeriodicBoundary(boundary_data); } } } for (MFIter mfi(inout); mfi.isValid(); ++mfi) { int idx = mfi.index(); FArrayBox& dst_fab = inout[mfi]; for (int j=0; j<old_to_new[idx].size(); ++j) { int new_box_idx = old_to_new[mfi.index()][j]; const FArrayBox& src_fab = boundary_data[new_box_idx]; const Box& src_box = src_fab.box(); BoxArray pieces_outside_domain = BoxLib::boxComplement(src_box,domain); for (int k=0; k<pieces_outside_domain.size(); ++k) { const Box& outside = pieces_outside_domain[k] & dst_fab.box(); if (outside.ok()) { dst_fab.copy(src_fab,outside,0,outside,src_comp,num_comp); } } } } } #endif }
void writePlotFile (const std::string& dir, const MultiFab& mf, const Geometry& geom) { // // Only let 64 CPUs be writing at any one time. // VisMF::SetNOutFiles(64); // // Only the I/O processor makes the directory if it doesn't already exist. // if (ParallelDescriptor::IOProcessor()) if (!BoxLib::UtilCreateDirectory(dir, 0755)) BoxLib::CreateDirectoryFailed(dir); // // Force other processors to wait till directory is built. // ParallelDescriptor::Barrier(); std::string HeaderFileName = dir + "/Header"; VisMF::IO_Buffer io_buffer(VisMF::IO_Buffer_Size); std::ofstream HeaderFile; HeaderFile.rdbuf()->pubsetbuf(io_buffer.dataPtr(), io_buffer.size()); if (ParallelDescriptor::IOProcessor()) { // // Only the IOProcessor() writes to the header file. // HeaderFile.open(HeaderFileName.c_str(), std::ios::out|std::ios::trunc|std::ios::binary); if (!HeaderFile.good()) BoxLib::FileOpenFailed(HeaderFileName); HeaderFile << "NavierStokes-V1.1\n"; HeaderFile << mf.nComp() << '\n'; for (int ivar = 1; ivar <= mf.nComp(); ivar++) { HeaderFile << "Variable " << ivar << "\n"; } HeaderFile << BL_SPACEDIM << '\n'; HeaderFile << 0 << '\n'; HeaderFile << 0 << '\n'; for (int i = 0; i < BL_SPACEDIM; i++) HeaderFile << geom.ProbLo(i) << ' '; HeaderFile << '\n'; for (int i = 0; i < BL_SPACEDIM; i++) HeaderFile << geom.ProbHi(i) << ' '; HeaderFile << '\n'; HeaderFile << '\n'; HeaderFile << geom.Domain() << ' '; HeaderFile << '\n'; HeaderFile << 0 << ' '; HeaderFile << '\n'; for (int k = 0; k < BL_SPACEDIM; k++) HeaderFile << geom.CellSize()[k] << ' '; HeaderFile << '\n'; HeaderFile << geom.Coord() << '\n'; HeaderFile << "0\n"; } // Build the directory to hold the MultiFab at this level. // The name is relative to the directory containing the Header file. // static const std::string BaseName = "/Cell"; std::string Level = BoxLib::Concatenate("Level_", 0, 1); // // Now for the full pathname of that directory. // std::string FullPath = dir; if (!FullPath.empty() && FullPath[FullPath.length()-1] != '/') FullPath += '/'; FullPath += Level; // // Only the I/O processor makes the directory if it doesn't already exist. // if (ParallelDescriptor::IOProcessor()) if (!BoxLib::UtilCreateDirectory(FullPath, 0755)) BoxLib::CreateDirectoryFailed(FullPath); // // Force other processors to wait till directory is built. // ParallelDescriptor::Barrier(); if (ParallelDescriptor::IOProcessor()) { HeaderFile << 0 << ' ' << mf.boxArray().size() << ' ' << 0 << '\n'; HeaderFile << 0 << '\n'; for (int i = 0; i < mf.boxArray().size(); ++i) { RealBox loc = RealBox(mf.boxArray()[i],geom.CellSize(),geom.ProbLo()); for (int n = 0; n < BL_SPACEDIM; n++) HeaderFile << loc.lo(n) << ' ' << loc.hi(n) << '\n'; } std::string PathNameInHeader = Level; PathNameInHeader += BaseName; HeaderFile << PathNameInHeader << '\n'; } // // Use the Full pathname when naming the MultiFab. // std::string TheFullPath = FullPath; TheFullPath += BaseName; VisMF::Write(mf,TheFullPath); }
int CGSolver::solve_cg (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cg()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); MultiFab r1(ba, ncomp, nghost, dm); MultiFab z1(ba, ncomp, nghost, dm); MultiFab r2(ba, ncomp, nghost, dm); MultiFab z2(ba, ncomp, nghost, dm); MultiFab::Copy(sorig,sol,0,0,1,0); Lp.residual(r, rhs, sorig, lev, bc_mode); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC; Real rnorm = norm_inf(r); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev); Real sol_norm = 0; Real rho_1 = 0; int ret = 0; int nit = 1; if ( rnorm == 0 || rnorm < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: niter = 0," << ", rnorm = " << rnorm << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" << eps_rel*(Lp_norm*sol_norm + rnorm0 ) << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1) { z.setVal(0); jbb_precond(z,r,lev,Lp); } else { MultiFab::Copy(z,r,0,0,1,0); } Real rho = dotxy(z,r); if (nit == 1) { MultiFab::Copy(p,z,0,0,1,0); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev, temp_bc_mode); Real alpha; if ( Real pw = dotxy(p,q) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_cg:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r); sol_norm = norm_inf(sol); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break; #endif if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs ) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_cg: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::solve_cabicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cabicgstab()"); BL_ASSERT(sol.nComp() == 1); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); Real temp1[4*SSS_MAX+1]; Real temp2[4*SSS_MAX+1]; Real temp3[4*SSS_MAX+1]; Real Tp[4*SSS_MAX+1][4*SSS_MAX+1]; Real Tpp[4*SSS_MAX+1][4*SSS_MAX+1]; Real aj[4*SSS_MAX+1]; Real cj[4*SSS_MAX+1]; Real ej[4*SSS_MAX+1]; Real Tpaj[4*SSS_MAX+1]; Real Tpcj[4*SSS_MAX+1]; Real Tppaj[4*SSS_MAX+1]; Real G[4*SSS_MAX+1][4*SSS_MAX+1]; // Extracted from first 4*SSS+1 columns of Gg[][]. indexed as [row][col] Real g[4*SSS_MAX+1]; // Extracted from last [4*SSS+1] column of Gg[][]. Real Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul(). indexed as [row*(4*SSS+2) + col] // // If variable_SSS we "telescope" SSS. // We start with 1 and increase it up to SSS_MAX on the outer iterations. // if (variable_SSS) SSS = 1; zero( aj, 4*SSS_MAX+1); zero( cj, 4*SSS_MAX+1); zero( ej, 4*SSS_MAX+1); zero( Tpaj, 4*SSS_MAX+1); zero( Tpcj, 4*SSS_MAX+1); zero(Tppaj, 4*SSS_MAX+1); zero(temp1, 4*SSS_MAX+1); zero(temp2, 4*SSS_MAX+1); zero(temp3, 4*SSS_MAX+1); SetMonomialBasis(Tp,Tpp,SSS); const int ncomp = 1, nghost = sol.nGrow(); // // Contains the matrix powers of p[] and r[]. // // First 2*SSS+1 components are powers of p[]. // Next 2*SSS components are powers of r[]. // const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab PR(ba, 4*SSS_MAX+1, 0, dm); MultiFab p(ba, ncomp, 0, dm); MultiFab r(ba, ncomp, 0, dm); MultiFab rt(ba, ncomp, 0, dm); MultiFab tmp(ba, 4, nghost, dm); Lp.residual(r, rhs, sol, lev, bc_mode); BL_ASSERT(!r.contains_nan()); MultiFab::Copy(rt,r,0,0,1,0); MultiFab::Copy( p,r,0,0,1,0); const Real rnorm0 = norm_inf(r); Real delta = dotxy(r,rt); const Real L2_norm_of_rt = sqrt(delta); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: niter = 0," << ", rnorm = " << rnorm0 << ", delta = " << delta << ", eps_abs = " << eps_abs << '\n'; } return 0; } int niters = 0, ret = 0; Real L2_norm_of_resid = 0, atime = 0, gtime = 0; bool BiCGStabFailed = false, BiCGStabConverged = false; for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; ) { const Real time1 = ParallelDescriptor::second(); // // Compute the matrix powers on p[] & r[] (monomial basis). // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[]. // MultiFab::Copy(PR,p,0,0,1,0); MultiFab::Copy(PR,r,0,2*SSS+1,1,0); BL_ASSERT(!PR.contains_nan(0, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+1,1)); // // We use "tmp" to minimize the number of Lp.apply()s. // We do this by doing p & r together in a single call. // MultiFab::Copy(tmp,p,0,0,1,0); MultiFab::Copy(tmp,r,0,1,1,0); for (int n = 1; n < 2*SSS; n++) { Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2); MultiFab::Copy(tmp,tmp,2,0,2,0); MultiFab::Copy(PR,tmp,0, n,1,0); MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0); BL_ASSERT(!PR.contains_nan(n, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1)); } MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0); Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1); MultiFab::Copy(PR,tmp,1,2*SSS,1,0); BL_ASSERT(!PR.contains_nan(2*SSS-1,1)); BL_ASSERT(!PR.contains_nan(2*SSS, 1)); Real time2 = ParallelDescriptor::second(); atime += (time2-time1); BuildGramMatrix(Gg, PR, rt, SSS); const Real time3 = ParallelDescriptor::second(); gtime += (time3-time2); // // Form G[][] and g[] from Gg. // for (int i = 0, k = 0; i < 4*SSS+1; i++) { for (int j = 0; j < 4*SSS+1; j++) // // First 4*SSS+1 elements in each row go to G[][]. // G[i][j] = Gg[k++]; // // Last element in row goes to g[]. // g[i] = Gg[k++]; } zero(aj, 4*SSS+1); aj[0] = 1; zero(cj, 4*SSS+1); cj[2*SSS+1] = 1; zero(ej, 4*SSS+1); for (int nit = 0; nit < SSS; nit++) { gemv( Tpaj, Tp, aj, 4*SSS+1, 4*SSS+1); gemv( Tpcj, Tp, cj, 4*SSS+1, 4*SSS+1); gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1); const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1); if ( g_dot_Tpaj == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 1; break; } const Real alpha = delta / g_dot_Tpaj; if ( std::isinf(alpha) ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 2; break; } axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); axpy(temp3, cj, -alpha, Tpaj, 4*SSS+1); const Real omega_numerator = dot(temp3, temp2, 4*SSS+1); const Real omega_denominator = dot(temp1, temp2, 4*SSS+1); // // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0. // // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj. // If omega_numerator==0, and ||s||!=0, then stabilization breakdown. // // Partial update of ej must happen before the check on omega to ensure forward progress !!! // axpy(ej, ej, alpha, aj, 4*SSS+1); // // ej has been updated so consider that we've done an iteration since // even if we break out of the loop we'll be able to update both sol. // niters++; // // Calculate the norm of Saad's vector 's' to check intra s-step convergence. // axpy(temp1, cj,-alpha, Tpaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1); L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s)); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n'; BiCGStabConverged = true; break; } if ( omega_denominator == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 3; break; } const Real omega = omega_numerator / omega_denominator; if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( omega == 0 ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n'; if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n'; } if ( omega == 0 ) { BiCGStabFailed = true; ret = 4; break; } if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; } // // Complete the update of ej & cj now that omega is known to be ok. // axpy(ej, ej, omega, cj, 4*SSS+1); axpy(ej, ej,-omega*alpha, Tpaj, 4*SSS+1); axpy(cj, cj, -omega, Tpcj, 4*SSS+1); axpy(cj, cj, -alpha, Tpaj, 4*SSS+1); axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1); // // Do an early check of the residual to determine convergence. // gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1); // // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic. // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel). // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged. // const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1); L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n'; BiCGStabConverged = true; break; } const Real delta_next = dot(g, cj, 4*SSS+1); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( delta_next == 0 ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n'; if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n'; } if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf? if ( delta_next == 0 ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown... const Real beta = (delta_next/delta)*(alpha/omega); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( beta == 0 ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n'; if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n'; } if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf? if ( beta == 0 ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0? can't make further progress(?) axpy(aj, cj, beta, aj, 4*SSS+1); axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1); delta = delta_next; } // // Update iterates. // for (int i = 0; i < 4*SSS+1; i++) sxay(sol,sol,ej[i],PR,i); MultiFab::Copy(p,PR,0,0,1,0); p.mult(aj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(p,p,aj[i],PR,i); MultiFab::Copy(r,PR,0,0,1,0); r.mult(cj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(r,r,cj[i],PR,i); if ( !BiCGStabFailed && !BiCGStabConverged ) { m += SSS; if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); } } } if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Final: Iteration " << std::setw(4) << niters << " rel. err. " << L2_norm_of_resid << '\n'; } if ( verbose > 1 ) { Real tmp[2] = { atime, gtime }; ParallelDescriptor::ReduceRealMax(tmp,2,color()); if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n'; } } } if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged) { if ( L2_norm_of_resid > L2_norm_of_rt ) { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!"); // // Return code 8 tells the MultiGrid driver to zero out the solution! // ret = 8; } else { // // Return codes 1-7 tells the MultiGrid driver to smooth the solution! // ret = 7; } } return ret; }
void Nyx::sdc_reactions (MultiFab& S_old, MultiFab& S_new, MultiFab& D_new, MultiFab& hydro_src, MultiFab& IR, Real delta_time, Real a_old, Real a_new, int sdc_iter) { BL_PROFILE("Nyx::sdc_reactions()"); const Real* dx = geom.CellSize(); // First reset internal energy before call to compute_temp MultiFab reset_e_src(S_new.boxArray(), S_new.DistributionMap(), 1, NUM_GROW); reset_e_src.setVal(0.0); reset_internal_energy(S_new,D_new,reset_e_src); compute_new_temp (S_new,D_new); #ifndef FORCING { const Real z = 1.0/a_old - 1.0; fort_interp_to_this_z(&z); } #endif int min_iter = 100000; int max_iter = 0; int min_iter_grid, max_iter_grid; /////////////////////Consider adding ifdefs for whether CVODE is compiled in for these statements if(heat_cool_type == 3) { #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(S_old,true); mfi.isValid(); ++mfi) { // Note that this "bx" is only the valid region (unlike for Strang) const Box& bx = mfi.tilebox(); min_iter_grid = 100000; max_iter_grid = 0; integrate_state_with_source (bx.loVect(), bx.hiVect(), BL_TO_FORTRAN(S_old[mfi]), BL_TO_FORTRAN(S_new[mfi]), BL_TO_FORTRAN(D_new[mfi]), BL_TO_FORTRAN(hydro_src[mfi]), BL_TO_FORTRAN(reset_e_src[mfi]), BL_TO_FORTRAN(IR[mfi]), &a_old, &delta_time, &min_iter_grid, &max_iter_grid); min_iter = std::min(min_iter,min_iter_grid); max_iter = std::max(max_iter,max_iter_grid); } } else if(heat_cool_type == 5) { #ifdef _OPENMP #pragma omp parallel #endif for (MFIter mfi(S_old,true); mfi.isValid(); ++mfi) { // Note that this "bx" is only the valid region (unlike for Strang) const Box& bx = mfi.tilebox(); min_iter_grid = 100000; max_iter_grid = 0; integrate_state_fcvode_with_source (bx.loVect(), bx.hiVect(), BL_TO_FORTRAN(S_old[mfi]), BL_TO_FORTRAN(S_new[mfi]), BL_TO_FORTRAN(D_new[mfi]), BL_TO_FORTRAN(hydro_src[mfi]), BL_TO_FORTRAN(reset_e_src[mfi]), BL_TO_FORTRAN(IR[mfi]), &a_old, &delta_time, &min_iter_grid, &max_iter_grid); min_iter = std::min(min_iter,min_iter_grid); max_iter = std::max(max_iter,max_iter_grid); } } ParallelDescriptor::ReduceIntMax(max_iter); ParallelDescriptor::ReduceIntMin(min_iter); amrex::Print() << "Min/Max Number of Iterations in SDC: " << min_iter << " " << max_iter << std::endl; }
void solve_with_HPGMG(MultiFab& soln, MultiFab& gphi, Real a, Real b, MultiFab& alpha, PArray<MultiFab>& beta, MultiFab& beta_cc, MultiFab& rhs, const BoxArray& bs, const Geometry& geom, int n_cell) { BndryData bd(bs, 1, geom); set_boundary(bd, rhs, 0); ABecLaplacian abec_operator(bd, dx); abec_operator.setScalars(a, b); abec_operator.setCoefficients(alpha, beta); int minCoarseDim; if (domain_boundary_condition == BC_PERIODIC) { minCoarseDim = 2; // avoid problems with black box calculation of D^{-1} for poisson with periodic BC's on a 1^3 grid } else { minCoarseDim = 1; // assumes you can drop order on the boundaries } level_type level_h; mg_type MG_h; int numVectors = 12; int my_rank = 0, num_ranks = 1; #ifdef BL_USE_MPI MPI_Comm_size (MPI_COMM_WORLD, &num_ranks); MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif /* BL_USE_MPI */ const double h0 = dx[0]; // Create the geometric structure of the HPGMG grid using the RHS MultiFab as // a template. This doesn't copy any actual data. CreateHPGMGLevel(&level_h, rhs, n_cell, max_grid_size, my_rank, num_ranks, domain_boundary_condition, numVectors, h0); // Set up the coefficients for the linear operator L. SetupHPGMGCoefficients(a, b, alpha, beta_cc, &level_h); // Now that the HPGMG grid is built, populate it with RHS data. ConvertToHPGMGLevel(rhs, n_cell, max_grid_size, &level_h, VECTOR_F); #ifdef USE_HELMHOLTZ if (ParallelDescriptor::IOProcessor()) { std::cout << "Creating Helmholtz (a=" << a << ", b=" << b << ") test problem" << std::endl;; } #else if (ParallelDescriptor::IOProcessor()) { std::cout << "Creating Poisson (a=" << a << ", b=" << b << ") test problem" << std::endl;; } #endif /* USE_HELMHOLTZ */ if (level_h.boundary_condition.type == BC_PERIODIC) { double average_value_of_f = mean (&level_h, VECTOR_F); if (average_value_of_f != 0.0) { if (ParallelDescriptor::IOProcessor()) { std::cerr << "WARNING: Periodic boundary conditions, but f does not sum to zero... mean(f)=" << average_value_of_f << std::endl; } //shift_vector(&level_h,VECTOR_F,VECTOR_F,-average_value_of_f); } } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rebuild_operator(&level_h,NULL,a,b); // i.e. calculate Dinv and lambda_max MGBuild(&MG_h,&level_h,a,b,minCoarseDim,ParallelDescriptor::Communicator()); // build the Multigrid Hierarchy //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (ParallelDescriptor::IOProcessor()) std::cout << std::endl << std::endl << "===== STARTING SOLVE =====" << std::endl << std::flush; MGResetTimers (&MG_h); zero_vector (MG_h.levels[0], VECTOR_U); #ifdef USE_FCYCLES FMGSolve (&MG_h, 0, VECTOR_U, VECTOR_F, a, b, tolerance_abs, tolerance_rel); #else MGSolve (&MG_h, 0, VECTOR_U, VECTOR_F, a, b, tolerance_abs, tolerance_rel); #endif /* USE_FCYCLES */ MGPrintTiming (&MG_h, 0); // don't include the error check in the timing results //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (ParallelDescriptor::IOProcessor()) std::cout << std::endl << std::endl << "===== Performing Richardson error analysis ==========================" << std::endl; // solve A^h u^h = f^h // solve A^2h u^2h = f^2h // solve A^4h u^4h = f^4h // error analysis... MGResetTimers(&MG_h); const double dtol = tolerance_abs; const double rtol = tolerance_rel; int l;for(l=0;l<3;l++){ if(l>0)restriction(MG_h.levels[l],VECTOR_F,MG_h.levels[l-1],VECTOR_F,RESTRICT_CELL); zero_vector(MG_h.levels[l],VECTOR_U); #ifdef USE_FCYCLES FMGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #else MGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #endif } richardson_error(&MG_h,0,VECTOR_U); // Now convert solution from HPGMG back to rhs MultiFab. ConvertFromHPGMGLevel(soln, &level_h, VECTOR_U); const double norm_from_HPGMG = norm(&level_h, VECTOR_U); const double mean_from_HPGMG = mean(&level_h, VECTOR_U); const Real norm0 = soln.norm0(); const Real norm2 = soln.norm2(); if (ParallelDescriptor::IOProcessor()) { std::cout << "mean from HPGMG: " << mean_from_HPGMG << std::endl; std::cout << "norm from HPGMG: " << norm_from_HPGMG << std::endl; std::cout << "norm0 of RHS copied to MF: " << norm0 << std::endl; std::cout << "norm2 of RHS copied to MF: " << norm2 << std::endl; } // Write the MF to disk for comparison with the in-house solver if (plot_soln) { writePlotFile("SOLN-HPGMG", soln, geom); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - MGDestroy(&MG_h); destroy_level(&level_h); //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PArray<MultiFab> grad_phi(BL_SPACEDIM, PArrayManage); for (int n = 0; n < BL_SPACEDIM; ++n) grad_phi.set(n, new MultiFab(BoxArray(soln.boxArray()).surroundingNodes(n), 1, 0)); #if (BL_SPACEDIM == 2) abec_operator.compFlux(grad_phi[0],grad_phi[1],soln); #elif (BL_SPACEDIM == 3) abec_operator.compFlux(grad_phi[0],grad_phi[1],grad_phi[2],soln); #endif // Average edge-centered gradients to cell centers. BoxLib::average_face_to_cellcenter(gphi, grad_phi, geom); }
void LinOp::makeCoefficients (MultiFab& cs, const MultiFab& fn, int level) { BL_PROFILE("LinOp::makeCoefficients()"); int nc = 1; // // Determine index type of incoming MultiFab. // const IndexType iType(fn.boxArray().ixType()); const IndexType cType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::CELL)); const IndexType xType(D_DECL(IndexType::NODE, IndexType::CELL, IndexType::CELL)); const IndexType yType(D_DECL(IndexType::CELL, IndexType::NODE, IndexType::CELL)); #if (BL_SPACEDIM == 3) const IndexType zType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::NODE)); #endif int cdir; if (iType == cType) { cdir = -1; } else if (iType == xType) { cdir = 0; } else if (iType == yType) { cdir = 1; #if (BL_SPACEDIM == 3) } else if (iType == zType) { cdir = 2; #endif } else { BoxLib::Error("LinOp::makeCoeffients: Bad index type"); } BoxArray d(gbox[level]); if (cdir >= 0) d.surroundingNodes(cdir); // // Only single-component solves supported (verified) by this class. // const int nComp=1; const int nGrow=0; cs.define(d, nComp, nGrow, Fab_allocate); const bool tiling = true; switch (cdir) { case -1: #ifdef _OPENMP #pragma omp parallel #endif for (MFIter csmfi(cs,tiling); csmfi.isValid(); ++csmfi) { const Box& tbx = csmfi.tilebox(); FArrayBox& csfab = cs[csmfi]; const FArrayBox& fnfab = fn[csmfi]; FORT_AVERAGECC(csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()),fnfab.dataPtr(), ARLIM(fnfab.loVect()),ARLIM(fnfab.hiVect()), tbx.loVect(),tbx.hiVect(), &nc); } break; case 0: case 1: case 2: if (harmavg) { #ifdef _OPENMP #pragma omp parallel #endif for (MFIter csmfi(cs,tiling); csmfi.isValid(); ++csmfi) { const Box& tbx = csmfi.tilebox(); FArrayBox& csfab = cs[csmfi]; const FArrayBox& fnfab = fn[csmfi]; FORT_HARMONIC_AVERAGEEC(csfab.dataPtr(), ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()), fnfab.dataPtr(), ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()), tbx.loVect(),tbx.hiVect(), &nc,&cdir); } } else { #ifdef _OPENMP #pragma omp parallel #endif for (MFIter csmfi(cs,tiling); csmfi.isValid(); ++csmfi) { const Box& tbx = csmfi.tilebox(); FArrayBox& csfab = cs[csmfi]; const FArrayBox& fnfab = fn[csmfi]; FORT_AVERAGEEC(csfab.dataPtr(),ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()),fnfab.dataPtr(), ARLIM(fnfab.loVect()),ARLIM(fnfab.hiVect()), tbx.loVect(),tbx.hiVect(), &nc, &cdir); } } break; default: BoxLib::Error("LinOp:: bad coefficient coarsening direction!"); } }
int CGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_bicgstab()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab ph(ba, ncomp, nghost, dm); MultiFab sh(ba, ncomp, nghost, dm); MultiFab sorig(ba, ncomp, 0, dm); MultiFab p (ba, ncomp, 0, dm); MultiFab r (ba, ncomp, 0, dm); MultiFab s (ba, ncomp, 0, dm); MultiFab rh (ba, ncomp, 0, dm); MultiFab v (ba, ncomp, 0, dm); MultiFab t (ba, ncomp, 0, dm); Lp.residual(r, rhs, sol, lev, bc_mode); MultiFab::Copy(sorig,sol,0,0,1,0); MultiFab::Copy(rh, r, 0,0,1,0); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA Real rnorm = norm_inf(r); #else // // Calculate the local values of these norms & reduce their values together. // Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) }; ParallelDescriptor::ReduceRealMax(vals,2,color()); Real rnorm = vals[0]; const Real Lp_norm = vals[1]; Real sol_norm = 0; #endif const Real rnorm0 = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } int ret = 0, nit = 1; Real rho_1 = 0, alpha = 0, omega = 0; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return ret; } for (; nit <= maxiter; ++nit) { const Real rho = dotxy(rh,r); if ( rho == 0 ) { ret = 1; break; } if ( nit == 1 ) { MultiFab::Copy(p,r,0,0,1,0); } else { const Real beta = (rho/rho_1)*(alpha/omega); sxay(p, p, -omega, v); sxay(p, r, beta, p); } if ( use_mg_precond ) { ph.setVal(0); mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { ph.setVal(0); Lp.jacobi_smooth(ph, p, lev, temp_bc_mode); } else { MultiFab::Copy(ph,p,0,0,1,0); } Lp.apply(v, ph, lev, temp_bc_mode); if ( Real rhTv = dotxy(rh,v) ) { alpha = rho/rhTv; } else { ret = 2; break; } sxay(sol, sol, alpha, ph); sxay(s, r, -alpha, v); rnorm = norm_inf(s); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Half Iter " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( use_mg_precond ) { sh.setVal(0); mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { sh.setVal(0); Lp.jacobi_smooth(sh, s, lev, temp_bc_mode); } else { MultiFab::Copy(sh,s,0,0,1,0); } Lp.apply(t, sh, lev, temp_bc_mode); // // This is a little funky. I want to elide one of the reductions // in the following two dotxy()s. We do that by calculating the "local" // values and then reducing the two local values at the same time. // Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) }; ParallelDescriptor::ReduceRealSum(vals,2,color()); if ( vals[0] ) { omega = vals[1]/vals[0]; } else { ret = 3; break; } sxay(sol, sol, omega, sh); sxay(r, s, -omega, t); rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Iteration " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( omega == 0 ) { ret = 4; break; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Final: Iteration " << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }