Esempio n. 1
0
    void average_down (MultiFab& S_fine, MultiFab& S_crse, 
                       int scomp, int ncomp, const IntVect& ratio)
    {
        BL_ASSERT(S_crse.nComp() == S_fine.nComp());

        //
        // Coarsen() the fine stuff on processors owning the fine data.
        //
        BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);

        MultiFab crse_S_fine(crse_S_fine_BA,ncomp,0);

#ifdef _OPENMP
#pragma omp parallel
#endif
        for (MFIter mfi(crse_S_fine,true); mfi.isValid(); ++mfi)
        {
            //  NOTE: The tilebox is defined at the coarse level.
            const Box& tbx = mfi.tilebox();

            //  NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
            //        because the crse fab is a temporary which was made starting at comp 0, it is
            //        not part of the actual crse multifab which came in.

            BL_FORT_PROC_CALL(BL_AVGDOWN,bl_avgdown)
                (tbx.loVect(), tbx.hiVect(),
                 BL_TO_FORTRAN_N(S_fine[mfi],scomp),
                 BL_TO_FORTRAN_N(crse_S_fine[mfi],0),
                 ratio.getVect(),&ncomp);
        }

        S_crse.copy(crse_S_fine,0,scomp,ncomp);
   }
Esempio n. 2
0
    void average_down (MultiFab& S_fine, MultiFab& S_crse, const Geometry& fgeom, const Geometry& cgeom, 
                       int scomp, int ncomp, const IntVect& ratio)
    {
  
        if (S_fine.is_nodal() || S_crse.is_nodal())
        {
            BoxLib::Error("Can't use BoxLib::average_down for nodal MultiFab!");
        }

#if (BL_SPACEDIM == 3)
	BoxLib::average_down(S_fine, S_crse, scomp, ncomp, ratio);
	return;
#else

        BL_ASSERT(S_crse.nComp() == S_fine.nComp());

        //
        // Coarsen() the fine stuff on processors owning the fine data.
        //
	const BoxArray& fine_BA = S_fine.boxArray();
        BoxArray crse_S_fine_BA = fine_BA; 
	crse_S_fine_BA.coarsen(ratio);

        MultiFab crse_S_fine(crse_S_fine_BA,ncomp,0);

	MultiFab fvolume;
	fgeom.GetVolume(fvolume, fine_BA, 0);

#ifdef _OPENMP
#pragma omp parallel
#endif
        for (MFIter mfi(crse_S_fine,true); mfi.isValid(); ++mfi)
        {
            //  NOTE: The tilebox is defined at the coarse level.
            const Box& tbx = mfi.tilebox();

            //  NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
            //        because the crse fab is a temporary which was made starting at comp 0, it is
            //        not part of the actual crse multifab which came in.
            BL_FORT_PROC_CALL(BL_AVGDOWN_WITH_VOL,bl_avgdown_with_vol)
                (tbx.loVect(), tbx.hiVect(),
                 BL_TO_FORTRAN_N(S_fine[mfi],scomp),
                 BL_TO_FORTRAN_N(crse_S_fine[mfi],0),
                 BL_TO_FORTRAN(fvolume[mfi]),
                 ratio.getVect(),&ncomp);
	}

        S_crse.copy(crse_S_fine,0,scomp,ncomp);
#endif
   }
Esempio n. 3
0
void
MultiGrid::average (MultiFab&       c,
                    const MultiFab& f)
{
    BL_PROFILE("MultiGrid::average()");
    //
    // Use Fortran function to average down (restrict) f to c.
    //
    const bool tiling = true;
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter cmfi(c,tiling); cmfi.isValid(); ++cmfi)
    {
        BL_ASSERT(c.boxArray().get(cmfi.index()) == cmfi.validbox());

        const int        nc   = c.nComp();
        const Box&       bx   = cmfi.tilebox();
        FArrayBox&       cfab = c[cmfi];
        const FArrayBox& ffab = f[cmfi];

        FORT_AVERAGE(cfab.dataPtr(),
                     ARLIM(cfab.loVect()), ARLIM(cfab.hiVect()),
                     ffab.dataPtr(),
                     ARLIM(ffab.loVect()), ARLIM(ffab.hiVect()),
                     bx.loVect(), bx.hiVect(), &nc);
    }
}
Esempio n. 4
0
void
MultiGrid::interpolate (MultiFab&       f,
                        const MultiFab& c)
{
    BL_PROFILE("MultiGrid::interpolate()");
    //
    // Use fortran function to interpolate up (prolong) c to f
    // Note: returns f=f+P(c) , i.e. ADDS interp'd c to f.
    //
    // OMP over boxes
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(c); mfi.isValid(); ++mfi)
    {
        const int           k = mfi.index();
        const Box&         bx = c.boxArray()[k];
        const int          nc = f.nComp();
        const FArrayBox& cfab = c[mfi];
        FArrayBox&       ffab = f[mfi];

        FORT_INTERP(ffab.dataPtr(),
                    ARLIM(ffab.loVect()), ARLIM(ffab.hiVect()),
                    cfab.dataPtr(),
                    ARLIM(cfab.loVect()), ARLIM(cfab.hiVect()),
                    bx.loVect(), bx.hiVect(), &nc);
    }
}
Esempio n. 5
0
    void average_face_to_cellcenter (MultiFab& cc, int dcomp, const std::vector<MultiFab*>& fc)
    {
	BL_ASSERT(cc.nComp() >= dcomp + BL_SPACEDIM);
	BL_ASSERT(fc.size() == BL_SPACEDIM);
	BL_ASSERT(fc[0]->nComp() == 1);

	Real dx[3] = {1.0,1.0,1.0};
	Real problo[3] = {0.,0.,0.};
	int coord_type = 0;

#ifdef _OPENMP
#pragma omp parallel
#endif
	for (MFIter mfi(cc,true); mfi.isValid(); ++mfi) 
	{
	    const Box& bx = mfi.tilebox();

	    BL_FORT_PROC_CALL(BL_AVG_FC_TO_CC,bl_avg_fc_to_cc)
		(bx.loVect(), bx.hiVect(),
		 BL_TO_FORTRAN_N(cc[mfi],dcomp),
		 D_DECL(BL_TO_FORTRAN((*fc[0])[mfi]),
			BL_TO_FORTRAN((*fc[1])[mfi]),
			BL_TO_FORTRAN((*fc[2])[mfi])),
		 dx, problo, coord_type);
	}
    }
Esempio n. 6
0
    void average_face_to_cellcenter (MultiFab& cc, const PArray<MultiFab>& fc, const Geometry& geom)
    {
	BL_ASSERT(cc.nComp() >= BL_SPACEDIM);
	BL_ASSERT(fc.size() == BL_SPACEDIM);
	BL_ASSERT(fc[0].nComp() == 1); // We only expect fc to have the gradient perpendicular to the face

	const Real* dx     = geom.CellSize();
	const Real* problo = geom.ProbLo();
	int coord_type = Geometry::Coord();

#ifdef _OPENMP
#pragma omp parallel
#endif
	for (MFIter mfi(cc,true); mfi.isValid(); ++mfi) 
	{
	    const Box& bx = mfi.tilebox();

	    BL_FORT_PROC_CALL(BL_AVG_FC_TO_CC,bl_avg_fc_to_cc)
		(bx.loVect(), bx.hiVect(),
		 BL_TO_FORTRAN(cc[mfi]),
		 D_DECL(BL_TO_FORTRAN(fc[0][mfi]),
			BL_TO_FORTRAN(fc[1][mfi]),
			BL_TO_FORTRAN(fc[2][mfi])),
		 dx, problo, coord_type);
	}
    }
Esempio n. 7
0
void
MCLinOp::residual (MultiFab&       residL,
		   const MultiFab& rhsL,
		   MultiFab&       solnL,
		   int             level,
		   MCBC_Mode       bc_mode)
{
    apply(residL, solnL, level, bc_mode);

    for (MFIter solnLmfi(solnL); solnLmfi.isValid(); ++solnLmfi)
    {
	int              nc     = residL.nComp();
        const Box&       vbox   = solnLmfi.validbox();
        FArrayBox&       resfab = residL[solnLmfi];
        const FArrayBox& rhsfab = rhsL[solnLmfi];
	FORT_RESIDL(
	    resfab.dataPtr(), 
            ARLIM(resfab.loVect()), ARLIM(resfab.hiVect()),
	    rhsfab.dataPtr(), 
            ARLIM(rhsfab.loVect()), ARLIM(rhsfab.hiVect()),
	    resfab.dataPtr(), 
            ARLIM(resfab.loVect()), ARLIM(resfab.hiVect()),
	    vbox.loVect(), vbox.hiVect(), &nc);
    }
}
Esempio n. 8
0
void
Nyx::strang_second_step (Real time, Real dt, MultiFab& S_new, MultiFab& D_new)
{
    BL_PROFILE("Nyx::strang_second_step()");
    Real half_dt = 0.5*dt;
    int  min_iter = 100000;
    int  max_iter =      0;

    int min_iter_grid;
    int max_iter_grid;

    // Set a at the half of the time step in the second strang
    const Real a = get_comoving_a(time-half_dt);

    MultiFab reset_e_src(S_new.boxArray(), S_new.DistributionMap(), 1, NUM_GROW);
    reset_e_src.setVal(0.0);
    reset_internal_energy(S_new,D_new,reset_e_src);
    compute_new_temp     (S_new,D_new);

#ifndef FORCING
    {
      const Real z = 1.0/a - 1.0;
      fort_interp_to_this_z(&z);
    }
#endif

#ifdef _OPENMP
#pragma omp parallel private(min_iter_grid,max_iter_grid) reduction(min:min_iter) reduction(max:max_iter)
#endif
    for (MFIter mfi(S_new,true); mfi.isValid(); ++mfi)
    {
        // Here bx is just the valid region
        const Box& bx = mfi.tilebox();

        min_iter_grid = 100000;
        max_iter_grid =      0;

        integrate_state
            (bx.loVect(), bx.hiVect(), 
             BL_TO_FORTRAN(S_new[mfi]),
             BL_TO_FORTRAN(D_new[mfi]),
             &a, &half_dt, &min_iter_grid, &max_iter_grid);

        if (S_new[mfi].contains_nan(bx,0,S_new.nComp()))
        {
            std::cout << "NANS IN THIS GRID " << bx << std::endl;
        }

        min_iter = std::min(min_iter,min_iter_grid);
        max_iter = std::max(max_iter,max_iter_grid);
    }

    ParallelDescriptor::ReduceIntMax(max_iter);
    ParallelDescriptor::ReduceIntMin(min_iter);

    if (heat_cool_type == 1)
        if (ParallelDescriptor::IOProcessor())
            std::cout << "Min/Max Number of Iterations in Second Strang: " << min_iter << " " << max_iter << std::endl;
}
Esempio n. 9
0
void advance (MultiFab& old_phi, MultiFab& new_phi, PArray<MultiFab>& flux,
	      Real time, Real dt, const Geometry& geom, PhysBCFunct& physbcf,
	      BCRec& bcr)
{
  // Fill the ghost cells of each grid from the other grids
  // includes periodic domain boundaries
  old_phi.FillBoundary(geom.periodicity());

  // Fill physical boundaries
  physbcf.FillBoundary(old_phi, time);

  int Ncomp = old_phi.nComp();
  int ng_p = old_phi.nGrow();
  int ng_f = flux[0].nGrow();

  const Real* dx = geom.CellSize();

  //
  // Note that this simple example is not optimized.
  // The following two MFIter loops could be merged
  // and we do not have to use flux MultiFab.
  // 

  // Compute fluxes one grid at a time
  for ( MFIter mfi(old_phi); mfi.isValid(); ++mfi )
  {
    const Box& bx = mfi.validbox();

    compute_flux(old_phi[mfi].dataPtr(),
		 &ng_p,
		 flux[0][mfi].dataPtr(),
		 flux[1][mfi].dataPtr(),
#if (BL_SPACEDIM == 3)   
		 flux[2][mfi].dataPtr(),
#endif
		 &ng_f, bx.loVect(), bx.hiVect(), 
		 (geom.Domain()).loVect(),
		 (geom.Domain()).hiVect(),
		 bcr.vect(),
		 &dx[0]);
  }

  // Advance the solution one grid at a time
  for ( MFIter mfi(old_phi); mfi.isValid(); ++mfi )
  {
    const Box& bx = mfi.validbox();

    update_phi(old_phi[mfi].dataPtr(),
	       new_phi[mfi].dataPtr(),
	       &ng_p,
	       flux[0][mfi].dataPtr(),
	       flux[1][mfi].dataPtr(),
#if (BL_SPACEDIM == 3)   
	       flux[2][mfi].dataPtr(),
#endif
	       &ng_f, bx.loVect(), bx.hiVect(), &dx[0] , &dt);
  }
}
Esempio n. 10
0
//
// Do a one-component dot product of r & z using supplied components.
//
static
Real
dotxy (const MultiFab& r,
       int             rcomp,
       const MultiFab& z,
       int             zcomp,
       bool            local)
{
    BL_PROFILE("CGSolver::dotxy()");

    BL_ASSERT(r.nComp() > rcomp);
    BL_ASSERT(z.nComp() > zcomp);
    BL_ASSERT(r.boxArray() == z.boxArray());

    const int ncomp = 1;
    const int nghost = 0;
    return MultiFab::Dot(r,rcomp,z,zcomp,ncomp,nghost,local);
}
Esempio n. 11
0
static
Real
norm_inf (const MultiFab& res, bool local = false)
{
    Real restot = 0.0;
    for (MFIter mfi(res); mfi.isValid(); ++mfi) 
    {
      restot = std::max(restot, res[mfi].norm(mfi.validbox(), 0, 0, res.nComp()));
    }
    if ( !local )
        ParallelDescriptor::ReduceRealMax(restot);
    return restot;
}
Esempio n. 12
0
void 
MultiFab_C_to_F::share (MultiFab& cmf, const std::string& fmf_name)
{
    const Box& bx = cmf.boxArray()[0];
    int nodal[BL_SPACEDIM];
    for ( int i = 0; i < BL_SPACEDIM; ++i ) {
	nodal[i] = (bx.type(i) == IndexType::NODE) ? 1 : 0;
    }

    share_multifab_with_f (fmf_name.c_str(), cmf.nComp(), cmf.nGrow(), nodal);

    for (MFIter mfi(cmf); mfi.isValid(); ++mfi)
    {
	int li = mfi.LocalIndex();
	const FArrayBox& fab = cmf[mfi];
	share_fab_with_f (li, fab.dataPtr());
    }
}
Esempio n. 13
0
void
MCMultiGrid::average (MultiFab&       c,
		      const MultiFab& f)
{
    //
    // Use Fortran function to average down (restrict) f to c.
    //
    for (MFIter cmfi(c); cmfi.isValid(); ++cmfi)
    {
        const Box&       bx   = cmfi.validbox();
	int              nc   = c.nComp();
        FArrayBox&       cfab = c[cmfi];
        const FArrayBox& ffab = f[cmfi];
	FORT_AVERAGE(
	    cfab.dataPtr(),ARLIM(cfab.loVect()),ARLIM(cfab.hiVect()),
	    ffab.dataPtr(),ARLIM(ffab.loVect()),ARLIM(ffab.hiVect()),
	    bx.loVect(), bx.hiVect(), &nc);
    }
}
Esempio n. 14
0
void
MCMultiGrid::solve (MultiFab&       _sol,
		    const MultiFab& _rhs,
		    Real            _eps_rel,
		    Real            _eps_abs,
		    MCBC_Mode       bc_mode)
{
    BL_ASSERT(numcomps == _sol.nComp());
    //
    // Prepare memory for new level, and solve the general boundary
    // value problem to within relative error _eps_rel.  Customized
    // to solve at level=0
    //
    int level = 0;
    prepareForLevel(level);
    residualCorrectionForm(*rhs[level],_rhs,*cor[level],_sol,bc_mode,level);
    if (!solve_(_sol, _eps_rel, _eps_abs, MCHomogeneous_BC, level))
      BoxLib::Error("MCMultiGrid::solve(): failed to converge!");
}
Esempio n. 15
0
void
LinOp::residual (MultiFab&       residL,
                 const MultiFab& rhsL,
                 MultiFab&       solnL,
                 int             level,
                 LinOp::BC_Mode  bc_mode,
                 bool            local)
{
    BL_PROFILE("LinOp::residual()");

    apply(residL, solnL, level, bc_mode, local);

    const bool tiling = true;

#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter solnLmfi(solnL,tiling); solnLmfi.isValid(); ++solnLmfi)
    {
        const int nc = residL.nComp();
        //
        // Only single-component solves supported (verified) by this class.
        //
        BL_ASSERT(nc == 1);

        const Box& tbx = solnLmfi.tilebox();

        BL_ASSERT(gbox[level][solnLmfi.index()] == solnLmfi.validbox());

        FArrayBox& residfab = residL[solnLmfi];
        const FArrayBox& rhsfab = rhsL[solnLmfi];

        FORT_RESIDL(
            residfab.dataPtr(), 
            ARLIM(residfab.loVect()), ARLIM(residfab.hiVect()),
            rhsfab.dataPtr(), 
            ARLIM(rhsfab.loVect()), ARLIM(rhsfab.hiVect()),
            residfab.dataPtr(), 
            ARLIM(residfab.loVect()), ARLIM(residfab.hiVect()),
            tbx.loVect(), tbx.hiVect(), &nc);
    }
}
Esempio n. 16
0
void
MCMultiGrid::interpolate (MultiFab&       f,
			  const MultiFab& c)
{
    //
    // Use fortran function to interpolate up (prolong) c to f
    // Note: returns f=f+P(c) , i.e. ADDS interp'd c to f
    //
    for (MFIter fmfi(f); fmfi.isValid(); ++fmfi)
    {
        const Box&       bx   = c.boxArray()[fmfi.index()];
	int              nc   = f.nComp();
        const FArrayBox& cfab = c[fmfi];
        FArrayBox&       ffab = f[fmfi];
	FORT_INTERP(
	    ffab.dataPtr(),ARLIM(ffab.loVect()),ARLIM(ffab.hiVect()),
	    cfab.dataPtr(),ARLIM(cfab.loVect()),ARLIM(cfab.hiVect()),
	    bx.loVect(), bx.hiVect(), &nc);
    }
}
Esempio n. 17
0
    void average_edge_to_cellcenter (MultiFab& cc, int dcomp, const std::vector<MultiFab*>& edge)
    {
	BL_ASSERT(cc.nComp() >= dcomp + BL_SPACEDIM);
	BL_ASSERT(edge.size() == BL_SPACEDIM);
	BL_ASSERT(edge[0]->nComp() == 1);
#ifdef _OPENMP
#pragma omp parallel
#endif
	for (MFIter mfi(cc,true); mfi.isValid(); ++mfi) 
	{
	    const Box& bx = mfi.tilebox();

	    BL_FORT_PROC_CALL(BL_AVG_EG_TO_CC,bl_avg_eg_to_cc)
		(bx.loVect(), bx.hiVect(),
		 BL_TO_FORTRAN_N(cc[mfi],dcomp),
		 D_DECL(BL_TO_FORTRAN((*edge[0])[mfi]),
			BL_TO_FORTRAN((*edge[1])[mfi]),
			BL_TO_FORTRAN((*edge[2])[mfi])));
	}	
    }
Esempio n. 18
0
    void average_cellcenter_to_face (PArray<MultiFab>& fc, const MultiFab& cc, const Geometry& geom)
    {
	BL_ASSERT(cc.nComp() == 1);
	BL_ASSERT(cc.nGrow() >= 1);
	BL_ASSERT(fc.size() == BL_SPACEDIM);
	BL_ASSERT(fc[0].nComp() == 1); // We only expect fc to have the gradient perpendicular to the face

	const Real* dx     = geom.CellSize();
	const Real* problo = geom.ProbLo();
	int coord_type = Geometry::Coord();

#ifdef _OPENMP
#pragma omp parallel
#endif
	for (MFIter mfi(cc,true); mfi.isValid(); ++mfi) 
	{
	    const Box& xbx = mfi.nodaltilebox(0);
#if (BL_SPACEDIM > 1)
	    const Box& ybx = mfi.nodaltilebox(1);
#endif
#if (BL_SPACEDIM == 3)
	    const Box& zbx = mfi.nodaltilebox(2);
#endif
	    
	    BL_FORT_PROC_CALL(BL_AVG_CC_TO_FC,bl_avg_cc_to_fc)
		(xbx.loVect(), xbx.hiVect(),
#if (BL_SPACEDIM > 1)
		 ybx.loVect(), ybx.hiVect(),
#endif
#if (BL_SPACEDIM == 3)
		 zbx.loVect(), zbx.hiVect(),
#endif
		 D_DECL(BL_TO_FORTRAN(fc[0][mfi]),
			BL_TO_FORTRAN(fc[1][mfi]),
			BL_TO_FORTRAN(fc[2][mfi])),
		 BL_TO_FORTRAN(cc[mfi]),
		 dx, problo, coord_type);
	}
    }
Esempio n. 19
0
    void fill_boundary(MultiFab& mf, const Geometry& geom, bool cross)
    {
	BoxLib::fill_boundary(mf, 0, mf.nComp(), geom, cross);
    }
Esempio n. 20
0
int
CGSolver::solve_cg (MultiFab&       sol,
		    const MultiFab& rhs,
		    Real            eps_rel,
		    Real            eps_abs,
		    LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cg()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab sorig(ba, ncomp, nghost, dm);
    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    MultiFab r1(ba, ncomp, nghost, dm);
    MultiFab z1(ba, ncomp, nghost, dm);
    MultiFab r2(ba, ncomp, nghost, dm);
    MultiFab z2(ba, ncomp, nghost, dm);

    MultiFab::Copy(sorig,sol,0,0,1,0);

    Lp.residual(r, rhs, sorig, lev, bc_mode);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC;

    Real       rnorm    = norm_inf(r);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "              CG: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev);
    Real sol_norm      = 0;
    Real rho_1         = 0;
    int  ret           = 0;
    int  nit           = 1;

    if ( rnorm == 0 || rnorm < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "       CG: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" <<  eps_rel*(Lp_norm*sol_norm + rnorm0 ) 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1)
        {
            z.setVal(0);

            jbb_precond(z,r,lev,Lp);
        }
        else
        {
            MultiFab::Copy(z,r,0,0,1,0);
        }

        Real rho = dotxy(z,r);

        if (nit == 1)
        {
            MultiFab::Copy(p,z,0,0,1,0);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }
        Lp.apply(q, p, lev, temp_bc_mode);

        Real alpha;
        if ( Real pw = dotxy(p,q) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_cg:"
                      << " nit " << nit
                      << " rho " << rho
                      << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm = norm_inf(r);
        sol_norm = norm_inf(sol);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "       CG:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break;
#endif
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "       CG: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 &&  rnorm > eps_rel*rnorm0 && rnorm > eps_abs )
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_cg: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Esempio n. 21
0
int
CGSolver::solve_cabicgstab (MultiFab&       sol,
                            const MultiFab& rhs,
                            Real            eps_rel,
                            Real            eps_abs,
                            LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cabicgstab()");

    BL_ASSERT(sol.nComp() == 1);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    Real  temp1[4*SSS_MAX+1];
    Real  temp2[4*SSS_MAX+1];
    Real  temp3[4*SSS_MAX+1];
    Real     Tp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real    Tpp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real     aj[4*SSS_MAX+1];
    Real     cj[4*SSS_MAX+1];
    Real     ej[4*SSS_MAX+1];
    Real   Tpaj[4*SSS_MAX+1];
    Real   Tpcj[4*SSS_MAX+1];
    Real  Tppaj[4*SSS_MAX+1];
    Real      G[4*SSS_MAX+1][4*SSS_MAX+1];    // Extracted from first 4*SSS+1 columns of Gg[][].  indexed as [row][col]
    Real      g[4*SSS_MAX+1];                 // Extracted from last [4*SSS+1] column of Gg[][].
    Real     Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul().  indexed as [row*(4*SSS+2) + col]
    //
    // If variable_SSS we "telescope" SSS.
    // We start with 1 and increase it up to SSS_MAX on the outer iterations.
    //
    if (variable_SSS) SSS = 1;

    zero(   aj, 4*SSS_MAX+1);
    zero(   cj, 4*SSS_MAX+1);
    zero(   ej, 4*SSS_MAX+1);
    zero( Tpaj, 4*SSS_MAX+1);
    zero( Tpcj, 4*SSS_MAX+1);
    zero(Tppaj, 4*SSS_MAX+1);
    zero(temp1, 4*SSS_MAX+1);
    zero(temp2, 4*SSS_MAX+1);
    zero(temp3, 4*SSS_MAX+1);

    SetMonomialBasis(Tp,Tpp,SSS);

    const int ncomp = 1, nghost = sol.nGrow();
    //
    // Contains the matrix powers of p[] and r[].
    //
    // First 2*SSS+1 components are powers of p[].
    // Next  2*SSS   components are powers of r[].
    //
    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab PR(ba, 4*SSS_MAX+1, 0, dm);

    MultiFab  p(ba, ncomp, 0, dm);
    MultiFab  r(ba, ncomp, 0, dm);
    MultiFab rt(ba, ncomp, 0, dm);
    
    MultiFab tmp(ba, 4, nghost, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    BL_ASSERT(!r.contains_nan());

    MultiFab::Copy(rt,r,0,0,1,0);
    MultiFab::Copy( p,r,0,0,1,0);

    const Real           rnorm0        = norm_inf(r);
    Real                 delta         = dotxy(r,rt);
    const Real           L2_norm_of_rt = sqrt(delta);
    const LinOp::BC_Mode temp_bc_mode  = LinOp::Homogeneous_BC;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_CABiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }

    if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: niter = 0,"
                      << ", rnorm = "   << rnorm0
                      << ", delta = "   << delta
                      << ", eps_abs = " << eps_abs << '\n';
	}
        return 0;
    }

    int niters = 0, ret = 0;

    Real L2_norm_of_resid = 0, atime = 0, gtime = 0;

    bool BiCGStabFailed = false, BiCGStabConverged = false;

    for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; )
    {
        const Real time1 = ParallelDescriptor::second();
        //
        // Compute the matrix powers on p[] & r[] (monomial basis).
        // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[].
        //
        MultiFab::Copy(PR,p,0,0,1,0);
        MultiFab::Copy(PR,r,0,2*SSS+1,1,0);
	
        BL_ASSERT(!PR.contains_nan(0,      1));
        BL_ASSERT(!PR.contains_nan(2*SSS+1,1));
        //
        // We use "tmp" to minimize the number of Lp.apply()s.
        // We do this by doing p & r together in a single call.
        //
        MultiFab::Copy(tmp,p,0,0,1,0);
        MultiFab::Copy(tmp,r,0,1,1,0);

        for (int n = 1; n < 2*SSS; n++)
        {
            Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2);

            MultiFab::Copy(tmp,tmp,2,0,2,0);

            MultiFab::Copy(PR,tmp,0,        n,1,0);
            MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0);

            BL_ASSERT(!PR.contains_nan(n,        1));
            BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1));
        }

        MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0);
        Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1);
        MultiFab::Copy(PR,tmp,1,2*SSS,1,0);

        BL_ASSERT(!PR.contains_nan(2*SSS-1,1));
        BL_ASSERT(!PR.contains_nan(2*SSS,  1));

        Real time2 = ParallelDescriptor::second();

        atime += (time2-time1);

        BuildGramMatrix(Gg, PR, rt, SSS);

        const Real time3 = ParallelDescriptor::second();

        gtime += (time3-time2);
        //
        // Form G[][] and g[] from Gg.
        //
        for (int i = 0, k = 0; i < 4*SSS+1; i++)
        {
            for (int j = 0; j < 4*SSS+1; j++)
                //
                // First 4*SSS+1 elements in each row go to G[][].
                //
                G[i][j] = Gg[k++];
            //
            // Last element in row goes to g[].
            //
            g[i] = Gg[k++];
        }

        zero(aj, 4*SSS+1); aj[0]       = 1;
        zero(cj, 4*SSS+1); cj[2*SSS+1] = 1;
        zero(ej, 4*SSS+1);

        for (int nit = 0; nit < SSS; nit++)
        {
            gemv( Tpaj,  Tp, aj, 4*SSS+1, 4*SSS+1);
            gemv( Tpcj,  Tp, cj, 4*SSS+1, 4*SSS+1);
            gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1);

            const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1);

            if ( g_dot_Tpaj == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 1; break;
            }

            const Real alpha = delta / g_dot_Tpaj;

            if ( std::isinf(alpha) )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 2; break;
            }

            axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            axpy(temp3,   cj, -alpha,  Tpaj, 4*SSS+1);

            const Real omega_numerator   = dot(temp3, temp2, 4*SSS+1);
            const Real omega_denominator = dot(temp1, temp2, 4*SSS+1);
            //
            // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0.
            //
            // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj.
            // If omega_numerator==0, and ||s||!=0, then stabilization breakdown.
            //
            // Partial update of ej must happen before the check on omega to ensure forward progress !!!
            //
            axpy(ej, ej, alpha, aj, 4*SSS+1);
            //
            // ej has been updated so consider that we've done an iteration since
            // even if we break out of the loop we'll be able to update both sol.
            //
            niters++;
            //
            // Calculate the norm of Saad's vector 's' to check intra s-step convergence.
            //
            axpy(temp1, cj,-alpha,  Tpaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s));

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n';
                BiCGStabConverged = true; break;
            }

            if ( omega_denominator == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 3; break;
            }

            const Real omega = omega_numerator / omega_denominator;

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( omega == 0   ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n';
                if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n';
            }

            if ( omega == 0   ) { BiCGStabFailed = true; ret = 4; break; }
            if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; }
            //
            // Complete the update of ej & cj now that omega is known to be ok.
            //
            axpy(ej, ej,       omega,    cj, 4*SSS+1);
            axpy(ej, ej,-omega*alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj,      -omega,  Tpcj, 4*SSS+1);
            axpy(cj, cj,      -alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1);
            //
            // Do an early check of the residual to determine convergence.
            //
            gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1);
            //
            // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic.
            // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel).
            // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged.
            //
            const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0);

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n';
                BiCGStabConverged = true; break;
            }

            const Real delta_next = dot(g, cj, 4*SSS+1);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( delta_next == 0   ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n';
                if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf?
            if ( delta_next  == 0  ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown...

            const Real beta = (delta_next/delta)*(alpha/omega);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( beta == 0   ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n';
                if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf?
            if ( beta == 0   ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0?  can't make further progress(?)

            axpy(aj, cj,        beta,   aj, 4*SSS+1);
            axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1);

            delta = delta_next;
        }
        //
        // Update iterates.
        //
        for (int i = 0; i < 4*SSS+1; i++)
            sxay(sol,sol,ej[i],PR,i);

        MultiFab::Copy(p,PR,0,0,1,0);
        p.mult(aj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(p,p,aj[i],PR,i);

        MultiFab::Copy(r,PR,0,0,1,0);
        r.mult(cj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(r,r,cj[i],PR,i);

        if ( !BiCGStabFailed && !BiCGStabConverged )
        {
            m += SSS;

            if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); }
        }
    }

    if ( verbose > 0 )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: Final: Iteration "
                      << std::setw(4) << niters
                      << " rel. err. "
                      << L2_norm_of_resid << '\n';
        }

        if ( verbose > 1 )
        {
            Real tmp[2] = { atime, gtime };

            ParallelDescriptor::ReduceRealMax(tmp,2,color());

            if ( ParallelDescriptor::IOProcessor(color()) )
            {
                Spacer(std::cout, lev);
                std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n';
            }
        }
    }

    if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged)
    {
        if ( L2_norm_of_resid > L2_norm_of_rt )
        {
            if ( ParallelDescriptor::IOProcessor(color()) )
                BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!");
            //
            // Return code 8 tells the MultiGrid driver to zero out the solution!
            //
            ret = 8;
        }
        else
        {
            //
            // Return codes 1-7 tells the MultiGrid driver to smooth the solution!
            //
            ret = 7;
        }
    }

    return ret;
}
Esempio n. 22
0
int
CGSolver::jbb_precond (MultiFab&       sol,
		       const MultiFab& rhs,
                       int             lev,
		       LinOp&          Lp)
{
    //
    // This is a local routine.  No parallel is allowed to happen here.
    //
    int                  lev_loc = lev;
    const Real           eps_rel = 1.e-2;
    const Real           eps_abs = 1.e-16;
    const int            nghost  = sol.nGrow();
    const int            ncomp   = sol.nComp();
    const bool           local   = true;
    const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC;

    BL_ASSERT(ncomp == 1 );
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc));

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab sorig(ba, ncomp, nghost, dm);

    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    sorig.copy(sol);

    Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local);

    sol.setVal(0);

    Real       rnorm    = norm_inf(r,local);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "     jbb_precond: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev_loc, local);
    Real sol_norm = 0;
    int  ret      = 0;			// will return this value if all goes well
    Real rho_1    = 0;
    int  nit      = 1;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        z.copy(r);

        Real rho = dotxy(z,r,local);
        if (nit == 1)
        {
            p.copy(z);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }

        Lp.apply(q, p, lev_loc, bc_mode, local);

        Real alpha;
        if ( Real pw = dotxy(p,q,local) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:" << " nit " << nit
                      << " rho " << rho << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm    = norm_inf(r,   local);
        sol_norm = norm_inf(sol, local);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs )
	{
            break;
	}
      
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "jbb_precond: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
	{
            BoxLib::Warning("jbb_precond:: failed to converge!");
	}
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Esempio n. 23
0
int
CGSolver::solve_bicgstab (MultiFab&       sol,
                          const MultiFab& rhs,
                          Real            eps_rel,
                          Real            eps_abs,
                          LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_bicgstab()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab ph(ba, ncomp, nghost, dm);
    MultiFab sh(ba, ncomp, nghost, dm);

    MultiFab sorig(ba, ncomp, 0, dm);
    MultiFab p    (ba, ncomp, 0, dm);
    MultiFab r    (ba, ncomp, 0, dm);
    MultiFab s    (ba, ncomp, 0, dm);
    MultiFab rh   (ba, ncomp, 0, dm);
    MultiFab v    (ba, ncomp, 0, dm);
    MultiFab t    (ba, ncomp, 0, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    MultiFab::Copy(sorig,sol,0,0,1,0);
    MultiFab::Copy(rh,   r,  0,0,1,0);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC;

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    Real rnorm = norm_inf(r);
#else
    //
    // Calculate the local values of these norms & reduce their values together.
    //
    Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) };

    ParallelDescriptor::ReduceRealMax(vals,2,color());

    Real       rnorm    = vals[0];
    const Real Lp_norm  = vals[1];
    Real       sol_norm = 0;
#endif
    const Real rnorm0   = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }
    int ret = 0, nit = 1;
    Real rho_1 = 0, alpha = 0, omega = 0;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return ret;
    }

    for (; nit <= maxiter; ++nit)
    {
        const Real rho = dotxy(rh,r);
        if ( rho == 0 ) 
	{
            ret = 1; break;
	}
        if ( nit == 1 )
        {
            MultiFab::Copy(p,r,0,0,1,0);
        }
        else
        {
            const Real beta = (rho/rho_1)*(alpha/omega);
            sxay(p, p, -omega, v);
            sxay(p, r,   beta, p);
        }
        if ( use_mg_precond )
        {
            ph.setVal(0);
            mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            ph.setVal(0);
            Lp.jacobi_smooth(ph, p, lev, temp_bc_mode);
        }
        else 
        {
            MultiFab::Copy(ph,p,0,0,1,0);
        }
        Lp.apply(v, ph, lev, temp_bc_mode);

        if ( Real rhTv = dotxy(rh,v) )
	{
            alpha = rho/rhTv;
	}
        else
	{
            ret = 2; break;
	}
        sxay(sol, sol,  alpha, ph);
        sxay(s,     r, -alpha,  v);

        rnorm = norm_inf(s);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Half Iter "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( use_mg_precond )
        {
            sh.setVal(0);
            mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            sh.setVal(0);
            Lp.jacobi_smooth(sh, s, lev, temp_bc_mode);
        }
        else
        {
            MultiFab::Copy(sh,s,0,0,1,0);
        }
        Lp.apply(t, sh, lev, temp_bc_mode);
        //
        // This is a little funky.  I want to elide one of the reductions
        // in the following two dotxy()s.  We do that by calculating the "local"
        // values and then reducing the two local values at the same time.
        //
        Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) };

        ParallelDescriptor::ReduceRealSum(vals,2,color());

        if ( vals[0] )
	{
            omega = vals[1]/vals[0];
	}
        else
	{
            ret = 3; break;
	}
        sxay(sol, sol,  omega, sh);
        sxay(r,     s, -omega,  t);

        rnorm = norm_inf(r);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Iteration "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( omega == 0 )
	{
            ret = 4; break;
	}
        rho_1 = rho;
    }

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Final: Iteration "
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs)
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Esempio n. 24
0
static
void
BuildGramMatrix (Real*           Gg,
                 const MultiFab& PR,
                 const MultiFab& rt,
                 int             sss)
{
    BL_ASSERT(rt.nComp() == 1);
    BL_ASSERT(PR.nComp() >= 4*sss+1);

    const int Nrows = 4*sss+1, Ncols = Nrows + 1;

    //
    // Gg is dimensioned (Ncols*Nrows).
    //
    // First fill the upper triangle into tmp
    //
#ifdef _OPENMP
    const int nthreads = omp_get_max_threads();
#else 
    const int nthreads = 1;
#endif
    const int Ntmp = (Nrows*(Nrows+3))/2;
    PArray<Array<Real> > tmp(nthreads, PArrayManage);

#ifdef _OPENMP
#pragma omp parallel
#endif
    {
#ifdef _OPENMP
	int tid = omp_get_thread_num();
#else
	int tid = 0;
#endif
	tmp.set(tid, new Array<Real>(Ntmp,0.0));

	for (MFIter mfi(PR,true); mfi.isValid(); ++mfi)
	{
	    const Box& bx = mfi.tilebox();
	    const FArrayBox& rfab = PR[mfi];
	    const FArrayBox& tfab = rt[mfi];
	    
	    int cnt = 0;
	    for (int mm = 0; mm < Nrows; mm++) {
		for (int nn = mm; nn < Nrows; nn++) {
		    tmp[tid][cnt++] = rfab.dot(bx,nn,rfab,bx,mm);
		}
		tmp[tid][cnt++] = tfab.dot(bx,0,rfab,bx,mm);
	    }
	}
#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
	for (int i = 0; i < Ntmp; ++i) {
	    for (int j = 1; j < nthreads; ++j) {
		tmp[0][i] += tmp[j][i];
	    }
	}
#endif
    }

    ParallelDescriptor::ReduceRealSum(&tmp[0][0], Ntmp, PR.color());

    // Now fill upper triangle with "tmp".
    int cnt = 0;
    for (int mm = 0; mm < Nrows; mm++) {
        for (int nn = mm; nn < Nrows; nn++) {
            Gg[mm*Ncols + nn] = tmp[0][cnt++];
        }
        Gg[mm*Ncols + Nrows] = tmp[0][cnt++];
    }
    // Then fill in strict lower triangle using symmetry.
    for (int mm = 0; mm < Nrows; mm++) {
        for (int nn = 0; nn < mm; nn++) {
            Gg[mm*Ncols + nn] = Gg[nn*Ncols + mm];
        }
    }
}
Esempio n. 25
0
int
MultiGrid::solve_ (MultiFab&      _sol,
                   Real           eps_rel,
                   Real           eps_abs,
                   LinOp::BC_Mode bc_mode,
                   Real           bnorm,
                   Real           resnorm0)
{
    BL_PROFILE("MultiGrid::solve_()");

  //
  // If do_fixed_number_of_iters = 1, then do maxiter iterations without checking for convergence 
  // 
  // If do_fixed_number_of_iters = 0, then relax system maxiter times, 
  //    and stop if relative error <= _eps_rel or if absolute err <= _abs_eps
  //
  const Real strt_time = ParallelDescriptor::second();

  const int level = 0;

  //
  // We take the max of the norms of the initial RHS and the initial residual in order to capture both cases
  //
  Real norm_to_test_against;
  bool using_bnorm;
  if (bnorm >= resnorm0) 
  {
      norm_to_test_against = bnorm;
      using_bnorm          = true;
  } else {
      norm_to_test_against = resnorm0;
      using_bnorm          = false;
  } 

  int        returnVal = 0;
  Real       error     = resnorm0;

  //
  // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed
  //
  if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 )
  {
      std::cout << "MultiGrid: Tolerance "
                << eps_rel
                << " < 1e-16 is probably set too low" << '\n';
  }

  //
  // We initially define norm_cor based on the initial solution only so we can use it in the very first iteration
  //    to decide whether the problem is already solved (this is relevant if the previous solve used was only solved
  //    according to the Anorm test and not the bnorm test).
  //
  Real       norm_cor    = norm_inf(*initialsolution,true);
  ParallelDescriptor::ReduceRealMax(norm_cor);

  int        nit         = 1;
  const Real norm_Lp     = Lp.norm(0, level);
  Real       cg_time     = 0;

  if ( use_Anorm_for_convergence == 1 ) 
  {
     //
     // Don't need to go any further -- no iterations are required
     //
     if (error <= eps_abs || error < eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) 
     {
         if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
         {
             std::cout << "   Problem is already converged -- no iterations required\n";
         }
         return 1;
     }

     for ( ;
           ( (error > eps_abs &&
              error > eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) ||
             (do_fixed_number_of_iters == 1) )
             && nit <= maxiter;
           ++nit)
     {
         relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time);

         Real tmp[2] = { norm_inf(*cor[level],true), errorEstimate(level,bc_mode,true) };

         ParallelDescriptor::ReduceRealMax(tmp,2);

         norm_cor = tmp[0];
         error    = tmp[1];
	
         if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
         {
             const Real rel_error = error / norm_to_test_against;
             Spacer(std::cout, level);
             if (using_bnorm)
             {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/bnorm = "
                           << rel_error << '\n';
             } else {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/resid0 = "
                           << rel_error << '\n';
             }
         }
     }
  }
  else
  {
     //
     // Don't need to go any further -- no iterations are required
     //
     if (error <= eps_abs || error < eps_rel*norm_to_test_against) 
     {
         if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
         {
             std::cout << "   Problem is already converged -- no iterations required\n";
         }
         return 1;
     }

     for ( ;
           ( (error > eps_abs &&
              error > eps_rel*norm_to_test_against) ||
             (do_fixed_number_of_iters == 1) )
             && nit <= maxiter;
           ++nit)
     {
         relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time);

         error = errorEstimate(level, bc_mode);
	
         if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
         {
             const Real rel_error = error / norm_to_test_against;
             Spacer(std::cout, level);
             if (using_bnorm)
             {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/bnorm = "
                           << rel_error << '\n';
             } else {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/resid0 = "
                           << rel_error << '\n';
             }
         }
     }
  }

  Real run_time = (ParallelDescriptor::second() - strt_time);

  if ( verbose > 0 )
  {
      if ( ParallelDescriptor::IOProcessor() )
      {
          const Real rel_error = error / norm_to_test_against;
          Spacer(std::cout, level);
          if (using_bnorm)
          {
              std::cout << "MultiGrid: Iteration   "
                        << nit-1
                        << " resid/bnorm = "
                        << rel_error << '\n';
          } else {
              std::cout << "MultiGrid: Iteration   "
                        << nit-1
                        << " resid/resid0 = "
                        << rel_error << '\n';
             }
      }

      if ( verbose > 1 )
      {
          Real tmp[2] = { run_time, cg_time };

          ParallelDescriptor::ReduceRealMax(tmp,2,ParallelDescriptor::IOProcessorNumber());

          if ( ParallelDescriptor::IOProcessor() )
              std::cout << ", Solve time: " << tmp[0] << ", CG time: " << tmp[1];
      }

      if ( ParallelDescriptor::IOProcessor() ) std::cout << '\n';
  }

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
      if ( do_fixed_number_of_iters == 1)
      {
          std::cout << "   Did fixed number of iterations: " << maxiter << std::endl;
      } 
      else if ( error < eps_rel*norm_to_test_against )
      {
          std::cout << "   Converged res < eps_rel*max(bnorm,res_norm)\n";
      } 
      else if ( (use_Anorm_for_convergence == 1) && (error < eps_rel*norm_Lp*norm_cor) )
      {
          std::cout << "   Converged res < eps_rel*Anorm*sol\n";
      } 
      else if ( error < eps_abs )
      {
          std::cout << "   Converged res < eps_abs\n";
      }
  }

  //
  // Omit ghost update since maybe not initialized in calling routine.
  // Add to boundary values stored in initialsolution.
  //
  _sol.copy(*cor[level]);
  _sol.plus(*initialsolution,0,_sol.nComp(),0);

  if ( use_Anorm_for_convergence == 1 ) 
  {
     if ( do_fixed_number_of_iters == 1                ||
          error <= eps_rel*(norm_Lp*norm_cor+norm_to_test_against) ||
          error <= eps_abs )
       returnVal = 1;
  } 
  else 
  {
     if ( do_fixed_number_of_iters == 1 ||
          error <= eps_rel*(norm_to_test_against)   ||
          error <= eps_abs )
       returnVal = 1;
  } 

  //
  // Otherwise, failed to solve satisfactorily
  //
  return returnVal;
}
Esempio n. 26
0
void
MCLinOp::makeCoefficients (MultiFab&       cs,
                           const MultiFab& fn,
                           int             level)
{
    const int nc = fn.nComp();
    //
    // Determine index type of incoming MultiFab.
    //
    const IndexType iType(fn.boxArray().ixType());
    const IndexType cType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::CELL));
    const IndexType xType(D_DECL(IndexType::NODE, IndexType::CELL, IndexType::CELL));
    const IndexType yType(D_DECL(IndexType::CELL, IndexType::NODE, IndexType::CELL));
#if (BL_SPACEDIM == 3)    
    const IndexType zType(D_DECL(IndexType::CELL, IndexType::CELL, IndexType::NODE));
#endif
    int cdir;
    if (iType == cType)
    {
        cdir = -1;
    }
    else if (iType == xType)
    {
        cdir = 0;
    }
    else if (iType == yType)
    {
        cdir = 1;
    }
#if (BL_SPACEDIM == 3)
    else if (iType == zType)
    {
        cdir = 2;
    }
#endif
    else
        BoxLib::Abort("MCLinOp::makeCoeffients(): Bad index type");
    
    BoxArray d(gbox[level]);
    if (cdir >= 0)
	d.surroundingNodes(cdir);

    int nGrow=0;
    cs.define(d, nc, nGrow, Fab_allocate);
    cs.setVal(0.0);

    const BoxArray& grids = gbox[level];

    for (MFIter csmfi(cs); csmfi.isValid(); ++csmfi)
    {
        const Box&       grd   = grids[csmfi.index()];
        FArrayBox&       csfab = cs[csmfi];
        const FArrayBox& fnfab = fn[csmfi];

	switch(cdir)
        {
	case -1:
	    FORT_AVERAGECC(
		csfab.dataPtr(),
                ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()),
		fnfab.dataPtr(),
                ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()),
		grd.loVect(),
                grd.hiVect(), &nc);
	    break;
	case 0:
	case 1:
	case 2:
	    if ( harmavg )
            {
		FORT_HARMONIC_AVERAGEEC(
		    csfab.dataPtr(), 
                    ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()),
		    fnfab.dataPtr(), 
                    ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()),
		    grd.loVect(),
                    grd.hiVect(), &nc, &cdir);
	    }
            else
            {
		FORT_AVERAGEEC(
		    csfab.dataPtr(), 
                    ARLIM(csfab.loVect()), ARLIM(csfab.hiVect()),
		    fnfab.dataPtr(), 
                    ARLIM(fnfab.loVect()), ARLIM(fnfab.hiVect()),
		    grd.loVect(),
                    grd.hiVect(), &nc, &cdir);
	    }
	    break;
	default:
	    BoxLib::Error("MCLinOp::makeCoeffients(): bad coefficient coarsening direction!");
	}
    }
}
Esempio n. 27
0
int
MCMultiGrid::solve_ (MultiFab& _sol,
		     Real      eps_rel,
		     Real      eps_abs,
		     MCBC_Mode bc_mode,
		     int       level)
{
  //
  // Relax system maxiter times, stop if relative error <= _eps_rel or
  // if absolute err <= _abs_eps
  //
  const Real strt_time = ParallelDescriptor::second();
  //
  // Elide a reduction by doing these together.
  //
  Real tmp[2] = { norm_inf(*rhs[level],true), errorEstimate(level,bc_mode,true) };

  ParallelDescriptor::ReduceRealMax(tmp,2);

  const Real norm_rhs  = tmp[0];
  const Real error0    = tmp[1];
  int        returnVal = 0;
  Real       error     = error0;

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
      Spacer(std::cout, level);
      std::cout << "MCMultiGrid: Initial rhs                = " << norm_rhs << '\n';
      std::cout << "MCMultiGrid: Initial error (error0)     = " << error0 << '\n';
  }
  
  if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 )
  {
      std::cout << "MCMultiGrid: Tolerance "
                << eps_rel
                << " < 1e-16 is probably set too low" << '\n';
  }
  //
  // Initialize correction to zero at this level (auto-filled at levels below)
  //
  (*cor[level]).setVal(0.0);
  //
  // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed.
  //
  int        nit         = 1;
  const Real new_error_0 = norm_rhs;
  //const Real norm_Lp     = Lp.norm(0, level);


  for ( ;
        error > eps_abs &&
          error > eps_rel*norm_rhs &&
          nit <= maxiter;
        ++nit)
  {
    relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode);

    error = errorEstimate(level,bc_mode);
	
    if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
    {
      const Real rel_error = (error0 != 0) ? error/new_error_0 : 0;
      Spacer(std::cout, level);
      std::cout << "MCMultiGrid: Iteration   "
                << nit
                << " error/error0 = "
                << rel_error << '\n';
    }
  }

  Real run_time = (ParallelDescriptor::second() - strt_time);
  if ( verbose > 0 )
  {
      if ( ParallelDescriptor::IOProcessor() )
      {
          const Real rel_error = (error0 != 0) ? error/error0 : 0;
          Spacer(std::cout, level);
          std::cout << "MCMultiGrid: Final Iter. "
                    << nit-1
                    << " error/error0 = "
                    << rel_error;
      }

      if ( verbose > 1 )
      {
        
        ParallelDescriptor::ReduceRealMax(run_time);

        if ( ParallelDescriptor::IOProcessor() )
          std::cout << ", Solve time: " << run_time << '\n';
      }
  }

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
    if ( error < eps_rel*norm_rhs )
    {
      std::cout << "   Converged res < eps_rel*bnorm\n";
    } 
    else if ( error < eps_abs )
    {
      std::cout << "   Converged res < eps_abs\n";
    }
  }
  //
  // Omit ghost update since maybe not initialized in calling routine.
  // Add to boundary values stored in initialsolution.
  //
  _sol.copy(*cor[level]);
  _sol.plus(*initialsolution,0,_sol.nComp(),0);

  if ( error <= eps_rel*(norm_rhs) ||
       error <= eps_abs )
    returnVal = 1;

  //
  // Otherwise, failed to solve satisfactorily
  //
  return returnVal;
}
Esempio n. 28
0
void
MCLinOp::applyBC (MultiFab& inout,
		  int       level,
		  MCBC_Mode bc_mode)
{
    //
    // The inout MultiFab must have at least MCLinOp_grow ghost cells
    // for applyBC()
    //
    BL_ASSERT(inout.nGrow() >= MCLinOp_grow);
    //
    // The inout MultiFab must have at least Periodic_BC_grow cells for the
    // algorithms taking care of periodic boundary conditions.
    //
    BL_ASSERT(inout.nGrow() >= MCLinOp_grow);
    //
    // No coarsened boundary values, cannot apply inhomog at lev>0.
    //
    BL_ASSERT(!(level>0 && bc_mode == MCInhomogeneous_BC));
    
    int flagden = 1;	// fill in the bndry data and undrrelxr
    int flagbc  = 1;	// with values
    if (bc_mode == MCHomogeneous_BC)
        flagbc = 0; // nodata if homog
    int nc = inout.nComp();
    BL_ASSERT(nc == numcomp );

    inout.setBndry(-1.e30);
    inout.FillBoundary();
    prepareForLevel(level);

    geomarray[level].FillPeriodicBoundary(inout,0,nc);
    //
    // Fill boundary cells.
    //
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(inout); mfi.isValid(); ++mfi)
    {
        const int gn = mfi.index();

        BL_ASSERT(gbox[level][gn] == inout.box(gn));

        const BndryData::RealTuple&      bdl = bgb.bndryLocs(gn);
        const Array< Array<BoundCond> >& bdc = bgb.bndryConds(gn);
        const MaskTuple&                 msk = maskvals[level][gn];

        for (OrientationIter oitr; oitr; ++oitr)
        {
            const Orientation face = oitr();
            FabSet& f  = (*undrrelxr[level])[face];
            FabSet& td = (*tangderiv[level])[face];
            int cdr(face);
            const FabSet& fs = bgb.bndryValues(face);
	    Real bcl = bdl[face];
            const Array<BoundCond>& bc = bdc[face];
	    const int *bct = (const int*) bc.dataPtr();
	    const FArrayBox& fsfab = fs[gn];
	    const Real* bcvalptr = fsfab.dataPtr();
            //
	    // Way external derivs stored.
            //
	    const Real* exttdptr = fsfab.dataPtr(numcomp); 
	    const int* fslo      = fsfab.loVect();
	    const int* fshi      = fsfab.hiVect();
	    FArrayBox& inoutfab  = inout[gn];
	    FArrayBox& denfab    = f[gn];
	    FArrayBox& tdfab     = td[gn];
#if BL_SPACEDIM==2
            int cdir = face.coordDir(), perpdir = -1;
	    if (cdir == 0)
                perpdir = 1;
	    else if (cdir == 1)
                perpdir = 0;
	    else
                BoxLib::Abort("MCLinOp::applyBC(): bad logic");

	    const Mask& m    = *msk[face];
	    const Mask& mphi = *msk[Orientation(perpdir,Orientation::high)];
	    const Mask& mplo = *msk[Orientation(perpdir,Orientation::low)];
	    FORT_APPLYBC(
		&flagden, &flagbc, &maxorder,
		inoutfab.dataPtr(), 
                ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()),
		&cdr, bct, &bcl,
		bcvalptr, ARLIM(fslo), ARLIM(fshi),
		m.dataPtr(),    ARLIM(m.loVect()),    ARLIM(m.hiVect()),
		mphi.dataPtr(), ARLIM(mphi.loVect()), ARLIM(mphi.hiVect()),
		mplo.dataPtr(), ARLIM(mplo.loVect()), ARLIM(mplo.hiVect()),
		denfab.dataPtr(), 
		ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()),
		exttdptr, ARLIM(fslo), ARLIM(fshi),
		tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()),
		inout.box(gn).loVect(), inout.box(gn).hiVect(),
		&nc, h[level]);
#elif BL_SPACEDIM==3
	    const Mask& mn = *msk[Orientation(1,Orientation::high)];
	    const Mask& me = *msk[Orientation(0,Orientation::high)];
	    const Mask& mw = *msk[Orientation(0,Orientation::low)];
	    const Mask& ms = *msk[Orientation(1,Orientation::low)];
	    const Mask& mt = *msk[Orientation(2,Orientation::high)];
	    const Mask& mb = *msk[Orientation(2,Orientation::low)];
	    FORT_APPLYBC(
		&flagden, &flagbc, &maxorder,
		inoutfab.dataPtr(), 
                ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()),
		&cdr, bct, &bcl,
		bcvalptr, ARLIM(fslo), ARLIM(fshi),
		mn.dataPtr(),ARLIM(mn.loVect()),ARLIM(mn.hiVect()),
		me.dataPtr(),ARLIM(me.loVect()),ARLIM(me.hiVect()),
		mw.dataPtr(),ARLIM(mw.loVect()),ARLIM(mw.hiVect()),
		ms.dataPtr(),ARLIM(ms.loVect()),ARLIM(ms.hiVect()),
		mt.dataPtr(),ARLIM(mt.loVect()),ARLIM(mt.hiVect()),
		mb.dataPtr(),ARLIM(mb.loVect()),ARLIM(mb.hiVect()),
		denfab.dataPtr(), 
		ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()),
		exttdptr, ARLIM(fslo), ARLIM(fshi),
		tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()),
		inout.box(gn).loVect(), inout.box(gn).hiVect(),
		&nc, h[level]);
#endif
	}
    }

#if 0
  // This "probably" works, but is not strictly needed just because of the way Bill
  // coded up the tangential derivative stuff.  It's handy code though, so I want to
  // keep it around/

  // Clean up corners:
  // The problem here is that APPLYBC fills only grow cells normal to the boundary.
  // As a result, any corner cell on the boundary (either coarse-fine or fine-fine)
  // is not filled.  For coarse-fine, the operator adjusts itself, sliding away from
  // the box edge to avoid referencing that corner point.  On the physical boundary
  // though, the corner point is needed.  Particularly if a fine-fine boundary intersects
  // the physical boundary, since we want the stencil to be independent of the box
  // blocking.  FillBoundary operations wont fix the problem because the "good"
  // data we need is living in the grow region of adjacent fabs.  So, here we play
  // the usual games to treat the newly filled grow cells as "valid" data.

  // Note that we only need to do something where the grids touch the physical boundary.

  const Geometry& geomlev = geomarray[level];
  const BoxArray& grids = inout.boxArray();
  const Box& domain = geomlev.Domain();
  int nGrow = 1;
  int src_comp = 0;
  int num_comp = BL_SPACEDIM;


  // Lets do a quick check to see if we need to do anything at all here
  BoxArray BIGba = BoxArray(grids).grow(nGrow);

  if (! (domain.contains(BIGba.minimalBox())) ) {

    BoxArray boundary_pieces;
    Array<int> proc_idxs;
    Array<Array<int> > old_to_new(grids.size());
    const DistributionMapping& dmap=inout.DistributionMap();

    for (int d=0; d<BL_SPACEDIM; ++d) {
      if (! (geomlev.isPeriodic(d)) ) {

        BoxArray gba = BoxArray(grids).grow(d,nGrow);
        for (int i=0; i<gba.size(); ++i) {
          BoxArray new_pieces = BoxLib::boxComplement(gba[i],domain);
          int size_new = new_pieces.size();
          if (size_new>0) {
            int size_old = boundary_pieces.size();
            boundary_pieces.resize(size_old+size_new);
            proc_idxs.resize(boundary_pieces.size());
            for (int j=0; j<size_new; ++j) {
              boundary_pieces.set(size_old+j,new_pieces[j]);
              proc_idxs[size_old+j] = dmap[i];
              old_to_new[i].push_back(size_old+j);
            }
          }
        }
      }
    }

    proc_idxs.push_back(ParallelDescriptor::MyProc());

    MultiFab boundary_data(boundary_pieces,num_comp,nGrow,
                           DistributionMapping(proc_idxs));

    for (MFIter mfi(inout); mfi.isValid(); ++mfi) {
      const FArrayBox& src_fab = inout[mfi];
      for (int j=0; j<old_to_new[mfi.index()].size(); ++j) {
        int new_box_idx = old_to_new[mfi.index()][j];
        boundary_data[new_box_idx].copy(src_fab,src_comp,0,num_comp);
      }
    }

    boundary_data.FillBoundary();

    // Use a hacked Geometry object to handle the periodic intersections for us.
    // Here, the "domain" is the plane of cells on non-periodic boundary faces.
    // and there may be cells over the periodic boundary in the remaining directions.
    // We do a Geometry::PFB on each non-periodic face to sync these up.
    if (geomlev.isAnyPeriodic()) {
      Array<int> is_per(BL_SPACEDIM,0);
      for (int d=0; d<BL_SPACEDIM; ++d) {
        is_per[d] = geomlev.isPeriodic(d);
      }
      for (int d=0; d<BL_SPACEDIM; ++d) {
        if (! is_per[d]) {
          Box tmpLo = BoxLib::adjCellLo(geomlev.Domain(),d,1);
          Geometry tmpGeomLo(tmpLo,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr());
          tmpGeomLo.FillPeriodicBoundary(boundary_data);

          Box tmpHi = BoxLib::adjCellHi(geomlev.Domain(),d,1);
          Geometry tmpGeomHi(tmpHi,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr());
          tmpGeomHi.FillPeriodicBoundary(boundary_data);
        }
      }
    }

    for (MFIter mfi(inout); mfi.isValid(); ++mfi) {
      int idx = mfi.index();
      FArrayBox& dst_fab = inout[mfi];
      for (int j=0; j<old_to_new[idx].size(); ++j) {
        int new_box_idx = old_to_new[mfi.index()][j];
        const FArrayBox& src_fab = boundary_data[new_box_idx];
        const Box& src_box = src_fab.box();

        BoxArray pieces_outside_domain = BoxLib::boxComplement(src_box,domain);
        for (int k=0; k<pieces_outside_domain.size(); ++k) {
          const Box& outside = pieces_outside_domain[k] & dst_fab.box();
          if (outside.ok()) {
            dst_fab.copy(src_fab,outside,0,outside,src_comp,num_comp);
          }
        }
      }
    }
  }
#endif
}
Esempio n. 29
0
void
writePlotFile (const std::string& dir,
               const MultiFab&    mf,
               const Geometry&    geom)
{
    //
    // Only let 64 CPUs be writing at any one time.
    //
    VisMF::SetNOutFiles(64);
    //
    // Only the I/O processor makes the directory if it doesn't already exist.
    //
    if (ParallelDescriptor::IOProcessor())
        if (!BoxLib::UtilCreateDirectory(dir, 0755))
            BoxLib::CreateDirectoryFailed(dir);
    //
    // Force other processors to wait till directory is built.
    //
    ParallelDescriptor::Barrier();

    std::string HeaderFileName = dir + "/Header";

    VisMF::IO_Buffer io_buffer(VisMF::IO_Buffer_Size);

    std::ofstream HeaderFile;

    HeaderFile.rdbuf()->pubsetbuf(io_buffer.dataPtr(), io_buffer.size());

    if (ParallelDescriptor::IOProcessor())
    {
        //
        // Only the IOProcessor() writes to the header file.
        //
        HeaderFile.open(HeaderFileName.c_str(), std::ios::out|std::ios::trunc|std::ios::binary);
        if (!HeaderFile.good())
            BoxLib::FileOpenFailed(HeaderFileName);
        HeaderFile << "NavierStokes-V1.1\n";

        HeaderFile << mf.nComp() << '\n';

        for (int ivar = 1; ivar <= mf.nComp(); ivar++) {
          HeaderFile << "Variable " << ivar << "\n";
        }

        HeaderFile << BL_SPACEDIM << '\n';
        HeaderFile << 0 << '\n';
        HeaderFile << 0 << '\n';
        for (int i = 0; i < BL_SPACEDIM; i++)
            HeaderFile << geom.ProbLo(i) << ' ';
        HeaderFile << '\n';
        for (int i = 0; i < BL_SPACEDIM; i++)
            HeaderFile << geom.ProbHi(i) << ' ';
        HeaderFile << '\n';
        HeaderFile << '\n';
        HeaderFile << geom.Domain() << ' ';
        HeaderFile << '\n';
        HeaderFile << 0 << ' ';
        HeaderFile << '\n';
        for (int k = 0; k < BL_SPACEDIM; k++)
            HeaderFile << geom.CellSize()[k] << ' ';
        HeaderFile << '\n';
        HeaderFile << geom.Coord() << '\n';
        HeaderFile << "0\n";
    }
    // Build the directory to hold the MultiFab at this level.
    // The name is relative to the directory containing the Header file.
    //
    static const std::string BaseName = "/Cell";

    std::string Level = BoxLib::Concatenate("Level_", 0, 1);
    //
    // Now for the full pathname of that directory.
    //
    std::string FullPath = dir;
    if (!FullPath.empty() && FullPath[FullPath.length()-1] != '/')
        FullPath += '/';
    FullPath += Level;
    //
    // Only the I/O processor makes the directory if it doesn't already exist.
    //
    if (ParallelDescriptor::IOProcessor())
        if (!BoxLib::UtilCreateDirectory(FullPath, 0755))
            BoxLib::CreateDirectoryFailed(FullPath);
    //
    // Force other processors to wait till directory is built.
    //
    ParallelDescriptor::Barrier();

    if (ParallelDescriptor::IOProcessor())
    {
        HeaderFile << 0 << ' ' << mf.boxArray().size() << ' ' << 0 << '\n';
        HeaderFile << 0 << '\n';

        for (int i = 0; i < mf.boxArray().size(); ++i)
        {
            RealBox loc = RealBox(mf.boxArray()[i],geom.CellSize(),geom.ProbLo());
            for (int n = 0; n < BL_SPACEDIM; n++)
                HeaderFile << loc.lo(n) << ' ' << loc.hi(n) << '\n';
        }

        std::string PathNameInHeader = Level;
        PathNameInHeader += BaseName;
        HeaderFile << PathNameInHeader << '\n';
    }
    //
    // Use the Full pathname when naming the MultiFab.
    //
    std::string TheFullPath = FullPath;
    TheFullPath += BaseName;

    VisMF::Write(mf,TheFullPath);
}