Example #1
0
void advance (MultiFab& old_phi, MultiFab& new_phi, PArray<MultiFab>& flux,
	      Real time, Real dt, const Geometry& geom, PhysBCFunct& physbcf,
	      BCRec& bcr)
{
  // Fill the ghost cells of each grid from the other grids
  // includes periodic domain boundaries
  old_phi.FillBoundary(geom.periodicity());

  // Fill physical boundaries
  physbcf.FillBoundary(old_phi, time);

  int Ncomp = old_phi.nComp();
  int ng_p = old_phi.nGrow();
  int ng_f = flux[0].nGrow();

  const Real* dx = geom.CellSize();

  //
  // Note that this simple example is not optimized.
  // The following two MFIter loops could be merged
  // and we do not have to use flux MultiFab.
  // 

  // Compute fluxes one grid at a time
  for ( MFIter mfi(old_phi); mfi.isValid(); ++mfi )
  {
    const Box& bx = mfi.validbox();

    compute_flux(old_phi[mfi].dataPtr(),
		 &ng_p,
		 flux[0][mfi].dataPtr(),
		 flux[1][mfi].dataPtr(),
#if (BL_SPACEDIM == 3)   
		 flux[2][mfi].dataPtr(),
#endif
		 &ng_f, bx.loVect(), bx.hiVect(), 
		 (geom.Domain()).loVect(),
		 (geom.Domain()).hiVect(),
		 bcr.vect(),
		 &dx[0]);
  }

  // Advance the solution one grid at a time
  for ( MFIter mfi(old_phi); mfi.isValid(); ++mfi )
  {
    const Box& bx = mfi.validbox();

    update_phi(old_phi[mfi].dataPtr(),
	       new_phi[mfi].dataPtr(),
	       &ng_p,
	       flux[0][mfi].dataPtr(),
	       flux[1][mfi].dataPtr(),
#if (BL_SPACEDIM == 3)   
	       flux[2][mfi].dataPtr(),
#endif
	       &ng_f, bx.loVect(), bx.hiVect(), &dx[0] , &dt);
  }
}
Example #2
0
//
// What's the slowest way I can think of to compute all the norms??
//
Real
MFNorm (const MultiFab& mfab, 
        const int       exponent,
        const int       srcComp,
        const int       numComp,
        const int       numGrow)
{
    BL_ASSERT (numGrow <= mfab.nGrow());
    BoxArray boxes = mfab.boxArray();
    boxes.grow(numGrow);
    //
    // Get a copy of the multifab
    //
    MultiFab mftmp(mfab.boxArray(), numComp, 0);
    MultiFab::Copy(mftmp,mfab,srcComp,0,numComp,numGrow);
    //
    // Calculate the Norms
    //
    Real myNorm = 0;
    if ( exponent == 0 )
    {
        for ( MFIter mftmpmfi(mftmp); mftmpmfi.isValid(); ++mftmpmfi)
        {
            mftmp[mftmpmfi].abs(boxes[mftmpmfi.index()], 0, numComp);
            myNorm = std::max(myNorm, mftmp[mftmpmfi].norm(0, 0, numComp));
        }
	ParallelDescriptor::ReduceRealMax(myNorm);

    } else if ( exponent == 1 )
    {
        for ( MFIter mftmpmfi(mftmp); mftmpmfi.isValid(); ++mftmpmfi)
        {
            mftmp[mftmpmfi].abs(boxes[mftmpmfi.index()], 0, numComp);

            myNorm += mftmp[mftmpmfi].norm(1, 0, numComp);
        }
	ParallelDescriptor::ReduceRealSum(myNorm);

    } else if ( exponent == 2 )
    {
        for ( MFIter mftmpmfi(mftmp); mftmpmfi.isValid(); ++mftmpmfi)
        {
            mftmp[mftmpmfi].abs(boxes[mftmpmfi.index()], 0, numComp);

            myNorm += pow(mftmp[mftmpmfi].norm(2, 0, numComp), 2);
        }
	ParallelDescriptor::ReduceRealSum(myNorm);
        myNorm = sqrt( myNorm );

    } else {

        BoxLib::Error("Invalid exponent to norm function");
    }
    
    return myNorm;
}
Example #3
0
    void fill_boundary(MultiFab& mf, int scomp, int ncomp, const Geometry& geom, bool cross)
    {
	if (mf.nGrow() <= 0) return;
	
	bool local = false;  // Don't think we ever want it to be true.
	mf.FillBoundary(scomp, ncomp, local, cross);

	bool do_corners = !cross;
	geom.FillPeriodicBoundary(mf, scomp, ncomp, do_corners, local);
    }
Example #4
0
void 
MultiFab_C_to_F::share (MultiFab& cmf, const std::string& fmf_name)
{
    const Box& bx = cmf.boxArray()[0];
    int nodal[BL_SPACEDIM];
    for ( int i = 0; i < BL_SPACEDIM; ++i ) {
	nodal[i] = (bx.type(i) == IndexType::NODE) ? 1 : 0;
    }

    share_multifab_with_f (fmf_name.c_str(), cmf.nComp(), cmf.nGrow(), nodal);

    for (MFIter mfi(cmf); mfi.isValid(); ++mfi)
    {
	int li = mfi.LocalIndex();
	const FArrayBox& fab = cmf[mfi];
	share_fab_with_f (li, fab.dataPtr());
    }
}
Example #5
0
    void average_cellcenter_to_face (PArray<MultiFab>& fc, const MultiFab& cc, const Geometry& geom)
    {
	BL_ASSERT(cc.nComp() == 1);
	BL_ASSERT(cc.nGrow() >= 1);
	BL_ASSERT(fc.size() == BL_SPACEDIM);
	BL_ASSERT(fc[0].nComp() == 1); // We only expect fc to have the gradient perpendicular to the face

	const Real* dx     = geom.CellSize();
	const Real* problo = geom.ProbLo();
	int coord_type = Geometry::Coord();

#ifdef _OPENMP
#pragma omp parallel
#endif
	for (MFIter mfi(cc,true); mfi.isValid(); ++mfi) 
	{
	    const Box& xbx = mfi.nodaltilebox(0);
#if (BL_SPACEDIM > 1)
	    const Box& ybx = mfi.nodaltilebox(1);
#endif
#if (BL_SPACEDIM == 3)
	    const Box& zbx = mfi.nodaltilebox(2);
#endif
	    
	    BL_FORT_PROC_CALL(BL_AVG_CC_TO_FC,bl_avg_cc_to_fc)
		(xbx.loVect(), xbx.hiVect(),
#if (BL_SPACEDIM > 1)
		 ybx.loVect(), ybx.hiVect(),
#endif
#if (BL_SPACEDIM == 3)
		 zbx.loVect(), zbx.hiVect(),
#endif
		 D_DECL(BL_TO_FORTRAN(fc[0][mfi]),
			BL_TO_FORTRAN(fc[1][mfi]),
			BL_TO_FORTRAN(fc[2][mfi])),
		 BL_TO_FORTRAN(cc[mfi]),
		 dx, problo, coord_type);
	}
    }
Example #6
0
void
Nyx::strang_first_step (Real time, Real dt, MultiFab& S_old, MultiFab& D_old)
{
    BL_PROFILE("Nyx::strang_first_step()");
    Real half_dt = 0.5*dt;

    const Real a = get_comoving_a(time);

#ifndef FORCING
    {
      const Real z = 1.0/a - 1.0;
      fort_interp_to_this_z(&z);
    }
#endif

#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(S_old,true); mfi.isValid(); ++mfi)
    {
        // Note that this "bx" includes the grow cells 
        const Box& bx = mfi.growntilebox(S_old.nGrow());

        int  min_iter = 100000;
        int  max_iter =      0;

        integrate_state
                (bx.loVect(), bx.hiVect(), 
                 BL_TO_FORTRAN(S_old[mfi]),
                 BL_TO_FORTRAN(D_old[mfi]),
                 &a, &half_dt, &min_iter, &max_iter);

#ifndef NDEBUG
        if (S_old[mfi].contains_nan())
            amrex::Abort("state has NaNs after the first strang call");
#endif

    }
}
Example #7
0
void
MCLinOp::applyBC (MultiFab& inout,
		  int       level,
		  MCBC_Mode bc_mode)
{
    //
    // The inout MultiFab must have at least MCLinOp_grow ghost cells
    // for applyBC()
    //
    BL_ASSERT(inout.nGrow() >= MCLinOp_grow);
    //
    // The inout MultiFab must have at least Periodic_BC_grow cells for the
    // algorithms taking care of periodic boundary conditions.
    //
    BL_ASSERT(inout.nGrow() >= MCLinOp_grow);
    //
    // No coarsened boundary values, cannot apply inhomog at lev>0.
    //
    BL_ASSERT(!(level>0 && bc_mode == MCInhomogeneous_BC));
    
    int flagden = 1;	// fill in the bndry data and undrrelxr
    int flagbc  = 1;	// with values
    if (bc_mode == MCHomogeneous_BC)
        flagbc = 0; // nodata if homog
    int nc = inout.nComp();
    BL_ASSERT(nc == numcomp );

    inout.setBndry(-1.e30);
    inout.FillBoundary();
    prepareForLevel(level);

    geomarray[level].FillPeriodicBoundary(inout,0,nc);
    //
    // Fill boundary cells.
    //
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(inout); mfi.isValid(); ++mfi)
    {
        const int gn = mfi.index();

        BL_ASSERT(gbox[level][gn] == inout.box(gn));

        const BndryData::RealTuple&      bdl = bgb.bndryLocs(gn);
        const Array< Array<BoundCond> >& bdc = bgb.bndryConds(gn);
        const MaskTuple&                 msk = maskvals[level][gn];

        for (OrientationIter oitr; oitr; ++oitr)
        {
            const Orientation face = oitr();
            FabSet& f  = (*undrrelxr[level])[face];
            FabSet& td = (*tangderiv[level])[face];
            int cdr(face);
            const FabSet& fs = bgb.bndryValues(face);
	    Real bcl = bdl[face];
            const Array<BoundCond>& bc = bdc[face];
	    const int *bct = (const int*) bc.dataPtr();
	    const FArrayBox& fsfab = fs[gn];
	    const Real* bcvalptr = fsfab.dataPtr();
            //
	    // Way external derivs stored.
            //
	    const Real* exttdptr = fsfab.dataPtr(numcomp); 
	    const int* fslo      = fsfab.loVect();
	    const int* fshi      = fsfab.hiVect();
	    FArrayBox& inoutfab  = inout[gn];
	    FArrayBox& denfab    = f[gn];
	    FArrayBox& tdfab     = td[gn];
#if BL_SPACEDIM==2
            int cdir = face.coordDir(), perpdir = -1;
	    if (cdir == 0)
                perpdir = 1;
	    else if (cdir == 1)
                perpdir = 0;
	    else
                BoxLib::Abort("MCLinOp::applyBC(): bad logic");

	    const Mask& m    = *msk[face];
	    const Mask& mphi = *msk[Orientation(perpdir,Orientation::high)];
	    const Mask& mplo = *msk[Orientation(perpdir,Orientation::low)];
	    FORT_APPLYBC(
		&flagden, &flagbc, &maxorder,
		inoutfab.dataPtr(), 
                ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()),
		&cdr, bct, &bcl,
		bcvalptr, ARLIM(fslo), ARLIM(fshi),
		m.dataPtr(),    ARLIM(m.loVect()),    ARLIM(m.hiVect()),
		mphi.dataPtr(), ARLIM(mphi.loVect()), ARLIM(mphi.hiVect()),
		mplo.dataPtr(), ARLIM(mplo.loVect()), ARLIM(mplo.hiVect()),
		denfab.dataPtr(), 
		ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()),
		exttdptr, ARLIM(fslo), ARLIM(fshi),
		tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()),
		inout.box(gn).loVect(), inout.box(gn).hiVect(),
		&nc, h[level]);
#elif BL_SPACEDIM==3
	    const Mask& mn = *msk[Orientation(1,Orientation::high)];
	    const Mask& me = *msk[Orientation(0,Orientation::high)];
	    const Mask& mw = *msk[Orientation(0,Orientation::low)];
	    const Mask& ms = *msk[Orientation(1,Orientation::low)];
	    const Mask& mt = *msk[Orientation(2,Orientation::high)];
	    const Mask& mb = *msk[Orientation(2,Orientation::low)];
	    FORT_APPLYBC(
		&flagden, &flagbc, &maxorder,
		inoutfab.dataPtr(), 
                ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()),
		&cdr, bct, &bcl,
		bcvalptr, ARLIM(fslo), ARLIM(fshi),
		mn.dataPtr(),ARLIM(mn.loVect()),ARLIM(mn.hiVect()),
		me.dataPtr(),ARLIM(me.loVect()),ARLIM(me.hiVect()),
		mw.dataPtr(),ARLIM(mw.loVect()),ARLIM(mw.hiVect()),
		ms.dataPtr(),ARLIM(ms.loVect()),ARLIM(ms.hiVect()),
		mt.dataPtr(),ARLIM(mt.loVect()),ARLIM(mt.hiVect()),
		mb.dataPtr(),ARLIM(mb.loVect()),ARLIM(mb.hiVect()),
		denfab.dataPtr(), 
		ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()),
		exttdptr, ARLIM(fslo), ARLIM(fshi),
		tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()),
		inout.box(gn).loVect(), inout.box(gn).hiVect(),
		&nc, h[level]);
#endif
	}
    }

#if 0
  // This "probably" works, but is not strictly needed just because of the way Bill
  // coded up the tangential derivative stuff.  It's handy code though, so I want to
  // keep it around/

  // Clean up corners:
  // The problem here is that APPLYBC fills only grow cells normal to the boundary.
  // As a result, any corner cell on the boundary (either coarse-fine or fine-fine)
  // is not filled.  For coarse-fine, the operator adjusts itself, sliding away from
  // the box edge to avoid referencing that corner point.  On the physical boundary
  // though, the corner point is needed.  Particularly if a fine-fine boundary intersects
  // the physical boundary, since we want the stencil to be independent of the box
  // blocking.  FillBoundary operations wont fix the problem because the "good"
  // data we need is living in the grow region of adjacent fabs.  So, here we play
  // the usual games to treat the newly filled grow cells as "valid" data.

  // Note that we only need to do something where the grids touch the physical boundary.

  const Geometry& geomlev = geomarray[level];
  const BoxArray& grids = inout.boxArray();
  const Box& domain = geomlev.Domain();
  int nGrow = 1;
  int src_comp = 0;
  int num_comp = BL_SPACEDIM;


  // Lets do a quick check to see if we need to do anything at all here
  BoxArray BIGba = BoxArray(grids).grow(nGrow);

  if (! (domain.contains(BIGba.minimalBox())) ) {

    BoxArray boundary_pieces;
    Array<int> proc_idxs;
    Array<Array<int> > old_to_new(grids.size());
    const DistributionMapping& dmap=inout.DistributionMap();

    for (int d=0; d<BL_SPACEDIM; ++d) {
      if (! (geomlev.isPeriodic(d)) ) {

        BoxArray gba = BoxArray(grids).grow(d,nGrow);
        for (int i=0; i<gba.size(); ++i) {
          BoxArray new_pieces = BoxLib::boxComplement(gba[i],domain);
          int size_new = new_pieces.size();
          if (size_new>0) {
            int size_old = boundary_pieces.size();
            boundary_pieces.resize(size_old+size_new);
            proc_idxs.resize(boundary_pieces.size());
            for (int j=0; j<size_new; ++j) {
              boundary_pieces.set(size_old+j,new_pieces[j]);
              proc_idxs[size_old+j] = dmap[i];
              old_to_new[i].push_back(size_old+j);
            }
          }
        }
      }
    }

    proc_idxs.push_back(ParallelDescriptor::MyProc());

    MultiFab boundary_data(boundary_pieces,num_comp,nGrow,
                           DistributionMapping(proc_idxs));

    for (MFIter mfi(inout); mfi.isValid(); ++mfi) {
      const FArrayBox& src_fab = inout[mfi];
      for (int j=0; j<old_to_new[mfi.index()].size(); ++j) {
        int new_box_idx = old_to_new[mfi.index()][j];
        boundary_data[new_box_idx].copy(src_fab,src_comp,0,num_comp);
      }
    }

    boundary_data.FillBoundary();

    // Use a hacked Geometry object to handle the periodic intersections for us.
    // Here, the "domain" is the plane of cells on non-periodic boundary faces.
    // and there may be cells over the periodic boundary in the remaining directions.
    // We do a Geometry::PFB on each non-periodic face to sync these up.
    if (geomlev.isAnyPeriodic()) {
      Array<int> is_per(BL_SPACEDIM,0);
      for (int d=0; d<BL_SPACEDIM; ++d) {
        is_per[d] = geomlev.isPeriodic(d);
      }
      for (int d=0; d<BL_SPACEDIM; ++d) {
        if (! is_per[d]) {
          Box tmpLo = BoxLib::adjCellLo(geomlev.Domain(),d,1);
          Geometry tmpGeomLo(tmpLo,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr());
          tmpGeomLo.FillPeriodicBoundary(boundary_data);

          Box tmpHi = BoxLib::adjCellHi(geomlev.Domain(),d,1);
          Geometry tmpGeomHi(tmpHi,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr());
          tmpGeomHi.FillPeriodicBoundary(boundary_data);
        }
      }
    }

    for (MFIter mfi(inout); mfi.isValid(); ++mfi) {
      int idx = mfi.index();
      FArrayBox& dst_fab = inout[mfi];
      for (int j=0; j<old_to_new[idx].size(); ++j) {
        int new_box_idx = old_to_new[mfi.index()][j];
        const FArrayBox& src_fab = boundary_data[new_box_idx];
        const Box& src_box = src_fab.box();

        BoxArray pieces_outside_domain = BoxLib::boxComplement(src_box,domain);
        for (int k=0; k<pieces_outside_domain.size(); ++k) {
          const Box& outside = pieces_outside_domain[k] & dst_fab.box();
          if (outside.ok()) {
            dst_fab.copy(src_fab,outside,0,outside,src_comp,num_comp);
          }
        }
      }
    }
  }
#endif
}
Example #8
0
void
LinOp::applyBC (MultiFab&      inout,
                int            src_comp,
                int            num_comp,
                int            level,
                LinOp::BC_Mode bc_mode,
                bool           local,
		int            bndry_comp)
{
    BL_PROFILE("LinOp::applyBC()");
    //
    // The inout MultiFab needs at least LinOp_grow ghost cells for applyBC.
    //
    BL_ASSERT(inout.nGrow() >= LinOp_grow);
    //
    // No coarsened boundary values, cannot apply inhomog at lev>0.
    //
    BL_ASSERT(level < numLevels());
    BL_ASSERT(!(level > 0 && bc_mode == Inhomogeneous_BC));

    int flagden = 1; // Fill in undrrelxr.
    int flagbc  = 1; // Fill boundary data.

    if (bc_mode == LinOp::Homogeneous_BC)
        //
        // No data if homogeneous.
        //
        flagbc = 0;

    const bool cross = true;

    inout.FillBoundary(src_comp,num_comp,local,cross);

    prepareForLevel(level);
    //
    // Do periodic fixup.
    //
    BL_ASSERT(level<geomarray.size());
    geomarray[level].FillPeriodicBoundary(inout,src_comp,num_comp,false,local);
    //
    // Fill boundary cells.
    //
    // OMP over boxes
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(inout); mfi.isValid(); ++mfi)
    {
        const int gn = mfi.index();

        BL_ASSERT(gbox[level][gn] == inout.box(gn));

        BL_ASSERT(level<maskvals.size() && maskvals[level].local_index(gn)>=0);
        BL_ASSERT(level<lmaskvals.size() && lmaskvals[level].local_index(gn)>=0);
        BL_ASSERT(level<undrrelxr.size());

        const MaskTuple&                 ma  =  maskvals[level][gn];
        const MaskTuple&                 lma = lmaskvals[level][gn];
        const BndryData::RealTuple&      bdl = bgb->bndryLocs(gn);
        const Array< Array<BoundCond> >& bdc = bgb->bndryConds(gn);

        for (OrientationIter oitr; oitr; ++oitr)
        {
            const Orientation o = oitr();

            FabSet&       f   = (*undrrelxr[level])[o];
            int           cdr = o;
            const FabSet& fs  = bgb->bndryValues(o);
            const Mask&   m   = local ? (*lma[o]) : (*ma[o]);
            Real          bcl = bdl[o];
            BL_ASSERT(bdc[o].size()>bndry_comp);
            int           bct = bdc[o][bndry_comp];

            const Box&       vbx   = inout.box(gn);
            FArrayBox&       iofab = inout[mfi];
            BL_ASSERT(f.size()>gn);
            BL_ASSERT(fs.size()>gn);

            FArrayBox&       ffab  = f[mfi];
            const FArrayBox& fsfab = fs[mfi];

            FORT_APPLYBC(&flagden, &flagbc, &maxorder,
                         iofab.dataPtr(src_comp),
                         ARLIM(iofab.loVect()), ARLIM(iofab.hiVect()),
                         &cdr, &bct, &bcl,
                         fsfab.dataPtr(bndry_comp), 
                         ARLIM(fsfab.loVect()), ARLIM(fsfab.hiVect()),
                         m.dataPtr(),
                         ARLIM(m.loVect()), ARLIM(m.hiVect()),
                         ffab.dataPtr(),
                         ARLIM(ffab.loVect()), ARLIM(ffab.hiVect()),
                         vbx.loVect(),
                         vbx.hiVect(), &num_comp, h[level]);
        }
    }
}
Example #9
0
void
ABec4::applyBC (MultiFab&     inout,
                int            src_comp,
                int            num_comp,
                int            level,
                LinOp::BC_Mode bc_mode,
                bool           local,
		int            bndry_comp)
{
    //
    // The inout MultiFab needs enough ghost cells for applyBC.
    //
    BL_ASSERT(inout.nGrow() >= NumGrow(level));
    //
    // No coarsened boundary values, cannot apply inhomog at lev>0.
    //
    BL_ASSERT(level < numLevelsHO());
    BL_ASSERT(!(level > 0 && bc_mode == Inhomogeneous_BC));

    int flagden = 1; // Fill in undrrelxr.
    int flagbc  = 1; // Fill boundary data.

    if (bc_mode == LinOp::Homogeneous_BC)
        //
        // No data if homogeneous.
        //
        flagbc = 0;

    prepareForLevel(level);

    const bool cross = false;
    inout.FillBoundary(src_comp,num_comp,geomarray[level].periodicity(),cross);

#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(inout); mfi.isValid(); ++mfi)
    {
        const int gn = mfi.index();

        BL_ASSERT(gbox[level][gn] == inout.box(gn));

        BL_ASSERT(level<undrrelxr.size());

        const BndryData::RealTuple&      bdl = bgb->bndryLocs(gn);
        const Array< Array<BoundCond> >& bdc = bgb->bndryConds(gn);

        for (OrientationIter oitr; oitr; ++oitr)
        {
            const Orientation o = oitr();

            FabSet&       f   = (*undrrelxr[level])[o];
            int           cdr = o;
            const FabSet& fs  = bgb->bndryValues(o);
            const Mask&   m   = local ? lmaskvals[level][o][mfi] : maskvals[level][o][mfi];
            Real          bcl = bdl[o];
            BL_ASSERT(bdc[o].size()>bndry_comp);
            int           bct = bdc[o][bndry_comp];

            const Box&       vbx   = inout.box(gn);
            FArrayBox&       iofab = inout[gn];
            BL_ASSERT(f.size()>gn);
            BL_ASSERT(fs.size()>gn);

            FArrayBox&       ffab  = f[gn];
            const FArrayBox& fsfab = fs[gn];

            FORT_APPLYBC4(&flagden, &flagbc, &maxorder,
			  iofab.dataPtr(src_comp),
			  ARLIM(iofab.loVect()), ARLIM(iofab.hiVect()),
			  &cdr, &bct, &bcl,
			  fsfab.dataPtr(bndry_comp), 
			  ARLIM(fsfab.loVect()), ARLIM(fsfab.hiVect()),
			  m.dataPtr(),
			  ARLIM(m.loVect()), ARLIM(m.hiVect()),
			  ffab.dataPtr(),
			  ARLIM(ffab.loVect()), ARLIM(ffab.hiVect()),
			  vbx.loVect(),
			  vbx.hiVect(), &num_comp, h[level]);
        }
    }

  // Clean up corners:
  // The problem here is that APPLYBC fills only grow cells normal to the boundary.
  // As a result, any corner cell on the boundary (either coarse-fine or fine-fine)
  // is not filled.  For coarse-fine, the operator adjusts itself, sliding away from
  // the box edge to avoid referencing that corner point.  On the physical boundary
  // though, the corner point is needed.  Particularly if a fine-fine boundary intersects
  // the physical boundary, since we want the stencil to be independent of the box
  // blocking. 
    inout.EnforcePeriodicity(geomarray[level].periodicity());

#ifdef _OPENMP
#pragma omp parallel
#endif
  for (MFIter mfi(inout); mfi.isValid(); ++mfi) {

    const int gn = mfi.index();

    BL_ASSERT(gbox[level][gn] == inout.box(gn));

    const Box& vbx   = inout.box(gn);
    FArrayBox& iofab = inout[gn];

    FORT_APPLYBC4_TOUCHUP(
      iofab.dataPtr(src_comp),ARLIM(iofab.loVect()), ARLIM(iofab.hiVect()),
      vbx.loVect(), vbx.hiVect(), &num_comp);
  }
}
Example #10
0
int
CGSolver::solve_bicgstab (MultiFab&       sol,
                          const MultiFab& rhs,
                          Real            eps_rel,
                          Real            eps_abs,
                          LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_bicgstab()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab ph(ba, ncomp, nghost, dm);
    MultiFab sh(ba, ncomp, nghost, dm);

    MultiFab sorig(ba, ncomp, 0, dm);
    MultiFab p    (ba, ncomp, 0, dm);
    MultiFab r    (ba, ncomp, 0, dm);
    MultiFab s    (ba, ncomp, 0, dm);
    MultiFab rh   (ba, ncomp, 0, dm);
    MultiFab v    (ba, ncomp, 0, dm);
    MultiFab t    (ba, ncomp, 0, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    MultiFab::Copy(sorig,sol,0,0,1,0);
    MultiFab::Copy(rh,   r,  0,0,1,0);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC;

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    Real rnorm = norm_inf(r);
#else
    //
    // Calculate the local values of these norms & reduce their values together.
    //
    Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) };

    ParallelDescriptor::ReduceRealMax(vals,2,color());

    Real       rnorm    = vals[0];
    const Real Lp_norm  = vals[1];
    Real       sol_norm = 0;
#endif
    const Real rnorm0   = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }
    int ret = 0, nit = 1;
    Real rho_1 = 0, alpha = 0, omega = 0;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return ret;
    }

    for (; nit <= maxiter; ++nit)
    {
        const Real rho = dotxy(rh,r);
        if ( rho == 0 ) 
	{
            ret = 1; break;
	}
        if ( nit == 1 )
        {
            MultiFab::Copy(p,r,0,0,1,0);
        }
        else
        {
            const Real beta = (rho/rho_1)*(alpha/omega);
            sxay(p, p, -omega, v);
            sxay(p, r,   beta, p);
        }
        if ( use_mg_precond )
        {
            ph.setVal(0);
            mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            ph.setVal(0);
            Lp.jacobi_smooth(ph, p, lev, temp_bc_mode);
        }
        else 
        {
            MultiFab::Copy(ph,p,0,0,1,0);
        }
        Lp.apply(v, ph, lev, temp_bc_mode);

        if ( Real rhTv = dotxy(rh,v) )
	{
            alpha = rho/rhTv;
	}
        else
	{
            ret = 2; break;
	}
        sxay(sol, sol,  alpha, ph);
        sxay(s,     r, -alpha,  v);

        rnorm = norm_inf(s);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Half Iter "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( use_mg_precond )
        {
            sh.setVal(0);
            mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            sh.setVal(0);
            Lp.jacobi_smooth(sh, s, lev, temp_bc_mode);
        }
        else
        {
            MultiFab::Copy(sh,s,0,0,1,0);
        }
        Lp.apply(t, sh, lev, temp_bc_mode);
        //
        // This is a little funky.  I want to elide one of the reductions
        // in the following two dotxy()s.  We do that by calculating the "local"
        // values and then reducing the two local values at the same time.
        //
        Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) };

        ParallelDescriptor::ReduceRealSum(vals,2,color());

        if ( vals[0] )
	{
            omega = vals[1]/vals[0];
	}
        else
	{
            ret = 3; break;
	}
        sxay(sol, sol,  omega, sh);
        sxay(r,     s, -omega,  t);

        rnorm = norm_inf(r);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Iteration "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( omega == 0 )
	{
            ret = 4; break;
	}
        rho_1 = rho;
    }

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Final: Iteration "
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs)
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Example #11
0
int
CGSolver::solve_cabicgstab (MultiFab&       sol,
                            const MultiFab& rhs,
                            Real            eps_rel,
                            Real            eps_abs,
                            LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cabicgstab()");

    BL_ASSERT(sol.nComp() == 1);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    Real  temp1[4*SSS_MAX+1];
    Real  temp2[4*SSS_MAX+1];
    Real  temp3[4*SSS_MAX+1];
    Real     Tp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real    Tpp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real     aj[4*SSS_MAX+1];
    Real     cj[4*SSS_MAX+1];
    Real     ej[4*SSS_MAX+1];
    Real   Tpaj[4*SSS_MAX+1];
    Real   Tpcj[4*SSS_MAX+1];
    Real  Tppaj[4*SSS_MAX+1];
    Real      G[4*SSS_MAX+1][4*SSS_MAX+1];    // Extracted from first 4*SSS+1 columns of Gg[][].  indexed as [row][col]
    Real      g[4*SSS_MAX+1];                 // Extracted from last [4*SSS+1] column of Gg[][].
    Real     Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul().  indexed as [row*(4*SSS+2) + col]
    //
    // If variable_SSS we "telescope" SSS.
    // We start with 1 and increase it up to SSS_MAX on the outer iterations.
    //
    if (variable_SSS) SSS = 1;

    zero(   aj, 4*SSS_MAX+1);
    zero(   cj, 4*SSS_MAX+1);
    zero(   ej, 4*SSS_MAX+1);
    zero( Tpaj, 4*SSS_MAX+1);
    zero( Tpcj, 4*SSS_MAX+1);
    zero(Tppaj, 4*SSS_MAX+1);
    zero(temp1, 4*SSS_MAX+1);
    zero(temp2, 4*SSS_MAX+1);
    zero(temp3, 4*SSS_MAX+1);

    SetMonomialBasis(Tp,Tpp,SSS);

    const int ncomp = 1, nghost = sol.nGrow();
    //
    // Contains the matrix powers of p[] and r[].
    //
    // First 2*SSS+1 components are powers of p[].
    // Next  2*SSS   components are powers of r[].
    //
    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab PR(ba, 4*SSS_MAX+1, 0, dm);

    MultiFab  p(ba, ncomp, 0, dm);
    MultiFab  r(ba, ncomp, 0, dm);
    MultiFab rt(ba, ncomp, 0, dm);
    
    MultiFab tmp(ba, 4, nghost, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    BL_ASSERT(!r.contains_nan());

    MultiFab::Copy(rt,r,0,0,1,0);
    MultiFab::Copy( p,r,0,0,1,0);

    const Real           rnorm0        = norm_inf(r);
    Real                 delta         = dotxy(r,rt);
    const Real           L2_norm_of_rt = sqrt(delta);
    const LinOp::BC_Mode temp_bc_mode  = LinOp::Homogeneous_BC;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_CABiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }

    if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: niter = 0,"
                      << ", rnorm = "   << rnorm0
                      << ", delta = "   << delta
                      << ", eps_abs = " << eps_abs << '\n';
	}
        return 0;
    }

    int niters = 0, ret = 0;

    Real L2_norm_of_resid = 0, atime = 0, gtime = 0;

    bool BiCGStabFailed = false, BiCGStabConverged = false;

    for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; )
    {
        const Real time1 = ParallelDescriptor::second();
        //
        // Compute the matrix powers on p[] & r[] (monomial basis).
        // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[].
        //
        MultiFab::Copy(PR,p,0,0,1,0);
        MultiFab::Copy(PR,r,0,2*SSS+1,1,0);
	
        BL_ASSERT(!PR.contains_nan(0,      1));
        BL_ASSERT(!PR.contains_nan(2*SSS+1,1));
        //
        // We use "tmp" to minimize the number of Lp.apply()s.
        // We do this by doing p & r together in a single call.
        //
        MultiFab::Copy(tmp,p,0,0,1,0);
        MultiFab::Copy(tmp,r,0,1,1,0);

        for (int n = 1; n < 2*SSS; n++)
        {
            Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2);

            MultiFab::Copy(tmp,tmp,2,0,2,0);

            MultiFab::Copy(PR,tmp,0,        n,1,0);
            MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0);

            BL_ASSERT(!PR.contains_nan(n,        1));
            BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1));
        }

        MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0);
        Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1);
        MultiFab::Copy(PR,tmp,1,2*SSS,1,0);

        BL_ASSERT(!PR.contains_nan(2*SSS-1,1));
        BL_ASSERT(!PR.contains_nan(2*SSS,  1));

        Real time2 = ParallelDescriptor::second();

        atime += (time2-time1);

        BuildGramMatrix(Gg, PR, rt, SSS);

        const Real time3 = ParallelDescriptor::second();

        gtime += (time3-time2);
        //
        // Form G[][] and g[] from Gg.
        //
        for (int i = 0, k = 0; i < 4*SSS+1; i++)
        {
            for (int j = 0; j < 4*SSS+1; j++)
                //
                // First 4*SSS+1 elements in each row go to G[][].
                //
                G[i][j] = Gg[k++];
            //
            // Last element in row goes to g[].
            //
            g[i] = Gg[k++];
        }

        zero(aj, 4*SSS+1); aj[0]       = 1;
        zero(cj, 4*SSS+1); cj[2*SSS+1] = 1;
        zero(ej, 4*SSS+1);

        for (int nit = 0; nit < SSS; nit++)
        {
            gemv( Tpaj,  Tp, aj, 4*SSS+1, 4*SSS+1);
            gemv( Tpcj,  Tp, cj, 4*SSS+1, 4*SSS+1);
            gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1);

            const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1);

            if ( g_dot_Tpaj == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 1; break;
            }

            const Real alpha = delta / g_dot_Tpaj;

            if ( std::isinf(alpha) )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 2; break;
            }

            axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            axpy(temp3,   cj, -alpha,  Tpaj, 4*SSS+1);

            const Real omega_numerator   = dot(temp3, temp2, 4*SSS+1);
            const Real omega_denominator = dot(temp1, temp2, 4*SSS+1);
            //
            // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0.
            //
            // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj.
            // If omega_numerator==0, and ||s||!=0, then stabilization breakdown.
            //
            // Partial update of ej must happen before the check on omega to ensure forward progress !!!
            //
            axpy(ej, ej, alpha, aj, 4*SSS+1);
            //
            // ej has been updated so consider that we've done an iteration since
            // even if we break out of the loop we'll be able to update both sol.
            //
            niters++;
            //
            // Calculate the norm of Saad's vector 's' to check intra s-step convergence.
            //
            axpy(temp1, cj,-alpha,  Tpaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s));

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n';
                BiCGStabConverged = true; break;
            }

            if ( omega_denominator == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 3; break;
            }

            const Real omega = omega_numerator / omega_denominator;

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( omega == 0   ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n';
                if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n';
            }

            if ( omega == 0   ) { BiCGStabFailed = true; ret = 4; break; }
            if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; }
            //
            // Complete the update of ej & cj now that omega is known to be ok.
            //
            axpy(ej, ej,       omega,    cj, 4*SSS+1);
            axpy(ej, ej,-omega*alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj,      -omega,  Tpcj, 4*SSS+1);
            axpy(cj, cj,      -alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1);
            //
            // Do an early check of the residual to determine convergence.
            //
            gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1);
            //
            // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic.
            // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel).
            // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged.
            //
            const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0);

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n';
                BiCGStabConverged = true; break;
            }

            const Real delta_next = dot(g, cj, 4*SSS+1);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( delta_next == 0   ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n';
                if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf?
            if ( delta_next  == 0  ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown...

            const Real beta = (delta_next/delta)*(alpha/omega);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( beta == 0   ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n';
                if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf?
            if ( beta == 0   ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0?  can't make further progress(?)

            axpy(aj, cj,        beta,   aj, 4*SSS+1);
            axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1);

            delta = delta_next;
        }
        //
        // Update iterates.
        //
        for (int i = 0; i < 4*SSS+1; i++)
            sxay(sol,sol,ej[i],PR,i);

        MultiFab::Copy(p,PR,0,0,1,0);
        p.mult(aj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(p,p,aj[i],PR,i);

        MultiFab::Copy(r,PR,0,0,1,0);
        r.mult(cj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(r,r,cj[i],PR,i);

        if ( !BiCGStabFailed && !BiCGStabConverged )
        {
            m += SSS;

            if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); }
        }
    }

    if ( verbose > 0 )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: Final: Iteration "
                      << std::setw(4) << niters
                      << " rel. err. "
                      << L2_norm_of_resid << '\n';
        }

        if ( verbose > 1 )
        {
            Real tmp[2] = { atime, gtime };

            ParallelDescriptor::ReduceRealMax(tmp,2,color());

            if ( ParallelDescriptor::IOProcessor(color()) )
            {
                Spacer(std::cout, lev);
                std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n';
            }
        }
    }

    if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged)
    {
        if ( L2_norm_of_resid > L2_norm_of_rt )
        {
            if ( ParallelDescriptor::IOProcessor(color()) )
                BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!");
            //
            // Return code 8 tells the MultiGrid driver to zero out the solution!
            //
            ret = 8;
        }
        else
        {
            //
            // Return codes 1-7 tells the MultiGrid driver to smooth the solution!
            //
            ret = 7;
        }
    }

    return ret;
}
Example #12
0
int
CGSolver::jbb_precond (MultiFab&       sol,
		       const MultiFab& rhs,
                       int             lev,
		       LinOp&          Lp)
{
    //
    // This is a local routine.  No parallel is allowed to happen here.
    //
    int                  lev_loc = lev;
    const Real           eps_rel = 1.e-2;
    const Real           eps_abs = 1.e-16;
    const int            nghost  = sol.nGrow();
    const int            ncomp   = sol.nComp();
    const bool           local   = true;
    const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC;

    BL_ASSERT(ncomp == 1 );
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc));

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab sorig(ba, ncomp, nghost, dm);

    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    sorig.copy(sol);

    Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local);

    sol.setVal(0);

    Real       rnorm    = norm_inf(r,local);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "     jbb_precond: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev_loc, local);
    Real sol_norm = 0;
    int  ret      = 0;			// will return this value if all goes well
    Real rho_1    = 0;
    int  nit      = 1;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        z.copy(r);

        Real rho = dotxy(z,r,local);
        if (nit == 1)
        {
            p.copy(z);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }

        Lp.apply(q, p, lev_loc, bc_mode, local);

        Real alpha;
        if ( Real pw = dotxy(p,q,local) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:" << " nit " << nit
                      << " rho " << rho << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm    = norm_inf(r,   local);
        sol_norm = norm_inf(sol, local);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs )
	{
            break;
	}
      
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "jbb_precond: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
	{
            BoxLib::Warning("jbb_precond:: failed to converge!");
	}
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Example #13
0
int
CGSolver::solve_cg (MultiFab&       sol,
		    const MultiFab& rhs,
		    Real            eps_rel,
		    Real            eps_abs,
		    LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cg()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab sorig(ba, ncomp, nghost, dm);
    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    MultiFab r1(ba, ncomp, nghost, dm);
    MultiFab z1(ba, ncomp, nghost, dm);
    MultiFab r2(ba, ncomp, nghost, dm);
    MultiFab z2(ba, ncomp, nghost, dm);

    MultiFab::Copy(sorig,sol,0,0,1,0);

    Lp.residual(r, rhs, sorig, lev, bc_mode);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC;

    Real       rnorm    = norm_inf(r);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "              CG: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev);
    Real sol_norm      = 0;
    Real rho_1         = 0;
    int  ret           = 0;
    int  nit           = 1;

    if ( rnorm == 0 || rnorm < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "       CG: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" <<  eps_rel*(Lp_norm*sol_norm + rnorm0 ) 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1)
        {
            z.setVal(0);

            jbb_precond(z,r,lev,Lp);
        }
        else
        {
            MultiFab::Copy(z,r,0,0,1,0);
        }

        Real rho = dotxy(z,r);

        if (nit == 1)
        {
            MultiFab::Copy(p,z,0,0,1,0);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }
        Lp.apply(q, p, lev, temp_bc_mode);

        Real alpha;
        if ( Real pw = dotxy(p,q) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_cg:"
                      << " nit " << nit
                      << " rho " << rho
                      << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm = norm_inf(r);
        sol_norm = norm_inf(sol);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "       CG:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break;
#endif
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "       CG: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 &&  rnorm > eps_rel*rnorm0 && rnorm > eps_abs )
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_cg: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}