Beispiel #1
0
void
Nyx::strang_second_step (Real time, Real dt, MultiFab& S_new, MultiFab& D_new)
{
    BL_PROFILE("Nyx::strang_second_step()");
    Real half_dt = 0.5*dt;
    int  min_iter = 100000;
    int  max_iter =      0;

    int min_iter_grid;
    int max_iter_grid;

    // Set a at the half of the time step in the second strang
    const Real a = get_comoving_a(time-half_dt);

    MultiFab reset_e_src(S_new.boxArray(), S_new.DistributionMap(), 1, NUM_GROW);
    reset_e_src.setVal(0.0);
    reset_internal_energy(S_new,D_new,reset_e_src);
    compute_new_temp     (S_new,D_new);

#ifndef FORCING
    {
      const Real z = 1.0/a - 1.0;
      fort_interp_to_this_z(&z);
    }
#endif

#ifdef _OPENMP
#pragma omp parallel private(min_iter_grid,max_iter_grid) reduction(min:min_iter) reduction(max:max_iter)
#endif
    for (MFIter mfi(S_new,true); mfi.isValid(); ++mfi)
    {
        // Here bx is just the valid region
        const Box& bx = mfi.tilebox();

        min_iter_grid = 100000;
        max_iter_grid =      0;

        integrate_state
            (bx.loVect(), bx.hiVect(), 
             BL_TO_FORTRAN(S_new[mfi]),
             BL_TO_FORTRAN(D_new[mfi]),
             &a, &half_dt, &min_iter_grid, &max_iter_grid);

        if (S_new[mfi].contains_nan(bx,0,S_new.nComp()))
        {
            std::cout << "NANS IN THIS GRID " << bx << std::endl;
        }

        min_iter = std::min(min_iter,min_iter_grid);
        max_iter = std::max(max_iter,max_iter_grid);
    }

    ParallelDescriptor::ReduceIntMax(max_iter);
    ParallelDescriptor::ReduceIntMin(min_iter);

    if (heat_cool_type == 1)
        if (ParallelDescriptor::IOProcessor())
            std::cout << "Min/Max Number of Iterations in Second Strang: " << min_iter << " " << max_iter << std::endl;
}
Beispiel #2
0
void
MCLinOp::applyBC (MultiFab& inout,
		  int       level,
		  MCBC_Mode bc_mode)
{
    //
    // The inout MultiFab must have at least MCLinOp_grow ghost cells
    // for applyBC()
    //
    BL_ASSERT(inout.nGrow() >= MCLinOp_grow);
    //
    // The inout MultiFab must have at least Periodic_BC_grow cells for the
    // algorithms taking care of periodic boundary conditions.
    //
    BL_ASSERT(inout.nGrow() >= MCLinOp_grow);
    //
    // No coarsened boundary values, cannot apply inhomog at lev>0.
    //
    BL_ASSERT(!(level>0 && bc_mode == MCInhomogeneous_BC));
    
    int flagden = 1;	// fill in the bndry data and undrrelxr
    int flagbc  = 1;	// with values
    if (bc_mode == MCHomogeneous_BC)
        flagbc = 0; // nodata if homog
    int nc = inout.nComp();
    BL_ASSERT(nc == numcomp );

    inout.setBndry(-1.e30);
    inout.FillBoundary();
    prepareForLevel(level);

    geomarray[level].FillPeriodicBoundary(inout,0,nc);
    //
    // Fill boundary cells.
    //
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(inout); mfi.isValid(); ++mfi)
    {
        const int gn = mfi.index();

        BL_ASSERT(gbox[level][gn] == inout.box(gn));

        const BndryData::RealTuple&      bdl = bgb.bndryLocs(gn);
        const Array< Array<BoundCond> >& bdc = bgb.bndryConds(gn);
        const MaskTuple&                 msk = maskvals[level][gn];

        for (OrientationIter oitr; oitr; ++oitr)
        {
            const Orientation face = oitr();
            FabSet& f  = (*undrrelxr[level])[face];
            FabSet& td = (*tangderiv[level])[face];
            int cdr(face);
            const FabSet& fs = bgb.bndryValues(face);
	    Real bcl = bdl[face];
            const Array<BoundCond>& bc = bdc[face];
	    const int *bct = (const int*) bc.dataPtr();
	    const FArrayBox& fsfab = fs[gn];
	    const Real* bcvalptr = fsfab.dataPtr();
            //
	    // Way external derivs stored.
            //
	    const Real* exttdptr = fsfab.dataPtr(numcomp); 
	    const int* fslo      = fsfab.loVect();
	    const int* fshi      = fsfab.hiVect();
	    FArrayBox& inoutfab  = inout[gn];
	    FArrayBox& denfab    = f[gn];
	    FArrayBox& tdfab     = td[gn];
#if BL_SPACEDIM==2
            int cdir = face.coordDir(), perpdir = -1;
	    if (cdir == 0)
                perpdir = 1;
	    else if (cdir == 1)
                perpdir = 0;
	    else
                BoxLib::Abort("MCLinOp::applyBC(): bad logic");

	    const Mask& m    = *msk[face];
	    const Mask& mphi = *msk[Orientation(perpdir,Orientation::high)];
	    const Mask& mplo = *msk[Orientation(perpdir,Orientation::low)];
	    FORT_APPLYBC(
		&flagden, &flagbc, &maxorder,
		inoutfab.dataPtr(), 
                ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()),
		&cdr, bct, &bcl,
		bcvalptr, ARLIM(fslo), ARLIM(fshi),
		m.dataPtr(),    ARLIM(m.loVect()),    ARLIM(m.hiVect()),
		mphi.dataPtr(), ARLIM(mphi.loVect()), ARLIM(mphi.hiVect()),
		mplo.dataPtr(), ARLIM(mplo.loVect()), ARLIM(mplo.hiVect()),
		denfab.dataPtr(), 
		ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()),
		exttdptr, ARLIM(fslo), ARLIM(fshi),
		tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()),
		inout.box(gn).loVect(), inout.box(gn).hiVect(),
		&nc, h[level]);
#elif BL_SPACEDIM==3
	    const Mask& mn = *msk[Orientation(1,Orientation::high)];
	    const Mask& me = *msk[Orientation(0,Orientation::high)];
	    const Mask& mw = *msk[Orientation(0,Orientation::low)];
	    const Mask& ms = *msk[Orientation(1,Orientation::low)];
	    const Mask& mt = *msk[Orientation(2,Orientation::high)];
	    const Mask& mb = *msk[Orientation(2,Orientation::low)];
	    FORT_APPLYBC(
		&flagden, &flagbc, &maxorder,
		inoutfab.dataPtr(), 
                ARLIM(inoutfab.loVect()), ARLIM(inoutfab.hiVect()),
		&cdr, bct, &bcl,
		bcvalptr, ARLIM(fslo), ARLIM(fshi),
		mn.dataPtr(),ARLIM(mn.loVect()),ARLIM(mn.hiVect()),
		me.dataPtr(),ARLIM(me.loVect()),ARLIM(me.hiVect()),
		mw.dataPtr(),ARLIM(mw.loVect()),ARLIM(mw.hiVect()),
		ms.dataPtr(),ARLIM(ms.loVect()),ARLIM(ms.hiVect()),
		mt.dataPtr(),ARLIM(mt.loVect()),ARLIM(mt.hiVect()),
		mb.dataPtr(),ARLIM(mb.loVect()),ARLIM(mb.hiVect()),
		denfab.dataPtr(), 
		ARLIM(denfab.loVect()), ARLIM(denfab.hiVect()),
		exttdptr, ARLIM(fslo), ARLIM(fshi),
		tdfab.dataPtr(),ARLIM(tdfab.loVect()),ARLIM(tdfab.hiVect()),
		inout.box(gn).loVect(), inout.box(gn).hiVect(),
		&nc, h[level]);
#endif
	}
    }

#if 0
  // This "probably" works, but is not strictly needed just because of the way Bill
  // coded up the tangential derivative stuff.  It's handy code though, so I want to
  // keep it around/

  // Clean up corners:
  // The problem here is that APPLYBC fills only grow cells normal to the boundary.
  // As a result, any corner cell on the boundary (either coarse-fine or fine-fine)
  // is not filled.  For coarse-fine, the operator adjusts itself, sliding away from
  // the box edge to avoid referencing that corner point.  On the physical boundary
  // though, the corner point is needed.  Particularly if a fine-fine boundary intersects
  // the physical boundary, since we want the stencil to be independent of the box
  // blocking.  FillBoundary operations wont fix the problem because the "good"
  // data we need is living in the grow region of adjacent fabs.  So, here we play
  // the usual games to treat the newly filled grow cells as "valid" data.

  // Note that we only need to do something where the grids touch the physical boundary.

  const Geometry& geomlev = geomarray[level];
  const BoxArray& grids = inout.boxArray();
  const Box& domain = geomlev.Domain();
  int nGrow = 1;
  int src_comp = 0;
  int num_comp = BL_SPACEDIM;


  // Lets do a quick check to see if we need to do anything at all here
  BoxArray BIGba = BoxArray(grids).grow(nGrow);

  if (! (domain.contains(BIGba.minimalBox())) ) {

    BoxArray boundary_pieces;
    Array<int> proc_idxs;
    Array<Array<int> > old_to_new(grids.size());
    const DistributionMapping& dmap=inout.DistributionMap();

    for (int d=0; d<BL_SPACEDIM; ++d) {
      if (! (geomlev.isPeriodic(d)) ) {

        BoxArray gba = BoxArray(grids).grow(d,nGrow);
        for (int i=0; i<gba.size(); ++i) {
          BoxArray new_pieces = BoxLib::boxComplement(gba[i],domain);
          int size_new = new_pieces.size();
          if (size_new>0) {
            int size_old = boundary_pieces.size();
            boundary_pieces.resize(size_old+size_new);
            proc_idxs.resize(boundary_pieces.size());
            for (int j=0; j<size_new; ++j) {
              boundary_pieces.set(size_old+j,new_pieces[j]);
              proc_idxs[size_old+j] = dmap[i];
              old_to_new[i].push_back(size_old+j);
            }
          }
        }
      }
    }

    proc_idxs.push_back(ParallelDescriptor::MyProc());

    MultiFab boundary_data(boundary_pieces,num_comp,nGrow,
                           DistributionMapping(proc_idxs));

    for (MFIter mfi(inout); mfi.isValid(); ++mfi) {
      const FArrayBox& src_fab = inout[mfi];
      for (int j=0; j<old_to_new[mfi.index()].size(); ++j) {
        int new_box_idx = old_to_new[mfi.index()][j];
        boundary_data[new_box_idx].copy(src_fab,src_comp,0,num_comp);
      }
    }

    boundary_data.FillBoundary();

    // Use a hacked Geometry object to handle the periodic intersections for us.
    // Here, the "domain" is the plane of cells on non-periodic boundary faces.
    // and there may be cells over the periodic boundary in the remaining directions.
    // We do a Geometry::PFB on each non-periodic face to sync these up.
    if (geomlev.isAnyPeriodic()) {
      Array<int> is_per(BL_SPACEDIM,0);
      for (int d=0; d<BL_SPACEDIM; ++d) {
        is_per[d] = geomlev.isPeriodic(d);
      }
      for (int d=0; d<BL_SPACEDIM; ++d) {
        if (! is_per[d]) {
          Box tmpLo = BoxLib::adjCellLo(geomlev.Domain(),d,1);
          Geometry tmpGeomLo(tmpLo,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr());
          tmpGeomLo.FillPeriodicBoundary(boundary_data);

          Box tmpHi = BoxLib::adjCellHi(geomlev.Domain(),d,1);
          Geometry tmpGeomHi(tmpHi,&(geomlev.ProbDomain()),(int)geomlev.Coord(),is_per.dataPtr());
          tmpGeomHi.FillPeriodicBoundary(boundary_data);
        }
      }
    }

    for (MFIter mfi(inout); mfi.isValid(); ++mfi) {
      int idx = mfi.index();
      FArrayBox& dst_fab = inout[mfi];
      for (int j=0; j<old_to_new[idx].size(); ++j) {
        int new_box_idx = old_to_new[mfi.index()][j];
        const FArrayBox& src_fab = boundary_data[new_box_idx];
        const Box& src_box = src_fab.box();

        BoxArray pieces_outside_domain = BoxLib::boxComplement(src_box,domain);
        for (int k=0; k<pieces_outside_domain.size(); ++k) {
          const Box& outside = pieces_outside_domain[k] & dst_fab.box();
          if (outside.ok()) {
            dst_fab.copy(src_fab,outside,0,outside,src_comp,num_comp);
          }
        }
      }
    }
  }
#endif
}
Beispiel #3
0
void
Nyx::sdc_reactions (MultiFab& S_old, MultiFab& S_new, MultiFab& D_new, 
                    MultiFab& hydro_src, MultiFab& IR,
                    Real delta_time, Real a_old, Real a_new, int sdc_iter)
{
    BL_PROFILE("Nyx::sdc_reactions()");

    const Real* dx = geom.CellSize();

    // First reset internal energy before call to compute_temp
    MultiFab reset_e_src(S_new.boxArray(), S_new.DistributionMap(), 1, NUM_GROW);
    reset_e_src.setVal(0.0);

    reset_internal_energy(S_new,D_new,reset_e_src);
    compute_new_temp     (S_new,D_new);
    
#ifndef FORCING
    {
      const Real z = 1.0/a_old - 1.0;
      fort_interp_to_this_z(&z);
    }
#endif

    int  min_iter = 100000;
    int  max_iter =      0;

    int  min_iter_grid, max_iter_grid;

    /////////////////////Consider adding ifdefs for whether CVODE is compiled in for these statements
    if(heat_cool_type == 3)
      {
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(S_old,true); mfi.isValid(); ++mfi)
    {
        // Note that this "bx" is only the valid region (unlike for Strang)
      const Box& bx = mfi.tilebox();

        min_iter_grid = 100000;
        max_iter_grid =      0;

        integrate_state_with_source
                (bx.loVect(), bx.hiVect(), 
                 BL_TO_FORTRAN(S_old[mfi]),
                 BL_TO_FORTRAN(S_new[mfi]),
                 BL_TO_FORTRAN(D_new[mfi]),
		 BL_TO_FORTRAN(hydro_src[mfi]),
		 BL_TO_FORTRAN(reset_e_src[mfi]),
		 BL_TO_FORTRAN(IR[mfi]),
                 &a_old, &delta_time, &min_iter_grid, &max_iter_grid);

        min_iter = std::min(min_iter,min_iter_grid);
        max_iter = std::max(max_iter,max_iter_grid);
    }
      }
    else if(heat_cool_type == 5)
      {
#ifdef _OPENMP
#pragma omp parallel
#endif
    for (MFIter mfi(S_old,true); mfi.isValid(); ++mfi)
    {
        // Note that this "bx" is only the valid region (unlike for Strang)
      const Box& bx = mfi.tilebox();

        min_iter_grid = 100000;
        max_iter_grid =      0;
	
        integrate_state_fcvode_with_source
                (bx.loVect(), bx.hiVect(), 
                 BL_TO_FORTRAN(S_old[mfi]),
                 BL_TO_FORTRAN(S_new[mfi]),
                 BL_TO_FORTRAN(D_new[mfi]),
		 BL_TO_FORTRAN(hydro_src[mfi]),
		 BL_TO_FORTRAN(reset_e_src[mfi]),
		 BL_TO_FORTRAN(IR[mfi]),
                 &a_old, &delta_time, &min_iter_grid, &max_iter_grid);

        min_iter = std::min(min_iter,min_iter_grid);
        max_iter = std::max(max_iter,max_iter_grid);
    }

      }

    ParallelDescriptor::ReduceIntMax(max_iter);
    ParallelDescriptor::ReduceIntMin(min_iter);

    amrex::Print() << "Min/Max Number of Iterations in SDC: " << min_iter << " " << max_iter << std::endl;
}
Beispiel #4
0
int
CGSolver::solve_bicgstab (MultiFab&       sol,
                          const MultiFab& rhs,
                          Real            eps_rel,
                          Real            eps_abs,
                          LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_bicgstab()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab ph(ba, ncomp, nghost, dm);
    MultiFab sh(ba, ncomp, nghost, dm);

    MultiFab sorig(ba, ncomp, 0, dm);
    MultiFab p    (ba, ncomp, 0, dm);
    MultiFab r    (ba, ncomp, 0, dm);
    MultiFab s    (ba, ncomp, 0, dm);
    MultiFab rh   (ba, ncomp, 0, dm);
    MultiFab v    (ba, ncomp, 0, dm);
    MultiFab t    (ba, ncomp, 0, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    MultiFab::Copy(sorig,sol,0,0,1,0);
    MultiFab::Copy(rh,   r,  0,0,1,0);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC;

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    Real rnorm = norm_inf(r);
#else
    //
    // Calculate the local values of these norms & reduce their values together.
    //
    Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) };

    ParallelDescriptor::ReduceRealMax(vals,2,color());

    Real       rnorm    = vals[0];
    const Real Lp_norm  = vals[1];
    Real       sol_norm = 0;
#endif
    const Real rnorm0   = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }
    int ret = 0, nit = 1;
    Real rho_1 = 0, alpha = 0, omega = 0;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return ret;
    }

    for (; nit <= maxiter; ++nit)
    {
        const Real rho = dotxy(rh,r);
        if ( rho == 0 ) 
	{
            ret = 1; break;
	}
        if ( nit == 1 )
        {
            MultiFab::Copy(p,r,0,0,1,0);
        }
        else
        {
            const Real beta = (rho/rho_1)*(alpha/omega);
            sxay(p, p, -omega, v);
            sxay(p, r,   beta, p);
        }
        if ( use_mg_precond )
        {
            ph.setVal(0);
            mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            ph.setVal(0);
            Lp.jacobi_smooth(ph, p, lev, temp_bc_mode);
        }
        else 
        {
            MultiFab::Copy(ph,p,0,0,1,0);
        }
        Lp.apply(v, ph, lev, temp_bc_mode);

        if ( Real rhTv = dotxy(rh,v) )
	{
            alpha = rho/rhTv;
	}
        else
	{
            ret = 2; break;
	}
        sxay(sol, sol,  alpha, ph);
        sxay(s,     r, -alpha,  v);

        rnorm = norm_inf(s);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Half Iter "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( use_mg_precond )
        {
            sh.setVal(0);
            mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            sh.setVal(0);
            Lp.jacobi_smooth(sh, s, lev, temp_bc_mode);
        }
        else
        {
            MultiFab::Copy(sh,s,0,0,1,0);
        }
        Lp.apply(t, sh, lev, temp_bc_mode);
        //
        // This is a little funky.  I want to elide one of the reductions
        // in the following two dotxy()s.  We do that by calculating the "local"
        // values and then reducing the two local values at the same time.
        //
        Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) };

        ParallelDescriptor::ReduceRealSum(vals,2,color());

        if ( vals[0] )
	{
            omega = vals[1]/vals[0];
	}
        else
	{
            ret = 3; break;
	}
        sxay(sol, sol,  omega, sh);
        sxay(r,     s, -omega,  t);

        rnorm = norm_inf(r);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Iteration "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( omega == 0 )
	{
            ret = 4; break;
	}
        rho_1 = rho;
    }

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Final: Iteration "
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs)
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Beispiel #5
0
int
CGSolver::solve_cabicgstab (MultiFab&       sol,
                            const MultiFab& rhs,
                            Real            eps_rel,
                            Real            eps_abs,
                            LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cabicgstab()");

    BL_ASSERT(sol.nComp() == 1);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    Real  temp1[4*SSS_MAX+1];
    Real  temp2[4*SSS_MAX+1];
    Real  temp3[4*SSS_MAX+1];
    Real     Tp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real    Tpp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real     aj[4*SSS_MAX+1];
    Real     cj[4*SSS_MAX+1];
    Real     ej[4*SSS_MAX+1];
    Real   Tpaj[4*SSS_MAX+1];
    Real   Tpcj[4*SSS_MAX+1];
    Real  Tppaj[4*SSS_MAX+1];
    Real      G[4*SSS_MAX+1][4*SSS_MAX+1];    // Extracted from first 4*SSS+1 columns of Gg[][].  indexed as [row][col]
    Real      g[4*SSS_MAX+1];                 // Extracted from last [4*SSS+1] column of Gg[][].
    Real     Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul().  indexed as [row*(4*SSS+2) + col]
    //
    // If variable_SSS we "telescope" SSS.
    // We start with 1 and increase it up to SSS_MAX on the outer iterations.
    //
    if (variable_SSS) SSS = 1;

    zero(   aj, 4*SSS_MAX+1);
    zero(   cj, 4*SSS_MAX+1);
    zero(   ej, 4*SSS_MAX+1);
    zero( Tpaj, 4*SSS_MAX+1);
    zero( Tpcj, 4*SSS_MAX+1);
    zero(Tppaj, 4*SSS_MAX+1);
    zero(temp1, 4*SSS_MAX+1);
    zero(temp2, 4*SSS_MAX+1);
    zero(temp3, 4*SSS_MAX+1);

    SetMonomialBasis(Tp,Tpp,SSS);

    const int ncomp = 1, nghost = sol.nGrow();
    //
    // Contains the matrix powers of p[] and r[].
    //
    // First 2*SSS+1 components are powers of p[].
    // Next  2*SSS   components are powers of r[].
    //
    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab PR(ba, 4*SSS_MAX+1, 0, dm);

    MultiFab  p(ba, ncomp, 0, dm);
    MultiFab  r(ba, ncomp, 0, dm);
    MultiFab rt(ba, ncomp, 0, dm);
    
    MultiFab tmp(ba, 4, nghost, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    BL_ASSERT(!r.contains_nan());

    MultiFab::Copy(rt,r,0,0,1,0);
    MultiFab::Copy( p,r,0,0,1,0);

    const Real           rnorm0        = norm_inf(r);
    Real                 delta         = dotxy(r,rt);
    const Real           L2_norm_of_rt = sqrt(delta);
    const LinOp::BC_Mode temp_bc_mode  = LinOp::Homogeneous_BC;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_CABiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }

    if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: niter = 0,"
                      << ", rnorm = "   << rnorm0
                      << ", delta = "   << delta
                      << ", eps_abs = " << eps_abs << '\n';
	}
        return 0;
    }

    int niters = 0, ret = 0;

    Real L2_norm_of_resid = 0, atime = 0, gtime = 0;

    bool BiCGStabFailed = false, BiCGStabConverged = false;

    for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; )
    {
        const Real time1 = ParallelDescriptor::second();
        //
        // Compute the matrix powers on p[] & r[] (monomial basis).
        // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[].
        //
        MultiFab::Copy(PR,p,0,0,1,0);
        MultiFab::Copy(PR,r,0,2*SSS+1,1,0);
	
        BL_ASSERT(!PR.contains_nan(0,      1));
        BL_ASSERT(!PR.contains_nan(2*SSS+1,1));
        //
        // We use "tmp" to minimize the number of Lp.apply()s.
        // We do this by doing p & r together in a single call.
        //
        MultiFab::Copy(tmp,p,0,0,1,0);
        MultiFab::Copy(tmp,r,0,1,1,0);

        for (int n = 1; n < 2*SSS; n++)
        {
            Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2);

            MultiFab::Copy(tmp,tmp,2,0,2,0);

            MultiFab::Copy(PR,tmp,0,        n,1,0);
            MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0);

            BL_ASSERT(!PR.contains_nan(n,        1));
            BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1));
        }

        MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0);
        Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1);
        MultiFab::Copy(PR,tmp,1,2*SSS,1,0);

        BL_ASSERT(!PR.contains_nan(2*SSS-1,1));
        BL_ASSERT(!PR.contains_nan(2*SSS,  1));

        Real time2 = ParallelDescriptor::second();

        atime += (time2-time1);

        BuildGramMatrix(Gg, PR, rt, SSS);

        const Real time3 = ParallelDescriptor::second();

        gtime += (time3-time2);
        //
        // Form G[][] and g[] from Gg.
        //
        for (int i = 0, k = 0; i < 4*SSS+1; i++)
        {
            for (int j = 0; j < 4*SSS+1; j++)
                //
                // First 4*SSS+1 elements in each row go to G[][].
                //
                G[i][j] = Gg[k++];
            //
            // Last element in row goes to g[].
            //
            g[i] = Gg[k++];
        }

        zero(aj, 4*SSS+1); aj[0]       = 1;
        zero(cj, 4*SSS+1); cj[2*SSS+1] = 1;
        zero(ej, 4*SSS+1);

        for (int nit = 0; nit < SSS; nit++)
        {
            gemv( Tpaj,  Tp, aj, 4*SSS+1, 4*SSS+1);
            gemv( Tpcj,  Tp, cj, 4*SSS+1, 4*SSS+1);
            gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1);

            const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1);

            if ( g_dot_Tpaj == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 1; break;
            }

            const Real alpha = delta / g_dot_Tpaj;

            if ( std::isinf(alpha) )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 2; break;
            }

            axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            axpy(temp3,   cj, -alpha,  Tpaj, 4*SSS+1);

            const Real omega_numerator   = dot(temp3, temp2, 4*SSS+1);
            const Real omega_denominator = dot(temp1, temp2, 4*SSS+1);
            //
            // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0.
            //
            // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj.
            // If omega_numerator==0, and ||s||!=0, then stabilization breakdown.
            //
            // Partial update of ej must happen before the check on omega to ensure forward progress !!!
            //
            axpy(ej, ej, alpha, aj, 4*SSS+1);
            //
            // ej has been updated so consider that we've done an iteration since
            // even if we break out of the loop we'll be able to update both sol.
            //
            niters++;
            //
            // Calculate the norm of Saad's vector 's' to check intra s-step convergence.
            //
            axpy(temp1, cj,-alpha,  Tpaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s));

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n';
                BiCGStabConverged = true; break;
            }

            if ( omega_denominator == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 3; break;
            }

            const Real omega = omega_numerator / omega_denominator;

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( omega == 0   ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n';
                if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n';
            }

            if ( omega == 0   ) { BiCGStabFailed = true; ret = 4; break; }
            if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; }
            //
            // Complete the update of ej & cj now that omega is known to be ok.
            //
            axpy(ej, ej,       omega,    cj, 4*SSS+1);
            axpy(ej, ej,-omega*alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj,      -omega,  Tpcj, 4*SSS+1);
            axpy(cj, cj,      -alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1);
            //
            // Do an early check of the residual to determine convergence.
            //
            gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1);
            //
            // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic.
            // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel).
            // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged.
            //
            const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0);

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n';
                BiCGStabConverged = true; break;
            }

            const Real delta_next = dot(g, cj, 4*SSS+1);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( delta_next == 0   ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n';
                if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf?
            if ( delta_next  == 0  ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown...

            const Real beta = (delta_next/delta)*(alpha/omega);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( beta == 0   ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n';
                if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf?
            if ( beta == 0   ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0?  can't make further progress(?)

            axpy(aj, cj,        beta,   aj, 4*SSS+1);
            axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1);

            delta = delta_next;
        }
        //
        // Update iterates.
        //
        for (int i = 0; i < 4*SSS+1; i++)
            sxay(sol,sol,ej[i],PR,i);

        MultiFab::Copy(p,PR,0,0,1,0);
        p.mult(aj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(p,p,aj[i],PR,i);

        MultiFab::Copy(r,PR,0,0,1,0);
        r.mult(cj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(r,r,cj[i],PR,i);

        if ( !BiCGStabFailed && !BiCGStabConverged )
        {
            m += SSS;

            if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); }
        }
    }

    if ( verbose > 0 )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: Final: Iteration "
                      << std::setw(4) << niters
                      << " rel. err. "
                      << L2_norm_of_resid << '\n';
        }

        if ( verbose > 1 )
        {
            Real tmp[2] = { atime, gtime };

            ParallelDescriptor::ReduceRealMax(tmp,2,color());

            if ( ParallelDescriptor::IOProcessor(color()) )
            {
                Spacer(std::cout, lev);
                std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n';
            }
        }
    }

    if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged)
    {
        if ( L2_norm_of_resid > L2_norm_of_rt )
        {
            if ( ParallelDescriptor::IOProcessor(color()) )
                BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!");
            //
            // Return code 8 tells the MultiGrid driver to zero out the solution!
            //
            ret = 8;
        }
        else
        {
            //
            // Return codes 1-7 tells the MultiGrid driver to smooth the solution!
            //
            ret = 7;
        }
    }

    return ret;
}
Beispiel #6
0
int
CGSolver::jbb_precond (MultiFab&       sol,
		       const MultiFab& rhs,
                       int             lev,
		       LinOp&          Lp)
{
    //
    // This is a local routine.  No parallel is allowed to happen here.
    //
    int                  lev_loc = lev;
    const Real           eps_rel = 1.e-2;
    const Real           eps_abs = 1.e-16;
    const int            nghost  = sol.nGrow();
    const int            ncomp   = sol.nComp();
    const bool           local   = true;
    const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC;

    BL_ASSERT(ncomp == 1 );
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc));

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab sorig(ba, ncomp, nghost, dm);

    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    sorig.copy(sol);

    Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local);

    sol.setVal(0);

    Real       rnorm    = norm_inf(r,local);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "     jbb_precond: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev_loc, local);
    Real sol_norm = 0;
    int  ret      = 0;			// will return this value if all goes well
    Real rho_1    = 0;
    int  nit      = 1;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        z.copy(r);

        Real rho = dotxy(z,r,local);
        if (nit == 1)
        {
            p.copy(z);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }

        Lp.apply(q, p, lev_loc, bc_mode, local);

        Real alpha;
        if ( Real pw = dotxy(p,q,local) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:" << " nit " << nit
                      << " rho " << rho << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm    = norm_inf(r,   local);
        sol_norm = norm_inf(sol, local);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs )
	{
            break;
	}
      
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "jbb_precond: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
	{
            BoxLib::Warning("jbb_precond:: failed to converge!");
	}
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Beispiel #7
0
int
CGSolver::solve_cg (MultiFab&       sol,
		    const MultiFab& rhs,
		    Real            eps_rel,
		    Real            eps_abs,
		    LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cg()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab sorig(ba, ncomp, nghost, dm);
    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    MultiFab r1(ba, ncomp, nghost, dm);
    MultiFab z1(ba, ncomp, nghost, dm);
    MultiFab r2(ba, ncomp, nghost, dm);
    MultiFab z2(ba, ncomp, nghost, dm);

    MultiFab::Copy(sorig,sol,0,0,1,0);

    Lp.residual(r, rhs, sorig, lev, bc_mode);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC;

    Real       rnorm    = norm_inf(r);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "              CG: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev);
    Real sol_norm      = 0;
    Real rho_1         = 0;
    int  ret           = 0;
    int  nit           = 1;

    if ( rnorm == 0 || rnorm < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "       CG: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" <<  eps_rel*(Lp_norm*sol_norm + rnorm0 ) 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1)
        {
            z.setVal(0);

            jbb_precond(z,r,lev,Lp);
        }
        else
        {
            MultiFab::Copy(z,r,0,0,1,0);
        }

        Real rho = dotxy(z,r);

        if (nit == 1)
        {
            MultiFab::Copy(p,z,0,0,1,0);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }
        Lp.apply(q, p, lev, temp_bc_mode);

        Real alpha;
        if ( Real pw = dotxy(p,q) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_cg:"
                      << " nit " << nit
                      << " rho " << rho
                      << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm = norm_inf(r);
        sol_norm = norm_inf(sol);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "       CG:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break;
#endif
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "       CG: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 &&  rnorm > eps_rel*rnorm0 && rnorm > eps_abs )
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_cg: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}