Example #1
0
void
MultiGrid::solve (MultiFab&       _sol,
                  const MultiFab& _rhs,
                  Real            _eps_rel,
                  Real            _eps_abs,
                  LinOp::BC_Mode  bc_mode)
{
    //
    // Prepare memory for new level, and solve the general boundary
    // value problem to within relative error _eps_rel.  Customized
    // to solve at level=0.
    //
    const int level = 0;
    prepareForLevel(level);

    //
    // Copy the initial guess, which may contain inhomogeneous boundray conditions,
    // into both "initialsolution" (to be added back later) and into "cor[0]" which
    // we will only use here to compute the residual, then will set back to 0 below
    //
    initialsolution->copy(_sol);
    cor[level]->copy(_sol);

    //
    // Put the problem in residual-correction form: we will now use "rhs[level
    // the initial residual (rhs[0]) rather than the initial RHS (_rhs)
    // to begin the solve.
    //
    Lp.residual(*rhs[level],_rhs,*cor[level],level,bc_mode);

    //
    // Now initialize correction to zero at this level (auto-filled at levels below)
    //
    (*cor[level]).setVal(0.0); //

    //
    // Elide a reduction by doing these together.
    //
    Real tmp[2] = { norm_inf(_rhs,true), norm_inf(*rhs[level],true) };
    ParallelDescriptor::ReduceRealMax(tmp,2);
    if ( ParallelDescriptor::IOProcessor() && verbose > 0)
    {
        Spacer(std::cout, level);
        std::cout << "MultiGrid: Initial rhs                = " << tmp[0] << '\n';
        std::cout << "MultiGrid: Initial residual           = " << tmp[1] << '\n';
    }

    if (tmp[1] == 0.0)
	return;

    //
    // We can now use homogeneous bc's because we have put the problem into residual-correction form.
    //
    if ( !solve_(_sol, _eps_rel, _eps_abs, LinOp::Homogeneous_BC, tmp[0], tmp[1]) )
        BoxLib::Error("MultiGrid:: failed to converge!");
}
Example #2
0
int
MultiGrid::solve_ (MultiFab&      _sol,
                   Real           eps_rel,
                   Real           eps_abs,
                   LinOp::BC_Mode bc_mode,
                   Real           bnorm,
                   Real           resnorm0)
{
    BL_PROFILE("MultiGrid::solve_()");

  //
  // If do_fixed_number_of_iters = 1, then do maxiter iterations without checking for convergence 
  // 
  // If do_fixed_number_of_iters = 0, then relax system maxiter times, 
  //    and stop if relative error <= _eps_rel or if absolute err <= _abs_eps
  //
  const Real strt_time = ParallelDescriptor::second();

  const int level = 0;

  //
  // We take the max of the norms of the initial RHS and the initial residual in order to capture both cases
  //
  Real norm_to_test_against;
  bool using_bnorm;
  if (bnorm >= resnorm0) 
  {
      norm_to_test_against = bnorm;
      using_bnorm          = true;
  } else {
      norm_to_test_against = resnorm0;
      using_bnorm          = false;
  } 

  int        returnVal = 0;
  Real       error     = resnorm0;

  //
  // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed
  //
  if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 )
  {
      std::cout << "MultiGrid: Tolerance "
                << eps_rel
                << " < 1e-16 is probably set too low" << '\n';
  }

  //
  // We initially define norm_cor based on the initial solution only so we can use it in the very first iteration
  //    to decide whether the problem is already solved (this is relevant if the previous solve used was only solved
  //    according to the Anorm test and not the bnorm test).
  //
  Real       norm_cor    = norm_inf(*initialsolution,true);
  ParallelDescriptor::ReduceRealMax(norm_cor);

  int        nit         = 1;
  const Real norm_Lp     = Lp.norm(0, level);
  Real       cg_time     = 0;

  if ( use_Anorm_for_convergence == 1 ) 
  {
     //
     // Don't need to go any further -- no iterations are required
     //
     if (error <= eps_abs || error < eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) 
     {
         if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
         {
             std::cout << "   Problem is already converged -- no iterations required\n";
         }
         return 1;
     }

     for ( ;
           ( (error > eps_abs &&
              error > eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) ||
             (do_fixed_number_of_iters == 1) )
             && nit <= maxiter;
           ++nit)
     {
         relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time);

         Real tmp[2] = { norm_inf(*cor[level],true), errorEstimate(level,bc_mode,true) };

         ParallelDescriptor::ReduceRealMax(tmp,2);

         norm_cor = tmp[0];
         error    = tmp[1];
	
         if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
         {
             const Real rel_error = error / norm_to_test_against;
             Spacer(std::cout, level);
             if (using_bnorm)
             {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/bnorm = "
                           << rel_error << '\n';
             } else {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/resid0 = "
                           << rel_error << '\n';
             }
         }
     }
  }
  else
  {
     //
     // Don't need to go any further -- no iterations are required
     //
     if (error <= eps_abs || error < eps_rel*norm_to_test_against) 
     {
         if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
         {
             std::cout << "   Problem is already converged -- no iterations required\n";
         }
         return 1;
     }

     for ( ;
           ( (error > eps_abs &&
              error > eps_rel*norm_to_test_against) ||
             (do_fixed_number_of_iters == 1) )
             && nit <= maxiter;
           ++nit)
     {
         relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time);

         error = errorEstimate(level, bc_mode);
	
         if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
         {
             const Real rel_error = error / norm_to_test_against;
             Spacer(std::cout, level);
             if (using_bnorm)
             {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/bnorm = "
                           << rel_error << '\n';
             } else {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/resid0 = "
                           << rel_error << '\n';
             }
         }
     }
  }

  Real run_time = (ParallelDescriptor::second() - strt_time);

  if ( verbose > 0 )
  {
      if ( ParallelDescriptor::IOProcessor() )
      {
          const Real rel_error = error / norm_to_test_against;
          Spacer(std::cout, level);
          if (using_bnorm)
          {
              std::cout << "MultiGrid: Iteration   "
                        << nit-1
                        << " resid/bnorm = "
                        << rel_error << '\n';
          } else {
              std::cout << "MultiGrid: Iteration   "
                        << nit-1
                        << " resid/resid0 = "
                        << rel_error << '\n';
             }
      }

      if ( verbose > 1 )
      {
          Real tmp[2] = { run_time, cg_time };

          ParallelDescriptor::ReduceRealMax(tmp,2,ParallelDescriptor::IOProcessorNumber());

          if ( ParallelDescriptor::IOProcessor() )
              std::cout << ", Solve time: " << tmp[0] << ", CG time: " << tmp[1];
      }

      if ( ParallelDescriptor::IOProcessor() ) std::cout << '\n';
  }

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
      if ( do_fixed_number_of_iters == 1)
      {
          std::cout << "   Did fixed number of iterations: " << maxiter << std::endl;
      } 
      else if ( error < eps_rel*norm_to_test_against )
      {
          std::cout << "   Converged res < eps_rel*max(bnorm,res_norm)\n";
      } 
      else if ( (use_Anorm_for_convergence == 1) && (error < eps_rel*norm_Lp*norm_cor) )
      {
          std::cout << "   Converged res < eps_rel*Anorm*sol\n";
      } 
      else if ( error < eps_abs )
      {
          std::cout << "   Converged res < eps_abs\n";
      }
  }

  //
  // Omit ghost update since maybe not initialized in calling routine.
  // Add to boundary values stored in initialsolution.
  //
  _sol.copy(*cor[level]);
  _sol.plus(*initialsolution,0,_sol.nComp(),0);

  if ( use_Anorm_for_convergence == 1 ) 
  {
     if ( do_fixed_number_of_iters == 1                ||
          error <= eps_rel*(norm_Lp*norm_cor+norm_to_test_against) ||
          error <= eps_abs )
       returnVal = 1;
  } 
  else 
  {
     if ( do_fixed_number_of_iters == 1 ||
          error <= eps_rel*(norm_to_test_against)   ||
          error <= eps_abs )
       returnVal = 1;
  } 

  //
  // Otherwise, failed to solve satisfactorily
  //
  return returnVal;
}
Example #3
0
	void runGui(OrbGui &gui)
	{
		// GUI state
		vec2i wndSize = gui.input->getWindowSize();

		// set up the default projection & modelview matrices
		glMatrixMode(GL_PROJECTION);
		glLoadIdentity();
		glOrtho(0.0, (double)wndSize.x, (double)wndSize.y, 0.0, 10.0, -10.0);
		glMatrixMode(GL_MODELVIEW);
		glLoadIdentity();
		
		ColumnLayout lyt(FixedLayout(10, 10, 200, wndSize.y), 10, 10, 10, 10, 3);

		Label("Ikarus").run(gui, lyt);

		Spacer(vec2i(0, 10)).run(gui, lyt);

		Label("Skeleton:").run(gui, lyt);
		ComboBox skelSel("skeleton-sel", WidgetID(curSkel));
		for (int i = 0; i < (int)skeletons.size(); ++i)
			skelSel.add(WidgetID(i), skeletons[i].name);
		curSkel = skelSel.run(gui, lyt).getIndex();

		if (Button("reload-btn", "Reload").run(gui, lyt))
			skeletons.reset_at(curSkel, new SkeletonItem(skeletons[curSkel].fname, skeletons[curSkel].name));
		
		SkeletonItem &skel = skeletons[curSkel];

		Spacer(vec2i(0, 10)).run(gui, lyt);

		if (Button("reset-btn", "Reset Pose").run(gui, lyt))
		{
			skel.solver->resetPose();
			skel.targetPos = skel.solver->getEffectorPos();
			targetSpeed = 0.0;
		}

		if (Button("reset-all-btn", "Reset All").run(gui, lyt))
		{
			skel.solver->resetAll();
			skel.targetPos = skel.solver->getEffectorPos();
			targetSpeed = 0.0;
		}

		// the grid is always shown
		// (no point turning it off really)
		//showGrid = CheckBox("show-grid-chk", "Show grid", showGrid).run(gui, lyt);
		showJointBasis = CheckBox("show-joint-basis-chk", "Show joint basis vectors", showJointBasis).run(gui, lyt);
		showConstraints = CheckBox("show-constraints-chk", "Show joint constraints", showConstraints).run(gui, lyt);

		ikMode = CheckBox("ik-mode-chk", "IK Mode", ikMode).run(gui, lyt);
		ikEnabled = CheckBox("ik-enabled-chk", "IK Enabled", ikEnabled, ikMode).run(gui, lyt);
		bool constraintsOn = CheckBox("ik-constrained-chk", "Enable Constraints", skel.solver->areConstraintsEnabled(), ikMode).run(gui, lyt);
		skel.solver->enableConstraints(constraintsOn);

		if (Button("solve-btn", "Solve", ikMode && !ikEnabled).run(gui, lyt))
			skel.solver->solveIk(30);

		if (Button("step-btn", "Step IK", ikMode && !ikEnabled).run(gui, lyt))
			skel.solver->iterateIk();

		if (Button("constraint-btn", "Apply Constraints", ikMode).run(gui, lyt))
		{
			skel.solver->applyAllConstraints();
			skel.targetPos = skel.solver->getEffectorPos();
			targetSpeed = 0.0;
		}

		Label("Root bone:").run(gui, lyt);
		ComboBox rootSel("root-sel", WidgetID(&skel.solver->getRootBone()));
		for (int i = 0; i < skel.skeleton.numBones(); ++i)
		{
			const Bone &b = skel.skeleton[i];
			if (! b.isEffector())
				rootSel.add(WidgetID(&b), b.name);
		}
		const Bone *newRootBone = rootSel.run(gui, lyt).getData<const Bone>();
		skel.solver->setRootBone(*newRootBone);

		Label("Effector:").run(gui, lyt);
		ComboBox effectorSel("effector-sel", WidgetID(&skel.solver->getEffector()));
		for (int i = 0; i < skel.skeleton.numBones(); ++i)
		{
			const Bone &b = skel.skeleton[i];
			if (b.isEffector())
				effectorSel.add(WidgetID(&b), b.name);
		}
		const Bone *newEffector = effectorSel.run(gui, lyt).getData<const Bone>();
		if (newEffector != &skel.solver->getEffector())
		{
			skel.solver->setEffector(*newEffector);
			skel.solver->setTargetPos(skel.solver->getEffectorPos());
			skel.targetPos = skel.solver->getEffectorPos();
		}

		int leftRightSplit = 250;
		int topBottomSplit = wndSize.y - 200;
		int a = leftRightSplit + (wndSize.x - leftRightSplit) / 3;
		int b = leftRightSplit + ((wndSize.x - leftRightSplit)*2) / 3;

		FixedLayout mainViewLyt(leftRightSplit, 0, wndSize.x - leftRightSplit, topBottomSplit);
		FixedLayout ortho0Lyt(leftRightSplit, topBottomSplit, a - leftRightSplit, wndSize.y - topBottomSplit);
		FixedLayout ortho1Lyt(a, topBottomSplit, b - a, wndSize.y - topBottomSplit);
		FixedLayout ortho2Lyt(b, topBottomSplit, wndSize.x - b, wndSize.y - topBottomSplit);

		if (ikMode)
		{
			IkSolverDisplay("displayP", &camPerspective, skel.solver.get(), showJointBasis, showConstraints, showGrid ? gridList : 0).run(gui, mainViewLyt);
			IkSolverDisplay("displayX", &camX, skel.solver.get(), showJointBasis, showConstraints).run(gui, ortho0Lyt);
			IkSolverDisplay("displayY", &camY, skel.solver.get(), showJointBasis, showConstraints).run(gui, ortho1Lyt);
			IkSolverDisplay("displayZ", &camZ, skel.solver.get(), showJointBasis, showConstraints).run(gui, ortho2Lyt);
		}
		else
		{
			SkeletonDisplay("displayP", &camPerspective, &skel.skeleton, showJointBasis, showConstraints, showGrid ? gridList : 0).run(gui, mainViewLyt);
			SkeletonDisplay("displayX", &camX, &skel.skeleton, showJointBasis, showConstraints).run(gui, ortho0Lyt);
			SkeletonDisplay("displayY", &camY, &skel.skeleton, showJointBasis, showConstraints).run(gui, ortho1Lyt);
			SkeletonDisplay("displayZ", &camZ, &skel.skeleton, showJointBasis, showConstraints).run(gui, ortho2Lyt);
		}
	}
Example #4
0
int
CGSolver::solve_bicgstab (MultiFab&       sol,
                          const MultiFab& rhs,
                          Real            eps_rel,
                          Real            eps_abs,
                          LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_bicgstab()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab ph(ba, ncomp, nghost, dm);
    MultiFab sh(ba, ncomp, nghost, dm);

    MultiFab sorig(ba, ncomp, 0, dm);
    MultiFab p    (ba, ncomp, 0, dm);
    MultiFab r    (ba, ncomp, 0, dm);
    MultiFab s    (ba, ncomp, 0, dm);
    MultiFab rh   (ba, ncomp, 0, dm);
    MultiFab v    (ba, ncomp, 0, dm);
    MultiFab t    (ba, ncomp, 0, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    MultiFab::Copy(sorig,sol,0,0,1,0);
    MultiFab::Copy(rh,   r,  0,0,1,0);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC;

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    Real rnorm = norm_inf(r);
#else
    //
    // Calculate the local values of these norms & reduce their values together.
    //
    Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) };

    ParallelDescriptor::ReduceRealMax(vals,2,color());

    Real       rnorm    = vals[0];
    const Real Lp_norm  = vals[1];
    Real       sol_norm = 0;
#endif
    const Real rnorm0   = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }
    int ret = 0, nit = 1;
    Real rho_1 = 0, alpha = 0, omega = 0;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return ret;
    }

    for (; nit <= maxiter; ++nit)
    {
        const Real rho = dotxy(rh,r);
        if ( rho == 0 ) 
	{
            ret = 1; break;
	}
        if ( nit == 1 )
        {
            MultiFab::Copy(p,r,0,0,1,0);
        }
        else
        {
            const Real beta = (rho/rho_1)*(alpha/omega);
            sxay(p, p, -omega, v);
            sxay(p, r,   beta, p);
        }
        if ( use_mg_precond )
        {
            ph.setVal(0);
            mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            ph.setVal(0);
            Lp.jacobi_smooth(ph, p, lev, temp_bc_mode);
        }
        else 
        {
            MultiFab::Copy(ph,p,0,0,1,0);
        }
        Lp.apply(v, ph, lev, temp_bc_mode);

        if ( Real rhTv = dotxy(rh,v) )
	{
            alpha = rho/rhTv;
	}
        else
	{
            ret = 2; break;
	}
        sxay(sol, sol,  alpha, ph);
        sxay(s,     r, -alpha,  v);

        rnorm = norm_inf(s);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Half Iter "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( use_mg_precond )
        {
            sh.setVal(0);
            mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            sh.setVal(0);
            Lp.jacobi_smooth(sh, s, lev, temp_bc_mode);
        }
        else
        {
            MultiFab::Copy(sh,s,0,0,1,0);
        }
        Lp.apply(t, sh, lev, temp_bc_mode);
        //
        // This is a little funky.  I want to elide one of the reductions
        // in the following two dotxy()s.  We do that by calculating the "local"
        // values and then reducing the two local values at the same time.
        //
        Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) };

        ParallelDescriptor::ReduceRealSum(vals,2,color());

        if ( vals[0] )
	{
            omega = vals[1]/vals[0];
	}
        else
	{
            ret = 3; break;
	}
        sxay(sol, sol,  omega, sh);
        sxay(r,     s, -omega,  t);

        rnorm = norm_inf(r);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Iteration "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( omega == 0 )
	{
            ret = 4; break;
	}
        rho_1 = rho;
    }

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Final: Iteration "
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs)
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Example #5
0
int
CGSolver::solve_cabicgstab (MultiFab&       sol,
                            const MultiFab& rhs,
                            Real            eps_rel,
                            Real            eps_abs,
                            LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cabicgstab()");

    BL_ASSERT(sol.nComp() == 1);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    Real  temp1[4*SSS_MAX+1];
    Real  temp2[4*SSS_MAX+1];
    Real  temp3[4*SSS_MAX+1];
    Real     Tp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real    Tpp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real     aj[4*SSS_MAX+1];
    Real     cj[4*SSS_MAX+1];
    Real     ej[4*SSS_MAX+1];
    Real   Tpaj[4*SSS_MAX+1];
    Real   Tpcj[4*SSS_MAX+1];
    Real  Tppaj[4*SSS_MAX+1];
    Real      G[4*SSS_MAX+1][4*SSS_MAX+1];    // Extracted from first 4*SSS+1 columns of Gg[][].  indexed as [row][col]
    Real      g[4*SSS_MAX+1];                 // Extracted from last [4*SSS+1] column of Gg[][].
    Real     Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul().  indexed as [row*(4*SSS+2) + col]
    //
    // If variable_SSS we "telescope" SSS.
    // We start with 1 and increase it up to SSS_MAX on the outer iterations.
    //
    if (variable_SSS) SSS = 1;

    zero(   aj, 4*SSS_MAX+1);
    zero(   cj, 4*SSS_MAX+1);
    zero(   ej, 4*SSS_MAX+1);
    zero( Tpaj, 4*SSS_MAX+1);
    zero( Tpcj, 4*SSS_MAX+1);
    zero(Tppaj, 4*SSS_MAX+1);
    zero(temp1, 4*SSS_MAX+1);
    zero(temp2, 4*SSS_MAX+1);
    zero(temp3, 4*SSS_MAX+1);

    SetMonomialBasis(Tp,Tpp,SSS);

    const int ncomp = 1, nghost = sol.nGrow();
    //
    // Contains the matrix powers of p[] and r[].
    //
    // First 2*SSS+1 components are powers of p[].
    // Next  2*SSS   components are powers of r[].
    //
    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab PR(ba, 4*SSS_MAX+1, 0, dm);

    MultiFab  p(ba, ncomp, 0, dm);
    MultiFab  r(ba, ncomp, 0, dm);
    MultiFab rt(ba, ncomp, 0, dm);
    
    MultiFab tmp(ba, 4, nghost, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    BL_ASSERT(!r.contains_nan());

    MultiFab::Copy(rt,r,0,0,1,0);
    MultiFab::Copy( p,r,0,0,1,0);

    const Real           rnorm0        = norm_inf(r);
    Real                 delta         = dotxy(r,rt);
    const Real           L2_norm_of_rt = sqrt(delta);
    const LinOp::BC_Mode temp_bc_mode  = LinOp::Homogeneous_BC;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_CABiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }

    if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: niter = 0,"
                      << ", rnorm = "   << rnorm0
                      << ", delta = "   << delta
                      << ", eps_abs = " << eps_abs << '\n';
	}
        return 0;
    }

    int niters = 0, ret = 0;

    Real L2_norm_of_resid = 0, atime = 0, gtime = 0;

    bool BiCGStabFailed = false, BiCGStabConverged = false;

    for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; )
    {
        const Real time1 = ParallelDescriptor::second();
        //
        // Compute the matrix powers on p[] & r[] (monomial basis).
        // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[].
        //
        MultiFab::Copy(PR,p,0,0,1,0);
        MultiFab::Copy(PR,r,0,2*SSS+1,1,0);
	
        BL_ASSERT(!PR.contains_nan(0,      1));
        BL_ASSERT(!PR.contains_nan(2*SSS+1,1));
        //
        // We use "tmp" to minimize the number of Lp.apply()s.
        // We do this by doing p & r together in a single call.
        //
        MultiFab::Copy(tmp,p,0,0,1,0);
        MultiFab::Copy(tmp,r,0,1,1,0);

        for (int n = 1; n < 2*SSS; n++)
        {
            Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2);

            MultiFab::Copy(tmp,tmp,2,0,2,0);

            MultiFab::Copy(PR,tmp,0,        n,1,0);
            MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0);

            BL_ASSERT(!PR.contains_nan(n,        1));
            BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1));
        }

        MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0);
        Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1);
        MultiFab::Copy(PR,tmp,1,2*SSS,1,0);

        BL_ASSERT(!PR.contains_nan(2*SSS-1,1));
        BL_ASSERT(!PR.contains_nan(2*SSS,  1));

        Real time2 = ParallelDescriptor::second();

        atime += (time2-time1);

        BuildGramMatrix(Gg, PR, rt, SSS);

        const Real time3 = ParallelDescriptor::second();

        gtime += (time3-time2);
        //
        // Form G[][] and g[] from Gg.
        //
        for (int i = 0, k = 0; i < 4*SSS+1; i++)
        {
            for (int j = 0; j < 4*SSS+1; j++)
                //
                // First 4*SSS+1 elements in each row go to G[][].
                //
                G[i][j] = Gg[k++];
            //
            // Last element in row goes to g[].
            //
            g[i] = Gg[k++];
        }

        zero(aj, 4*SSS+1); aj[0]       = 1;
        zero(cj, 4*SSS+1); cj[2*SSS+1] = 1;
        zero(ej, 4*SSS+1);

        for (int nit = 0; nit < SSS; nit++)
        {
            gemv( Tpaj,  Tp, aj, 4*SSS+1, 4*SSS+1);
            gemv( Tpcj,  Tp, cj, 4*SSS+1, 4*SSS+1);
            gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1);

            const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1);

            if ( g_dot_Tpaj == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 1; break;
            }

            const Real alpha = delta / g_dot_Tpaj;

            if ( std::isinf(alpha) )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 2; break;
            }

            axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            axpy(temp3,   cj, -alpha,  Tpaj, 4*SSS+1);

            const Real omega_numerator   = dot(temp3, temp2, 4*SSS+1);
            const Real omega_denominator = dot(temp1, temp2, 4*SSS+1);
            //
            // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0.
            //
            // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj.
            // If omega_numerator==0, and ||s||!=0, then stabilization breakdown.
            //
            // Partial update of ej must happen before the check on omega to ensure forward progress !!!
            //
            axpy(ej, ej, alpha, aj, 4*SSS+1);
            //
            // ej has been updated so consider that we've done an iteration since
            // even if we break out of the loop we'll be able to update both sol.
            //
            niters++;
            //
            // Calculate the norm of Saad's vector 's' to check intra s-step convergence.
            //
            axpy(temp1, cj,-alpha,  Tpaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s));

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n';
                BiCGStabConverged = true; break;
            }

            if ( omega_denominator == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 3; break;
            }

            const Real omega = omega_numerator / omega_denominator;

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( omega == 0   ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n';
                if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n';
            }

            if ( omega == 0   ) { BiCGStabFailed = true; ret = 4; break; }
            if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; }
            //
            // Complete the update of ej & cj now that omega is known to be ok.
            //
            axpy(ej, ej,       omega,    cj, 4*SSS+1);
            axpy(ej, ej,-omega*alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj,      -omega,  Tpcj, 4*SSS+1);
            axpy(cj, cj,      -alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1);
            //
            // Do an early check of the residual to determine convergence.
            //
            gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1);
            //
            // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic.
            // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel).
            // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged.
            //
            const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0);

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n';
                BiCGStabConverged = true; break;
            }

            const Real delta_next = dot(g, cj, 4*SSS+1);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( delta_next == 0   ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n';
                if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf?
            if ( delta_next  == 0  ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown...

            const Real beta = (delta_next/delta)*(alpha/omega);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( beta == 0   ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n';
                if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf?
            if ( beta == 0   ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0?  can't make further progress(?)

            axpy(aj, cj,        beta,   aj, 4*SSS+1);
            axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1);

            delta = delta_next;
        }
        //
        // Update iterates.
        //
        for (int i = 0; i < 4*SSS+1; i++)
            sxay(sol,sol,ej[i],PR,i);

        MultiFab::Copy(p,PR,0,0,1,0);
        p.mult(aj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(p,p,aj[i],PR,i);

        MultiFab::Copy(r,PR,0,0,1,0);
        r.mult(cj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(r,r,cj[i],PR,i);

        if ( !BiCGStabFailed && !BiCGStabConverged )
        {
            m += SSS;

            if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); }
        }
    }

    if ( verbose > 0 )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: Final: Iteration "
                      << std::setw(4) << niters
                      << " rel. err. "
                      << L2_norm_of_resid << '\n';
        }

        if ( verbose > 1 )
        {
            Real tmp[2] = { atime, gtime };

            ParallelDescriptor::ReduceRealMax(tmp,2,color());

            if ( ParallelDescriptor::IOProcessor(color()) )
            {
                Spacer(std::cout, lev);
                std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n';
            }
        }
    }

    if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged)
    {
        if ( L2_norm_of_resid > L2_norm_of_rt )
        {
            if ( ParallelDescriptor::IOProcessor(color()) )
                BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!");
            //
            // Return code 8 tells the MultiGrid driver to zero out the solution!
            //
            ret = 8;
        }
        else
        {
            //
            // Return codes 1-7 tells the MultiGrid driver to smooth the solution!
            //
            ret = 7;
        }
    }

    return ret;
}
Example #6
0
int
CGSolver::jbb_precond (MultiFab&       sol,
		       const MultiFab& rhs,
                       int             lev,
		       LinOp&          Lp)
{
    //
    // This is a local routine.  No parallel is allowed to happen here.
    //
    int                  lev_loc = lev;
    const Real           eps_rel = 1.e-2;
    const Real           eps_abs = 1.e-16;
    const int            nghost  = sol.nGrow();
    const int            ncomp   = sol.nComp();
    const bool           local   = true;
    const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC;

    BL_ASSERT(ncomp == 1 );
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc));

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab sorig(ba, ncomp, nghost, dm);

    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    sorig.copy(sol);

    Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local);

    sol.setVal(0);

    Real       rnorm    = norm_inf(r,local);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "     jbb_precond: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev_loc, local);
    Real sol_norm = 0;
    int  ret      = 0;			// will return this value if all goes well
    Real rho_1    = 0;
    int  nit      = 1;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        z.copy(r);

        Real rho = dotxy(z,r,local);
        if (nit == 1)
        {
            p.copy(z);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }

        Lp.apply(q, p, lev_loc, bc_mode, local);

        Real alpha;
        if ( Real pw = dotxy(p,q,local) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:" << " nit " << nit
                      << " rho " << rho << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm    = norm_inf(r,   local);
        sol_norm = norm_inf(sol, local);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs )
	{
            break;
	}
      
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "jbb_precond: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
	{
            BoxLib::Warning("jbb_precond:: failed to converge!");
	}
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Example #7
0
int
CGSolver::solve_cg (MultiFab&       sol,
		    const MultiFab& rhs,
		    Real            eps_rel,
		    Real            eps_abs,
		    LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cg()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab sorig(ba, ncomp, nghost, dm);
    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    MultiFab r1(ba, ncomp, nghost, dm);
    MultiFab z1(ba, ncomp, nghost, dm);
    MultiFab r2(ba, ncomp, nghost, dm);
    MultiFab z2(ba, ncomp, nghost, dm);

    MultiFab::Copy(sorig,sol,0,0,1,0);

    Lp.residual(r, rhs, sorig, lev, bc_mode);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC;

    Real       rnorm    = norm_inf(r);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "              CG: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev);
    Real sol_norm      = 0;
    Real rho_1         = 0;
    int  ret           = 0;
    int  nit           = 1;

    if ( rnorm == 0 || rnorm < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "       CG: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" <<  eps_rel*(Lp_norm*sol_norm + rnorm0 ) 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1)
        {
            z.setVal(0);

            jbb_precond(z,r,lev,Lp);
        }
        else
        {
            MultiFab::Copy(z,r,0,0,1,0);
        }

        Real rho = dotxy(z,r);

        if (nit == 1)
        {
            MultiFab::Copy(p,z,0,0,1,0);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }
        Lp.apply(q, p, lev, temp_bc_mode);

        Real alpha;
        if ( Real pw = dotxy(p,q) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_cg:"
                      << " nit " << nit
                      << " rho " << rho
                      << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm = norm_inf(r);
        sol_norm = norm_inf(sol);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "       CG:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break;
#endif
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "       CG: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 &&  rnorm > eps_rel*rnorm0 && rnorm > eps_abs )
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_cg: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Example #8
0
int
MCMultiGrid::solve_ (MultiFab& _sol,
		     Real      eps_rel,
		     Real      eps_abs,
		     MCBC_Mode bc_mode,
		     int       level)
{
  //
  // Relax system maxiter times, stop if relative error <= _eps_rel or
  // if absolute err <= _abs_eps
  //
  const Real strt_time = ParallelDescriptor::second();
  //
  // Elide a reduction by doing these together.
  //
  Real tmp[2] = { norm_inf(*rhs[level],true), errorEstimate(level,bc_mode,true) };

  ParallelDescriptor::ReduceRealMax(tmp,2);

  const Real norm_rhs  = tmp[0];
  const Real error0    = tmp[1];
  int        returnVal = 0;
  Real       error     = error0;

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
      Spacer(std::cout, level);
      std::cout << "MCMultiGrid: Initial rhs                = " << norm_rhs << '\n';
      std::cout << "MCMultiGrid: Initial error (error0)     = " << error0 << '\n';
  }
  
  if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 )
  {
      std::cout << "MCMultiGrid: Tolerance "
                << eps_rel
                << " < 1e-16 is probably set too low" << '\n';
  }
  //
  // Initialize correction to zero at this level (auto-filled at levels below)
  //
  (*cor[level]).setVal(0.0);
  //
  // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed.
  //
  int        nit         = 1;
  const Real new_error_0 = norm_rhs;
  //const Real norm_Lp     = Lp.norm(0, level);


  for ( ;
        error > eps_abs &&
          error > eps_rel*norm_rhs &&
          nit <= maxiter;
        ++nit)
  {
    relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode);

    error = errorEstimate(level,bc_mode);
	
    if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
    {
      const Real rel_error = (error0 != 0) ? error/new_error_0 : 0;
      Spacer(std::cout, level);
      std::cout << "MCMultiGrid: Iteration   "
                << nit
                << " error/error0 = "
                << rel_error << '\n';
    }
  }

  Real run_time = (ParallelDescriptor::second() - strt_time);
  if ( verbose > 0 )
  {
      if ( ParallelDescriptor::IOProcessor() )
      {
          const Real rel_error = (error0 != 0) ? error/error0 : 0;
          Spacer(std::cout, level);
          std::cout << "MCMultiGrid: Final Iter. "
                    << nit-1
                    << " error/error0 = "
                    << rel_error;
      }

      if ( verbose > 1 )
      {
        
        ParallelDescriptor::ReduceRealMax(run_time);

        if ( ParallelDescriptor::IOProcessor() )
          std::cout << ", Solve time: " << run_time << '\n';
      }
  }

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
    if ( error < eps_rel*norm_rhs )
    {
      std::cout << "   Converged res < eps_rel*bnorm\n";
    } 
    else if ( error < eps_abs )
    {
      std::cout << "   Converged res < eps_abs\n";
    }
  }
  //
  // Omit ghost update since maybe not initialized in calling routine.
  // Add to boundary values stored in initialsolution.
  //
  _sol.copy(*cor[level]);
  _sol.plus(*initialsolution,0,_sol.nComp(),0);

  if ( error <= eps_rel*(norm_rhs) ||
       error <= eps_abs )
    returnVal = 1;

  //
  // Otherwise, failed to solve satisfactorily
  //
  return returnVal;
}