Пример #1
0
/* Polar Decomposition of 3x3 matrix in 4x4,
 * M = QS.  See Nicholas Higham and Robert S. Schreiber,
 * Fast Polar Decomposition of An Arbitrary Matrix,
 * Technical Report 88-942, October 1988,
 * Department of Computer Science, Cornell University.
 */
double polar_decomp(HMatrix M, HMatrix Q, HMatrix S)
{
#define TOL 1.0e-6
    HMatrix Mk, MadjTk, Ek;
    double det, M_one, M_inf, MadjT_one, MadjT_inf, E_one, gamma, g1, g2;
    int i, j;
    mat_tpose(Mk,=,M,3);
    M_one = norm_one(Mk);  M_inf = norm_inf(Mk);
    do {
	adjoint_transpose(Mk, MadjTk);
	det = vdot(Mk[0], MadjTk[0]);
	if (det==0.0) {do_rank2(Mk, MadjTk, Mk); break;}
	MadjT_one = norm_one(MadjTk); MadjT_inf = norm_inf(MadjTk);
	gamma = sqrt(sqrt((MadjT_one*MadjT_inf)/(M_one*M_inf))/fabs(det));
	g1 = gamma*0.5;
	g2 = 0.5/(gamma*det);
	mat_copy(Ek,=,Mk,3);
	mat_binop(Mk,=,g1*Mk,+,g2*MadjTk,3);
	mat_copy(Ek,-=,Mk,3);
	E_one = norm_one(Ek);
	M_one = norm_one(Mk);  M_inf = norm_inf(Mk);
    } while (E_one>(M_one*TOL));
    mat_tpose(Q,=,Mk,3); mat_pad(Q);
    mat_mult(Mk, M, S);	 mat_pad(S);
    for (i=0; i<3; i++) for (j=i; j<3; j++)
	S[i][j] = S[j][i] = 0.5*(S[i][j]+S[j][i]);
    return (det);
}
Пример #2
0
void test_smooth_r4<float_type>::test()
{
    // place particles along the x-axis within one half of the box,
    // put every second particle at the origin
    unsigned int npart = particle->nparticle();
    vector_type dx(0);
    dx[0] = box->edges()(0, 0) / npart / 2;

    std::vector<vector_type> r_list(particle->nparticle());
    std::vector<unsigned int> species(particle->nparticle());
    for (unsigned int k = 0; k < r_list.size(); ++k) {
        r_list[k] = (k % 2) ? k * dx : vector_type(0);
        species[k] = (k < npart_list[0]) ? 0U : 1U;  // set particle type for a binary mixture
    }
    BOOST_CHECK( set_position(*particle, r_list.begin()) == r_list.end() );
    BOOST_CHECK( set_species(*particle, species.begin()) == species.end() );

    // read forces and other stuff from device
    std::vector<float> en_pot(particle->nparticle());
    BOOST_CHECK( get_potential_energy(*particle, en_pot.begin()) == en_pot.end() );

    std::vector<vector_type> f_list(particle->nparticle());
    BOOST_CHECK( get_force(*particle, f_list.begin()) == f_list.end() );

    float const eps = std::numeric_limits<float>::epsilon();

    for (unsigned int i = 0; i < npart; ++i) {
        unsigned int type1 = species[i];
        unsigned int type2 = species[(i + 1) % npart];
        vector_type r = r_list[i] - r_list[(i + 1) % npart];
        vector_type f = f_list[i];

        // reference values from host module
        host_float_type fval, en_pot_;
        host_float_type rr = inner_prod(r, r);
        std::tie(fval, en_pot_) = (*host_potential)(rr, type1, type2);

        if (rr < host_potential->rr_cut(type1, type2)) {
            double rcut = host_potential->r_cut(type1, type2);
            // the GPU force module stores only a fraction of these values
            en_pot_ /= 2;

            // the first term is from the smoothing, the second from the potential
            // (see lennard_jones.cpp from unit tests)
            float const tolerance = 8 * eps * (1 + rcut/(rcut - std::sqrt(rr))) + 10 * eps;

            BOOST_CHECK_SMALL(norm_inf(fval * r - f), std::max(norm_inf(fval * r), 1.f) * tolerance * 2);
            BOOST_CHECK_CLOSE_FRACTION(en_pot_, en_pot[i], 2 * tolerance);
        }
        else {
            // when the distance is greater than the cutoff
            // set the force and the pair potential to zero
            fval = en_pot_ = 0;
            BOOST_CHECK_SMALL(norm_inf(f), eps);
            BOOST_CHECK_SMALL(en_pot[i], eps);
        }
    }
}
Пример #3
0
void
MultiGrid::solve (MultiFab&       _sol,
                  const MultiFab& _rhs,
                  Real            _eps_rel,
                  Real            _eps_abs,
                  LinOp::BC_Mode  bc_mode)
{
    //
    // Prepare memory for new level, and solve the general boundary
    // value problem to within relative error _eps_rel.  Customized
    // to solve at level=0.
    //
    const int level = 0;
    prepareForLevel(level);

    //
    // Copy the initial guess, which may contain inhomogeneous boundray conditions,
    // into both "initialsolution" (to be added back later) and into "cor[0]" which
    // we will only use here to compute the residual, then will set back to 0 below
    //
    initialsolution->copy(_sol);
    cor[level]->copy(_sol);

    //
    // Put the problem in residual-correction form: we will now use "rhs[level
    // the initial residual (rhs[0]) rather than the initial RHS (_rhs)
    // to begin the solve.
    //
    Lp.residual(*rhs[level],_rhs,*cor[level],level,bc_mode);

    //
    // Now initialize correction to zero at this level (auto-filled at levels below)
    //
    (*cor[level]).setVal(0.0); //

    //
    // Elide a reduction by doing these together.
    //
    Real tmp[2] = { norm_inf(_rhs,true), norm_inf(*rhs[level],true) };
    ParallelDescriptor::ReduceRealMax(tmp,2);
    if ( ParallelDescriptor::IOProcessor() && verbose > 0)
    {
        Spacer(std::cout, level);
        std::cout << "MultiGrid: Initial rhs                = " << tmp[0] << '\n';
        std::cout << "MultiGrid: Initial residual           = " << tmp[1] << '\n';
    }

    if (tmp[1] == 0.0)
	return;

    //
    // We can now use homogeneous bc's because we have put the problem into residual-correction form.
    //
    if ( !solve_(_sol, _eps_rel, _eps_abs, LinOp::Homogeneous_BC, tmp[0], tmp[1]) )
        BoxLib::Error("MultiGrid:: failed to converge!");
}
Пример #4
0
Real
MultiGrid::errorEstimate (int            level,
                          LinOp::BC_Mode bc_mode,
                          bool           local)
{
    Lp.residual(*res[level], *rhs[level], *cor[level], level, bc_mode);
    return norm_inf(*res[level], local);
}
Пример #5
0
double serial_norm_inf (const boost::numeric::ublas::vector <double> &lhs,
                        int length) {
#ifdef HAVE_BLAS
    int i = from_blas::idamax (length, &lhs(0), 1);
    double ret = lhs(i);
#else
    double ret = norm_inf (subrange (lhs, 0, length));
#endif
    return ret;
}
// ||a||_p
double pnorm(const double *a, int N, double p){
  if (p==Inf){
    return norm_inf(a,N);
  }
  else{
    double r=0;
    for(int k=0;k<N;k++)
      r+=pow(fabs(a[k]),p);
    return pow(r,1.0/p);
  }
}
void assertNodesUnChanged(
    const typename DataStore<Scalar>::DataStoreVector_t & nodes, 
    const typename DataStore<Scalar>::DataStoreVector_t & nodes_copy 
    ) 
{
  typedef Teuchos::ScalarTraits<Scalar> ST;
  int N = nodes.size();
  int Ncopy = nodes_copy.size();
  TEUCHOS_TEST_FOR_EXCEPTION( N != Ncopy, std::logic_error, 
      "Error!  The number of nodes passed in through setNodes has changed!"
      );
  if (N > 0) {
    RCP<Thyra::VectorBase<Scalar> > xdiff = nodes[0].x->clone_v();
    RCP<Thyra::VectorBase<Scalar> > xdotdiff = xdiff->clone_v();
    V_S(outArg(*xdiff),ST::one());
    V_S(outArg(*xdotdiff),ST::one());
    for (int i=0 ; i<N ; ++i) {
      V_StVpStV(outArg(*xdiff),ST::one(),*nodes[i].x,-ST::one(),*nodes_copy[i].x);
      if ((!Teuchos::is_null(nodes[i].xdot)) && (!Teuchos::is_null(nodes_copy[i].xdot))) {
        V_StVpStV(outArg(*xdotdiff),ST::one(),*nodes[i].xdot,-ST::one(),*nodes_copy[i].xdot);
      } else if (Teuchos::is_null(nodes[i].xdot) && Teuchos::is_null(nodes_copy[i].xdot)) {
        V_S(outArg(*xdotdiff),ST::zero());
      }
      Scalar xdiffnorm = norm_inf(*xdiff);
      Scalar xdotdiffnorm = norm_inf(*xdotdiff);
      TEUCHOS_TEST_FOR_EXCEPTION(
          ( ( nodes[i].time != nodes_copy[i].time ) ||
            ( xdiffnorm != ST::zero() ) ||
            ( xdotdiffnorm != ST::zero() ) ||
            ( nodes[i].accuracy != nodes_copy[i].accuracy ) ), 
          std::logic_error,
          "Error!  The data in the nodes passed through setNodes has changed!"
          );
    }
  }
}
c_vector<double,3> CalculateEigenvectorForSmallestNonzeroEigenvalue(c_matrix<double, 3, 3>& rA)
{
    //Check for symmetry
    if (norm_inf( rA - trans(rA)) > 10*DBL_EPSILON)
    {
        EXCEPTION("Matrix should be symmetric");
    }

    // Find the eigenvector by brute-force using the power method.
    // We can't use the inverse method, because the matrix might be singular

    c_matrix<double,3,3> copy_A(rA);
    //Eigenvalue 1
    c_vector<double, 3> eigenvec1 = scalar_vector<double>(3, 1.0);

    double eigen1 = CalculateMaxEigenpair(copy_A, eigenvec1);

    // Take out maximum eigenpair
    c_matrix<double, 3, 3> wielandt_reduce_first_vector = identity_matrix<double>(3,3);
    wielandt_reduce_first_vector -= outer_prod(eigenvec1, eigenvec1);
    copy_A = prod(wielandt_reduce_first_vector, copy_A);

    c_vector<double, 3> eigenvec2 = scalar_vector<double>(3, 1.0);
    double eigen2 = CalculateMaxEigenpair(copy_A, eigenvec2);

    // Take out maximum (second) eigenpair
    c_matrix<double, 3, 3> wielandt_reduce_second_vector = identity_matrix<double>(3,3);
    wielandt_reduce_second_vector -= outer_prod(eigenvec2, eigenvec2);
    copy_A = prod(wielandt_reduce_second_vector, copy_A);

    c_vector<double, 3> eigenvec3 = scalar_vector<double>(3, 1.0);
    double eigen3 = CalculateMaxEigenpair(copy_A, eigenvec3);

    //Look backwards through the eigenvalues, checking that they are non-zero
    if (eigen3 >= DBL_EPSILON)
    {
        return eigenvec3;
    }
    if (eigen2 >= DBL_EPSILON)
    {
        return eigenvec2;
    }
    UNUSED_OPT(eigen1);
    assert( eigen1 > DBL_EPSILON);
    return eigenvec1;
}
double CalculateMaxEigenpair(c_matrix<double, 3, 3>& rA, c_vector<double, 3>& rEigenvector)
{
    double norm = 0.0;
    double step = DBL_MAX;
    while (step > DBL_EPSILON) //Machine precision
    {
        c_vector<double, 3> old_value(rEigenvector);
        rEigenvector = prod(rA, rEigenvector);
        norm = norm_2(rEigenvector);
        rEigenvector /= norm;
        if (norm < DBL_EPSILON)
        {
            //We don't care about a zero eigenvector, so don't polish it
            break;
        }
        step = norm_inf(rEigenvector-old_value);
    }
    return norm;
}
Пример #10
0
ublas::vector<ublas::matrix<double> > wishart_InvA_rnd(const int df, ublas::matrix<double>& S, const int mc) {
  // Generates wishart matrix allowing for singular wishart
  size_t p = S.size1();
  ublas::vector<double> D(p);
  ublas::matrix<double> P(p, p);
  ublas::matrix<double> F(p, p);
  F = ublas::zero_matrix<double>(p, p);

  // make copy of S
  // ublas::matrix<double> SS(S);

  lapack::gesvd('A', 'A', S, D, P, F);
  // svd0(S, P, D, F);
  // P = trans(P);

  //! correct for singular matrix
  std::vector<size_t> ii;
  for (size_t i=0; i<D.size(); ++i)
    if (D(i) > norm_inf(D)*1e-9)
      ii.push_back(i);
  
  size_t r = ii.size();
  ublas::indirect_array<> idx(r);
  for (size_t i=0; i<r; ++i)
    idx(i) = ii[i];

  ublas::indirect_array<> irow(p);
  for (size_t i=0; i<irow.size(); ++ i) 
    irow(i) = i;
  ublas::matrix<double> Q(p, r);
  // Q = prod(project(P, irow, idx), diagm(ublas::apply_to_all<functor::inv_sqrt<double> >(project(D, idx))));
  // rprod does not seem any faster than diagonalizing D before multiplication
  // Q = rprod(project(P, irow, idx), ublas::apply_to_all<functor::inv_sqrt<double> >(D));
  axpy_prod(project(trans(P), irow, idx), diagm(ublas::apply_to_all<functor::inv_sqrt<double> >(project(D, idx))), Q, true);

  // generate mc samples
  ublas::vector<ublas::matrix<double> > K(mc);
  for (int i=0; i<mc; ++i)
    K(i) = wishart_1(df, Q, p, r);
  return K;
}
Пример #11
0
int main() {
    Vec<3,double> v0;
    std::cout << v0 << std::endl;

    Vec<3,double> v1 = Vec<3,double>(0.,1.,2);

    Vec<3,double> v2 = Vec<3,double>(3,4,5);

    Vec<3,double> v3 = v1 * (v2 + v2);

    std::cout << v3 << std::endl;
    std::cout << norm(v3) << std::endl;
    std::cout << norm_2(v3) << std::endl;

    Vec<4, double> v = Vec<4,double>(1, 2.1f, 3.14, 2u);

    std::cout << norm_1(v) << std::endl;
    std::cout << norm_2(v) << std::endl;
    std::cout << norm_inf(v) << std::endl;
    std::cout << v << std::endl;

    return 0;
}
Пример #12
0
int
CGSolver::jbb_precond (MultiFab&       sol,
		       const MultiFab& rhs,
                       int             lev,
		       LinOp&          Lp)
{
    //
    // This is a local routine.  No parallel is allowed to happen here.
    //
    int                  lev_loc = lev;
    const Real           eps_rel = 1.e-2;
    const Real           eps_abs = 1.e-16;
    const int            nghost  = sol.nGrow();
    const int            ncomp   = sol.nComp();
    const bool           local   = true;
    const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC;

    BL_ASSERT(ncomp == 1 );
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc));

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab sorig(ba, ncomp, nghost, dm);

    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    sorig.copy(sol);

    Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local);

    sol.setVal(0);

    Real       rnorm    = norm_inf(r,local);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "     jbb_precond: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev_loc, local);
    Real sol_norm = 0;
    int  ret      = 0;			// will return this value if all goes well
    Real rho_1    = 0;
    int  nit      = 1;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        z.copy(r);

        Real rho = dotxy(z,r,local);
        if (nit == 1)
        {
            p.copy(z);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }

        Lp.apply(q, p, lev_loc, bc_mode, local);

        Real alpha;
        if ( Real pw = dotxy(p,q,local) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:" << " nit " << nit
                      << " rho " << rho << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm    = norm_inf(r,   local);
        sol_norm = norm_inf(sol, local);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev_loc);
            std::cout << "jbb_precond:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs )
	{
            break;
	}
      
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev_loc);
        std::cout << "jbb_precond: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
	{
            BoxLib::Warning("jbb_precond:: failed to converge!");
	}
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
    /**
     * Use Newton's method to solve the given cell for the next timestep.
     *
     * @param rCell  the cell to solve
     * @param time  the current time
     * @param rCurrentGuess  the current guess at a solution.  Will be updated on exit.
     */
    void Solve(CELLTYPE &rCell,
               double time,
               double rCurrentGuess[SIZE])
    {
        unsigned counter = 0;
        const double eps = 1e-6; // JonW tolerance

        // check that the initial guess that was given gives a valid residual
        rCell.ComputeResidual(time, rCurrentGuess, mResidual.data());
        double norm_of_residual = norm_inf(mResidual);
        assert(!std::isnan(norm_of_residual));
        double norm_of_update = 0.0; //Properly initialised in the loop
        do
        {
            // Calculate Jacobian for current guess
            rCell.ComputeJacobian(time, rCurrentGuess, mJacobian);

            // Solve Newton linear system for mUpdate, given mJacobian and mResidual
            SolveLinearSystem();

            // Update norm (JonW style)
            norm_of_update = norm_inf(mUpdate);

            // Update current guess and recalculate residual
            for (unsigned i=0; i<SIZE; i++)
            {
                rCurrentGuess[i] -= mUpdate[i];
            }
            double norm_of_previous_residual = norm_of_residual;
            rCell.ComputeResidual(time, rCurrentGuess, mResidual.data());
            norm_of_residual = norm_inf(mResidual);
            if (norm_of_residual > norm_of_previous_residual && norm_of_update > eps)
            {
                //Second part of guard:
                //Note that if norm_of_update < eps (converged) then it's
                //likely that both the residual and the previous residual were
                //close to the root.

                //Work out where the biggest change in the guess has happened.
                double relative_change_max = 0.0;
                unsigned relative_change_direction = 0;
                for (unsigned i=0; i<SIZE; i++)
                {
                    double relative_change = fabs(mUpdate[i]/rCurrentGuess[i]);
                    if (relative_change > relative_change_max)
                    {
                        relative_change_max = relative_change;
                        relative_change_direction = i;
                    }
                }

                if (relative_change_max > 1.0)
                {
                    //Only walk 0.2 of the way in that direction (put back 0.8)
                    rCurrentGuess[relative_change_direction] += 0.8*mUpdate[relative_change_direction];
                    rCell.ComputeResidual(time, rCurrentGuess, mResidual.data());
                    norm_of_residual = norm_inf(mResidual);
                    WARNING("Residual increasing and one direction changing radically - back tracking in that direction");
                }
            }
            counter++;

            // avoid infinite loops
            if (counter > 15)
            {
#define COVERAGE_IGNORE
                EXCEPTION("Newton method diverged in CardiacNewtonSolver::Solve()");
#undef COVERAGE_IGNORE
            }
        }
        while (norm_of_update > eps);

#define COVERAGE_IGNORE
#ifndef NDEBUG
        if (norm_of_residual > 2e-10)
        { //This line is for correlation - in case we use norm_of_residual as convergence criterion
            WARN_ONCE_ONLY("Newton iteration terminated because update vector norm is small, but residual norm is not small.");
        }
#endif // NDEBUG
#undef COVERAGE_IGNORE
    }
Пример #14
0
  void Sqpmethod::evaluate() {
    if (inputs_check_) checkInputs();
    checkInitialBounds();

    if (gather_stats_) {
      Dict iterations;
      iterations["inf_pr"] = std::vector<double>();
      iterations["inf_du"] = std::vector<double>();
      iterations["ls_trials"] = std::vector<double>();
      iterations["d_norm"] = std::vector<double>();
      iterations["obj"] = std::vector<double>();
      stats_["iterations"] = iterations;
    }


    // Get problem data
    const vector<double>& x_init = input(NLP_SOLVER_X0).data();
    const vector<double>& lbx = input(NLP_SOLVER_LBX).data();
    const vector<double>& ubx = input(NLP_SOLVER_UBX).data();
    const vector<double>& lbg = input(NLP_SOLVER_LBG).data();
    const vector<double>& ubg = input(NLP_SOLVER_UBG).data();

    // Set linearization point to initial guess
    copy(x_init.begin(), x_init.end(), x_.begin());

    // Initialize Lagrange multipliers of the NLP
    copy(input(NLP_SOLVER_LAM_G0).begin(), input(NLP_SOLVER_LAM_G0).end(), mu_.begin());
    copy(input(NLP_SOLVER_LAM_X0).begin(), input(NLP_SOLVER_LAM_X0).end(), mu_x_.begin());

    t_eval_f_ = t_eval_grad_f_ = t_eval_g_ = t_eval_jac_g_ = t_eval_h_ =
        t_callback_fun_ = t_callback_prepare_ = t_mainloop_ = 0;

    n_eval_f_ = n_eval_grad_f_ = n_eval_g_ = n_eval_jac_g_ = n_eval_h_ = 0;

    double time1 = clock();

    // Initial constraint Jacobian
    eval_jac_g(x_, gk_, Jk_);

    // Initial objective gradient
    eval_grad_f(x_, fk_, gf_);

    // Initialize or reset the Hessian or Hessian approximation
    reg_ = 0;
    if (exact_hessian_) {
      eval_h(x_, mu_, 1.0, Bk_);
    } else {
      reset_h();
    }

    // Evaluate the initial gradient of the Lagrangian
    copy(gf_.begin(), gf_.end(), gLag_.begin());
    if (ng_>0) casadi_mv_t(Jk_.ptr(), Jk_.sparsity(), getPtr(mu_), getPtr(gLag_));
    // gLag += mu_x_;
    transform(gLag_.begin(), gLag_.end(), mu_x_.begin(), gLag_.begin(), plus<double>());

    // Number of SQP iterations
    int iter = 0;

    // Number of line-search iterations
    int ls_iter = 0;

    // Last linesearch successfull
    bool ls_success = true;

    // Reset
    merit_mem_.clear();
    sigma_ = 0.;    // NOTE: Move this into the main optimization loop

    // Default stepsize
    double t = 0;

    // MAIN OPTIMIZATION LOOP
    while (true) {

      // Primal infeasability
      double pr_inf = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg);

      // inf-norm of lagrange gradient
      double gLag_norminf = norm_inf(gLag_);

      // inf-norm of step
      double dx_norminf = norm_inf(dx_);

      // Print header occasionally
      if (iter % 10 == 0) printIteration(userOut());

      // Printing information about the actual iterate
      printIteration(userOut(), iter, fk_, pr_inf, gLag_norminf, dx_norminf,
                     reg_, ls_iter, ls_success);

      if (gather_stats_) {
        Dict iterations = stats_["iterations"];
        std::vector<double> tmp=iterations["inf_pr"];
        tmp.push_back(pr_inf);
        iterations["inf_pr"] = tmp;

        tmp=iterations["inf_du"];
        tmp.push_back(gLag_norminf);
        iterations["inf_du"] = tmp;

        tmp=iterations["d_norm"];
        tmp.push_back(dx_norminf);
        iterations["d_norm"] = tmp;

        std::vector<int> tmp2=iterations["ls_trials"];
        tmp2.push_back(ls_iter);
        iterations["ls_trials"] = tmp2;

        tmp=iterations["obj"];
        tmp.push_back(fk_);
        iterations["obj"] = tmp;

        stats_["iterations"] = iterations;
      }

      // Call callback function if present
      if (!callback_.isNull()) {
        double time1 = clock();

        if (!output(NLP_SOLVER_F).isempty()) output(NLP_SOLVER_F).set(fk_);
        if (!output(NLP_SOLVER_X).isempty()) output(NLP_SOLVER_X).setNZ(x_);
        if (!output(NLP_SOLVER_LAM_G).isempty()) output(NLP_SOLVER_LAM_G).setNZ(mu_);
        if (!output(NLP_SOLVER_LAM_X).isempty()) output(NLP_SOLVER_LAM_X).setNZ(mu_x_);
        if (!output(NLP_SOLVER_G).isempty()) output(NLP_SOLVER_G).setNZ(gk_);

        Dict iteration;
        iteration["iter"] = iter;
        iteration["inf_pr"] = pr_inf;
        iteration["inf_du"] = gLag_norminf;
        iteration["d_norm"] = dx_norminf;
        iteration["ls_trials"] = ls_iter;
        iteration["obj"] = fk_;
        stats_["iteration"] = iteration;

        double time2 = clock();
        t_callback_prepare_ += (time2-time1)/CLOCKS_PER_SEC;
        time1 = clock();
        int ret = callback_(ref_, user_data_);
        time2 = clock();
        t_callback_fun_ += (time2-time1)/CLOCKS_PER_SEC;
        if (ret) {
          userOut() << endl;
          userOut() << "casadi::SQPMethod: aborted by callback..." << endl;
          stats_["return_status"] = "User_Requested_Stop";
          break;
        }
      }

      // Checking convergence criteria
      if (pr_inf < tol_pr_ && gLag_norminf < tol_du_) {
        userOut() << endl;
        userOut() << "casadi::SQPMethod: Convergence achieved after "
                  << iter << " iterations." << endl;
        stats_["return_status"] = "Solve_Succeeded";
        break;
      }

      if (iter >= max_iter_) {
        userOut() << endl;
        userOut() << "casadi::SQPMethod: Maximum number of iterations reached." << endl;
        stats_["return_status"] = "Maximum_Iterations_Exceeded";
        break;
      }

      if (iter > 0 && dx_norminf <= min_step_size_) {
        userOut() << endl;
        userOut() << "casadi::SQPMethod: Search direction becomes too small without "
            "convergence criteria being met." << endl;
        stats_["return_status"] = "Search_Direction_Becomes_Too_Small";
        break;
      }

      // Start a new iteration
      iter++;

      log("Formulating QP");
      // Formulate the QP
      transform(lbx.begin(), lbx.end(), x_.begin(), qp_LBX_.begin(), minus<double>());
      transform(ubx.begin(), ubx.end(), x_.begin(), qp_UBX_.begin(), minus<double>());
      transform(lbg.begin(), lbg.end(), gk_.begin(), qp_LBA_.begin(), minus<double>());
      transform(ubg.begin(), ubg.end(), gk_.begin(), qp_UBA_.begin(), minus<double>());

      // Solve the QP
      solve_QP(Bk_, gf_, qp_LBX_, qp_UBX_, Jk_, qp_LBA_, qp_UBA_, dx_, qp_DUAL_X_, qp_DUAL_A_);
      log("QP solved");

      // Detecting indefiniteness
      double gain = casadi_quad_form(Bk_.ptr(), Bk_.sparsity(), getPtr(dx_));
      if (gain < 0) {
        casadi_warning("Indefinite Hessian detected...");
      }

      // Calculate penalty parameter of merit function
      sigma_ = std::max(sigma_, 1.01*norm_inf(qp_DUAL_X_));
      sigma_ = std::max(sigma_, 1.01*norm_inf(qp_DUAL_A_));

      // Calculate L1-merit function in the actual iterate
      double l1_infeas = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg);

      // Right-hand side of Armijo condition
      double F_sens = inner_prod(dx_, gf_);
      double L1dir = F_sens - sigma_ * l1_infeas;
      double L1merit = fk_ + sigma_ * l1_infeas;

      // Storing the actual merit function value in a list
      merit_mem_.push_back(L1merit);
      if (merit_mem_.size() > merit_memsize_) {
        merit_mem_.pop_front();
      }
      // Stepsize
      t = 1.0;
      double fk_cand;
      // Merit function value in candidate
      double L1merit_cand = 0;

      // Reset line-search counter, success marker
      ls_iter = 0;
      ls_success = true;

      // Line-search
      log("Starting line-search");
      if (max_iter_ls_>0) { // max_iter_ls_== 0 disables line-search

        // Line-search loop
        while (true) {
          for (int i=0; i<nx_; ++i) x_cand_[i] = x_[i] + t * dx_[i];

          try {
            // Evaluating objective and constraints
            eval_f(x_cand_, fk_cand);
            eval_g(x_cand_, gk_cand_);
          } catch(const CasadiException& ex) {
            // Silent ignore; line-search failed
            ls_iter++;
            // Backtracking
            t = beta_ * t;
            continue;
          }

          ls_iter++;

          // Calculating merit-function in candidate
          l1_infeas = primalInfeasibility(x_cand_, lbx, ubx, gk_cand_, lbg, ubg);

          L1merit_cand = fk_cand + sigma_ * l1_infeas;
          // Calculating maximal merit function value so far
          double meritmax = *max_element(merit_mem_.begin(), merit_mem_.end());
          if (L1merit_cand <= meritmax + t * c1_ * L1dir) {
            // Accepting candidate
            log("Line-search completed, candidate accepted");
            break;
          }

          // Line-search not successful, but we accept it.
          if (ls_iter == max_iter_ls_) {
            ls_success = false;
            log("Line-search completed, maximum number of iterations");
            break;
          }

          // Backtracking
          t = beta_ * t;
        }

        // Candidate accepted, update dual variables
        for (int i=0; i<ng_; ++i) mu_[i] = t * qp_DUAL_A_[i] + (1 - t) * mu_[i];
        for (int i=0; i<nx_; ++i) mu_x_[i] = t * qp_DUAL_X_[i] + (1 - t) * mu_x_[i];

        // Candidate accepted, update the primal variable
        copy(x_.begin(), x_.end(), x_old_.begin());
        copy(x_cand_.begin(), x_cand_.end(), x_.begin());

      } else {
        // Full step
        copy(qp_DUAL_A_.begin(), qp_DUAL_A_.end(), mu_.begin());
        copy(qp_DUAL_X_.begin(), qp_DUAL_X_.end(), mu_x_.begin());

        copy(x_.begin(), x_.end(), x_old_.begin());
        // x+=dx
        transform(x_.begin(), x_.end(), dx_.begin(), x_.begin(), plus<double>());
      }

      if (!exact_hessian_) {
        // Evaluate the gradient of the Lagrangian with the old x but new mu (for BFGS)
        copy(gf_.begin(), gf_.end(), gLag_old_.begin());
        if (ng_>0) casadi_mv_t(Jk_.ptr(), Jk_.sparsity(), getPtr(mu_), getPtr(gLag_old_));
        // gLag_old += mu_x_;
        transform(gLag_old_.begin(), gLag_old_.end(), mu_x_.begin(), gLag_old_.begin(),
                  plus<double>());
      }

      // Evaluate the constraint Jacobian
      log("Evaluating jac_g");
      eval_jac_g(x_, gk_, Jk_);

      // Evaluate the gradient of the objective function
      log("Evaluating grad_f");
      eval_grad_f(x_, fk_, gf_);

      // Evaluate the gradient of the Lagrangian with the new x and new mu
      copy(gf_.begin(), gf_.end(), gLag_.begin());
      if (ng_>0) casadi_mv_t(Jk_.ptr(), Jk_.sparsity(), getPtr(mu_), getPtr(gLag_));

      // gLag += mu_x_;
      transform(gLag_.begin(), gLag_.end(), mu_x_.begin(), gLag_.begin(), plus<double>());

      // Updating Lagrange Hessian
      if (!exact_hessian_) {
        log("Updating Hessian (BFGS)");
        // BFGS with careful updates and restarts
        if (iter % lbfgs_memory_ == 0) {
          // Reset Hessian approximation by dropping all off-diagonal entries
          const int* colind = Bk_.colind();      // Access sparsity (column offset)
          int ncol = Bk_.size2();
          const int* row = Bk_.row();            // Access sparsity (row)
          vector<double>& data = Bk_.data();             // Access nonzero elements
          for (int cc=0; cc<ncol; ++cc) {     // Loop over the columns of the Hessian
            for (int el=colind[cc]; el<colind[cc+1]; ++el) {
              // Loop over the nonzero elements of the column
              if (cc!=row[el]) data[el] = 0;               // Remove if off-diagonal entries
            }
          }
        }

        // Pass to BFGS update function
        bfgs_.setInput(Bk_, BFGS_BK);
        bfgs_.setInputNZ(x_, BFGS_X);
        bfgs_.setInputNZ(x_old_, BFGS_X_OLD);
        bfgs_.setInputNZ(gLag_, BFGS_GLAG);
        bfgs_.setInputNZ(gLag_old_, BFGS_GLAG_OLD);

        // Update the Hessian approximation
        bfgs_.evaluate();

        // Get the updated Hessian
        bfgs_.getOutput(Bk_);
        if (monitored("bfgs")) {
          userOut() << "x = " << x_ << endl;
          userOut() << "BFGS = "  << endl;
          Bk_.printSparse();
        }
      } else {
        // Exact Hessian
        log("Evaluating hessian");
        eval_h(x_, mu_, 1.0, Bk_);
      }
    }

    double time2 = clock();
    t_mainloop_ = (time2-time1)/CLOCKS_PER_SEC;

    // Save results to outputs
    output(NLP_SOLVER_F).set(fk_);
    output(NLP_SOLVER_X).setNZ(x_);
    output(NLP_SOLVER_LAM_G).setNZ(mu_);
    output(NLP_SOLVER_LAM_X).setNZ(mu_x_);
    output(NLP_SOLVER_G).setNZ(gk_);

    if (hasOption("print_time") && static_cast<bool>(getOption("print_time"))) {
      // Write timings
      userOut() << "time spent in eval_f: " << t_eval_f_ << " s.";
      if (n_eval_f_>0)
        userOut() << " (" << n_eval_f_ << " calls, " << (t_eval_f_/n_eval_f_)*1000
                  << " ms. average)";
      userOut() << endl;
      userOut() << "time spent in eval_grad_f: " << t_eval_grad_f_ << " s.";
      if (n_eval_grad_f_>0)
        userOut() << " (" << n_eval_grad_f_ << " calls, "
             << (t_eval_grad_f_/n_eval_grad_f_)*1000 << " ms. average)";
      userOut() << endl;
      userOut() << "time spent in eval_g: " << t_eval_g_ << " s.";
      if (n_eval_g_>0)
        userOut() << " (" << n_eval_g_ << " calls, " << (t_eval_g_/n_eval_g_)*1000
                  << " ms. average)";
      userOut() << endl;
      userOut() << "time spent in eval_jac_g: " << t_eval_jac_g_ << " s.";
      if (n_eval_jac_g_>0)
        userOut() << " (" << n_eval_jac_g_ << " calls, "
             << (t_eval_jac_g_/n_eval_jac_g_)*1000 << " ms. average)";
      userOut() << endl;
      userOut() << "time spent in eval_h: " << t_eval_h_ << " s.";
      if (n_eval_h_>1)
        userOut() << " (" << n_eval_h_ << " calls, " << (t_eval_h_/n_eval_h_)*1000
                  << " ms. average)";
      userOut() << endl;
      userOut() << "time spent in main loop: " << t_mainloop_ << " s." << endl;
      userOut() << "time spent in callback function: " << t_callback_fun_ << " s." << endl;
      userOut() << "time spent in callback preparation: " << t_callback_prepare_ << " s." << endl;
    }

    // Save statistics
    stats_["iter_count"] = iter;

    stats_["t_eval_f"] = t_eval_f_;
    stats_["t_eval_grad_f"] = t_eval_grad_f_;
    stats_["t_eval_g"] = t_eval_g_;
    stats_["t_eval_jac_g"] = t_eval_jac_g_;
    stats_["t_eval_h"] = t_eval_h_;
    stats_["t_mainloop"] = t_mainloop_;
    stats_["t_callback_fun"] = t_callback_fun_;
    stats_["t_callback_prepare"] = t_callback_prepare_;
    stats_["n_eval_f"] = n_eval_f_;
    stats_["n_eval_grad_f"] = n_eval_grad_f_;
    stats_["n_eval_g"] = n_eval_g_;
    stats_["n_eval_jac_g"] = n_eval_jac_g_;
    stats_["n_eval_h"] = n_eval_h_;
  }
Пример #15
0
void PolarDecomposer::PolarDecompose(const mat3& A, mat3& Q, mat3& S, double& det, double tolerance)
{
	mat3 At = A.transpose();
	mat3 Aadj;
	mat3 Ek;
	double A_one = norm_one(At);
	double A_inf = norm_inf(At);
	double Aadj_one, Aadj_inf, E_one, gamma, g1, g2;
	
	do
	{
		Aadj = mat3(At[1].Cross(At[2]), At[2].Cross(At[0]), At[0].Cross(At[1]));
		det = At[0][0] * Aadj[0][0] + At[0][1] * Aadj[0][1] + At[0][2] * Aadj[0][2];
		if(det == 0.0)
		{
			//TODO: handle this case
			printf("Warning: zero determinant encountered.\n");
			break;
		}
		Aadj_one = norm_one(Aadj);
		Aadj_inf = norm_inf(Aadj);
		
		gamma = sqrt(sqrt((Aadj_one*Aadj_inf)/(A_one*A_inf))/fabs(det));
		g1 = gamma * 0.5;
		g2 = 0.5/(gamma*det);

		for(unsigned int i = 0; i < 3; i++)
			for(unsigned int j = 0; j < 3; j++)
			{
				Ek[i][j] = At[i][j];
				At[i][j] = g1 * At[i][j] + g2 * Aadj[i][j];
				Ek[i][j] -= At[i][j];
			}

		E_one = norm_one(Ek);

		A_one = norm_one(At);
		A_inf = norm_inf(At);
	}
	while( E_one > A_one * tolerance);

	if(fabs(det) < EPSILON)//edit by Xing
		Q = mat3::Identity();
	else
		Q = At.transpose();

	//TODO: if S is to be used. uncomment this part

	for(unsigned int i = 0; i < 3; i++)
		for(unsigned int j = 0; j < 3; j++)
		{
			S[i][j] = 0;
			for(unsigned int k = 0; k < 3; k++)
				S[i][j] += At[i][k] * A[k][j];
		}
	for(unsigned int i = 0; i < 3; i++)
		for(unsigned int j = i; j < 3; j++)
		{
			S[i][j] = S[j][i] = 0.5*(S[i][j] + S[j][i]);
		}
}
Пример #16
0
void
MultiGrid::relax (MultiFab&      solL,
                  MultiFab&      rhsL,
                  int            level,
                  Real           eps_rel,
                  Real           eps_abs,
                  LinOp::BC_Mode bc_mode,
                  Real&          cg_time)
{
    BL_PROFILE("MultiGrid::relax()");
    //
    // Recursively relax system.  Equivalent to multigrid V-cycle.
    // At coarsest grid, call coarsestSmooth.
    //
    if ( level < numlevels - 1 )
    {
        if ( verbose > 2 )
        {
           Real rnorm = errorEstimate(level, bc_mode);
           if (ParallelDescriptor::IOProcessor())
           {
              std::cout << "  AT LEVEL " << level << '\n';
              std::cout << "    DN:Norm before smooth " << rnorm << '\n';;
           }
        }
        for (int i = preSmooth() ; i > 0 ; i--)
        {
            Lp.smooth(solL, rhsL, level, bc_mode);
        }
        Lp.residual(*res[level], rhsL, solL, level, bc_mode);

        if ( verbose > 2 )
        {
           Real rnorm = norm_inf(*res[level]);
           if (ParallelDescriptor::IOProcessor())
              std::cout << "    DN:Norm after  smooth " << rnorm << '\n';
        }

        prepareForLevel(level+1);
        average(*rhs[level+1], *res[level]);
        cor[level+1]->setVal(0.0);
        for (int i = cntRelax(); i > 0 ; i--)
        {
            relax(*cor[level+1],*rhs[level+1],level+1,eps_rel,eps_abs,bc_mode,cg_time);
        }
        interpolate(solL, *cor[level+1]);

        if ( verbose > 2 )
        {
           Lp.residual(*res[level], rhsL, solL, level, bc_mode);
           Real rnorm = norm_inf(*res[level]);
           if ( ParallelDescriptor::IOProcessor() )
           {
              std::cout << "  AT LEVEL " << level << '\n';
              std::cout << "    UP:Norm before  smooth " << rnorm << '\n';
           }
        }

        for (int i = postSmooth(); i > 0 ; i--)
        {
            Lp.smooth(solL, rhsL, level, bc_mode);
        }
        if ( verbose > 2 )
        {
           Lp.residual(*res[level], rhsL, solL, level, bc_mode);
           Real rnorm = norm_inf(*res[level]);
           if ( ParallelDescriptor::IOProcessor() ) 
             std::cout << "    UP:Norm after  smooth " << rnorm << '\n';
        }
    }
    else
    {
        if ( verbose > 2 )
        {
           Real rnorm = norm_inf(rhsL);
           if ( ParallelDescriptor::IOProcessor() )
           {
              std::cout << "  AT LEVEL " << level << '\n';
              std::cout << "    DN:Norm before bottom " << rnorm << '\n';
           }
        }

        coarsestSmooth(solL, rhsL, level, eps_rel, eps_abs, bc_mode, usecg, cg_time);

        if ( verbose > 2 )
        {
           Lp.residual(*res[level], rhsL, solL, level, bc_mode);
           Real rnorm = norm_inf(*res[level]);
           if ( ParallelDescriptor::IOProcessor() ) 
              std::cout << "    UP:Norm after  bottom " << rnorm << '\n';
        }
    }
}
Пример #17
0
int
CGSolver::solve_cabicgstab (MultiFab&       sol,
                            const MultiFab& rhs,
                            Real            eps_rel,
                            Real            eps_abs,
                            LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cabicgstab()");

    BL_ASSERT(sol.nComp() == 1);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    Real  temp1[4*SSS_MAX+1];
    Real  temp2[4*SSS_MAX+1];
    Real  temp3[4*SSS_MAX+1];
    Real     Tp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real    Tpp[4*SSS_MAX+1][4*SSS_MAX+1];
    Real     aj[4*SSS_MAX+1];
    Real     cj[4*SSS_MAX+1];
    Real     ej[4*SSS_MAX+1];
    Real   Tpaj[4*SSS_MAX+1];
    Real   Tpcj[4*SSS_MAX+1];
    Real  Tppaj[4*SSS_MAX+1];
    Real      G[4*SSS_MAX+1][4*SSS_MAX+1];    // Extracted from first 4*SSS+1 columns of Gg[][].  indexed as [row][col]
    Real      g[4*SSS_MAX+1];                 // Extracted from last [4*SSS+1] column of Gg[][].
    Real     Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul().  indexed as [row*(4*SSS+2) + col]
    //
    // If variable_SSS we "telescope" SSS.
    // We start with 1 and increase it up to SSS_MAX on the outer iterations.
    //
    if (variable_SSS) SSS = 1;

    zero(   aj, 4*SSS_MAX+1);
    zero(   cj, 4*SSS_MAX+1);
    zero(   ej, 4*SSS_MAX+1);
    zero( Tpaj, 4*SSS_MAX+1);
    zero( Tpcj, 4*SSS_MAX+1);
    zero(Tppaj, 4*SSS_MAX+1);
    zero(temp1, 4*SSS_MAX+1);
    zero(temp2, 4*SSS_MAX+1);
    zero(temp3, 4*SSS_MAX+1);

    SetMonomialBasis(Tp,Tpp,SSS);

    const int ncomp = 1, nghost = sol.nGrow();
    //
    // Contains the matrix powers of p[] and r[].
    //
    // First 2*SSS+1 components are powers of p[].
    // Next  2*SSS   components are powers of r[].
    //
    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    MultiFab PR(ba, 4*SSS_MAX+1, 0, dm);

    MultiFab  p(ba, ncomp, 0, dm);
    MultiFab  r(ba, ncomp, 0, dm);
    MultiFab rt(ba, ncomp, 0, dm);
    
    MultiFab tmp(ba, 4, nghost, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    BL_ASSERT(!r.contains_nan());

    MultiFab::Copy(rt,r,0,0,1,0);
    MultiFab::Copy( p,r,0,0,1,0);

    const Real           rnorm0        = norm_inf(r);
    Real                 delta         = dotxy(r,rt);
    const Real           L2_norm_of_rt = sqrt(delta);
    const LinOp::BC_Mode temp_bc_mode  = LinOp::Homogeneous_BC;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_CABiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }

    if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: niter = 0,"
                      << ", rnorm = "   << rnorm0
                      << ", delta = "   << delta
                      << ", eps_abs = " << eps_abs << '\n';
	}
        return 0;
    }

    int niters = 0, ret = 0;

    Real L2_norm_of_resid = 0, atime = 0, gtime = 0;

    bool BiCGStabFailed = false, BiCGStabConverged = false;

    for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; )
    {
        const Real time1 = ParallelDescriptor::second();
        //
        // Compute the matrix powers on p[] & r[] (monomial basis).
        // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[].
        //
        MultiFab::Copy(PR,p,0,0,1,0);
        MultiFab::Copy(PR,r,0,2*SSS+1,1,0);
	
        BL_ASSERT(!PR.contains_nan(0,      1));
        BL_ASSERT(!PR.contains_nan(2*SSS+1,1));
        //
        // We use "tmp" to minimize the number of Lp.apply()s.
        // We do this by doing p & r together in a single call.
        //
        MultiFab::Copy(tmp,p,0,0,1,0);
        MultiFab::Copy(tmp,r,0,1,1,0);

        for (int n = 1; n < 2*SSS; n++)
        {
            Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2);

            MultiFab::Copy(tmp,tmp,2,0,2,0);

            MultiFab::Copy(PR,tmp,0,        n,1,0);
            MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0);

            BL_ASSERT(!PR.contains_nan(n,        1));
            BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1));
        }

        MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0);
        Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1);
        MultiFab::Copy(PR,tmp,1,2*SSS,1,0);

        BL_ASSERT(!PR.contains_nan(2*SSS-1,1));
        BL_ASSERT(!PR.contains_nan(2*SSS,  1));

        Real time2 = ParallelDescriptor::second();

        atime += (time2-time1);

        BuildGramMatrix(Gg, PR, rt, SSS);

        const Real time3 = ParallelDescriptor::second();

        gtime += (time3-time2);
        //
        // Form G[][] and g[] from Gg.
        //
        for (int i = 0, k = 0; i < 4*SSS+1; i++)
        {
            for (int j = 0; j < 4*SSS+1; j++)
                //
                // First 4*SSS+1 elements in each row go to G[][].
                //
                G[i][j] = Gg[k++];
            //
            // Last element in row goes to g[].
            //
            g[i] = Gg[k++];
        }

        zero(aj, 4*SSS+1); aj[0]       = 1;
        zero(cj, 4*SSS+1); cj[2*SSS+1] = 1;
        zero(ej, 4*SSS+1);

        for (int nit = 0; nit < SSS; nit++)
        {
            gemv( Tpaj,  Tp, aj, 4*SSS+1, 4*SSS+1);
            gemv( Tpcj,  Tp, cj, 4*SSS+1, 4*SSS+1);
            gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1);

            const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1);

            if ( g_dot_Tpaj == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 1; break;
            }

            const Real alpha = delta / g_dot_Tpaj;

            if ( std::isinf(alpha) )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 2; break;
            }

            axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            axpy(temp3,   cj, -alpha,  Tpaj, 4*SSS+1);

            const Real omega_numerator   = dot(temp3, temp2, 4*SSS+1);
            const Real omega_denominator = dot(temp1, temp2, 4*SSS+1);
            //
            // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0.
            //
            // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj.
            // If omega_numerator==0, and ||s||!=0, then stabilization breakdown.
            //
            // Partial update of ej must happen before the check on omega to ensure forward progress !!!
            //
            axpy(ej, ej, alpha, aj, 4*SSS+1);
            //
            // ej has been updated so consider that we've done an iteration since
            // even if we break out of the loop we'll be able to update both sol.
            //
            niters++;
            //
            // Calculate the norm of Saad's vector 's' to check intra s-step convergence.
            //
            axpy(temp1, cj,-alpha,  Tpaj, 4*SSS+1);

            gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1);

            const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s));

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n';
                BiCGStabConverged = true; break;
            }

            if ( omega_denominator == 0 )
            {
                if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n';
                BiCGStabFailed = true; ret = 3; break;
            }

            const Real omega = omega_numerator / omega_denominator;

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( omega == 0   ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n';
                if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n';
            }

            if ( omega == 0   ) { BiCGStabFailed = true; ret = 4; break; }
            if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; }
            //
            // Complete the update of ej & cj now that omega is known to be ok.
            //
            axpy(ej, ej,       omega,    cj, 4*SSS+1);
            axpy(ej, ej,-omega*alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj,      -omega,  Tpcj, 4*SSS+1);
            axpy(cj, cj,      -alpha,  Tpaj, 4*SSS+1);
            axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1);
            //
            // Do an early check of the residual to determine convergence.
            //
            gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1);
            //
            // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic.
            // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel).
            // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged.
            //
            const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1);

            L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0);

            if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt )
            {
                if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) )
                    std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n';
                BiCGStabConverged = true; break;
            }

            const Real delta_next = dot(g, cj, 4*SSS+1);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( delta_next == 0   ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n';
                if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf?
            if ( delta_next  == 0  ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown...

            const Real beta = (delta_next/delta)*(alpha/omega);

            if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) )
            {
                if ( beta == 0   ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n';
                if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n';
            }

            if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf?
            if ( beta == 0   ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0?  can't make further progress(?)

            axpy(aj, cj,        beta,   aj, 4*SSS+1);
            axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1);

            delta = delta_next;
        }
        //
        // Update iterates.
        //
        for (int i = 0; i < 4*SSS+1; i++)
            sxay(sol,sol,ej[i],PR,i);

        MultiFab::Copy(p,PR,0,0,1,0);
        p.mult(aj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(p,p,aj[i],PR,i);

        MultiFab::Copy(r,PR,0,0,1,0);
        r.mult(cj[0],0,1);
        for (int i = 1; i < 4*SSS+1; i++)
            sxay(r,r,cj[i],PR,i);

        if ( !BiCGStabFailed && !BiCGStabConverged )
        {
            m += SSS;

            if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); }
        }
    }

    if ( verbose > 0 )
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_CABiCGStab: Final: Iteration "
                      << std::setw(4) << niters
                      << " rel. err. "
                      << L2_norm_of_resid << '\n';
        }

        if ( verbose > 1 )
        {
            Real tmp[2] = { atime, gtime };

            ParallelDescriptor::ReduceRealMax(tmp,2,color());

            if ( ParallelDescriptor::IOProcessor(color()) )
            {
                Spacer(std::cout, lev);
                std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n';
            }
        }
    }

    if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged)
    {
        if ( L2_norm_of_resid > L2_norm_of_rt )
        {
            if ( ParallelDescriptor::IOProcessor(color()) )
                BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!");
            //
            // Return code 8 tells the MultiGrid driver to zero out the solution!
            //
            ret = 8;
        }
        else
        {
            //
            // Return codes 1-7 tells the MultiGrid driver to smooth the solution!
            //
            ret = 7;
        }
    }

    return ret;
}
int levenberg_marquardt_nllsq_impl(Function f, JacobianFunction fill_jac, 
                                    InputVector& x, const OutputVector& y, 
                                    LinearSolver lin_solve, LimitFunction impose_limits, unsigned int max_iter, 
                                    T tau, T epsj, T epsx, T epsy)
{
  typedef typename vect_traits<InputVector>::value_type ValueType;
  typedef typename vect_traits<InputVector>::size_type SizeType;
  
  /* Check if the problem is defined properly */
  if (y.size() < x.size())
    throw improper_problem("Levenberg-Marquardt requires M > N!");
  
  mat<ValueType,mat_structure::rectangular> J(y.size(),x.size());
  mat<ValueType,mat_structure::square> JtJ(x.size());
  mat<ValueType,mat_structure::diagonal> diag_JtJ(x.size());
  mat<ValueType,mat_structure::scalar> mu(x.size(),0.0);
  InputVector Jte = x;
  InputVector Dp = x; Dp -= x;
  mat_vect_adaptor<InputVector> Dp_mat(Dp);
  InputVector pDp = x;
  impose_limits(x,Dp); // make sure the initial solution is feasible.
  x += Dp;
  
  if(tau <= 0.0) 
    tau = 1E-03;
  if(epsj <= 0.0) 
    epsj = 1E-17;
  if(epsx <= 0.0) 
    epsx = 1E-17;
  ValueType epsx_sq = epsx * epsx;
  if(epsy <= 0.0) 
    epsy = 1E-17;
  if(max_iter <= 1)
    max_iter = 2;

  /* compute e=x - f(p) and its L2 norm */
  OutputVector y_approx = f(x);
  OutputVector e = y; e -= y_approx;
  OutputVector e_tmp = e;
  ValueType p_eL2 = e * e;
  
  unsigned int nu = 2;
  for(unsigned int k = 0; k < max_iter; ++k) {
    
    if(p_eL2 < epsy)
      return 1;  //residual is too small.

    fill_jac(J,x,y_approx);

    /* J^T J, J^T e */
    for(SizeType i = 0; i < J.get_col_count(); ++i) {
      for(SizeType j = i; j < J.get_col_count(); ++j) {
        ValueType tmp(0.0);
        for(SizeType l = 0; l < J.get_row_count(); ++l)
          tmp += J(l,i) * J(l,j);
        JtJ(i,j) = JtJ(j,i) = tmp;
      };
    };
    Jte = e * J;
    
    ValueType p_L2 = x * x;

    /* check for convergence */
    if( norm_inf(mat_vect_adaptor<InputVector>(Jte)) < epsj) 
      return 2;  //Jacobian is too small.

    /* compute initial damping factor */
    if( k == 0 ) {
      ValueType tmp = std::numeric_limits<ValueType>::min();
      for(SizeType i=0; i < JtJ.get_row_count(); ++i)
        if(JtJ(i,i) > tmp) 
          tmp = JtJ(i,i); /* find max diagonal element */
      mu = mat<ValueType,mat_structure::scalar>(x.size(), tau * tmp);
    };

    /* determine increment using adaptive damping */
    while(true) {

      /* solve augmented equations */
      try {
        lin_solve(make_damped_matrix(JtJ,mu),Dp_mat,mat_vect_adaptor<InputVector>(Jte),epsj);
        
        impose_limits(x,Dp);
        ValueType Dp_L2 = Dp * Dp;
        pDp = x; pDp += Dp;

        if(Dp_L2 < epsx_sq * p_L2) /* relative change in p is small, stop */
          return 3;  //steps are too small.

        if( Dp_L2 >= (p_L2 + epsx) / ( std::numeric_limits<ValueType>::epsilon() * std::numeric_limits<ValueType>::epsilon() ) ) 
          throw 42; //signal to throw a singularity-error (see below).
        
        e_tmp = y; e_tmp -= f(pDp);
        ValueType pDp_eL2 = e_tmp * e_tmp;
        ValueType dL = mu(0,0) * Dp_L2 + Dp * Jte;

        ValueType dF = p_eL2 - pDp_eL2;
        
        if( (dL < 0.0) || (dF < 0.0) ) 
          throw singularity_error("reject inc.");

        // reduction in error, increment is accepted
        ValueType tmp = ( ValueType(2.0) * dF / dL - ValueType(1.0));
        tmp = 1.0 - tmp * tmp * tmp;
        mu *= ( ( tmp >= ValueType(1.0 / 3.0) ) ? tmp : ValueType(1.0 / 3.0) );
        nu = 2;

        x = pDp;
        y_approx = y; y_approx -= e_tmp;
        e = e_tmp;
        p_eL2 = pDp_eL2;
        break; //the step is accepted and the loop is broken.
      } catch(singularity_error&) {
        //the increment must be rejected (either by singularity in damped matrix or no-redux by the step.
        mu *= ValueType(nu);
        nu <<= 1; // 2*nu;
        if( nu == 0 ) /* nu has overflown. */
          throw infeasible_problem("Levenberg-Marquardt method cannot reduce the function further, matrix damping has overflown!");
      } catch(int i) {
        if(i == 42)
          throw singularity_error("Levenberg-Marquardt method has detected a near-singularity in the Jacobian matrix!");
        else
          throw i;  //just in case there might be another integer thrown (very unlikely).
      };

    }; /* inner loop */
  };

  //if this point is reached, it means we have reached the maximum iterations.
  throw maximum_iteration(max_iter);

};
Пример #19
0
bool
SolverUnconstrained<Data,Problem>::optimize( vector_type& x )
{
    M_solver_stats.clear();

    // Controlling parameters
    // trust region radius
    value_type Delta = M_options.Delta_init;

    vector_type x_new ( x.size() );
    vector_type stot ( x.size() );
    value_type norm_Tgrad_fx = this->norm_Theta_x_grad_fx( x, Delta );
    value_type norm_s_til = 0;

    M_prob.copy_x0_to_x( x );


    // FIXME: if ( M_options.verbose() )
    //M_prob.print_complete( x );

    if ( M_solver_stats.collectStats() )
        M_solver_stats.push( norm_Tgrad_fx, 0, 0, 0, 0, 0, 0, 0, Delta, 0, 0, 0 );


    try
    {
        int iter = 0;

        //
        // Apply bound constrained Trust region algorithm
        //
        while ( iter == 0 ||
                ( iter < M_options.max_TR_iter && norm_Tgrad_fx > M_options.TR_tol ) )
        {
            iter++;
            int n_CGiter, n_restarts, n_indef,
                n_crosses_def, n_crosses_indef, n_truss_exit_def, n_truss_exit_indef;
            value_type _s_til_x_G_til_x_s_til, phi_til, rho, Delta_used = Delta;

            DVLOG(2) << "\n===================== iter = " << iter << " ===========================";
            //DVLOG(2) << "\nx = " << x << "\n";
            DVLOG(2) << "\n -> norm_Tgrad_fx = " << norm_Tgrad_fx;



            /** find an approximate stot for the step to make
             * solve :
             * Find \f$\tilde{s}^k   = \mathrm{arg}\mathrm{min}_{s \in R^n}{\tilde{\phi}^k : ||s|| < \Delta^k}\f$
             */
            this->CGstep( x, Delta, stot, norm_s_til,
                          n_CGiter,
                          n_restarts, n_indef,
                          n_crosses_def, n_crosses_indef,
                          n_truss_exit_def, n_truss_exit_indef,
                          _s_til_x_G_til_x_s_til, phi_til );

            //
            x_new = x + stot;

            f_type __fx_new;
            M_prob.evaluate( x_new, __fx_new, diff_order<0>() );
            f_type __fx;
            M_prob.evaluate( x, __fx, diff_order<0>() );

            // compute actual merit function reduction
            value_type ared_til = __fx_new.value( 0 ) - __fx.value( 0 ) + 0.5 * _s_til_x_G_til_x_s_til;

            rho = ared_til / phi_til;

            /////////////////////////////////////////////////////////////////////////
            // Trust region radius calculation:
            if ( M_options.allow_Trust_radius_calculations )
            {
                if ( rho <= 1e-8 )
                    Delta = M_options.rho_decrease * Delta;

                else
                {
                    x = x + stot;

                    if     ( rho > M_options.rho_big )
                        Delta = std::max( M_options.rho_increase_big*norm_s_til, Delta );

                    else if ( rho > M_options.rho_small )
                        Delta = std::max( M_options.rho_increase_small*norm_s_til, Delta );
                }

                norm_Tgrad_fx = this->norm_Theta_x_grad_fx( x, Delta );
            }

            else
            {
                x = x + stot;
                norm_Tgrad_fx = this->norm_Theta_x_grad_fx( x, Delta );
            }

            /////////////////////////////////////////////////////////////////////////

            if ( M_solver_stats.collectStats() )
            {
                M_solver_stats.push( norm_Tgrad_fx,
                                      n_CGiter, n_restarts, n_indef,
                                      n_crosses_def, n_crosses_indef,
                                      n_truss_exit_def, n_truss_exit_indef,
                                      Delta_used, ared_til, phi_til, rho );
            }

            if (  norm_Tgrad_fx > 1e-5 )
                M_prob.setAccuracy( std::min( 1e-1, norm_Tgrad_fx ) );

            DVLOG(2) << "norm_Tgrad_fx = " << norm_Tgrad_fx  << "\n";
        }
    }

    catch ( std::exception const& __ex )
    {
        f_type __fx_new;
        M_prob.evaluate ( x, __fx_new, diff_order<2>() );

        if ( norm_inf( __fx_new.gradient( 0 ) ) > 1e-10 )
            throw __ex;
    }

    vector_type __l( _E_n ), __u( _E_n );
    lambda_LS( x, __l, __u );

    if ( M_solver_stats.collectStats() )
        M_solver_stats.push( x, __l, __u );

    return true;
}
Пример #20
0
int
CGSolver::solve_cg (MultiFab&       sol,
		    const MultiFab& rhs,
		    Real            eps_rel,
		    Real            eps_abs,
		    LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_cg()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab sorig(ba, ncomp, nghost, dm);
    MultiFab r(ba, ncomp, nghost, dm);
    MultiFab z(ba, ncomp, nghost, dm);
    MultiFab q(ba, ncomp, nghost, dm);
    MultiFab p(ba, ncomp, nghost, dm);

    MultiFab r1(ba, ncomp, nghost, dm);
    MultiFab z1(ba, ncomp, nghost, dm);
    MultiFab r2(ba, ncomp, nghost, dm);
    MultiFab z2(ba, ncomp, nghost, dm);

    MultiFab::Copy(sorig,sol,0,0,1,0);

    Lp.residual(r, rhs, sorig, lev, bc_mode);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC;

    Real       rnorm    = norm_inf(r);
    const Real rnorm0   = rnorm;
    Real       minrnorm = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "              CG: Initial error :        " << rnorm0 << '\n';
    }

    const Real Lp_norm = Lp.norm(0, lev);
    Real sol_norm      = 0;
    Real rho_1         = 0;
    int  ret           = 0;
    int  nit           = 1;

    if ( rnorm == 0 || rnorm < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "       CG: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" <<  eps_rel*(Lp_norm*sol_norm + rnorm0 ) 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return 0;
    }

    for (; nit <= maxiter; ++nit)
    {
        if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1)
        {
            z.setVal(0);

            jbb_precond(z,r,lev,Lp);
        }
        else
        {
            MultiFab::Copy(z,r,0,0,1,0);
        }

        Real rho = dotxy(z,r);

        if (nit == 1)
        {
            MultiFab::Copy(p,z,0,0,1,0);
        }
        else
        {
            Real beta = rho/rho_1;
            sxay(p, z, beta, p);
        }
        Lp.apply(q, p, lev, temp_bc_mode);

        Real alpha;
        if ( Real pw = dotxy(p,q) )
	{
            alpha = rho/pw;
	}
        else
	{
            ret = 1; break;
	}
        
        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_cg:"
                      << " nit " << nit
                      << " rho " << rho
                      << " alpha " << alpha << '\n';
        }
        sxay(sol, sol, alpha, p);
        sxay(  r,   r,-alpha, q);
        rnorm = norm_inf(r);
        sol_norm = norm_inf(sol);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "       CG:       Iteration"
                      << std::setw(4) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break;
#endif
        if ( rnorm > def_unstable_criterion*minrnorm )
	{
            ret = 2; break;
	}
        else if ( rnorm < minrnorm )
	{
            minrnorm = rnorm;
	}

        rho_1 = rho;
    }
    
    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "       CG: Final Iteration"
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 &&  rnorm > eps_rel*rnorm0 && rnorm > eps_abs )
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_cg: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}
Пример #21
0
void SQPInternal::evaluate(int nfdir, int nadir){
  casadi_assert(nfdir==0 && nadir==0);
  
  checkInitialBounds();
  
  // Get problem data
  const vector<double>& x_init = input(NLP_X_INIT).data();
  const vector<double>& lbx = input(NLP_LBX).data();
  const vector<double>& ubx = input(NLP_UBX).data();
  const vector<double>& lbg = input(NLP_LBG).data();
  const vector<double>& ubg = input(NLP_UBG).data();
  
  // Set the static parameter
  if (parametric_) {
    const vector<double>& p = input(NLP_P).data();
    if (!F_.isNull()) F_.setInput(p,F_.getNumInputs()-1);
    if (!G_.isNull()) G_.setInput(p,G_.getNumInputs()-1);
    if (!H_.isNull()) H_.setInput(p,H_.getNumInputs()-1);
    if (!J_.isNull()) J_.setInput(p,J_.getNumInputs()-1);
  }
    
  // Set linearization point to initial guess
  copy(x_init.begin(),x_init.end(),x_.begin());
  
  // Lagrange multipliers of the NLP
  fill(mu_.begin(),mu_.end(),0);
  fill(mu_x_.begin(),mu_x_.end(),0);

  // Initial constraint Jacobian
  eval_jac_g(x_,gk_,Jk_);
  
  // Initial objective gradient
  eval_grad_f(x_,fk_,gf_);
  
  // Initialize or reset the Hessian or Hessian approximation
  reg_ = 0;
  if( hess_mode_ == HESS_BFGS){
    reset_h();
  } else {
    eval_h(x_,mu_,1.0,Bk_);
  }

  // Evaluate the initial gradient of the Lagrangian
  copy(gf_.begin(),gf_.end(),gLag_.begin());
  if(m_>0) DMatrix::mul_no_alloc_tn(Jk_,mu_,gLag_);
  // gLag += mu_x_;
  transform(gLag_.begin(),gLag_.end(),mu_x_.begin(),gLag_.begin(),plus<double>());

  // Number of SQP iterations
  int iter = 0;

  // Number of line-search iterations
  int ls_iter = 0;
  
  // Last linesearch successfull
  bool ls_success = true;
  
  // Reset
  merit_mem_.clear();
  sigma_ = 0.;    // NOTE: Move this into the main optimization loop

  // Default stepsize
  double t = 0;
  
  // MAIN OPTIMIZATION LOOP
  while(true){
    
    // Primal infeasability
    double pr_inf = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg);
    
    // 1-norm of lagrange gradient
    double gLag_norm1 = norm_1(gLag_);
    
    // 1-norm of step
    double dx_norm1 = norm_1(dx_);
    
    // Print header occasionally
    if(iter % 10 == 0) printIteration(cout);
    
    // Printing information about the actual iterate
    printIteration(cout,iter,fk_,pr_inf,gLag_norm1,dx_norm1,reg_,ls_iter,ls_success);
    
    // Call callback function if present
    if (!callback_.isNull()) {
      callback_.input(NLP_COST).set(fk_);
      callback_.input(NLP_X_OPT).set(x_);
      callback_.input(NLP_LAMBDA_G).set(mu_);
      callback_.input(NLP_LAMBDA_X).set(mu_x_);
      callback_.input(NLP_G).set(gk_);
      callback_.evaluate();
      
      if (callback_.output(0).at(0)) {
        cout << endl;
        cout << "CasADi::SQPMethod: aborted by callback..." << endl;
        break;
      }
    }
    
    // Checking convergence criteria
    if (pr_inf < tol_pr_ && gLag_norm1 < tol_du_){
      cout << endl;
      cout << "CasADi::SQPMethod: Convergence achieved after " << iter << " iterations." << endl;
      break;
    }
    
    if (iter >= maxiter_){
      cout << endl;
      cout << "CasADi::SQPMethod: Maximum number of iterations reached." << endl;
      break;
    }
    
    // Start a new iteration
    iter++;
    
    // Formulate the QP
    transform(lbx.begin(),lbx.end(),x_.begin(),qp_LBX_.begin(),minus<double>());
    transform(ubx.begin(),ubx.end(),x_.begin(),qp_UBX_.begin(),minus<double>());
    transform(lbg.begin(),lbg.end(),gk_.begin(),qp_LBA_.begin(),minus<double>());
    transform(ubg.begin(),ubg.end(),gk_.begin(),qp_UBA_.begin(),minus<double>());

    // Solve the QP
    solve_QP(Bk_,gf_,qp_LBX_,qp_UBX_,Jk_,qp_LBA_,qp_UBA_,dx_,qp_DUAL_X_,qp_DUAL_A_);
    log("QP solved");

    // Detecting indefiniteness
    double gain = quad_form(dx_,Bk_);
    if (gain < 0){
      casadi_warning("Indefinite Hessian detected...");
    }
        
    // Calculate penalty parameter of merit function
    sigma_ = std::max(sigma_,1.01*norm_inf(qp_DUAL_X_));
    sigma_ = std::max(sigma_,1.01*norm_inf(qp_DUAL_A_));

    // Calculate L1-merit function in the actual iterate
    double l1_infeas = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg);

    // Right-hand side of Armijo condition
    double F_sens = inner_prod(dx_, gf_);    
    double L1dir = F_sens - sigma_ * l1_infeas;
    double L1merit = fk_ + sigma_ * l1_infeas;

    // Storing the actual merit function value in a list
    merit_mem_.push_back(L1merit);
    if (merit_mem_.size() > merit_memsize_){
      merit_mem_.pop_front();
    }
    // Stepsize
    t = 1.0;
    double fk_cand;
    // Merit function value in candidate
    double L1merit_cand = 0;

    // Reset line-search counter, success marker
    ls_iter = 0;
    ls_success = true;

    // Line-search
    log("Starting line-search");
    if(maxiter_ls_>0){ // maxiter_ls_== 0 disables line-search
      
      // Line-search loop
      while (true){
        for(int i=0; i<n_; ++i) x_cand_[i] = x_[i] + t * dx_[i];
      
        // Evaluating objective and constraints
        eval_f(x_cand_,fk_cand);
        eval_g(x_cand_,gk_cand_);
        ls_iter++;

        // Calculating merit-function in candidate
        l1_infeas = primalInfeasibility(x_cand_, lbx, ubx, gk_cand_, lbg, ubg);
      
        L1merit_cand = fk_cand + sigma_ * l1_infeas;
        // Calculating maximal merit function value so far
        double meritmax = *max_element(merit_mem_.begin(), merit_mem_.end());
        if (L1merit_cand <= meritmax + t * c1_ * L1dir){
          // Accepting candidate
	  log("Line-search completed, candidate accepted");
          break;
        }
      
        // Line-search not successful, but we accept it.
        if(ls_iter == maxiter_ls_){
          ls_success = false;
	  log("Line-search completed, maximum number of iterations");
          break;
        }
      
        // Backtracking
        t = beta_ * t;
      }
    }

    // Candidate accepted, update dual variables
    for(int i=0; i<m_; ++i) mu_[i] = t * qp_DUAL_A_[i] + (1 - t) * mu_[i];
    for(int i=0; i<n_; ++i) mu_x_[i] = t * qp_DUAL_X_[i] + (1 - t) * mu_x_[i];
    
    if( hess_mode_ == HESS_BFGS){
      // Evaluate the gradient of the Lagrangian with the old x but new mu (for BFGS)
      copy(gf_.begin(),gf_.end(),gLag_old_.begin());
      if(m_>0) DMatrix::mul_no_alloc_tn(Jk_,mu_,gLag_old_);
      // gLag_old += mu_x_;
      transform(gLag_old_.begin(),gLag_old_.end(),mu_x_.begin(),gLag_old_.begin(),plus<double>());
    }
    
    // Candidate accepted, update the primal variable
    copy(x_.begin(),x_.end(),x_old_.begin());
    copy(x_cand_.begin(),x_cand_.end(),x_.begin());

    // Evaluate the constraint Jacobian
    log("Evaluating jac_g");
    eval_jac_g(x_,gk_,Jk_);
    
    // Evaluate the gradient of the objective function
    log("Evaluating grad_f");
    eval_grad_f(x_,fk_,gf_);
    
    // Evaluate the gradient of the Lagrangian with the new x and new mu
    copy(gf_.begin(),gf_.end(),gLag_.begin());
    if(m_>0) DMatrix::mul_no_alloc_tn(Jk_,mu_,gLag_);
    // gLag += mu_x_;
    transform(gLag_.begin(),gLag_.end(),mu_x_.begin(),gLag_.begin(),plus<double>());

    // Updating Lagrange Hessian
    if( hess_mode_ == HESS_BFGS){
      log("Updating Hessian (BFGS)");
      // BFGS with careful updates and restarts
      if (iter % lbfgs_memory_ == 0){
        // Reset Hessian approximation by dropping all off-diagonal entries
        const vector<int>& rowind = Bk_.rowind();      // Access sparsity (row offset)
        const vector<int>& col = Bk_.col();            // Access sparsity (column)
        vector<double>& data = Bk_.data();             // Access nonzero elements
        for(int i=0; i<rowind.size()-1; ++i){          // Loop over the rows of the Hessian
          for(int el=rowind[i]; el<rowind[i+1]; ++el){ // Loop over the nonzero elements of the row
            if(i!=col[el]) data[el] = 0;               // Remove if off-diagonal entries
          }
        }
      }
      
      // Pass to BFGS update function
      bfgs_.setInput(Bk_,BFGS_BK);
      bfgs_.setInput(x_,BFGS_X);
      bfgs_.setInput(x_old_,BFGS_X_OLD);
      bfgs_.setInput(gLag_,BFGS_GLAG);
      bfgs_.setInput(gLag_old_,BFGS_GLAG_OLD);
      
      // Update the Hessian approximation
      bfgs_.evaluate();
      
      // Get the updated Hessian
      bfgs_.getOutput(Bk_);
    } else {
      // Exact Hessian
      log("Evaluating hessian");
      eval_h(x_,mu_,1.0,Bk_);
    }
  }
  
  // Save results to outputs
  output(NLP_COST).set(fk_);
  output(NLP_X_OPT).set(x_);
  output(NLP_LAMBDA_G).set(mu_);
  output(NLP_LAMBDA_X).set(mu_x_);
  output(NLP_G).set(gk_);
  
  // Save statistics
  stats_["iter_count"] = iter;
}
Пример #22
0
int
MultiGrid::solve_ (MultiFab&      _sol,
                   Real           eps_rel,
                   Real           eps_abs,
                   LinOp::BC_Mode bc_mode,
                   Real           bnorm,
                   Real           resnorm0)
{
    BL_PROFILE("MultiGrid::solve_()");

  //
  // If do_fixed_number_of_iters = 1, then do maxiter iterations without checking for convergence 
  // 
  // If do_fixed_number_of_iters = 0, then relax system maxiter times, 
  //    and stop if relative error <= _eps_rel or if absolute err <= _abs_eps
  //
  const Real strt_time = ParallelDescriptor::second();

  const int level = 0;

  //
  // We take the max of the norms of the initial RHS and the initial residual in order to capture both cases
  //
  Real norm_to_test_against;
  bool using_bnorm;
  if (bnorm >= resnorm0) 
  {
      norm_to_test_against = bnorm;
      using_bnorm          = true;
  } else {
      norm_to_test_against = resnorm0;
      using_bnorm          = false;
  } 

  int        returnVal = 0;
  Real       error     = resnorm0;

  //
  // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed
  //
  if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 )
  {
      std::cout << "MultiGrid: Tolerance "
                << eps_rel
                << " < 1e-16 is probably set too low" << '\n';
  }

  //
  // We initially define norm_cor based on the initial solution only so we can use it in the very first iteration
  //    to decide whether the problem is already solved (this is relevant if the previous solve used was only solved
  //    according to the Anorm test and not the bnorm test).
  //
  Real       norm_cor    = norm_inf(*initialsolution,true);
  ParallelDescriptor::ReduceRealMax(norm_cor);

  int        nit         = 1;
  const Real norm_Lp     = Lp.norm(0, level);
  Real       cg_time     = 0;

  if ( use_Anorm_for_convergence == 1 ) 
  {
     //
     // Don't need to go any further -- no iterations are required
     //
     if (error <= eps_abs || error < eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) 
     {
         if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
         {
             std::cout << "   Problem is already converged -- no iterations required\n";
         }
         return 1;
     }

     for ( ;
           ( (error > eps_abs &&
              error > eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) ||
             (do_fixed_number_of_iters == 1) )
             && nit <= maxiter;
           ++nit)
     {
         relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time);

         Real tmp[2] = { norm_inf(*cor[level],true), errorEstimate(level,bc_mode,true) };

         ParallelDescriptor::ReduceRealMax(tmp,2);

         norm_cor = tmp[0];
         error    = tmp[1];
	
         if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
         {
             const Real rel_error = error / norm_to_test_against;
             Spacer(std::cout, level);
             if (using_bnorm)
             {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/bnorm = "
                           << rel_error << '\n';
             } else {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/resid0 = "
                           << rel_error << '\n';
             }
         }
     }
  }
  else
  {
     //
     // Don't need to go any further -- no iterations are required
     //
     if (error <= eps_abs || error < eps_rel*norm_to_test_against) 
     {
         if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
         {
             std::cout << "   Problem is already converged -- no iterations required\n";
         }
         return 1;
     }

     for ( ;
           ( (error > eps_abs &&
              error > eps_rel*norm_to_test_against) ||
             (do_fixed_number_of_iters == 1) )
             && nit <= maxiter;
           ++nit)
     {
         relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time);

         error = errorEstimate(level, bc_mode);
	
         if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
         {
             const Real rel_error = error / norm_to_test_against;
             Spacer(std::cout, level);
             if (using_bnorm)
             {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/bnorm = "
                           << rel_error << '\n';
             } else {
                 std::cout << "MultiGrid: Iteration   "
                           << nit
                           << " resid/resid0 = "
                           << rel_error << '\n';
             }
         }
     }
  }

  Real run_time = (ParallelDescriptor::second() - strt_time);

  if ( verbose > 0 )
  {
      if ( ParallelDescriptor::IOProcessor() )
      {
          const Real rel_error = error / norm_to_test_against;
          Spacer(std::cout, level);
          if (using_bnorm)
          {
              std::cout << "MultiGrid: Iteration   "
                        << nit-1
                        << " resid/bnorm = "
                        << rel_error << '\n';
          } else {
              std::cout << "MultiGrid: Iteration   "
                        << nit-1
                        << " resid/resid0 = "
                        << rel_error << '\n';
             }
      }

      if ( verbose > 1 )
      {
          Real tmp[2] = { run_time, cg_time };

          ParallelDescriptor::ReduceRealMax(tmp,2,ParallelDescriptor::IOProcessorNumber());

          if ( ParallelDescriptor::IOProcessor() )
              std::cout << ", Solve time: " << tmp[0] << ", CG time: " << tmp[1];
      }

      if ( ParallelDescriptor::IOProcessor() ) std::cout << '\n';
  }

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
      if ( do_fixed_number_of_iters == 1)
      {
          std::cout << "   Did fixed number of iterations: " << maxiter << std::endl;
      } 
      else if ( error < eps_rel*norm_to_test_against )
      {
          std::cout << "   Converged res < eps_rel*max(bnorm,res_norm)\n";
      } 
      else if ( (use_Anorm_for_convergence == 1) && (error < eps_rel*norm_Lp*norm_cor) )
      {
          std::cout << "   Converged res < eps_rel*Anorm*sol\n";
      } 
      else if ( error < eps_abs )
      {
          std::cout << "   Converged res < eps_abs\n";
      }
  }

  //
  // Omit ghost update since maybe not initialized in calling routine.
  // Add to boundary values stored in initialsolution.
  //
  _sol.copy(*cor[level]);
  _sol.plus(*initialsolution,0,_sol.nComp(),0);

  if ( use_Anorm_for_convergence == 1 ) 
  {
     if ( do_fixed_number_of_iters == 1                ||
          error <= eps_rel*(norm_Lp*norm_cor+norm_to_test_against) ||
          error <= eps_abs )
       returnVal = 1;
  } 
  else 
  {
     if ( do_fixed_number_of_iters == 1 ||
          error <= eps_rel*(norm_to_test_against)   ||
          error <= eps_abs )
       returnVal = 1;
  } 

  //
  // Otherwise, failed to solve satisfactorily
  //
  return returnVal;
}
Пример #23
0
 typename type_traits<typename V::value_type>::real_type
 amax (const V &v) {
     return norm_inf (v);
 }
Пример #24
0
int
MCMultiGrid::solve_ (MultiFab& _sol,
		     Real      eps_rel,
		     Real      eps_abs,
		     MCBC_Mode bc_mode,
		     int       level)
{
  //
  // Relax system maxiter times, stop if relative error <= _eps_rel or
  // if absolute err <= _abs_eps
  //
  const Real strt_time = ParallelDescriptor::second();
  //
  // Elide a reduction by doing these together.
  //
  Real tmp[2] = { norm_inf(*rhs[level],true), errorEstimate(level,bc_mode,true) };

  ParallelDescriptor::ReduceRealMax(tmp,2);

  const Real norm_rhs  = tmp[0];
  const Real error0    = tmp[1];
  int        returnVal = 0;
  Real       error     = error0;

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
      Spacer(std::cout, level);
      std::cout << "MCMultiGrid: Initial rhs                = " << norm_rhs << '\n';
      std::cout << "MCMultiGrid: Initial error (error0)     = " << error0 << '\n';
  }
  
  if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 )
  {
      std::cout << "MCMultiGrid: Tolerance "
                << eps_rel
                << " < 1e-16 is probably set too low" << '\n';
  }
  //
  // Initialize correction to zero at this level (auto-filled at levels below)
  //
  (*cor[level]).setVal(0.0);
  //
  // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed.
  //
  int        nit         = 1;
  const Real new_error_0 = norm_rhs;
  //const Real norm_Lp     = Lp.norm(0, level);


  for ( ;
        error > eps_abs &&
          error > eps_rel*norm_rhs &&
          nit <= maxiter;
        ++nit)
  {
    relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode);

    error = errorEstimate(level,bc_mode);
	
    if ( ParallelDescriptor::IOProcessor() && verbose > 1 )
    {
      const Real rel_error = (error0 != 0) ? error/new_error_0 : 0;
      Spacer(std::cout, level);
      std::cout << "MCMultiGrid: Iteration   "
                << nit
                << " error/error0 = "
                << rel_error << '\n';
    }
  }

  Real run_time = (ParallelDescriptor::second() - strt_time);
  if ( verbose > 0 )
  {
      if ( ParallelDescriptor::IOProcessor() )
      {
          const Real rel_error = (error0 != 0) ? error/error0 : 0;
          Spacer(std::cout, level);
          std::cout << "MCMultiGrid: Final Iter. "
                    << nit-1
                    << " error/error0 = "
                    << rel_error;
      }

      if ( verbose > 1 )
      {
        
        ParallelDescriptor::ReduceRealMax(run_time);

        if ( ParallelDescriptor::IOProcessor() )
          std::cout << ", Solve time: " << run_time << '\n';
      }
  }

  if ( ParallelDescriptor::IOProcessor() && (verbose > 0) )
  {
    if ( error < eps_rel*norm_rhs )
    {
      std::cout << "   Converged res < eps_rel*bnorm\n";
    } 
    else if ( error < eps_abs )
    {
      std::cout << "   Converged res < eps_abs\n";
    }
  }
  //
  // Omit ghost update since maybe not initialized in calling routine.
  // Add to boundary values stored in initialsolution.
  //
  _sol.copy(*cor[level]);
  _sol.plus(*initialsolution,0,_sol.nComp(),0);

  if ( error <= eps_rel*(norm_rhs) ||
       error <= eps_abs )
    returnVal = 1;

  //
  // Otherwise, failed to solve satisfactorily
  //
  return returnVal;
}
Пример #25
0
int
CGSolver::solve_bicgstab (MultiFab&       sol,
                          const MultiFab& rhs,
                          Real            eps_rel,
                          Real            eps_abs,
                          LinOp::BC_Mode  bc_mode)
{
    BL_PROFILE("CGSolver::solve_bicgstab()");

    const int nghost = sol.nGrow(), ncomp = 1;

    const BoxArray& ba = sol.boxArray();
    const DistributionMapping& dm = sol.DistributionMap();

    BL_ASSERT(sol.nComp() == ncomp);
    BL_ASSERT(sol.boxArray() == Lp.boxArray(lev));
    BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev));

    MultiFab ph(ba, ncomp, nghost, dm);
    MultiFab sh(ba, ncomp, nghost, dm);

    MultiFab sorig(ba, ncomp, 0, dm);
    MultiFab p    (ba, ncomp, 0, dm);
    MultiFab r    (ba, ncomp, 0, dm);
    MultiFab s    (ba, ncomp, 0, dm);
    MultiFab rh   (ba, ncomp, 0, dm);
    MultiFab v    (ba, ncomp, 0, dm);
    MultiFab t    (ba, ncomp, 0, dm);

    Lp.residual(r, rhs, sol, lev, bc_mode);

    MultiFab::Copy(sorig,sol,0,0,1,0);
    MultiFab::Copy(rh,   r,  0,0,1,0);

    sol.setVal(0);

    const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC;

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    Real rnorm = norm_inf(r);
#else
    //
    // Calculate the local values of these norms & reduce their values together.
    //
    Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) };

    ParallelDescriptor::ReduceRealMax(vals,2,color());

    Real       rnorm    = vals[0];
    const Real Lp_norm  = vals[1];
    Real       sol_norm = 0;
#endif
    const Real rnorm0   = rnorm;

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Initial error (error0) =        " << rnorm0 << '\n';
    }
    int ret = 0, nit = 1;
    Real rho_1 = 0, alpha = 0, omega = 0;

    if ( rnorm0 == 0 || rnorm0 < eps_abs )
    {
        if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
	{
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: niter = 0,"
                      << ", rnorm = " << rnorm 
                      << ", eps_abs = " << eps_abs << std::endl;
	}
        return ret;
    }

    for (; nit <= maxiter; ++nit)
    {
        const Real rho = dotxy(rh,r);
        if ( rho == 0 ) 
	{
            ret = 1; break;
	}
        if ( nit == 1 )
        {
            MultiFab::Copy(p,r,0,0,1,0);
        }
        else
        {
            const Real beta = (rho/rho_1)*(alpha/omega);
            sxay(p, p, -omega, v);
            sxay(p, r,   beta, p);
        }
        if ( use_mg_precond )
        {
            ph.setVal(0);
            mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            ph.setVal(0);
            Lp.jacobi_smooth(ph, p, lev, temp_bc_mode);
        }
        else 
        {
            MultiFab::Copy(ph,p,0,0,1,0);
        }
        Lp.apply(v, ph, lev, temp_bc_mode);

        if ( Real rhTv = dotxy(rh,v) )
	{
            alpha = rho/rhTv;
	}
        else
	{
            ret = 2; break;
	}
        sxay(sol, sol,  alpha, ph);
        sxay(s,     r, -alpha,  v);

        rnorm = norm_inf(s);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Half Iter "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( use_mg_precond )
        {
            sh.setVal(0);
            mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode);
        }
        else if ( use_jacobi_precond )
        {
            sh.setVal(0);
            Lp.jacobi_smooth(sh, s, lev, temp_bc_mode);
        }
        else
        {
            MultiFab::Copy(sh,s,0,0,1,0);
        }
        Lp.apply(t, sh, lev, temp_bc_mode);
        //
        // This is a little funky.  I want to elide one of the reductions
        // in the following two dotxy()s.  We do that by calculating the "local"
        // values and then reducing the two local values at the same time.
        //
        Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) };

        ParallelDescriptor::ReduceRealSum(vals,2,color());

        if ( vals[0] )
	{
            omega = vals[1]/vals[0];
	}
        else
	{
            ret = 3; break;
	}
        sxay(sol, sol,  omega, sh);
        sxay(r,     s, -omega,  t);

        rnorm = norm_inf(r);

        if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) )
        {
            Spacer(std::cout, lev);
            std::cout << "CGSolver_BiCGStab: Iteration "
                      << std::setw(11) << nit
                      << " rel. err. "
                      << rnorm/(rnorm0) << '\n';
        }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
        if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break;
#else
        sol_norm = norm_inf(sol);
        if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break;
#endif
        if ( omega == 0 )
	{
            ret = 4; break;
	}
        rho_1 = rho;
    }

    if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) )
    {
        Spacer(std::cout, lev);
        std::cout << "CGSolver_BiCGStab: Final: Iteration "
                  << std::setw(4) << nit
                  << " rel. err. "
                  << rnorm/(rnorm0) << '\n';
    }

#ifdef CG_USE_OLD_CONVERGENCE_CRITERIA
    if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs)
#else
    if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs )
#endif
    {
        if ( ParallelDescriptor::IOProcessor(color()) )
            BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!");
        ret = 8;
    }

    if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
    {
        sol.plus(sorig, 0, 1, 0);
    } 
    else 
    {
        sol.setVal(0);
        sol.plus(sorig, 0, 1, 0);
    }

    return ret;
}