/* Polar Decomposition of 3x3 matrix in 4x4, * M = QS. See Nicholas Higham and Robert S. Schreiber, * Fast Polar Decomposition of An Arbitrary Matrix, * Technical Report 88-942, October 1988, * Department of Computer Science, Cornell University. */ double polar_decomp(HMatrix M, HMatrix Q, HMatrix S) { #define TOL 1.0e-6 HMatrix Mk, MadjTk, Ek; double det, M_one, M_inf, MadjT_one, MadjT_inf, E_one, gamma, g1, g2; int i, j; mat_tpose(Mk,=,M,3); M_one = norm_one(Mk); M_inf = norm_inf(Mk); do { adjoint_transpose(Mk, MadjTk); det = vdot(Mk[0], MadjTk[0]); if (det==0.0) {do_rank2(Mk, MadjTk, Mk); break;} MadjT_one = norm_one(MadjTk); MadjT_inf = norm_inf(MadjTk); gamma = sqrt(sqrt((MadjT_one*MadjT_inf)/(M_one*M_inf))/fabs(det)); g1 = gamma*0.5; g2 = 0.5/(gamma*det); mat_copy(Ek,=,Mk,3); mat_binop(Mk,=,g1*Mk,+,g2*MadjTk,3); mat_copy(Ek,-=,Mk,3); E_one = norm_one(Ek); M_one = norm_one(Mk); M_inf = norm_inf(Mk); } while (E_one>(M_one*TOL)); mat_tpose(Q,=,Mk,3); mat_pad(Q); mat_mult(Mk, M, S); mat_pad(S); for (i=0; i<3; i++) for (j=i; j<3; j++) S[i][j] = S[j][i] = 0.5*(S[i][j]+S[j][i]); return (det); }
void test_smooth_r4<float_type>::test() { // place particles along the x-axis within one half of the box, // put every second particle at the origin unsigned int npart = particle->nparticle(); vector_type dx(0); dx[0] = box->edges()(0, 0) / npart / 2; std::vector<vector_type> r_list(particle->nparticle()); std::vector<unsigned int> species(particle->nparticle()); for (unsigned int k = 0; k < r_list.size(); ++k) { r_list[k] = (k % 2) ? k * dx : vector_type(0); species[k] = (k < npart_list[0]) ? 0U : 1U; // set particle type for a binary mixture } BOOST_CHECK( set_position(*particle, r_list.begin()) == r_list.end() ); BOOST_CHECK( set_species(*particle, species.begin()) == species.end() ); // read forces and other stuff from device std::vector<float> en_pot(particle->nparticle()); BOOST_CHECK( get_potential_energy(*particle, en_pot.begin()) == en_pot.end() ); std::vector<vector_type> f_list(particle->nparticle()); BOOST_CHECK( get_force(*particle, f_list.begin()) == f_list.end() ); float const eps = std::numeric_limits<float>::epsilon(); for (unsigned int i = 0; i < npart; ++i) { unsigned int type1 = species[i]; unsigned int type2 = species[(i + 1) % npart]; vector_type r = r_list[i] - r_list[(i + 1) % npart]; vector_type f = f_list[i]; // reference values from host module host_float_type fval, en_pot_; host_float_type rr = inner_prod(r, r); std::tie(fval, en_pot_) = (*host_potential)(rr, type1, type2); if (rr < host_potential->rr_cut(type1, type2)) { double rcut = host_potential->r_cut(type1, type2); // the GPU force module stores only a fraction of these values en_pot_ /= 2; // the first term is from the smoothing, the second from the potential // (see lennard_jones.cpp from unit tests) float const tolerance = 8 * eps * (1 + rcut/(rcut - std::sqrt(rr))) + 10 * eps; BOOST_CHECK_SMALL(norm_inf(fval * r - f), std::max(norm_inf(fval * r), 1.f) * tolerance * 2); BOOST_CHECK_CLOSE_FRACTION(en_pot_, en_pot[i], 2 * tolerance); } else { // when the distance is greater than the cutoff // set the force and the pair potential to zero fval = en_pot_ = 0; BOOST_CHECK_SMALL(norm_inf(f), eps); BOOST_CHECK_SMALL(en_pot[i], eps); } } }
void MultiGrid::solve (MultiFab& _sol, const MultiFab& _rhs, Real _eps_rel, Real _eps_abs, LinOp::BC_Mode bc_mode) { // // Prepare memory for new level, and solve the general boundary // value problem to within relative error _eps_rel. Customized // to solve at level=0. // const int level = 0; prepareForLevel(level); // // Copy the initial guess, which may contain inhomogeneous boundray conditions, // into both "initialsolution" (to be added back later) and into "cor[0]" which // we will only use here to compute the residual, then will set back to 0 below // initialsolution->copy(_sol); cor[level]->copy(_sol); // // Put the problem in residual-correction form: we will now use "rhs[level // the initial residual (rhs[0]) rather than the initial RHS (_rhs) // to begin the solve. // Lp.residual(*rhs[level],_rhs,*cor[level],level,bc_mode); // // Now initialize correction to zero at this level (auto-filled at levels below) // (*cor[level]).setVal(0.0); // // // Elide a reduction by doing these together. // Real tmp[2] = { norm_inf(_rhs,true), norm_inf(*rhs[level],true) }; ParallelDescriptor::ReduceRealMax(tmp,2); if ( ParallelDescriptor::IOProcessor() && verbose > 0) { Spacer(std::cout, level); std::cout << "MultiGrid: Initial rhs = " << tmp[0] << '\n'; std::cout << "MultiGrid: Initial residual = " << tmp[1] << '\n'; } if (tmp[1] == 0.0) return; // // We can now use homogeneous bc's because we have put the problem into residual-correction form. // if ( !solve_(_sol, _eps_rel, _eps_abs, LinOp::Homogeneous_BC, tmp[0], tmp[1]) ) BoxLib::Error("MultiGrid:: failed to converge!"); }
Real MultiGrid::errorEstimate (int level, LinOp::BC_Mode bc_mode, bool local) { Lp.residual(*res[level], *rhs[level], *cor[level], level, bc_mode); return norm_inf(*res[level], local); }
double serial_norm_inf (const boost::numeric::ublas::vector <double> &lhs, int length) { #ifdef HAVE_BLAS int i = from_blas::idamax (length, &lhs(0), 1); double ret = lhs(i); #else double ret = norm_inf (subrange (lhs, 0, length)); #endif return ret; }
// ||a||_p double pnorm(const double *a, int N, double p){ if (p==Inf){ return norm_inf(a,N); } else{ double r=0; for(int k=0;k<N;k++) r+=pow(fabs(a[k]),p); return pow(r,1.0/p); } }
void assertNodesUnChanged( const typename DataStore<Scalar>::DataStoreVector_t & nodes, const typename DataStore<Scalar>::DataStoreVector_t & nodes_copy ) { typedef Teuchos::ScalarTraits<Scalar> ST; int N = nodes.size(); int Ncopy = nodes_copy.size(); TEUCHOS_TEST_FOR_EXCEPTION( N != Ncopy, std::logic_error, "Error! The number of nodes passed in through setNodes has changed!" ); if (N > 0) { RCP<Thyra::VectorBase<Scalar> > xdiff = nodes[0].x->clone_v(); RCP<Thyra::VectorBase<Scalar> > xdotdiff = xdiff->clone_v(); V_S(outArg(*xdiff),ST::one()); V_S(outArg(*xdotdiff),ST::one()); for (int i=0 ; i<N ; ++i) { V_StVpStV(outArg(*xdiff),ST::one(),*nodes[i].x,-ST::one(),*nodes_copy[i].x); if ((!Teuchos::is_null(nodes[i].xdot)) && (!Teuchos::is_null(nodes_copy[i].xdot))) { V_StVpStV(outArg(*xdotdiff),ST::one(),*nodes[i].xdot,-ST::one(),*nodes_copy[i].xdot); } else if (Teuchos::is_null(nodes[i].xdot) && Teuchos::is_null(nodes_copy[i].xdot)) { V_S(outArg(*xdotdiff),ST::zero()); } Scalar xdiffnorm = norm_inf(*xdiff); Scalar xdotdiffnorm = norm_inf(*xdotdiff); TEUCHOS_TEST_FOR_EXCEPTION( ( ( nodes[i].time != nodes_copy[i].time ) || ( xdiffnorm != ST::zero() ) || ( xdotdiffnorm != ST::zero() ) || ( nodes[i].accuracy != nodes_copy[i].accuracy ) ), std::logic_error, "Error! The data in the nodes passed through setNodes has changed!" ); } } }
c_vector<double,3> CalculateEigenvectorForSmallestNonzeroEigenvalue(c_matrix<double, 3, 3>& rA) { //Check for symmetry if (norm_inf( rA - trans(rA)) > 10*DBL_EPSILON) { EXCEPTION("Matrix should be symmetric"); } // Find the eigenvector by brute-force using the power method. // We can't use the inverse method, because the matrix might be singular c_matrix<double,3,3> copy_A(rA); //Eigenvalue 1 c_vector<double, 3> eigenvec1 = scalar_vector<double>(3, 1.0); double eigen1 = CalculateMaxEigenpair(copy_A, eigenvec1); // Take out maximum eigenpair c_matrix<double, 3, 3> wielandt_reduce_first_vector = identity_matrix<double>(3,3); wielandt_reduce_first_vector -= outer_prod(eigenvec1, eigenvec1); copy_A = prod(wielandt_reduce_first_vector, copy_A); c_vector<double, 3> eigenvec2 = scalar_vector<double>(3, 1.0); double eigen2 = CalculateMaxEigenpair(copy_A, eigenvec2); // Take out maximum (second) eigenpair c_matrix<double, 3, 3> wielandt_reduce_second_vector = identity_matrix<double>(3,3); wielandt_reduce_second_vector -= outer_prod(eigenvec2, eigenvec2); copy_A = prod(wielandt_reduce_second_vector, copy_A); c_vector<double, 3> eigenvec3 = scalar_vector<double>(3, 1.0); double eigen3 = CalculateMaxEigenpair(copy_A, eigenvec3); //Look backwards through the eigenvalues, checking that they are non-zero if (eigen3 >= DBL_EPSILON) { return eigenvec3; } if (eigen2 >= DBL_EPSILON) { return eigenvec2; } UNUSED_OPT(eigen1); assert( eigen1 > DBL_EPSILON); return eigenvec1; }
double CalculateMaxEigenpair(c_matrix<double, 3, 3>& rA, c_vector<double, 3>& rEigenvector) { double norm = 0.0; double step = DBL_MAX; while (step > DBL_EPSILON) //Machine precision { c_vector<double, 3> old_value(rEigenvector); rEigenvector = prod(rA, rEigenvector); norm = norm_2(rEigenvector); rEigenvector /= norm; if (norm < DBL_EPSILON) { //We don't care about a zero eigenvector, so don't polish it break; } step = norm_inf(rEigenvector-old_value); } return norm; }
ublas::vector<ublas::matrix<double> > wishart_InvA_rnd(const int df, ublas::matrix<double>& S, const int mc) { // Generates wishart matrix allowing for singular wishart size_t p = S.size1(); ublas::vector<double> D(p); ublas::matrix<double> P(p, p); ublas::matrix<double> F(p, p); F = ublas::zero_matrix<double>(p, p); // make copy of S // ublas::matrix<double> SS(S); lapack::gesvd('A', 'A', S, D, P, F); // svd0(S, P, D, F); // P = trans(P); //! correct for singular matrix std::vector<size_t> ii; for (size_t i=0; i<D.size(); ++i) if (D(i) > norm_inf(D)*1e-9) ii.push_back(i); size_t r = ii.size(); ublas::indirect_array<> idx(r); for (size_t i=0; i<r; ++i) idx(i) = ii[i]; ublas::indirect_array<> irow(p); for (size_t i=0; i<irow.size(); ++ i) irow(i) = i; ublas::matrix<double> Q(p, r); // Q = prod(project(P, irow, idx), diagm(ublas::apply_to_all<functor::inv_sqrt<double> >(project(D, idx)))); // rprod does not seem any faster than diagonalizing D before multiplication // Q = rprod(project(P, irow, idx), ublas::apply_to_all<functor::inv_sqrt<double> >(D)); axpy_prod(project(trans(P), irow, idx), diagm(ublas::apply_to_all<functor::inv_sqrt<double> >(project(D, idx))), Q, true); // generate mc samples ublas::vector<ublas::matrix<double> > K(mc); for (int i=0; i<mc; ++i) K(i) = wishart_1(df, Q, p, r); return K; }
int main() { Vec<3,double> v0; std::cout << v0 << std::endl; Vec<3,double> v1 = Vec<3,double>(0.,1.,2); Vec<3,double> v2 = Vec<3,double>(3,4,5); Vec<3,double> v3 = v1 * (v2 + v2); std::cout << v3 << std::endl; std::cout << norm(v3) << std::endl; std::cout << norm_2(v3) << std::endl; Vec<4, double> v = Vec<4,double>(1, 2.1f, 3.14, 2u); std::cout << norm_1(v) << std::endl; std::cout << norm_2(v) << std::endl; std::cout << norm_inf(v) << std::endl; std::cout << v << std::endl; return 0; }
int CGSolver::jbb_precond (MultiFab& sol, const MultiFab& rhs, int lev, LinOp& Lp) { // // This is a local routine. No parallel is allowed to happen here. // int lev_loc = lev; const Real eps_rel = 1.e-2; const Real eps_abs = 1.e-16; const int nghost = sol.nGrow(); const int ncomp = sol.nComp(); const bool local = true; const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC; BL_ASSERT(ncomp == 1 ); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc)); const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); sorig.copy(sol); Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local); sol.setVal(0); Real rnorm = norm_inf(r,local); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << " jbb_precond: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev_loc, local); Real sol_norm = 0; int ret = 0; // will return this value if all goes well Real rho_1 = 0; int nit = 1; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { z.copy(r); Real rho = dotxy(z,r,local); if (nit == 1) { p.copy(z); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev_loc, bc_mode, local); Real alpha; if ( Real pw = dotxy(p,q,local) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r, local); sol_norm = norm_inf(sol, local); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) { break; } if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) { if ( ParallelDescriptor::IOProcessor(color()) ) { BoxLib::Warning("jbb_precond:: failed to converge!"); } ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
/** * Use Newton's method to solve the given cell for the next timestep. * * @param rCell the cell to solve * @param time the current time * @param rCurrentGuess the current guess at a solution. Will be updated on exit. */ void Solve(CELLTYPE &rCell, double time, double rCurrentGuess[SIZE]) { unsigned counter = 0; const double eps = 1e-6; // JonW tolerance // check that the initial guess that was given gives a valid residual rCell.ComputeResidual(time, rCurrentGuess, mResidual.data()); double norm_of_residual = norm_inf(mResidual); assert(!std::isnan(norm_of_residual)); double norm_of_update = 0.0; //Properly initialised in the loop do { // Calculate Jacobian for current guess rCell.ComputeJacobian(time, rCurrentGuess, mJacobian); // Solve Newton linear system for mUpdate, given mJacobian and mResidual SolveLinearSystem(); // Update norm (JonW style) norm_of_update = norm_inf(mUpdate); // Update current guess and recalculate residual for (unsigned i=0; i<SIZE; i++) { rCurrentGuess[i] -= mUpdate[i]; } double norm_of_previous_residual = norm_of_residual; rCell.ComputeResidual(time, rCurrentGuess, mResidual.data()); norm_of_residual = norm_inf(mResidual); if (norm_of_residual > norm_of_previous_residual && norm_of_update > eps) { //Second part of guard: //Note that if norm_of_update < eps (converged) then it's //likely that both the residual and the previous residual were //close to the root. //Work out where the biggest change in the guess has happened. double relative_change_max = 0.0; unsigned relative_change_direction = 0; for (unsigned i=0; i<SIZE; i++) { double relative_change = fabs(mUpdate[i]/rCurrentGuess[i]); if (relative_change > relative_change_max) { relative_change_max = relative_change; relative_change_direction = i; } } if (relative_change_max > 1.0) { //Only walk 0.2 of the way in that direction (put back 0.8) rCurrentGuess[relative_change_direction] += 0.8*mUpdate[relative_change_direction]; rCell.ComputeResidual(time, rCurrentGuess, mResidual.data()); norm_of_residual = norm_inf(mResidual); WARNING("Residual increasing and one direction changing radically - back tracking in that direction"); } } counter++; // avoid infinite loops if (counter > 15) { #define COVERAGE_IGNORE EXCEPTION("Newton method diverged in CardiacNewtonSolver::Solve()"); #undef COVERAGE_IGNORE } } while (norm_of_update > eps); #define COVERAGE_IGNORE #ifndef NDEBUG if (norm_of_residual > 2e-10) { //This line is for correlation - in case we use norm_of_residual as convergence criterion WARN_ONCE_ONLY("Newton iteration terminated because update vector norm is small, but residual norm is not small."); } #endif // NDEBUG #undef COVERAGE_IGNORE }
void Sqpmethod::evaluate() { if (inputs_check_) checkInputs(); checkInitialBounds(); if (gather_stats_) { Dict iterations; iterations["inf_pr"] = std::vector<double>(); iterations["inf_du"] = std::vector<double>(); iterations["ls_trials"] = std::vector<double>(); iterations["d_norm"] = std::vector<double>(); iterations["obj"] = std::vector<double>(); stats_["iterations"] = iterations; } // Get problem data const vector<double>& x_init = input(NLP_SOLVER_X0).data(); const vector<double>& lbx = input(NLP_SOLVER_LBX).data(); const vector<double>& ubx = input(NLP_SOLVER_UBX).data(); const vector<double>& lbg = input(NLP_SOLVER_LBG).data(); const vector<double>& ubg = input(NLP_SOLVER_UBG).data(); // Set linearization point to initial guess copy(x_init.begin(), x_init.end(), x_.begin()); // Initialize Lagrange multipliers of the NLP copy(input(NLP_SOLVER_LAM_G0).begin(), input(NLP_SOLVER_LAM_G0).end(), mu_.begin()); copy(input(NLP_SOLVER_LAM_X0).begin(), input(NLP_SOLVER_LAM_X0).end(), mu_x_.begin()); t_eval_f_ = t_eval_grad_f_ = t_eval_g_ = t_eval_jac_g_ = t_eval_h_ = t_callback_fun_ = t_callback_prepare_ = t_mainloop_ = 0; n_eval_f_ = n_eval_grad_f_ = n_eval_g_ = n_eval_jac_g_ = n_eval_h_ = 0; double time1 = clock(); // Initial constraint Jacobian eval_jac_g(x_, gk_, Jk_); // Initial objective gradient eval_grad_f(x_, fk_, gf_); // Initialize or reset the Hessian or Hessian approximation reg_ = 0; if (exact_hessian_) { eval_h(x_, mu_, 1.0, Bk_); } else { reset_h(); } // Evaluate the initial gradient of the Lagrangian copy(gf_.begin(), gf_.end(), gLag_.begin()); if (ng_>0) casadi_mv_t(Jk_.ptr(), Jk_.sparsity(), getPtr(mu_), getPtr(gLag_)); // gLag += mu_x_; transform(gLag_.begin(), gLag_.end(), mu_x_.begin(), gLag_.begin(), plus<double>()); // Number of SQP iterations int iter = 0; // Number of line-search iterations int ls_iter = 0; // Last linesearch successfull bool ls_success = true; // Reset merit_mem_.clear(); sigma_ = 0.; // NOTE: Move this into the main optimization loop // Default stepsize double t = 0; // MAIN OPTIMIZATION LOOP while (true) { // Primal infeasability double pr_inf = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg); // inf-norm of lagrange gradient double gLag_norminf = norm_inf(gLag_); // inf-norm of step double dx_norminf = norm_inf(dx_); // Print header occasionally if (iter % 10 == 0) printIteration(userOut()); // Printing information about the actual iterate printIteration(userOut(), iter, fk_, pr_inf, gLag_norminf, dx_norminf, reg_, ls_iter, ls_success); if (gather_stats_) { Dict iterations = stats_["iterations"]; std::vector<double> tmp=iterations["inf_pr"]; tmp.push_back(pr_inf); iterations["inf_pr"] = tmp; tmp=iterations["inf_du"]; tmp.push_back(gLag_norminf); iterations["inf_du"] = tmp; tmp=iterations["d_norm"]; tmp.push_back(dx_norminf); iterations["d_norm"] = tmp; std::vector<int> tmp2=iterations["ls_trials"]; tmp2.push_back(ls_iter); iterations["ls_trials"] = tmp2; tmp=iterations["obj"]; tmp.push_back(fk_); iterations["obj"] = tmp; stats_["iterations"] = iterations; } // Call callback function if present if (!callback_.isNull()) { double time1 = clock(); if (!output(NLP_SOLVER_F).isempty()) output(NLP_SOLVER_F).set(fk_); if (!output(NLP_SOLVER_X).isempty()) output(NLP_SOLVER_X).setNZ(x_); if (!output(NLP_SOLVER_LAM_G).isempty()) output(NLP_SOLVER_LAM_G).setNZ(mu_); if (!output(NLP_SOLVER_LAM_X).isempty()) output(NLP_SOLVER_LAM_X).setNZ(mu_x_); if (!output(NLP_SOLVER_G).isempty()) output(NLP_SOLVER_G).setNZ(gk_); Dict iteration; iteration["iter"] = iter; iteration["inf_pr"] = pr_inf; iteration["inf_du"] = gLag_norminf; iteration["d_norm"] = dx_norminf; iteration["ls_trials"] = ls_iter; iteration["obj"] = fk_; stats_["iteration"] = iteration; double time2 = clock(); t_callback_prepare_ += (time2-time1)/CLOCKS_PER_SEC; time1 = clock(); int ret = callback_(ref_, user_data_); time2 = clock(); t_callback_fun_ += (time2-time1)/CLOCKS_PER_SEC; if (ret) { userOut() << endl; userOut() << "casadi::SQPMethod: aborted by callback..." << endl; stats_["return_status"] = "User_Requested_Stop"; break; } } // Checking convergence criteria if (pr_inf < tol_pr_ && gLag_norminf < tol_du_) { userOut() << endl; userOut() << "casadi::SQPMethod: Convergence achieved after " << iter << " iterations." << endl; stats_["return_status"] = "Solve_Succeeded"; break; } if (iter >= max_iter_) { userOut() << endl; userOut() << "casadi::SQPMethod: Maximum number of iterations reached." << endl; stats_["return_status"] = "Maximum_Iterations_Exceeded"; break; } if (iter > 0 && dx_norminf <= min_step_size_) { userOut() << endl; userOut() << "casadi::SQPMethod: Search direction becomes too small without " "convergence criteria being met." << endl; stats_["return_status"] = "Search_Direction_Becomes_Too_Small"; break; } // Start a new iteration iter++; log("Formulating QP"); // Formulate the QP transform(lbx.begin(), lbx.end(), x_.begin(), qp_LBX_.begin(), minus<double>()); transform(ubx.begin(), ubx.end(), x_.begin(), qp_UBX_.begin(), minus<double>()); transform(lbg.begin(), lbg.end(), gk_.begin(), qp_LBA_.begin(), minus<double>()); transform(ubg.begin(), ubg.end(), gk_.begin(), qp_UBA_.begin(), minus<double>()); // Solve the QP solve_QP(Bk_, gf_, qp_LBX_, qp_UBX_, Jk_, qp_LBA_, qp_UBA_, dx_, qp_DUAL_X_, qp_DUAL_A_); log("QP solved"); // Detecting indefiniteness double gain = casadi_quad_form(Bk_.ptr(), Bk_.sparsity(), getPtr(dx_)); if (gain < 0) { casadi_warning("Indefinite Hessian detected..."); } // Calculate penalty parameter of merit function sigma_ = std::max(sigma_, 1.01*norm_inf(qp_DUAL_X_)); sigma_ = std::max(sigma_, 1.01*norm_inf(qp_DUAL_A_)); // Calculate L1-merit function in the actual iterate double l1_infeas = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg); // Right-hand side of Armijo condition double F_sens = inner_prod(dx_, gf_); double L1dir = F_sens - sigma_ * l1_infeas; double L1merit = fk_ + sigma_ * l1_infeas; // Storing the actual merit function value in a list merit_mem_.push_back(L1merit); if (merit_mem_.size() > merit_memsize_) { merit_mem_.pop_front(); } // Stepsize t = 1.0; double fk_cand; // Merit function value in candidate double L1merit_cand = 0; // Reset line-search counter, success marker ls_iter = 0; ls_success = true; // Line-search log("Starting line-search"); if (max_iter_ls_>0) { // max_iter_ls_== 0 disables line-search // Line-search loop while (true) { for (int i=0; i<nx_; ++i) x_cand_[i] = x_[i] + t * dx_[i]; try { // Evaluating objective and constraints eval_f(x_cand_, fk_cand); eval_g(x_cand_, gk_cand_); } catch(const CasadiException& ex) { // Silent ignore; line-search failed ls_iter++; // Backtracking t = beta_ * t; continue; } ls_iter++; // Calculating merit-function in candidate l1_infeas = primalInfeasibility(x_cand_, lbx, ubx, gk_cand_, lbg, ubg); L1merit_cand = fk_cand + sigma_ * l1_infeas; // Calculating maximal merit function value so far double meritmax = *max_element(merit_mem_.begin(), merit_mem_.end()); if (L1merit_cand <= meritmax + t * c1_ * L1dir) { // Accepting candidate log("Line-search completed, candidate accepted"); break; } // Line-search not successful, but we accept it. if (ls_iter == max_iter_ls_) { ls_success = false; log("Line-search completed, maximum number of iterations"); break; } // Backtracking t = beta_ * t; } // Candidate accepted, update dual variables for (int i=0; i<ng_; ++i) mu_[i] = t * qp_DUAL_A_[i] + (1 - t) * mu_[i]; for (int i=0; i<nx_; ++i) mu_x_[i] = t * qp_DUAL_X_[i] + (1 - t) * mu_x_[i]; // Candidate accepted, update the primal variable copy(x_.begin(), x_.end(), x_old_.begin()); copy(x_cand_.begin(), x_cand_.end(), x_.begin()); } else { // Full step copy(qp_DUAL_A_.begin(), qp_DUAL_A_.end(), mu_.begin()); copy(qp_DUAL_X_.begin(), qp_DUAL_X_.end(), mu_x_.begin()); copy(x_.begin(), x_.end(), x_old_.begin()); // x+=dx transform(x_.begin(), x_.end(), dx_.begin(), x_.begin(), plus<double>()); } if (!exact_hessian_) { // Evaluate the gradient of the Lagrangian with the old x but new mu (for BFGS) copy(gf_.begin(), gf_.end(), gLag_old_.begin()); if (ng_>0) casadi_mv_t(Jk_.ptr(), Jk_.sparsity(), getPtr(mu_), getPtr(gLag_old_)); // gLag_old += mu_x_; transform(gLag_old_.begin(), gLag_old_.end(), mu_x_.begin(), gLag_old_.begin(), plus<double>()); } // Evaluate the constraint Jacobian log("Evaluating jac_g"); eval_jac_g(x_, gk_, Jk_); // Evaluate the gradient of the objective function log("Evaluating grad_f"); eval_grad_f(x_, fk_, gf_); // Evaluate the gradient of the Lagrangian with the new x and new mu copy(gf_.begin(), gf_.end(), gLag_.begin()); if (ng_>0) casadi_mv_t(Jk_.ptr(), Jk_.sparsity(), getPtr(mu_), getPtr(gLag_)); // gLag += mu_x_; transform(gLag_.begin(), gLag_.end(), mu_x_.begin(), gLag_.begin(), plus<double>()); // Updating Lagrange Hessian if (!exact_hessian_) { log("Updating Hessian (BFGS)"); // BFGS with careful updates and restarts if (iter % lbfgs_memory_ == 0) { // Reset Hessian approximation by dropping all off-diagonal entries const int* colind = Bk_.colind(); // Access sparsity (column offset) int ncol = Bk_.size2(); const int* row = Bk_.row(); // Access sparsity (row) vector<double>& data = Bk_.data(); // Access nonzero elements for (int cc=0; cc<ncol; ++cc) { // Loop over the columns of the Hessian for (int el=colind[cc]; el<colind[cc+1]; ++el) { // Loop over the nonzero elements of the column if (cc!=row[el]) data[el] = 0; // Remove if off-diagonal entries } } } // Pass to BFGS update function bfgs_.setInput(Bk_, BFGS_BK); bfgs_.setInputNZ(x_, BFGS_X); bfgs_.setInputNZ(x_old_, BFGS_X_OLD); bfgs_.setInputNZ(gLag_, BFGS_GLAG); bfgs_.setInputNZ(gLag_old_, BFGS_GLAG_OLD); // Update the Hessian approximation bfgs_.evaluate(); // Get the updated Hessian bfgs_.getOutput(Bk_); if (monitored("bfgs")) { userOut() << "x = " << x_ << endl; userOut() << "BFGS = " << endl; Bk_.printSparse(); } } else { // Exact Hessian log("Evaluating hessian"); eval_h(x_, mu_, 1.0, Bk_); } } double time2 = clock(); t_mainloop_ = (time2-time1)/CLOCKS_PER_SEC; // Save results to outputs output(NLP_SOLVER_F).set(fk_); output(NLP_SOLVER_X).setNZ(x_); output(NLP_SOLVER_LAM_G).setNZ(mu_); output(NLP_SOLVER_LAM_X).setNZ(mu_x_); output(NLP_SOLVER_G).setNZ(gk_); if (hasOption("print_time") && static_cast<bool>(getOption("print_time"))) { // Write timings userOut() << "time spent in eval_f: " << t_eval_f_ << " s."; if (n_eval_f_>0) userOut() << " (" << n_eval_f_ << " calls, " << (t_eval_f_/n_eval_f_)*1000 << " ms. average)"; userOut() << endl; userOut() << "time spent in eval_grad_f: " << t_eval_grad_f_ << " s."; if (n_eval_grad_f_>0) userOut() << " (" << n_eval_grad_f_ << " calls, " << (t_eval_grad_f_/n_eval_grad_f_)*1000 << " ms. average)"; userOut() << endl; userOut() << "time spent in eval_g: " << t_eval_g_ << " s."; if (n_eval_g_>0) userOut() << " (" << n_eval_g_ << " calls, " << (t_eval_g_/n_eval_g_)*1000 << " ms. average)"; userOut() << endl; userOut() << "time spent in eval_jac_g: " << t_eval_jac_g_ << " s."; if (n_eval_jac_g_>0) userOut() << " (" << n_eval_jac_g_ << " calls, " << (t_eval_jac_g_/n_eval_jac_g_)*1000 << " ms. average)"; userOut() << endl; userOut() << "time spent in eval_h: " << t_eval_h_ << " s."; if (n_eval_h_>1) userOut() << " (" << n_eval_h_ << " calls, " << (t_eval_h_/n_eval_h_)*1000 << " ms. average)"; userOut() << endl; userOut() << "time spent in main loop: " << t_mainloop_ << " s." << endl; userOut() << "time spent in callback function: " << t_callback_fun_ << " s." << endl; userOut() << "time spent in callback preparation: " << t_callback_prepare_ << " s." << endl; } // Save statistics stats_["iter_count"] = iter; stats_["t_eval_f"] = t_eval_f_; stats_["t_eval_grad_f"] = t_eval_grad_f_; stats_["t_eval_g"] = t_eval_g_; stats_["t_eval_jac_g"] = t_eval_jac_g_; stats_["t_eval_h"] = t_eval_h_; stats_["t_mainloop"] = t_mainloop_; stats_["t_callback_fun"] = t_callback_fun_; stats_["t_callback_prepare"] = t_callback_prepare_; stats_["n_eval_f"] = n_eval_f_; stats_["n_eval_grad_f"] = n_eval_grad_f_; stats_["n_eval_g"] = n_eval_g_; stats_["n_eval_jac_g"] = n_eval_jac_g_; stats_["n_eval_h"] = n_eval_h_; }
void PolarDecomposer::PolarDecompose(const mat3& A, mat3& Q, mat3& S, double& det, double tolerance) { mat3 At = A.transpose(); mat3 Aadj; mat3 Ek; double A_one = norm_one(At); double A_inf = norm_inf(At); double Aadj_one, Aadj_inf, E_one, gamma, g1, g2; do { Aadj = mat3(At[1].Cross(At[2]), At[2].Cross(At[0]), At[0].Cross(At[1])); det = At[0][0] * Aadj[0][0] + At[0][1] * Aadj[0][1] + At[0][2] * Aadj[0][2]; if(det == 0.0) { //TODO: handle this case printf("Warning: zero determinant encountered.\n"); break; } Aadj_one = norm_one(Aadj); Aadj_inf = norm_inf(Aadj); gamma = sqrt(sqrt((Aadj_one*Aadj_inf)/(A_one*A_inf))/fabs(det)); g1 = gamma * 0.5; g2 = 0.5/(gamma*det); for(unsigned int i = 0; i < 3; i++) for(unsigned int j = 0; j < 3; j++) { Ek[i][j] = At[i][j]; At[i][j] = g1 * At[i][j] + g2 * Aadj[i][j]; Ek[i][j] -= At[i][j]; } E_one = norm_one(Ek); A_one = norm_one(At); A_inf = norm_inf(At); } while( E_one > A_one * tolerance); if(fabs(det) < EPSILON)//edit by Xing Q = mat3::Identity(); else Q = At.transpose(); //TODO: if S is to be used. uncomment this part for(unsigned int i = 0; i < 3; i++) for(unsigned int j = 0; j < 3; j++) { S[i][j] = 0; for(unsigned int k = 0; k < 3; k++) S[i][j] += At[i][k] * A[k][j]; } for(unsigned int i = 0; i < 3; i++) for(unsigned int j = i; j < 3; j++) { S[i][j] = S[j][i] = 0.5*(S[i][j] + S[j][i]); } }
void MultiGrid::relax (MultiFab& solL, MultiFab& rhsL, int level, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode, Real& cg_time) { BL_PROFILE("MultiGrid::relax()"); // // Recursively relax system. Equivalent to multigrid V-cycle. // At coarsest grid, call coarsestSmooth. // if ( level < numlevels - 1 ) { if ( verbose > 2 ) { Real rnorm = errorEstimate(level, bc_mode); if (ParallelDescriptor::IOProcessor()) { std::cout << " AT LEVEL " << level << '\n'; std::cout << " DN:Norm before smooth " << rnorm << '\n';; } } for (int i = preSmooth() ; i > 0 ; i--) { Lp.smooth(solL, rhsL, level, bc_mode); } Lp.residual(*res[level], rhsL, solL, level, bc_mode); if ( verbose > 2 ) { Real rnorm = norm_inf(*res[level]); if (ParallelDescriptor::IOProcessor()) std::cout << " DN:Norm after smooth " << rnorm << '\n'; } prepareForLevel(level+1); average(*rhs[level+1], *res[level]); cor[level+1]->setVal(0.0); for (int i = cntRelax(); i > 0 ; i--) { relax(*cor[level+1],*rhs[level+1],level+1,eps_rel,eps_abs,bc_mode,cg_time); } interpolate(solL, *cor[level+1]); if ( verbose > 2 ) { Lp.residual(*res[level], rhsL, solL, level, bc_mode); Real rnorm = norm_inf(*res[level]); if ( ParallelDescriptor::IOProcessor() ) { std::cout << " AT LEVEL " << level << '\n'; std::cout << " UP:Norm before smooth " << rnorm << '\n'; } } for (int i = postSmooth(); i > 0 ; i--) { Lp.smooth(solL, rhsL, level, bc_mode); } if ( verbose > 2 ) { Lp.residual(*res[level], rhsL, solL, level, bc_mode); Real rnorm = norm_inf(*res[level]); if ( ParallelDescriptor::IOProcessor() ) std::cout << " UP:Norm after smooth " << rnorm << '\n'; } } else { if ( verbose > 2 ) { Real rnorm = norm_inf(rhsL); if ( ParallelDescriptor::IOProcessor() ) { std::cout << " AT LEVEL " << level << '\n'; std::cout << " DN:Norm before bottom " << rnorm << '\n'; } } coarsestSmooth(solL, rhsL, level, eps_rel, eps_abs, bc_mode, usecg, cg_time); if ( verbose > 2 ) { Lp.residual(*res[level], rhsL, solL, level, bc_mode); Real rnorm = norm_inf(*res[level]); if ( ParallelDescriptor::IOProcessor() ) std::cout << " UP:Norm after bottom " << rnorm << '\n'; } } }
int CGSolver::solve_cabicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cabicgstab()"); BL_ASSERT(sol.nComp() == 1); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); Real temp1[4*SSS_MAX+1]; Real temp2[4*SSS_MAX+1]; Real temp3[4*SSS_MAX+1]; Real Tp[4*SSS_MAX+1][4*SSS_MAX+1]; Real Tpp[4*SSS_MAX+1][4*SSS_MAX+1]; Real aj[4*SSS_MAX+1]; Real cj[4*SSS_MAX+1]; Real ej[4*SSS_MAX+1]; Real Tpaj[4*SSS_MAX+1]; Real Tpcj[4*SSS_MAX+1]; Real Tppaj[4*SSS_MAX+1]; Real G[4*SSS_MAX+1][4*SSS_MAX+1]; // Extracted from first 4*SSS+1 columns of Gg[][]. indexed as [row][col] Real g[4*SSS_MAX+1]; // Extracted from last [4*SSS+1] column of Gg[][]. Real Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul(). indexed as [row*(4*SSS+2) + col] // // If variable_SSS we "telescope" SSS. // We start with 1 and increase it up to SSS_MAX on the outer iterations. // if (variable_SSS) SSS = 1; zero( aj, 4*SSS_MAX+1); zero( cj, 4*SSS_MAX+1); zero( ej, 4*SSS_MAX+1); zero( Tpaj, 4*SSS_MAX+1); zero( Tpcj, 4*SSS_MAX+1); zero(Tppaj, 4*SSS_MAX+1); zero(temp1, 4*SSS_MAX+1); zero(temp2, 4*SSS_MAX+1); zero(temp3, 4*SSS_MAX+1); SetMonomialBasis(Tp,Tpp,SSS); const int ncomp = 1, nghost = sol.nGrow(); // // Contains the matrix powers of p[] and r[]. // // First 2*SSS+1 components are powers of p[]. // Next 2*SSS components are powers of r[]. // const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab PR(ba, 4*SSS_MAX+1, 0, dm); MultiFab p(ba, ncomp, 0, dm); MultiFab r(ba, ncomp, 0, dm); MultiFab rt(ba, ncomp, 0, dm); MultiFab tmp(ba, 4, nghost, dm); Lp.residual(r, rhs, sol, lev, bc_mode); BL_ASSERT(!r.contains_nan()); MultiFab::Copy(rt,r,0,0,1,0); MultiFab::Copy( p,r,0,0,1,0); const Real rnorm0 = norm_inf(r); Real delta = dotxy(r,rt); const Real L2_norm_of_rt = sqrt(delta); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: niter = 0," << ", rnorm = " << rnorm0 << ", delta = " << delta << ", eps_abs = " << eps_abs << '\n'; } return 0; } int niters = 0, ret = 0; Real L2_norm_of_resid = 0, atime = 0, gtime = 0; bool BiCGStabFailed = false, BiCGStabConverged = false; for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; ) { const Real time1 = ParallelDescriptor::second(); // // Compute the matrix powers on p[] & r[] (monomial basis). // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[]. // MultiFab::Copy(PR,p,0,0,1,0); MultiFab::Copy(PR,r,0,2*SSS+1,1,0); BL_ASSERT(!PR.contains_nan(0, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+1,1)); // // We use "tmp" to minimize the number of Lp.apply()s. // We do this by doing p & r together in a single call. // MultiFab::Copy(tmp,p,0,0,1,0); MultiFab::Copy(tmp,r,0,1,1,0); for (int n = 1; n < 2*SSS; n++) { Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2); MultiFab::Copy(tmp,tmp,2,0,2,0); MultiFab::Copy(PR,tmp,0, n,1,0); MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0); BL_ASSERT(!PR.contains_nan(n, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1)); } MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0); Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1); MultiFab::Copy(PR,tmp,1,2*SSS,1,0); BL_ASSERT(!PR.contains_nan(2*SSS-1,1)); BL_ASSERT(!PR.contains_nan(2*SSS, 1)); Real time2 = ParallelDescriptor::second(); atime += (time2-time1); BuildGramMatrix(Gg, PR, rt, SSS); const Real time3 = ParallelDescriptor::second(); gtime += (time3-time2); // // Form G[][] and g[] from Gg. // for (int i = 0, k = 0; i < 4*SSS+1; i++) { for (int j = 0; j < 4*SSS+1; j++) // // First 4*SSS+1 elements in each row go to G[][]. // G[i][j] = Gg[k++]; // // Last element in row goes to g[]. // g[i] = Gg[k++]; } zero(aj, 4*SSS+1); aj[0] = 1; zero(cj, 4*SSS+1); cj[2*SSS+1] = 1; zero(ej, 4*SSS+1); for (int nit = 0; nit < SSS; nit++) { gemv( Tpaj, Tp, aj, 4*SSS+1, 4*SSS+1); gemv( Tpcj, Tp, cj, 4*SSS+1, 4*SSS+1); gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1); const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1); if ( g_dot_Tpaj == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 1; break; } const Real alpha = delta / g_dot_Tpaj; if ( std::isinf(alpha) ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 2; break; } axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); axpy(temp3, cj, -alpha, Tpaj, 4*SSS+1); const Real omega_numerator = dot(temp3, temp2, 4*SSS+1); const Real omega_denominator = dot(temp1, temp2, 4*SSS+1); // // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0. // // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj. // If omega_numerator==0, and ||s||!=0, then stabilization breakdown. // // Partial update of ej must happen before the check on omega to ensure forward progress !!! // axpy(ej, ej, alpha, aj, 4*SSS+1); // // ej has been updated so consider that we've done an iteration since // even if we break out of the loop we'll be able to update both sol. // niters++; // // Calculate the norm of Saad's vector 's' to check intra s-step convergence. // axpy(temp1, cj,-alpha, Tpaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1); L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s)); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n'; BiCGStabConverged = true; break; } if ( omega_denominator == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 3; break; } const Real omega = omega_numerator / omega_denominator; if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( omega == 0 ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n'; if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n'; } if ( omega == 0 ) { BiCGStabFailed = true; ret = 4; break; } if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; } // // Complete the update of ej & cj now that omega is known to be ok. // axpy(ej, ej, omega, cj, 4*SSS+1); axpy(ej, ej,-omega*alpha, Tpaj, 4*SSS+1); axpy(cj, cj, -omega, Tpcj, 4*SSS+1); axpy(cj, cj, -alpha, Tpaj, 4*SSS+1); axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1); // // Do an early check of the residual to determine convergence. // gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1); // // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic. // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel). // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged. // const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1); L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n'; BiCGStabConverged = true; break; } const Real delta_next = dot(g, cj, 4*SSS+1); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( delta_next == 0 ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n'; if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n'; } if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf? if ( delta_next == 0 ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown... const Real beta = (delta_next/delta)*(alpha/omega); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( beta == 0 ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n'; if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n'; } if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf? if ( beta == 0 ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0? can't make further progress(?) axpy(aj, cj, beta, aj, 4*SSS+1); axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1); delta = delta_next; } // // Update iterates. // for (int i = 0; i < 4*SSS+1; i++) sxay(sol,sol,ej[i],PR,i); MultiFab::Copy(p,PR,0,0,1,0); p.mult(aj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(p,p,aj[i],PR,i); MultiFab::Copy(r,PR,0,0,1,0); r.mult(cj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(r,r,cj[i],PR,i); if ( !BiCGStabFailed && !BiCGStabConverged ) { m += SSS; if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); } } } if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Final: Iteration " << std::setw(4) << niters << " rel. err. " << L2_norm_of_resid << '\n'; } if ( verbose > 1 ) { Real tmp[2] = { atime, gtime }; ParallelDescriptor::ReduceRealMax(tmp,2,color()); if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n'; } } } if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged) { if ( L2_norm_of_resid > L2_norm_of_rt ) { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!"); // // Return code 8 tells the MultiGrid driver to zero out the solution! // ret = 8; } else { // // Return codes 1-7 tells the MultiGrid driver to smooth the solution! // ret = 7; } } return ret; }
int levenberg_marquardt_nllsq_impl(Function f, JacobianFunction fill_jac, InputVector& x, const OutputVector& y, LinearSolver lin_solve, LimitFunction impose_limits, unsigned int max_iter, T tau, T epsj, T epsx, T epsy) { typedef typename vect_traits<InputVector>::value_type ValueType; typedef typename vect_traits<InputVector>::size_type SizeType; /* Check if the problem is defined properly */ if (y.size() < x.size()) throw improper_problem("Levenberg-Marquardt requires M > N!"); mat<ValueType,mat_structure::rectangular> J(y.size(),x.size()); mat<ValueType,mat_structure::square> JtJ(x.size()); mat<ValueType,mat_structure::diagonal> diag_JtJ(x.size()); mat<ValueType,mat_structure::scalar> mu(x.size(),0.0); InputVector Jte = x; InputVector Dp = x; Dp -= x; mat_vect_adaptor<InputVector> Dp_mat(Dp); InputVector pDp = x; impose_limits(x,Dp); // make sure the initial solution is feasible. x += Dp; if(tau <= 0.0) tau = 1E-03; if(epsj <= 0.0) epsj = 1E-17; if(epsx <= 0.0) epsx = 1E-17; ValueType epsx_sq = epsx * epsx; if(epsy <= 0.0) epsy = 1E-17; if(max_iter <= 1) max_iter = 2; /* compute e=x - f(p) and its L2 norm */ OutputVector y_approx = f(x); OutputVector e = y; e -= y_approx; OutputVector e_tmp = e; ValueType p_eL2 = e * e; unsigned int nu = 2; for(unsigned int k = 0; k < max_iter; ++k) { if(p_eL2 < epsy) return 1; //residual is too small. fill_jac(J,x,y_approx); /* J^T J, J^T e */ for(SizeType i = 0; i < J.get_col_count(); ++i) { for(SizeType j = i; j < J.get_col_count(); ++j) { ValueType tmp(0.0); for(SizeType l = 0; l < J.get_row_count(); ++l) tmp += J(l,i) * J(l,j); JtJ(i,j) = JtJ(j,i) = tmp; }; }; Jte = e * J; ValueType p_L2 = x * x; /* check for convergence */ if( norm_inf(mat_vect_adaptor<InputVector>(Jte)) < epsj) return 2; //Jacobian is too small. /* compute initial damping factor */ if( k == 0 ) { ValueType tmp = std::numeric_limits<ValueType>::min(); for(SizeType i=0; i < JtJ.get_row_count(); ++i) if(JtJ(i,i) > tmp) tmp = JtJ(i,i); /* find max diagonal element */ mu = mat<ValueType,mat_structure::scalar>(x.size(), tau * tmp); }; /* determine increment using adaptive damping */ while(true) { /* solve augmented equations */ try { lin_solve(make_damped_matrix(JtJ,mu),Dp_mat,mat_vect_adaptor<InputVector>(Jte),epsj); impose_limits(x,Dp); ValueType Dp_L2 = Dp * Dp; pDp = x; pDp += Dp; if(Dp_L2 < epsx_sq * p_L2) /* relative change in p is small, stop */ return 3; //steps are too small. if( Dp_L2 >= (p_L2 + epsx) / ( std::numeric_limits<ValueType>::epsilon() * std::numeric_limits<ValueType>::epsilon() ) ) throw 42; //signal to throw a singularity-error (see below). e_tmp = y; e_tmp -= f(pDp); ValueType pDp_eL2 = e_tmp * e_tmp; ValueType dL = mu(0,0) * Dp_L2 + Dp * Jte; ValueType dF = p_eL2 - pDp_eL2; if( (dL < 0.0) || (dF < 0.0) ) throw singularity_error("reject inc."); // reduction in error, increment is accepted ValueType tmp = ( ValueType(2.0) * dF / dL - ValueType(1.0)); tmp = 1.0 - tmp * tmp * tmp; mu *= ( ( tmp >= ValueType(1.0 / 3.0) ) ? tmp : ValueType(1.0 / 3.0) ); nu = 2; x = pDp; y_approx = y; y_approx -= e_tmp; e = e_tmp; p_eL2 = pDp_eL2; break; //the step is accepted and the loop is broken. } catch(singularity_error&) { //the increment must be rejected (either by singularity in damped matrix or no-redux by the step. mu *= ValueType(nu); nu <<= 1; // 2*nu; if( nu == 0 ) /* nu has overflown. */ throw infeasible_problem("Levenberg-Marquardt method cannot reduce the function further, matrix damping has overflown!"); } catch(int i) { if(i == 42) throw singularity_error("Levenberg-Marquardt method has detected a near-singularity in the Jacobian matrix!"); else throw i; //just in case there might be another integer thrown (very unlikely). }; }; /* inner loop */ }; //if this point is reached, it means we have reached the maximum iterations. throw maximum_iteration(max_iter); };
bool SolverUnconstrained<Data,Problem>::optimize( vector_type& x ) { M_solver_stats.clear(); // Controlling parameters // trust region radius value_type Delta = M_options.Delta_init; vector_type x_new ( x.size() ); vector_type stot ( x.size() ); value_type norm_Tgrad_fx = this->norm_Theta_x_grad_fx( x, Delta ); value_type norm_s_til = 0; M_prob.copy_x0_to_x( x ); // FIXME: if ( M_options.verbose() ) //M_prob.print_complete( x ); if ( M_solver_stats.collectStats() ) M_solver_stats.push( norm_Tgrad_fx, 0, 0, 0, 0, 0, 0, 0, Delta, 0, 0, 0 ); try { int iter = 0; // // Apply bound constrained Trust region algorithm // while ( iter == 0 || ( iter < M_options.max_TR_iter && norm_Tgrad_fx > M_options.TR_tol ) ) { iter++; int n_CGiter, n_restarts, n_indef, n_crosses_def, n_crosses_indef, n_truss_exit_def, n_truss_exit_indef; value_type _s_til_x_G_til_x_s_til, phi_til, rho, Delta_used = Delta; DVLOG(2) << "\n===================== iter = " << iter << " ==========================="; //DVLOG(2) << "\nx = " << x << "\n"; DVLOG(2) << "\n -> norm_Tgrad_fx = " << norm_Tgrad_fx; /** find an approximate stot for the step to make * solve : * Find \f$\tilde{s}^k = \mathrm{arg}\mathrm{min}_{s \in R^n}{\tilde{\phi}^k : ||s|| < \Delta^k}\f$ */ this->CGstep( x, Delta, stot, norm_s_til, n_CGiter, n_restarts, n_indef, n_crosses_def, n_crosses_indef, n_truss_exit_def, n_truss_exit_indef, _s_til_x_G_til_x_s_til, phi_til ); // x_new = x + stot; f_type __fx_new; M_prob.evaluate( x_new, __fx_new, diff_order<0>() ); f_type __fx; M_prob.evaluate( x, __fx, diff_order<0>() ); // compute actual merit function reduction value_type ared_til = __fx_new.value( 0 ) - __fx.value( 0 ) + 0.5 * _s_til_x_G_til_x_s_til; rho = ared_til / phi_til; ///////////////////////////////////////////////////////////////////////// // Trust region radius calculation: if ( M_options.allow_Trust_radius_calculations ) { if ( rho <= 1e-8 ) Delta = M_options.rho_decrease * Delta; else { x = x + stot; if ( rho > M_options.rho_big ) Delta = std::max( M_options.rho_increase_big*norm_s_til, Delta ); else if ( rho > M_options.rho_small ) Delta = std::max( M_options.rho_increase_small*norm_s_til, Delta ); } norm_Tgrad_fx = this->norm_Theta_x_grad_fx( x, Delta ); } else { x = x + stot; norm_Tgrad_fx = this->norm_Theta_x_grad_fx( x, Delta ); } ///////////////////////////////////////////////////////////////////////// if ( M_solver_stats.collectStats() ) { M_solver_stats.push( norm_Tgrad_fx, n_CGiter, n_restarts, n_indef, n_crosses_def, n_crosses_indef, n_truss_exit_def, n_truss_exit_indef, Delta_used, ared_til, phi_til, rho ); } if ( norm_Tgrad_fx > 1e-5 ) M_prob.setAccuracy( std::min( 1e-1, norm_Tgrad_fx ) ); DVLOG(2) << "norm_Tgrad_fx = " << norm_Tgrad_fx << "\n"; } } catch ( std::exception const& __ex ) { f_type __fx_new; M_prob.evaluate ( x, __fx_new, diff_order<2>() ); if ( norm_inf( __fx_new.gradient( 0 ) ) > 1e-10 ) throw __ex; } vector_type __l( _E_n ), __u( _E_n ); lambda_LS( x, __l, __u ); if ( M_solver_stats.collectStats() ) M_solver_stats.push( x, __l, __u ); return true; }
int CGSolver::solve_cg (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cg()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); MultiFab r1(ba, ncomp, nghost, dm); MultiFab z1(ba, ncomp, nghost, dm); MultiFab r2(ba, ncomp, nghost, dm); MultiFab z2(ba, ncomp, nghost, dm); MultiFab::Copy(sorig,sol,0,0,1,0); Lp.residual(r, rhs, sorig, lev, bc_mode); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC; Real rnorm = norm_inf(r); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev); Real sol_norm = 0; Real rho_1 = 0; int ret = 0; int nit = 1; if ( rnorm == 0 || rnorm < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: niter = 0," << ", rnorm = " << rnorm << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" << eps_rel*(Lp_norm*sol_norm + rnorm0 ) << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1) { z.setVal(0); jbb_precond(z,r,lev,Lp); } else { MultiFab::Copy(z,r,0,0,1,0); } Real rho = dotxy(z,r); if (nit == 1) { MultiFab::Copy(p,z,0,0,1,0); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev, temp_bc_mode); Real alpha; if ( Real pw = dotxy(p,q) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_cg:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r); sol_norm = norm_inf(sol); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break; #endif if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs ) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_cg: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
void SQPInternal::evaluate(int nfdir, int nadir){ casadi_assert(nfdir==0 && nadir==0); checkInitialBounds(); // Get problem data const vector<double>& x_init = input(NLP_X_INIT).data(); const vector<double>& lbx = input(NLP_LBX).data(); const vector<double>& ubx = input(NLP_UBX).data(); const vector<double>& lbg = input(NLP_LBG).data(); const vector<double>& ubg = input(NLP_UBG).data(); // Set the static parameter if (parametric_) { const vector<double>& p = input(NLP_P).data(); if (!F_.isNull()) F_.setInput(p,F_.getNumInputs()-1); if (!G_.isNull()) G_.setInput(p,G_.getNumInputs()-1); if (!H_.isNull()) H_.setInput(p,H_.getNumInputs()-1); if (!J_.isNull()) J_.setInput(p,J_.getNumInputs()-1); } // Set linearization point to initial guess copy(x_init.begin(),x_init.end(),x_.begin()); // Lagrange multipliers of the NLP fill(mu_.begin(),mu_.end(),0); fill(mu_x_.begin(),mu_x_.end(),0); // Initial constraint Jacobian eval_jac_g(x_,gk_,Jk_); // Initial objective gradient eval_grad_f(x_,fk_,gf_); // Initialize or reset the Hessian or Hessian approximation reg_ = 0; if( hess_mode_ == HESS_BFGS){ reset_h(); } else { eval_h(x_,mu_,1.0,Bk_); } // Evaluate the initial gradient of the Lagrangian copy(gf_.begin(),gf_.end(),gLag_.begin()); if(m_>0) DMatrix::mul_no_alloc_tn(Jk_,mu_,gLag_); // gLag += mu_x_; transform(gLag_.begin(),gLag_.end(),mu_x_.begin(),gLag_.begin(),plus<double>()); // Number of SQP iterations int iter = 0; // Number of line-search iterations int ls_iter = 0; // Last linesearch successfull bool ls_success = true; // Reset merit_mem_.clear(); sigma_ = 0.; // NOTE: Move this into the main optimization loop // Default stepsize double t = 0; // MAIN OPTIMIZATION LOOP while(true){ // Primal infeasability double pr_inf = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg); // 1-norm of lagrange gradient double gLag_norm1 = norm_1(gLag_); // 1-norm of step double dx_norm1 = norm_1(dx_); // Print header occasionally if(iter % 10 == 0) printIteration(cout); // Printing information about the actual iterate printIteration(cout,iter,fk_,pr_inf,gLag_norm1,dx_norm1,reg_,ls_iter,ls_success); // Call callback function if present if (!callback_.isNull()) { callback_.input(NLP_COST).set(fk_); callback_.input(NLP_X_OPT).set(x_); callback_.input(NLP_LAMBDA_G).set(mu_); callback_.input(NLP_LAMBDA_X).set(mu_x_); callback_.input(NLP_G).set(gk_); callback_.evaluate(); if (callback_.output(0).at(0)) { cout << endl; cout << "CasADi::SQPMethod: aborted by callback..." << endl; break; } } // Checking convergence criteria if (pr_inf < tol_pr_ && gLag_norm1 < tol_du_){ cout << endl; cout << "CasADi::SQPMethod: Convergence achieved after " << iter << " iterations." << endl; break; } if (iter >= maxiter_){ cout << endl; cout << "CasADi::SQPMethod: Maximum number of iterations reached." << endl; break; } // Start a new iteration iter++; // Formulate the QP transform(lbx.begin(),lbx.end(),x_.begin(),qp_LBX_.begin(),minus<double>()); transform(ubx.begin(),ubx.end(),x_.begin(),qp_UBX_.begin(),minus<double>()); transform(lbg.begin(),lbg.end(),gk_.begin(),qp_LBA_.begin(),minus<double>()); transform(ubg.begin(),ubg.end(),gk_.begin(),qp_UBA_.begin(),minus<double>()); // Solve the QP solve_QP(Bk_,gf_,qp_LBX_,qp_UBX_,Jk_,qp_LBA_,qp_UBA_,dx_,qp_DUAL_X_,qp_DUAL_A_); log("QP solved"); // Detecting indefiniteness double gain = quad_form(dx_,Bk_); if (gain < 0){ casadi_warning("Indefinite Hessian detected..."); } // Calculate penalty parameter of merit function sigma_ = std::max(sigma_,1.01*norm_inf(qp_DUAL_X_)); sigma_ = std::max(sigma_,1.01*norm_inf(qp_DUAL_A_)); // Calculate L1-merit function in the actual iterate double l1_infeas = primalInfeasibility(x_, lbx, ubx, gk_, lbg, ubg); // Right-hand side of Armijo condition double F_sens = inner_prod(dx_, gf_); double L1dir = F_sens - sigma_ * l1_infeas; double L1merit = fk_ + sigma_ * l1_infeas; // Storing the actual merit function value in a list merit_mem_.push_back(L1merit); if (merit_mem_.size() > merit_memsize_){ merit_mem_.pop_front(); } // Stepsize t = 1.0; double fk_cand; // Merit function value in candidate double L1merit_cand = 0; // Reset line-search counter, success marker ls_iter = 0; ls_success = true; // Line-search log("Starting line-search"); if(maxiter_ls_>0){ // maxiter_ls_== 0 disables line-search // Line-search loop while (true){ for(int i=0; i<n_; ++i) x_cand_[i] = x_[i] + t * dx_[i]; // Evaluating objective and constraints eval_f(x_cand_,fk_cand); eval_g(x_cand_,gk_cand_); ls_iter++; // Calculating merit-function in candidate l1_infeas = primalInfeasibility(x_cand_, lbx, ubx, gk_cand_, lbg, ubg); L1merit_cand = fk_cand + sigma_ * l1_infeas; // Calculating maximal merit function value so far double meritmax = *max_element(merit_mem_.begin(), merit_mem_.end()); if (L1merit_cand <= meritmax + t * c1_ * L1dir){ // Accepting candidate log("Line-search completed, candidate accepted"); break; } // Line-search not successful, but we accept it. if(ls_iter == maxiter_ls_){ ls_success = false; log("Line-search completed, maximum number of iterations"); break; } // Backtracking t = beta_ * t; } } // Candidate accepted, update dual variables for(int i=0; i<m_; ++i) mu_[i] = t * qp_DUAL_A_[i] + (1 - t) * mu_[i]; for(int i=0; i<n_; ++i) mu_x_[i] = t * qp_DUAL_X_[i] + (1 - t) * mu_x_[i]; if( hess_mode_ == HESS_BFGS){ // Evaluate the gradient of the Lagrangian with the old x but new mu (for BFGS) copy(gf_.begin(),gf_.end(),gLag_old_.begin()); if(m_>0) DMatrix::mul_no_alloc_tn(Jk_,mu_,gLag_old_); // gLag_old += mu_x_; transform(gLag_old_.begin(),gLag_old_.end(),mu_x_.begin(),gLag_old_.begin(),plus<double>()); } // Candidate accepted, update the primal variable copy(x_.begin(),x_.end(),x_old_.begin()); copy(x_cand_.begin(),x_cand_.end(),x_.begin()); // Evaluate the constraint Jacobian log("Evaluating jac_g"); eval_jac_g(x_,gk_,Jk_); // Evaluate the gradient of the objective function log("Evaluating grad_f"); eval_grad_f(x_,fk_,gf_); // Evaluate the gradient of the Lagrangian with the new x and new mu copy(gf_.begin(),gf_.end(),gLag_.begin()); if(m_>0) DMatrix::mul_no_alloc_tn(Jk_,mu_,gLag_); // gLag += mu_x_; transform(gLag_.begin(),gLag_.end(),mu_x_.begin(),gLag_.begin(),plus<double>()); // Updating Lagrange Hessian if( hess_mode_ == HESS_BFGS){ log("Updating Hessian (BFGS)"); // BFGS with careful updates and restarts if (iter % lbfgs_memory_ == 0){ // Reset Hessian approximation by dropping all off-diagonal entries const vector<int>& rowind = Bk_.rowind(); // Access sparsity (row offset) const vector<int>& col = Bk_.col(); // Access sparsity (column) vector<double>& data = Bk_.data(); // Access nonzero elements for(int i=0; i<rowind.size()-1; ++i){ // Loop over the rows of the Hessian for(int el=rowind[i]; el<rowind[i+1]; ++el){ // Loop over the nonzero elements of the row if(i!=col[el]) data[el] = 0; // Remove if off-diagonal entries } } } // Pass to BFGS update function bfgs_.setInput(Bk_,BFGS_BK); bfgs_.setInput(x_,BFGS_X); bfgs_.setInput(x_old_,BFGS_X_OLD); bfgs_.setInput(gLag_,BFGS_GLAG); bfgs_.setInput(gLag_old_,BFGS_GLAG_OLD); // Update the Hessian approximation bfgs_.evaluate(); // Get the updated Hessian bfgs_.getOutput(Bk_); } else { // Exact Hessian log("Evaluating hessian"); eval_h(x_,mu_,1.0,Bk_); } } // Save results to outputs output(NLP_COST).set(fk_); output(NLP_X_OPT).set(x_); output(NLP_LAMBDA_G).set(mu_); output(NLP_LAMBDA_X).set(mu_x_); output(NLP_G).set(gk_); // Save statistics stats_["iter_count"] = iter; }
int MultiGrid::solve_ (MultiFab& _sol, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode, Real bnorm, Real resnorm0) { BL_PROFILE("MultiGrid::solve_()"); // // If do_fixed_number_of_iters = 1, then do maxiter iterations without checking for convergence // // If do_fixed_number_of_iters = 0, then relax system maxiter times, // and stop if relative error <= _eps_rel or if absolute err <= _abs_eps // const Real strt_time = ParallelDescriptor::second(); const int level = 0; // // We take the max of the norms of the initial RHS and the initial residual in order to capture both cases // Real norm_to_test_against; bool using_bnorm; if (bnorm >= resnorm0) { norm_to_test_against = bnorm; using_bnorm = true; } else { norm_to_test_against = resnorm0; using_bnorm = false; } int returnVal = 0; Real error = resnorm0; // // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed // if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 ) { std::cout << "MultiGrid: Tolerance " << eps_rel << " < 1e-16 is probably set too low" << '\n'; } // // We initially define norm_cor based on the initial solution only so we can use it in the very first iteration // to decide whether the problem is already solved (this is relevant if the previous solve used was only solved // according to the Anorm test and not the bnorm test). // Real norm_cor = norm_inf(*initialsolution,true); ParallelDescriptor::ReduceRealMax(norm_cor); int nit = 1; const Real norm_Lp = Lp.norm(0, level); Real cg_time = 0; if ( use_Anorm_for_convergence == 1 ) { // // Don't need to go any further -- no iterations are required // if (error <= eps_abs || error < eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) { if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { std::cout << " Problem is already converged -- no iterations required\n"; } return 1; } for ( ; ( (error > eps_abs && error > eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) || (do_fixed_number_of_iters == 1) ) && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time); Real tmp[2] = { norm_inf(*cor[level],true), errorEstimate(level,bc_mode,true) }; ParallelDescriptor::ReduceRealMax(tmp,2); norm_cor = tmp[0]; error = tmp[1]; if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit << " resid/resid0 = " << rel_error << '\n'; } } } } else { // // Don't need to go any further -- no iterations are required // if (error <= eps_abs || error < eps_rel*norm_to_test_against) { if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { std::cout << " Problem is already converged -- no iterations required\n"; } return 1; } for ( ; ( (error > eps_abs && error > eps_rel*norm_to_test_against) || (do_fixed_number_of_iters == 1) ) && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time); error = errorEstimate(level, bc_mode); if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit << " resid/resid0 = " << rel_error << '\n'; } } } } Real run_time = (ParallelDescriptor::second() - strt_time); if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor() ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit-1 << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit-1 << " resid/resid0 = " << rel_error << '\n'; } } if ( verbose > 1 ) { Real tmp[2] = { run_time, cg_time }; ParallelDescriptor::ReduceRealMax(tmp,2,ParallelDescriptor::IOProcessorNumber()); if ( ParallelDescriptor::IOProcessor() ) std::cout << ", Solve time: " << tmp[0] << ", CG time: " << tmp[1]; } if ( ParallelDescriptor::IOProcessor() ) std::cout << '\n'; } if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { if ( do_fixed_number_of_iters == 1) { std::cout << " Did fixed number of iterations: " << maxiter << std::endl; } else if ( error < eps_rel*norm_to_test_against ) { std::cout << " Converged res < eps_rel*max(bnorm,res_norm)\n"; } else if ( (use_Anorm_for_convergence == 1) && (error < eps_rel*norm_Lp*norm_cor) ) { std::cout << " Converged res < eps_rel*Anorm*sol\n"; } else if ( error < eps_abs ) { std::cout << " Converged res < eps_abs\n"; } } // // Omit ghost update since maybe not initialized in calling routine. // Add to boundary values stored in initialsolution. // _sol.copy(*cor[level]); _sol.plus(*initialsolution,0,_sol.nComp(),0); if ( use_Anorm_for_convergence == 1 ) { if ( do_fixed_number_of_iters == 1 || error <= eps_rel*(norm_Lp*norm_cor+norm_to_test_against) || error <= eps_abs ) returnVal = 1; } else { if ( do_fixed_number_of_iters == 1 || error <= eps_rel*(norm_to_test_against) || error <= eps_abs ) returnVal = 1; } // // Otherwise, failed to solve satisfactorily // return returnVal; }
typename type_traits<typename V::value_type>::real_type amax (const V &v) { return norm_inf (v); }
int MCMultiGrid::solve_ (MultiFab& _sol, Real eps_rel, Real eps_abs, MCBC_Mode bc_mode, int level) { // // Relax system maxiter times, stop if relative error <= _eps_rel or // if absolute err <= _abs_eps // const Real strt_time = ParallelDescriptor::second(); // // Elide a reduction by doing these together. // Real tmp[2] = { norm_inf(*rhs[level],true), errorEstimate(level,bc_mode,true) }; ParallelDescriptor::ReduceRealMax(tmp,2); const Real norm_rhs = tmp[0]; const Real error0 = tmp[1]; int returnVal = 0; Real error = error0; if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { Spacer(std::cout, level); std::cout << "MCMultiGrid: Initial rhs = " << norm_rhs << '\n'; std::cout << "MCMultiGrid: Initial error (error0) = " << error0 << '\n'; } if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 ) { std::cout << "MCMultiGrid: Tolerance " << eps_rel << " < 1e-16 is probably set too low" << '\n'; } // // Initialize correction to zero at this level (auto-filled at levels below) // (*cor[level]).setVal(0.0); // // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed. // int nit = 1; const Real new_error_0 = norm_rhs; //const Real norm_Lp = Lp.norm(0, level); for ( ; error > eps_abs && error > eps_rel*norm_rhs && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode); error = errorEstimate(level,bc_mode); if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = (error0 != 0) ? error/new_error_0 : 0; Spacer(std::cout, level); std::cout << "MCMultiGrid: Iteration " << nit << " error/error0 = " << rel_error << '\n'; } } Real run_time = (ParallelDescriptor::second() - strt_time); if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor() ) { const Real rel_error = (error0 != 0) ? error/error0 : 0; Spacer(std::cout, level); std::cout << "MCMultiGrid: Final Iter. " << nit-1 << " error/error0 = " << rel_error; } if ( verbose > 1 ) { ParallelDescriptor::ReduceRealMax(run_time); if ( ParallelDescriptor::IOProcessor() ) std::cout << ", Solve time: " << run_time << '\n'; } } if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { if ( error < eps_rel*norm_rhs ) { std::cout << " Converged res < eps_rel*bnorm\n"; } else if ( error < eps_abs ) { std::cout << " Converged res < eps_abs\n"; } } // // Omit ghost update since maybe not initialized in calling routine. // Add to boundary values stored in initialsolution. // _sol.copy(*cor[level]); _sol.plus(*initialsolution,0,_sol.nComp(),0); if ( error <= eps_rel*(norm_rhs) || error <= eps_abs ) returnVal = 1; // // Otherwise, failed to solve satisfactorily // return returnVal; }
int CGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_bicgstab()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab ph(ba, ncomp, nghost, dm); MultiFab sh(ba, ncomp, nghost, dm); MultiFab sorig(ba, ncomp, 0, dm); MultiFab p (ba, ncomp, 0, dm); MultiFab r (ba, ncomp, 0, dm); MultiFab s (ba, ncomp, 0, dm); MultiFab rh (ba, ncomp, 0, dm); MultiFab v (ba, ncomp, 0, dm); MultiFab t (ba, ncomp, 0, dm); Lp.residual(r, rhs, sol, lev, bc_mode); MultiFab::Copy(sorig,sol,0,0,1,0); MultiFab::Copy(rh, r, 0,0,1,0); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA Real rnorm = norm_inf(r); #else // // Calculate the local values of these norms & reduce their values together. // Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) }; ParallelDescriptor::ReduceRealMax(vals,2,color()); Real rnorm = vals[0]; const Real Lp_norm = vals[1]; Real sol_norm = 0; #endif const Real rnorm0 = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } int ret = 0, nit = 1; Real rho_1 = 0, alpha = 0, omega = 0; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return ret; } for (; nit <= maxiter; ++nit) { const Real rho = dotxy(rh,r); if ( rho == 0 ) { ret = 1; break; } if ( nit == 1 ) { MultiFab::Copy(p,r,0,0,1,0); } else { const Real beta = (rho/rho_1)*(alpha/omega); sxay(p, p, -omega, v); sxay(p, r, beta, p); } if ( use_mg_precond ) { ph.setVal(0); mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { ph.setVal(0); Lp.jacobi_smooth(ph, p, lev, temp_bc_mode); } else { MultiFab::Copy(ph,p,0,0,1,0); } Lp.apply(v, ph, lev, temp_bc_mode); if ( Real rhTv = dotxy(rh,v) ) { alpha = rho/rhTv; } else { ret = 2; break; } sxay(sol, sol, alpha, ph); sxay(s, r, -alpha, v); rnorm = norm_inf(s); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Half Iter " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( use_mg_precond ) { sh.setVal(0); mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { sh.setVal(0); Lp.jacobi_smooth(sh, s, lev, temp_bc_mode); } else { MultiFab::Copy(sh,s,0,0,1,0); } Lp.apply(t, sh, lev, temp_bc_mode); // // This is a little funky. I want to elide one of the reductions // in the following two dotxy()s. We do that by calculating the "local" // values and then reducing the two local values at the same time. // Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) }; ParallelDescriptor::ReduceRealSum(vals,2,color()); if ( vals[0] ) { omega = vals[1]/vals[0]; } else { ret = 3; break; } sxay(sol, sol, omega, sh); sxay(r, s, -omega, t); rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Iteration " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( omega == 0 ) { ret = 4; break; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Final: Iteration " << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }