double L_BFGS::Evaluate(FunctionType& function, const arma::mat& iterate, std::pair<arma::mat, double>& minPointIterate) { // Evaluate the function and keep track of the minimum function // value encountered during the optimization. const double functionValue = function.Evaluate(iterate); if (functionValue < minPointIterate.second) { minPointIterate.first = iterate; minPointIterate.second = functionValue; } return functionValue; }
double L_BFGS::Optimize(FunctionType& function, arma::mat& iterate) { // Ensure that the cubes holding past iterations' information are the right // size. Also set the current best point value to the maximum. const size_t rows = iterate.n_rows; const size_t cols = iterate.n_cols; arma::mat newIterateTmp(rows, cols); arma::cube s(rows, cols, numBasis); arma::cube y(rows, cols, numBasis); std::pair<arma::mat, double> minPointIterate; minPointIterate.second = std::numeric_limits<double>::max(); // The old iterate to be saved. arma::mat oldIterate; oldIterate.zeros(iterate.n_rows, iterate.n_cols); // Whether to optimize until convergence. bool optimizeUntilConvergence = (maxIterations == 0); // The initial function value. double functionValue = Evaluate(function, iterate, minPointIterate); double prevFunctionValue = functionValue; // The gradient: the current and the old. arma::mat gradient; arma::mat oldGradient; gradient.zeros(iterate.n_rows, iterate.n_cols); oldGradient.zeros(iterate.n_rows, iterate.n_cols); // The search direction. arma::mat searchDirection; searchDirection.zeros(iterate.n_rows, iterate.n_cols); // The initial gradient value. function.Gradient(iterate, gradient); // The main optimization loop. for (size_t itNum = 0; optimizeUntilConvergence || (itNum != maxIterations); ++itNum) { Log::Debug << "L-BFGS iteration " << itNum << "; objective " << function.Evaluate(iterate) << ", gradient norm " << arma::norm(gradient, 2) << ", " << ((prevFunctionValue - functionValue) / std::max(std::max(fabs(prevFunctionValue), fabs(functionValue)), 1.0)) << "." << std::endl; prevFunctionValue = functionValue; // Break when the norm of the gradient becomes too small. // // But don't do this on the first iteration to ensure we always take at // least one descent step. if (itNum > 0 && GradientNormTooSmall(gradient)) { Log::Debug << "L-BFGS gradient norm too small (terminating successfully)." << std::endl; break; } // Break if the objective is not a number. if (std::isnan(functionValue)) { Log::Warn << "L-BFGS terminated with objective " << functionValue << "; " << "are the objective and gradient functions implemented correctly?" << std::endl; break; } // Choose the scaling factor. double scalingFactor = ChooseScalingFactor(itNum, gradient, s, y); // Build an approximation to the Hessian and choose the search // direction for the current iteration. SearchDirection(gradient, itNum, scalingFactor, s, y, searchDirection); // Save the old iterate and the gradient before stepping. oldIterate = iterate; oldGradient = gradient; // Do a line search and take a step. if (!LineSearch(function, functionValue, iterate, gradient, newIterateTmp, minPointIterate, searchDirection)) { Log::Debug << "Line search failed. Stopping optimization." << std::endl; break; // The line search failed; nothing else to try. } // It is possible that the difference between the two coordinates is zero. // In this case we terminate successfully. if (accu(iterate != oldIterate) == 0) { Log::Debug << "L-BFGS step size of 0 (terminating successfully)." << std::endl; break; } // If we can't make progress on the gradient, then we'll also accept // a stable function value. const double denom = std::max( std::max(fabs(prevFunctionValue), fabs(functionValue)), 1.0); if ((prevFunctionValue - functionValue) / denom <= factr) { Log::Debug << "L-BFGS function value stable (terminating successfully)." << std::endl; break; } // Overwrite an old basis set. UpdateBasisSet(itNum, iterate, oldIterate, gradient, oldGradient, s, y); } // End of the optimization loop. return function.Evaluate(iterate); }