Beispiel #1
0
bool L_BFGS::LineSearch(FunctionType& function,
                        double& functionValue,
                        arma::mat& iterate,
                        arma::mat& gradient,
                        arma::mat& newIterateTmp,
                        std::pair<arma::mat, double>& minPointIterate,
                        const arma::mat& searchDirection)
{
  // Default first step size of 1.0.
  double stepSize = 1.0;

  // The initial linear term approximation in the direction of the
  // search direction.
  double initialSearchDirectionDotGradient =
      arma::dot(gradient, searchDirection);

  // If it is not a descent direction, just report failure.
  if (initialSearchDirectionDotGradient > 0.0)
  {
    Log::Warn << "L-BFGS line search direction is not a descent direction "
        << "(terminating)!" << std::endl;
    return false;
  }

  // Save the initial function value.
  double initialFunctionValue = functionValue;

  // Unit linear approximation to the decrease in function value.
  double linearApproxFunctionValueDecrease = armijoConstant *
      initialSearchDirectionDotGradient;

  // The number of iteration in the search.
  size_t numIterations = 0;

  // Armijo step size scaling factor for increase and decrease.
  const double inc = 2.1;
  const double dec = 0.5;
  double width = 0;

  while (true)
  {
    // Perform a step and evaluate the gradient and the function values at that
    // point.
    newIterateTmp = iterate;
    newIterateTmp += stepSize * searchDirection;
    functionValue = Evaluate(function, newIterateTmp, minPointIterate);
    function.Gradient(newIterateTmp, gradient);
    numIterations++;

    if (functionValue > initialFunctionValue + stepSize *
        linearApproxFunctionValueDecrease)
    {
      width = dec;
    }
    else
    {
      // Check Wolfe's condition.
      double searchDirectionDotGradient = arma::dot(gradient, searchDirection);

      if (searchDirectionDotGradient < wolfe *
          initialSearchDirectionDotGradient)
      {
        width = inc;
      }
      else
      {
        if (searchDirectionDotGradient > -wolfe *
            initialSearchDirectionDotGradient)
        {
          width = dec;
        }
        else
        {
          break;
        }
      }
    }

    // Terminate when the step size gets too small or too big or it
    // exceeds the max number of iterations.
    const bool cond1 = (stepSize < minStep);
    const bool cond2 = (stepSize > maxStep);
    const bool cond3 = (numIterations >= maxLineSearchTrials);
    if (cond1 || cond2 || cond3)
      break;

    // Scale the step size.
    stepSize *= width;
  }

  // Move to the new iterate.
  iterate = newIterateTmp;
  return true;
}
Beispiel #2
0
double L_BFGS::Optimize(FunctionType& function, arma::mat& iterate)
{
  // Ensure that the cubes holding past iterations' information are the right
  // size.  Also set the current best point value to the maximum.
  const size_t rows = iterate.n_rows;
  const size_t cols = iterate.n_cols;

  arma::mat newIterateTmp(rows, cols);
  arma::cube s(rows, cols, numBasis);
  arma::cube y(rows, cols, numBasis);
  std::pair<arma::mat, double> minPointIterate;
  minPointIterate.second = std::numeric_limits<double>::max();

  // The old iterate to be saved.
  arma::mat oldIterate;
  oldIterate.zeros(iterate.n_rows, iterate.n_cols);

  // Whether to optimize until convergence.
  bool optimizeUntilConvergence = (maxIterations == 0);

  // The initial function value.
  double functionValue = Evaluate(function, iterate, minPointIterate);
  double prevFunctionValue = functionValue;

  // The gradient: the current and the old.
  arma::mat gradient;
  arma::mat oldGradient;
  gradient.zeros(iterate.n_rows, iterate.n_cols);
  oldGradient.zeros(iterate.n_rows, iterate.n_cols);

  // The search direction.
  arma::mat searchDirection;
  searchDirection.zeros(iterate.n_rows, iterate.n_cols);

  // The initial gradient value.
  function.Gradient(iterate, gradient);

  // The main optimization loop.
  for (size_t itNum = 0; optimizeUntilConvergence || (itNum != maxIterations);
       ++itNum)
  {
    Log::Debug << "L-BFGS iteration " << itNum << "; objective " <<
        function.Evaluate(iterate) << ", gradient norm "
        << arma::norm(gradient, 2) << ", "
        << ((prevFunctionValue - functionValue) /
            std::max(std::max(fabs(prevFunctionValue),
                              fabs(functionValue)), 1.0))
        << "." << std::endl;

    prevFunctionValue = functionValue;

    // Break when the norm of the gradient becomes too small.
    //
    // But don't do this on the first iteration to ensure we always take at
    // least one descent step.
    if (itNum > 0 && GradientNormTooSmall(gradient))
    {
      Log::Debug << "L-BFGS gradient norm too small (terminating successfully)."
          << std::endl;
      break;
    }

    // Break if the objective is not a number.
    if (std::isnan(functionValue))
    {
      Log::Warn << "L-BFGS terminated with objective " << functionValue << "; "
          << "are the objective and gradient functions implemented correctly?"
          << std::endl;
      break;
    }

    // Choose the scaling factor.
    double scalingFactor = ChooseScalingFactor(itNum, gradient, s, y);

    // Build an approximation to the Hessian and choose the search
    // direction for the current iteration.
    SearchDirection(gradient, itNum, scalingFactor, s, y, searchDirection);

    // Save the old iterate and the gradient before stepping.
    oldIterate = iterate;
    oldGradient = gradient;

    // Do a line search and take a step.
    if (!LineSearch(function, functionValue, iterate, gradient, newIterateTmp,
        minPointIterate, searchDirection))
    {
      Log::Debug << "Line search failed.  Stopping optimization." << std::endl;
      break; // The line search failed; nothing else to try.
    }

    // It is possible that the difference between the two coordinates is zero.
    // In this case we terminate successfully.
    if (accu(iterate != oldIterate) == 0)
    {
      Log::Debug << "L-BFGS step size of 0 (terminating successfully)."
          << std::endl;
      break;
    }

    // If we can't make progress on the gradient, then we'll also accept
    // a stable function value.
    const double denom = std::max(
        std::max(fabs(prevFunctionValue), fabs(functionValue)), 1.0);
    if ((prevFunctionValue - functionValue) / denom <= factr)
    {
      Log::Debug << "L-BFGS function value stable (terminating successfully)."
          << std::endl;
      break;
    }

    // Overwrite an old basis set.
    UpdateBasisSet(itNum, iterate, oldIterate, gradient, oldGradient, s, y);
  } // End of the optimization loop.

  return function.Evaluate(iterate);
}