Example #1
0
static int line_search_morethuente(
    int n,
    lbfgsfloatval_t *x,
    lbfgsfloatval_t *f,
    lbfgsfloatval_t *g,
    lbfgsfloatval_t *s,
    lbfgsfloatval_t *stp,
    lbfgsfloatval_t *wa,
    callback_data_t *cd,
    const lbfgs_parameter_t *param
    )
{
    int i, count = 0;
    int brackt, stage1, uinfo = 0;
    lbfgsfloatval_t dg, norm;
    lbfgsfloatval_t stx, fx, dgx;
    lbfgsfloatval_t sty, fy, dgy;
    lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm;
    lbfgsfloatval_t finit, ftest1, dginit, dgtest;
    lbfgsfloatval_t width, prev_width;
    lbfgsfloatval_t stmin, stmax;

    /* Check the input parameters for errors. */
    if (*stp <= 0.) {
        return LBFGSERR_INVALIDPARAMETERS;
    }

    /* Compute the initial gradient in the search direction. */
    if (param->orthantwise_c != 0.) {
        dginit = 0.;

        for (i = 0;i < param->orthantwise_start;++i) {
            dginit += s[i] * g[i];
        }

        /* Use psuedo-gradients for orthant-wise updates. */
        for (i = param->orthantwise_start;i < n;++i) {
            /* Notice that:
                (-s[i] < 0)  <==>  (g[i] < -param->orthantwise_c)
                (-s[i] > 0)  <==>  (param->orthantwise_c < g[i])
               as the result of the lbfgs() function for orthant-wise updates.
             */
            if (s[i] != 0.) {
                if (x[i] < 0.) {
                    /* Differentiable. */
                    dginit += s[i] * (g[i] - param->orthantwise_c);
                } else if (0. < x[i]) {
                    /* Differentiable. */
                    dginit += s[i] * (g[i] + param->orthantwise_c);
                } else if (s[i] < 0.) {
                    /* Take the left partial derivative. */
                    dginit += s[i] * (g[i] - param->orthantwise_c);
                } else if (0. < s[i]) {
                    /* Take the right partial derivative. */
                    dginit += s[i] * (g[i] + param->orthantwise_c);
                }
            }
        }
    } else {
        vecdot(&dginit, g, s, n);
    }

    /* Make sure that s points to a descent direction. */
    if (0 < dginit) {
        return LBFGSERR_INCREASEGRADIENT;
    }

    /* Initialize local variables. */
    brackt = 0;
    stage1 = 1;
    finit = *f;
    dgtest = param->ftol * dginit;
    width = param->max_step - param->min_step;
    prev_width = 2.0 * width;

    /* Copy the value of x to the work area. */
    veccpy(wa, x, n);

    /*
        The variables stx, fx, dgx contain the values of the step,
        function, and directional derivative at the best step.
        The variables sty, fy, dgy contain the value of the step,
        function, and derivative at the other endpoint of
        the interval of uncertainty.
        The variables stp, f, dg contain the values of the step,
        function, and derivative at the current step.
    */
    stx = sty = 0.;
    fx = fy = finit;
    dgx = dgy = dginit;

    for (;;) {
        /*
            Set the minimum and maximum steps to correspond to the
            present interval of uncertainty.
         */
        if (brackt) {
            stmin = min2(stx, sty);
            stmax = max2(stx, sty);
        } else {
            stmin = stx;
            stmax = *stp + 4.0 * (*stp - stx);
        }

        /* Clip the step in the range of [stpmin, stpmax]. */
        if (*stp < param->min_step) *stp = param->min_step;
        if (param->max_step < *stp) *stp = param->max_step;

        /*
            If an unusual termination is to occur then let
            stp be the lowest point obtained so far.
         */
        if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) {
            *stp = stx;
        }

        /*
            Compute the current value of x:
                x <- x + (*stp) * s.
         */
        veccpy(x, wa, n);
        vecadd(x, s, *stp, n);

        if (param->orthantwise_c != 0.) {
            /* The current point is projected onto the orthant of the previous one. */
            for (i = param->orthantwise_start;i < n;++i) {
                if (x[i] * wa[i] < 0.) {
                    x[i] = 0.;
                }
            }
        }

        /* Evaluate the function and gradient values. */
        *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
        if (0. < param->orthantwise_c) {
            /* Compute L1-regularization factor and add it to the object value. */
            norm = 0.;
            for (i = param->orthantwise_start;i < n;++i) {
                norm += fabs(x[i]);
            }
            *f += norm * param->orthantwise_c;

            dg = 0.;
            for (i = 0;i < param->orthantwise_start;++i) {
                dg += s[i] * g[i];
            }

            /* Use psuedo-gradients for orthant-wise updates. */
            for (i = param->orthantwise_start;i < n;++i) {
                if (x[i] < 0.) {
                    /* Differentiable. */
                    dg += s[i] * (g[i] - param->orthantwise_c);
                } else if (0. < x[i]) {
                    /* Differentiable. */
                    dg += s[i] * (g[i] + param->orthantwise_c);
                } else {
                    if (g[i] < -param->orthantwise_c) {
                        /* Take the right partial derivative. */
                        dg += s[i] * (g[i] + param->orthantwise_c);
                    } else if (param->orthantwise_c < g[i]) {
                        /* Take the left partial derivative. */
                        dg += s[i] * (g[i] - param->orthantwise_c);
                    } else {
                        /* dg += 0.; */
                    }
                }
            }
        } else {
            vecdot(&dg, g, s, n);
        }
        ftest1 = finit + *stp * dgtest;
        ++count;

        /* Test for errors and convergence. */
        if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) {
            /* Rounding errors prevent further progress. */
            return LBFGSERR_ROUNDING_ERROR;
        }
        if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) {
            /* The step is the maximum value. */
            return LBFGSERR_MAXIMUMSTEP;
        }
        if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) {
            /* The step is the minimum value. */
            return LBFGSERR_MINIMUMSTEP;
        }
        if (brackt && (stmax - stmin) <= param->xtol * stmax) {
            /* Relative width of the interval of uncertainty is at most xtol. */
            return LBFGSERR_WIDTHTOOSMALL;
        }
        if (param->max_linesearch <= count) {
            /* Maximum number of iteration. */
            return LBFGSERR_MAXIMUMLINESEARCH;
        }
        if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) {
            /* The sufficient decrease condition and the directional derivative condition hold. */
            return count;
        }

        /*
            In the first stage we seek a step for which the modified
            function has a nonpositive value and nonnegative derivative.
         */
        if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) {
            stage1 = 0;
        }

        /*
            A modified function is used to predict the step only if
            we have not obtained a step for which the modified
            function has a nonpositive function value and nonnegative
            derivative, and if a lower function value has been
            obtained but the decrease is not sufficient.
         */
        if (stage1 && ftest1 < *f && *f <= fx) {
            /* Define the modified function and derivative values. */
            fm = *f - *stp * dgtest;
            fxm = fx - stx * dgtest;
            fym = fy - sty * dgtest;
            dgm = dg - dgtest;
            dgxm = dgx - dgtest;
            dgym = dgy - dgtest;

            /*
                Call update_trial_interval() to update the interval of
                uncertainty and to compute the new step.
             */
            uinfo = update_trial_interval(
                &stx, &fxm, &dgxm,
                &sty, &fym, &dgym,
                stp, &fm, &dgm,
                stmin, stmax, &brackt
                );

            /* Reset the function and gradient values for f. */
            fx = fxm + stx * dgtest;
            fy = fym + sty * dgtest;
            dgx = dgxm + dgtest;
            dgy = dgym + dgtest;
        } else {
            /*
                Call update_trial_interval() to update the interval of
                uncertainty and to compute the new step.
             */
            uinfo = update_trial_interval(
                &stx, &fx, &dgx,
                &sty, &fy, &dgy,
                stp, f, &dg,
                stmin, stmax, &brackt
                );
        }

        /*
            Force a sufficient decrease in the interval of uncertainty.
         */
        if (brackt) {
            if (0.66 * prev_width <= fabs(sty - stx)) {
                *stp = stx + 0.5 * (sty - stx);
            }
            prev_width = width;
            width = fabs(sty - stx);
        }
    }

    return LBFGSERR_LOGICERROR;
}
Example #2
0
 int line_search_morethuente(
    int n,
    T *x,
    T *f,
    T *g,
    T *s,
    T *stp,
    const T* xp,
    const T* gp,
    T *wa,
    callback_data_t<T> *cd,
    const lbfgs_parameter_t *param
    )
{
    int count = 0;
    int brackt, stage1, uinfo = 0;
    T dg;
    T stx, fx, dgx;
    T sty, fy, dgy;
    T fxm, dgxm, fym, dgym, fm, dgm;
    T finit, ftest1, dginit, dgtest;
    T width, prev_width;
    T stmin, stmax;

    /* Check the input parameters for errors. */
    if (*stp <= 0.) {
        return LBFGSERR_INVALIDPARAMETERS;
    }

    /* Compute the initial gradient in the search direction. */
    vecdot(&dginit, g, s, n);

    /* Make sure that s points to a descent direction. */
    if (0 < dginit) {
        return LBFGSERR_INCREASEGRADIENT;
    }

    /* Initialize local variables. */
    brackt = 0;
    stage1 = 1;
    finit = *f;
    dgtest = param->ftol * dginit;
    width = param->max_step - param->min_step;
    prev_width = 2.0 * width;

    /*
        The variables stx, fx, dgx contain the values of the step,
        function, and directional derivative at the best step.
        The variables sty, fy, dgy contain the value of the step,
        function, and derivative at the other endpoint of
        the interval of uncertainty.
        The variables stp, f, dg contain the values of the step,
        function, and derivative at the current step.
    */
    stx = sty = 0.;
    fx = fy = finit;
    dgx = dgy = dginit;

    for (;;) {
        /*
            Set the minimum and maximum steps to correspond to the
            present interval of uncertainty.
         */
        if (brackt) {
            stmin = min2(stx, sty);
            stmax = max2(stx, sty);
        } else {
            stmin = stx;
            stmax = *stp + 4.0 * (*stp - stx);
        }

        /* Clip the step in the range of [stpmin, stpmax]. */
        if (*stp < param->min_step) *stp = param->min_step;
        if (param->max_step < *stp) *stp = param->max_step;

        /*
            If an unusual termination is to occur then let
            stp be the lowest point obtained so far.
         */
        if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) {
            *stp = stx;
        }

        /*
            Compute the current value of x:
                x <- x + (*stp) * s.
         */
        veccpy(x, xp, n);
        vecadd(x, s, *stp, n);

        /* Evaluate the function and gradient values. */
        *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
        vecdot(&dg, g, s, n);

        ftest1 = finit + *stp * dgtest;
        ++count;

        /* Test for errors and convergence. */
        if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) {
            /* Rounding errors prevent further progress. */
            return LBFGSERR_ROUNDING_ERROR;
        }
        if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) {
            /* The step is the maximum value. */
            return LBFGSERR_MAXIMUMSTEP;
        }
        if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) {
            /* The step is the minimum value. */
            return LBFGSERR_MINIMUMSTEP;
        }
        if (brackt && (stmax - stmin) <= param->xtol * stmax) {
            /* Relative width of the interval of uncertainty is at most xtol. */
            return LBFGSERR_WIDTHTOOSMALL;
        }
        if (param->max_linesearch <= count) {
            /* Maximum number of iteration. */
            return LBFGSERR_MAXIMUMLINESEARCH;
        }
        if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) {
            /* The sufficient decrease condition and the directional derivative condition hold. */
            return count;
        }

        /*
            In the first stage we seek a step for which the modified
            function has a nonpositive value and nonnegative derivative.
         */
        if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) {
            stage1 = 0;
        }

        /*
            A modified function is used to predict the step only if
            we have not obtained a step for which the modified
            function has a nonpositive function value and nonnegative
            derivative, and if a lower function value has been
            obtained but the decrease is not sufficient.
         */
        if (stage1 && ftest1 < *f && *f <= fx) {
            /* Define the modified function and derivative values. */
            fm = *f - *stp * dgtest;
            fxm = fx - stx * dgtest;
            fym = fy - sty * dgtest;
            dgm = dg - dgtest;
            dgxm = dgx - dgtest;
            dgym = dgy - dgtest;

            /*
                Call update_trial_interval() to update the interval of
                uncertainty and to compute the new step.
             */
            uinfo = update_trial_interval(
                &stx, &fxm, &dgxm,
                &sty, &fym, &dgym,
                stp, &fm, &dgm,
                stmin, stmax, &brackt
                );

            /* Reset the function and gradient values for f. */
            fx = fxm + stx * dgtest;
            fy = fym + sty * dgtest;
            dgx = dgxm + dgtest;
            dgy = dgym + dgtest;
        } else {
            /*
                Call update_trial_interval() to update the interval of
                uncertainty and to compute the new step.
             */
            uinfo = update_trial_interval(
                &stx, &fx, &dgx,
                &sty, &fy, &dgy,
                stp, f, &dg,
                stmin, stmax, &brackt
                );
        }

        /*
            Force a sufficient decrease in the interval of uncertainty.
         */
        if (brackt) {
            if (0.66 * prev_width <= fabs(sty - stx)) {
                *stp = stx + 0.5 * (sty - stx);
            }
            prev_width = width;
            width = fabs(sty - stx);
        }
    }

    return LBFGSERR_LOGICERROR;
}
Example #3
0
bool LineSearcher::MoreThuenteLineSearch(
    DenseVector &param, DenseVector &direc, DenseVector &grad, double finit,
    double &stepsize,
    std::function<double(DenseVector &, DenseVector &)> &funcgrad) {

  itercnt_ = 0;
  int brackt, stage1, uinfo = 0;
  double dg;
  double stx, fx, dgx;
  double sty, fy, dgy;
  double fxm, dgxm, fym, dgym, fm, dgm;
  double ftest1, dginit, dgtest;
  double width, prev_width;
  double stmin, stmax;
  double fval;

  if (stepsize < 0) {
    LOG(FATAL) << "Stepsize less than 0";
    return false;
  }

  dginit = direc.dot(grad);
  if (dginit > 0) {
    LOG(FATAL) << "Direction not decent";
    return false;
  }

  if (tparam_.size() != param.size()) {
    tparam_.resize(param.size());
  }

  /* Initialize local variables. */
  brackt = 0;
  stage1 = 1;
  dgtest = alpha_ * dginit;
  width = maxstep_ - minstep_;
  prev_width = 2.0 * width;

  stx = sty = 0.;
  fx = fy = finit;
  dgx = dgy = dginit;

  while (itercnt_ < maxtries_) {
    /*
    Set the minimum and maximum steps to correspond to the
    present interval of uncertainty.
    */
    if (brackt) {
      stmin = std::min(stx, sty);
      stmax = std::min(stx, sty);
    } else {
      stmin = stx;
      stmax = stepsize + 4.0 * (stepsize - stx);
    }

    /* Clip the step in the range of [minstep_, maxstep_]. */
    if (stepsize < minstep_)
      stepsize = minstep_;
    if (stepsize > maxstep_)
      stepsize = maxstep_;

    /*
    If an unusual termination is to occur then let
    stepsize be the lowest point obtained so far.
    */
    if ((brackt && ((stepsize <= stmin || stepsize >= stmax) ||
                    (itercnt_ + 1 >= maxtries_) || uinfo != 0)) ||
        (brackt && (stmax - stmin <= parameps_ * stmax))) {
      stepsize = stx;
    }

    tparam_ = param + stepsize * direc;
    fval = funcgrad(tparam_, grad);
    dg = grad.dot(direc);

    ftest1 = finit + stepsize * dgtest;
    ++itercnt_;

    /* Test for errors and convergence. */
    if (brackt && ((stepsize <= stmin || stmax <= stepsize) || uinfo != 0)) {
      /* Rounding errors prevent further progress. */
      return false;
    }
    if (stepsize == maxstep_ && fval <= ftest1 && dg <= dgtest) {
      /* The step is the maximum value. */
      return false;
    }
    if (stepsize == minstep_ && (ftest1 < fval || dgtest <= dg)) {
      /* The step is the minimum value. */
      return false;
    }
    if (brackt && (stmax - stmin) <= parameps_ * stmax) {
      /* Relative width of the interval of uncertainty is at most xtol. */
      return false;
    }
    if (maxtries_ <= itercnt_) {
      /* Maximum number of iteration. */
      return false;
    }

    if (fval <= ftest1 && std::fabs(dg) <= beta_ * (-dginit)) {
      /* The sufficient decrease condition and the directional derivative
       * condition hold. */
      param.swap(tparam_);
      return true;
    }

    /*
    In the first stage we seek a step for which the modified
    function has a nonpositive value and nonnegative derivative.
    */
    if (stage1 && fval <= ftest1 && std::min(alpha_, beta_) * dginit <= dg) {
      stage1 = 0;
    }

    /*
    A modified function is used to predict the step only if
    we have not obtained a step for which the modified
    function has a nonpositive function value and nonnegative
    derivative, and if a lower function value has been
    obtained but the decrease is not sufficient.
    */
    if (stage1 && ftest1 < fval && fval <= fx) {
      /* Define the modified function and derivative values. */
      fm = fval - stepsize * dgtest;
      fxm = fx - stx * dgtest;
      fym = fy - sty * dgtest;
      dgm = dg - dgtest;
      dgxm = dgx - dgtest;
      dgym = dgy - dgtest;

      /*
      Call update_trial_interval() to update the interval of
      uncertainty and to compute the new step.
      */
      uinfo =
          update_trial_interval(&stx, &fxm, &dgxm, &sty, &fym, &dgym, &stepsize,
                                &fm, &dgm, stmin, stmax, &brackt);

      /* Reset the function and gradient values for f. */
      fx = fxm + stx * dgtest;
      fy = fym + sty * dgtest;
      dgx = dgxm + dgtest;
      dgy = dgym + dgtest;
    } else {
      /*
      Call update_trial_interval() to update the interval of
      uncertainty and to compute the new step.
      */
      uinfo = update_trial_interval(&stx, &fx, &dgx, &sty, &fy, &dgy, &stepsize,
                                    &fval, &dg, stmin, stmax, &brackt);
    }

    /*
    Force a sufficient decrease in the interval of uncertainty.
    */
    if (brackt) {
      if (0.66 * prev_width <= fabs(sty - stx)) {
        stepsize = stx + 0.5 * (sty - stx);
      }
      prev_width = width;
      width = std::fabs(sty - stx);
    }
  }

  return false;
}