static int line_search_morethuente( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, lbfgsfloatval_t *wa, callback_data_t *cd, const lbfgs_parameter_t *param ) { int i, count = 0; int brackt, stage1, uinfo = 0; lbfgsfloatval_t dg, norm; lbfgsfloatval_t stx, fx, dgx; lbfgsfloatval_t sty, fy, dgy; lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm; lbfgsfloatval_t finit, ftest1, dginit, dgtest; lbfgsfloatval_t width, prev_width; lbfgsfloatval_t stmin, stmax; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Compute the initial gradient in the search direction. */ if (param->orthantwise_c != 0.) { dginit = 0.; for (i = 0;i < param->orthantwise_start;++i) { dginit += s[i] * g[i]; } /* Use psuedo-gradients for orthant-wise updates. */ for (i = param->orthantwise_start;i < n;++i) { /* Notice that: (-s[i] < 0) <==> (g[i] < -param->orthantwise_c) (-s[i] > 0) <==> (param->orthantwise_c < g[i]) as the result of the lbfgs() function for orthant-wise updates. */ if (s[i] != 0.) { if (x[i] < 0.) { /* Differentiable. */ dginit += s[i] * (g[i] - param->orthantwise_c); } else if (0. < x[i]) { /* Differentiable. */ dginit += s[i] * (g[i] + param->orthantwise_c); } else if (s[i] < 0.) { /* Take the left partial derivative. */ dginit += s[i] * (g[i] - param->orthantwise_c); } else if (0. < s[i]) { /* Take the right partial derivative. */ dginit += s[i] * (g[i] + param->orthantwise_c); } } } } else { vecdot(&dginit, g, s, n); } /* Make sure that s points to a descent direction. */ if (0 < dginit) { return LBFGSERR_INCREASEGRADIENT; } /* Initialize local variables. */ brackt = 0; stage1 = 1; finit = *f; dgtest = param->ftol * dginit; width = param->max_step - param->min_step; prev_width = 2.0 * width; /* Copy the value of x to the work area. */ veccpy(wa, x, n); /* The variables stx, fx, dgx contain the values of the step, function, and directional derivative at the best step. The variables sty, fy, dgy contain the value of the step, function, and derivative at the other endpoint of the interval of uncertainty. The variables stp, f, dg contain the values of the step, function, and derivative at the current step. */ stx = sty = 0.; fx = fy = finit; dgx = dgy = dginit; for (;;) { /* Set the minimum and maximum steps to correspond to the present interval of uncertainty. */ if (brackt) { stmin = min2(stx, sty); stmax = max2(stx, sty); } else { stmin = stx; stmax = *stp + 4.0 * (*stp - stx); } /* Clip the step in the range of [stpmin, stpmax]. */ if (*stp < param->min_step) *stp = param->min_step; if (param->max_step < *stp) *stp = param->max_step; /* If an unusual termination is to occur then let stp be the lowest point obtained so far. */ if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) { *stp = stx; } /* Compute the current value of x: x <- x + (*stp) * s. */ veccpy(x, wa, n); vecadd(x, s, *stp, n); if (param->orthantwise_c != 0.) { /* The current point is projected onto the orthant of the previous one. */ for (i = param->orthantwise_start;i < n;++i) { if (x[i] * wa[i] < 0.) { x[i] = 0.; } } } /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); if (0. < param->orthantwise_c) { /* Compute L1-regularization factor and add it to the object value. */ norm = 0.; for (i = param->orthantwise_start;i < n;++i) { norm += fabs(x[i]); } *f += norm * param->orthantwise_c; dg = 0.; for (i = 0;i < param->orthantwise_start;++i) { dg += s[i] * g[i]; } /* Use psuedo-gradients for orthant-wise updates. */ for (i = param->orthantwise_start;i < n;++i) { if (x[i] < 0.) { /* Differentiable. */ dg += s[i] * (g[i] - param->orthantwise_c); } else if (0. < x[i]) { /* Differentiable. */ dg += s[i] * (g[i] + param->orthantwise_c); } else { if (g[i] < -param->orthantwise_c) { /* Take the right partial derivative. */ dg += s[i] * (g[i] + param->orthantwise_c); } else if (param->orthantwise_c < g[i]) { /* Take the left partial derivative. */ dg += s[i] * (g[i] - param->orthantwise_c); } else { /* dg += 0.; */ } } } } else { vecdot(&dg, g, s, n); } ftest1 = finit + *stp * dgtest; ++count; /* Test for errors and convergence. */ if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) { /* Rounding errors prevent further progress. */ return LBFGSERR_ROUNDING_ERROR; } if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP; } if (brackt && (stmax - stmin) <= param->xtol * stmax) { /* Relative width of the interval of uncertainty is at most xtol. */ return LBFGSERR_WIDTHTOOSMALL; } if (param->max_linesearch <= count) { /* Maximum number of iteration. */ return LBFGSERR_MAXIMUMLINESEARCH; } if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) { /* The sufficient decrease condition and the directional derivative condition hold. */ return count; } /* In the first stage we seek a step for which the modified function has a nonpositive value and nonnegative derivative. */ if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) { stage1 = 0; } /* A modified function is used to predict the step only if we have not obtained a step for which the modified function has a nonpositive function value and nonnegative derivative, and if a lower function value has been obtained but the decrease is not sufficient. */ if (stage1 && ftest1 < *f && *f <= fx) { /* Define the modified function and derivative values. */ fm = *f - *stp * dgtest; fxm = fx - stx * dgtest; fym = fy - sty * dgtest; dgm = dg - dgtest; dgxm = dgx - dgtest; dgym = dgy - dgtest; /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval( &stx, &fxm, &dgxm, &sty, &fym, &dgym, stp, &fm, &dgm, stmin, stmax, &brackt ); /* Reset the function and gradient values for f. */ fx = fxm + stx * dgtest; fy = fym + sty * dgtest; dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval( &stx, &fx, &dgx, &sty, &fy, &dgy, stp, f, &dg, stmin, stmax, &brackt ); } /* Force a sufficient decrease in the interval of uncertainty. */ if (brackt) { if (0.66 * prev_width <= fabs(sty - stx)) { *stp = stx + 0.5 * (sty - stx); } prev_width = width; width = fabs(sty - stx); } } return LBFGSERR_LOGICERROR; }
int line_search_morethuente( int n, T *x, T *f, T *g, T *s, T *stp, const T* xp, const T* gp, T *wa, callback_data_t<T> *cd, const lbfgs_parameter_t *param ) { int count = 0; int brackt, stage1, uinfo = 0; T dg; T stx, fx, dgx; T sty, fy, dgy; T fxm, dgxm, fym, dgym, fm, dgm; T finit, ftest1, dginit, dgtest; T width, prev_width; T stmin, stmax; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Compute the initial gradient in the search direction. */ vecdot(&dginit, g, s, n); /* Make sure that s points to a descent direction. */ if (0 < dginit) { return LBFGSERR_INCREASEGRADIENT; } /* Initialize local variables. */ brackt = 0; stage1 = 1; finit = *f; dgtest = param->ftol * dginit; width = param->max_step - param->min_step; prev_width = 2.0 * width; /* The variables stx, fx, dgx contain the values of the step, function, and directional derivative at the best step. The variables sty, fy, dgy contain the value of the step, function, and derivative at the other endpoint of the interval of uncertainty. The variables stp, f, dg contain the values of the step, function, and derivative at the current step. */ stx = sty = 0.; fx = fy = finit; dgx = dgy = dginit; for (;;) { /* Set the minimum and maximum steps to correspond to the present interval of uncertainty. */ if (brackt) { stmin = min2(stx, sty); stmax = max2(stx, sty); } else { stmin = stx; stmax = *stp + 4.0 * (*stp - stx); } /* Clip the step in the range of [stpmin, stpmax]. */ if (*stp < param->min_step) *stp = param->min_step; if (param->max_step < *stp) *stp = param->max_step; /* If an unusual termination is to occur then let stp be the lowest point obtained so far. */ if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) { *stp = stx; } /* Compute the current value of x: x <- x + (*stp) * s. */ veccpy(x, xp, n); vecadd(x, s, *stp, n); /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); vecdot(&dg, g, s, n); ftest1 = finit + *stp * dgtest; ++count; /* Test for errors and convergence. */ if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) { /* Rounding errors prevent further progress. */ return LBFGSERR_ROUNDING_ERROR; } if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP; } if (brackt && (stmax - stmin) <= param->xtol * stmax) { /* Relative width of the interval of uncertainty is at most xtol. */ return LBFGSERR_WIDTHTOOSMALL; } if (param->max_linesearch <= count) { /* Maximum number of iteration. */ return LBFGSERR_MAXIMUMLINESEARCH; } if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) { /* The sufficient decrease condition and the directional derivative condition hold. */ return count; } /* In the first stage we seek a step for which the modified function has a nonpositive value and nonnegative derivative. */ if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) { stage1 = 0; } /* A modified function is used to predict the step only if we have not obtained a step for which the modified function has a nonpositive function value and nonnegative derivative, and if a lower function value has been obtained but the decrease is not sufficient. */ if (stage1 && ftest1 < *f && *f <= fx) { /* Define the modified function and derivative values. */ fm = *f - *stp * dgtest; fxm = fx - stx * dgtest; fym = fy - sty * dgtest; dgm = dg - dgtest; dgxm = dgx - dgtest; dgym = dgy - dgtest; /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval( &stx, &fxm, &dgxm, &sty, &fym, &dgym, stp, &fm, &dgm, stmin, stmax, &brackt ); /* Reset the function and gradient values for f. */ fx = fxm + stx * dgtest; fy = fym + sty * dgtest; dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval( &stx, &fx, &dgx, &sty, &fy, &dgy, stp, f, &dg, stmin, stmax, &brackt ); } /* Force a sufficient decrease in the interval of uncertainty. */ if (brackt) { if (0.66 * prev_width <= fabs(sty - stx)) { *stp = stx + 0.5 * (sty - stx); } prev_width = width; width = fabs(sty - stx); } } return LBFGSERR_LOGICERROR; }
bool LineSearcher::MoreThuenteLineSearch( DenseVector ¶m, DenseVector &direc, DenseVector &grad, double finit, double &stepsize, std::function<double(DenseVector &, DenseVector &)> &funcgrad) { itercnt_ = 0; int brackt, stage1, uinfo = 0; double dg; double stx, fx, dgx; double sty, fy, dgy; double fxm, dgxm, fym, dgym, fm, dgm; double ftest1, dginit, dgtest; double width, prev_width; double stmin, stmax; double fval; if (stepsize < 0) { LOG(FATAL) << "Stepsize less than 0"; return false; } dginit = direc.dot(grad); if (dginit > 0) { LOG(FATAL) << "Direction not decent"; return false; } if (tparam_.size() != param.size()) { tparam_.resize(param.size()); } /* Initialize local variables. */ brackt = 0; stage1 = 1; dgtest = alpha_ * dginit; width = maxstep_ - minstep_; prev_width = 2.0 * width; stx = sty = 0.; fx = fy = finit; dgx = dgy = dginit; while (itercnt_ < maxtries_) { /* Set the minimum and maximum steps to correspond to the present interval of uncertainty. */ if (brackt) { stmin = std::min(stx, sty); stmax = std::min(stx, sty); } else { stmin = stx; stmax = stepsize + 4.0 * (stepsize - stx); } /* Clip the step in the range of [minstep_, maxstep_]. */ if (stepsize < minstep_) stepsize = minstep_; if (stepsize > maxstep_) stepsize = maxstep_; /* If an unusual termination is to occur then let stepsize be the lowest point obtained so far. */ if ((brackt && ((stepsize <= stmin || stepsize >= stmax) || (itercnt_ + 1 >= maxtries_) || uinfo != 0)) || (brackt && (stmax - stmin <= parameps_ * stmax))) { stepsize = stx; } tparam_ = param + stepsize * direc; fval = funcgrad(tparam_, grad); dg = grad.dot(direc); ftest1 = finit + stepsize * dgtest; ++itercnt_; /* Test for errors and convergence. */ if (brackt && ((stepsize <= stmin || stmax <= stepsize) || uinfo != 0)) { /* Rounding errors prevent further progress. */ return false; } if (stepsize == maxstep_ && fval <= ftest1 && dg <= dgtest) { /* The step is the maximum value. */ return false; } if (stepsize == minstep_ && (ftest1 < fval || dgtest <= dg)) { /* The step is the minimum value. */ return false; } if (brackt && (stmax - stmin) <= parameps_ * stmax) { /* Relative width of the interval of uncertainty is at most xtol. */ return false; } if (maxtries_ <= itercnt_) { /* Maximum number of iteration. */ return false; } if (fval <= ftest1 && std::fabs(dg) <= beta_ * (-dginit)) { /* The sufficient decrease condition and the directional derivative * condition hold. */ param.swap(tparam_); return true; } /* In the first stage we seek a step for which the modified function has a nonpositive value and nonnegative derivative. */ if (stage1 && fval <= ftest1 && std::min(alpha_, beta_) * dginit <= dg) { stage1 = 0; } /* A modified function is used to predict the step only if we have not obtained a step for which the modified function has a nonpositive function value and nonnegative derivative, and if a lower function value has been obtained but the decrease is not sufficient. */ if (stage1 && ftest1 < fval && fval <= fx) { /* Define the modified function and derivative values. */ fm = fval - stepsize * dgtest; fxm = fx - stx * dgtest; fym = fy - sty * dgtest; dgm = dg - dgtest; dgxm = dgx - dgtest; dgym = dgy - dgtest; /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval(&stx, &fxm, &dgxm, &sty, &fym, &dgym, &stepsize, &fm, &dgm, stmin, stmax, &brackt); /* Reset the function and gradient values for f. */ fx = fxm + stx * dgtest; fy = fym + sty * dgtest; dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval(&stx, &fx, &dgx, &sty, &fy, &dgy, &stepsize, &fval, &dg, stmin, stmax, &brackt); } /* Force a sufficient decrease in the interval of uncertainty. */ if (brackt) { if (0.66 * prev_width <= fabs(sty - stx)) { stepsize = stx + 0.5 * (sty - stx); } prev_width = width; width = std::fabs(sty - stx); } } return false; }