int lbfgs( int n, T *x, T *ptr_fx, typename FuncWrapper<T>::lbfgs_evaluate_t proc_evaluate, typename FuncWrapper<T>::lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param ) { int ret; int i, j, k, ls, end, bound; T step; /* Constant parameters and their default values. */ lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; const int m = param.m; T *xp = NULL; T *g = NULL, *gp = NULL, *pg = NULL; T *d = NULL, *w = NULL, *pf = NULL; iteration_data_t<T> *lm = NULL; iteration_data_t<T>*it = NULL; T ys, yy; T xnorm, gnorm, beta; T fx = 0.; T rate = 0.; typename LineSearchWrapper<T>::line_search_proc linesearch = line_search_morethuente; /* Construct a callback data. */ callback_data_t<T> cd; cd.n = n; cd.instance = instance; cd.proc_evaluate = proc_evaluate; cd.proc_progress = proc_progress; #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) /* Round out the number of variables. */ n = round_out_variables(n); #endif/*defined(USE_SSE)*/ /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) if (n % 8 != 0) { return LBFGSERR_INVALID_N_SSE; } if ((uintptr_t)(const void*)x % 16 != 0) { return LBFGSERR_INVALID_X_SSE; } #endif/*defined(USE_SSE)*/ if (param.epsilon < 0.) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.) { return LBFGSERR_INVALID_DELTA; } if (param.min_step < 0.) { return LBFGSERR_INVALID_MINSTEP; } if (param.max_step < param.min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param.ftol < 0.) { return LBFGSERR_INVALID_FTOL; } if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (param.wolfe <= param.ftol || 1. <= param.wolfe) { return LBFGSERR_INVALID_WOLFE; } } if (param.gtol < 0.) { return LBFGSERR_INVALID_GTOL; } if (param.xtol < 0.) { return LBFGSERR_INVALID_XTOL; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } if (param.orthantwise_c < 0.) { return LBFGSERR_INVALID_ORTHANTWISE; } if (param.orthantwise_start < 0 || n < param.orthantwise_start) { return LBFGSERR_INVALID_ORTHANTWISE_START; } if (param.orthantwise_end < 0) { param.orthantwise_end = n; } if (n < param.orthantwise_end) { return LBFGSERR_INVALID_ORTHANTWISE_END; } if (param.orthantwise_c != 0.) { switch (param.linesearch) { case LBFGS_LINESEARCH_BACKTRACKING: linesearch = line_search_backtracking_owlqn; break; default: /* Only the backtracking method is available. */ return LBFGSERR_INVALID_LINESEARCH; } } else { switch (param.linesearch) { case LBFGS_LINESEARCH_MORETHUENTE: linesearch = line_search_morethuente; break; case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: linesearch = line_search_backtracking; break; default: return LBFGSERR_INVALID_LINESEARCH; } } /* Allocate working space. */ xp = (T*)vecalloc(n * sizeof(T)); g = (T*)vecalloc(n * sizeof(T)); gp = (T*)vecalloc(n * sizeof(T)); d = (T*)vecalloc(n * sizeof(T)); w = (T*)vecalloc(n * sizeof(T)); if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } if (param.orthantwise_c != 0.) { /* Allocate working space for OW-LQN. */ pg = (T*)vecalloc(n * sizeof(T)); if (pg == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate limited memory storage. */ lm = (iteration_data_t<T>*)vecalloc(m * sizeof(iteration_data_t<T>)); if (lm == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } /* Initialize the limited memory. */ for (i = 0;i < m;++i) { it = &lm[i]; it->alpha = 0; it->ys = 0; it->s = (T*)vecalloc(n * sizeof(T)); it->y = (T*)vecalloc(n * sizeof(T)); if (it->s == NULL || it->y == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate an array for storing previous values of the objective function. */ if (0 < param.past) { pf = (T*)vecalloc(param.past * sizeof(T)); } /* Evaluate the function value and its gradient. */ fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); if (0. != param.orthantwise_c) { /* Compute the L1 norm of the variable and add it to the object value. */ xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); fx += xnorm * param.orthantwise_c; owlqn_pseudo_gradient( pg, x, g, n, T(param.orthantwise_c), param.orthantwise_start, param.orthantwise_end ); } /* Store the initial value of the objective function. */ if (pf != NULL) { pf[0] = fx; } /* Compute the direction; we assume the initial hessian matrix H_0 as the identity matrix. */ if (param.orthantwise_c == 0.) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } /* Make sure that the initial variables are not a minimizer. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto lbfgs_exit; } /* Compute the initial step: step = 1.0 / sqrt(vecdot(d, d, n)) */ vec2norminv(&step, d, n); k = 1; end = 0; for (;;) { /* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ if (param.orthantwise_c == 0.) { ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); } else { ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, T(param.orthantwise_c), param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { /* Revert to the previous point. */ veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; goto lbfgs_exit; } /* Compute x and g norms. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } /* Report the progress. */ if (cd.proc_progress) { if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { goto lbfgs_exit; } } /* Convergence test. The criterion is given by the following formula: |g(x)| / \max(1, |x|) < \epsilon */ if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { /* Convergence. */ ret = LBFGS_SUCCESS; break; } /* Test for stopping criterion. The criterion is given by the following formula: (f(past_x) - f(x)) / f(x) < \delta */ if (pf != NULL) { /* We don't test the stopping criterion while k < past. */ if (param.past <= k) { /* Compute the relative improvement from the past. */ rate = (pf[k % param.past] - fx) / fx; /* The stopping criterion. */ if (rate < param.delta) { ret = LBFGS_STOP; break; } } /* Store the current value of the objective function. */ pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k+1) { /* Maximum number of iterations. */ ret = LBFGSERR_MAXIMUMITERATION; break; } /* Update vectors s and y: s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. y_{k+1} = g_{k+1} - g_{k}. */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /* Compute scalars ys and yy: ys = y^t \cdot s = 1 / \rho. yy = y^t \cdot y. Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /* Recursive formula to compute dir = -(H \cdot g). This is described in page 779 of: Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. */ bound = (m <= k) ? m : k; ++k; end = (end + 1) % m; /* Compute the steepest direction. */ if (param.orthantwise_c == 0.) { /* Compute the negative of gradients. */ vecncpy(d, g, n); } else { vecncpy(d, pg, n); } j = end; for (i = 0;i < bound;++i) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0;i < bound;++i) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (param.orthantwise_c != 0.) { for (i = param.orthantwise_start;i < param.orthantwise_end;++i) { if (d[i] * pg[i] >= 0) { d[i] = 0; } } } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; } lbfgs_exit: /* Return the final value of the objective function. */ if (ptr_fx != NULL) { *ptr_fx = fx; } vecfree(pf); /* Free memory blocks used by this function. */ if (lm != NULL) { for (i = 0;i < m;++i) { vecfree(lm[i].s); vecfree(lm[i].y); } vecfree(lm); } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret; }
int lbfgs( int n, double* x, double* pfx, lbfgs_evaluate_t evaluate, lbfgs_progress_t progress, void* instance, const lbfgs_parameter_t* _param ) { int ret; int i, j, k, ls, end, bound, n_evaluate = 0; int enalbe_owlqn; double step; lbfgs_parameter_t param = (_param) ? (*_param) : default_param; const int m = param.m; double* xp; double* g, *gp, *pg = 0; double* d, *w, *pf = 0; iteration_data_t* lm = 0, *it = 0; double ys, yy; double xnorm, gnorm, rate, beta; double fx; line_search_proc_t linesearch = line_search_morethuente; callback_data_t cd; cd.n = n; cd.instance = instance; cd.evaluate = evaluate; cd.progress = (progress) ? progress : default_lbfgs_progress; /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } if (param.epsilon < 0.0) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.0) { return LBFGSERR_INVALID_DELTA; } if (param.min_step < 0.0) { return LBFGSERR_INVALID_MINSTEP; } if (param.max_step < param.min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param.ftol < 0.0) { return LBFGSERR_INVALID_FTOL; } if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (param.wolfe <= param.ftol || 1. <= param.wolfe) { return LBFGSERR_INVALID_WOLFE; } } if (param.gtol < 0.0) { return LBFGSERR_INVALID_GTOL; } if (param.xtol < 0.0) { return LBFGSERR_INVALID_XTOL; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } if (param.orthantwise_c < 0.0) { return LBFGSERR_INVALID_ORTHANTWISE; } if (param.orthantwise_start < 0 || param.orthantwise_start > n) { return LBFGSERR_INVALID_ORTHANTWISE_START; } if (param.orthantwise_end < 0) { param.orthantwise_end = n; } if (param.orthantwise_end > n) { return LBFGSERR_INVALID_ORTHANTWISE_END; } enalbe_owlqn = (param.orthantwise_c != 0.0); if (enalbe_owlqn) { switch (param.linesearch) { case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: linesearch = line_search_backtracking_owlqn; break; default: /* Only the backtracking method is available. */ return LBFGSERR_INVALID_LINESEARCH; } } else { switch (param.linesearch) { case LBFGS_LINESEARCH_MORETHUENTE: linesearch = line_search_morethuente; break; case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: linesearch = line_search_backtracking; break; default: return LBFGSERR_INVALID_LINESEARCH; } } /* Allocate working space. */ xp = vecalloc(n); g = vecalloc(n); gp = vecalloc(n); d = vecalloc(n); w = vecalloc(n); /* Allocate pseudo gradient. */ if (enalbe_owlqn) { pg = vecalloc(n); } /* Allocate and initialize the limited memory storage. */ lm = (iteration_data_t*)xalloc(m * sizeof(iteration_data_t)); for (i = 0; i < m; i++) { it = &lm[i]; it->alpha = 0.0; it->s = vecalloc(n); it->y = vecalloc(n); it->ys = 0.0; } /* Allocate an array for storing previous values of the objective function. */ if (param.past > 0) { pf = vecalloc((size_t)param.past); } fx = cd.evaluate(cd.instance, cd.n, x, g, 0); n_evaluate++; if (enalbe_owlqn) { xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); fx += xnorm * param.orthantwise_c; owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end); } /* Store the initial value of the objective function. */ if (pf) { pf[0] = fx; } /** * Compute the direction. * we assume the initial hessian matrix H_0 as the identity matrix. */ if (!enalbe_owlqn) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } /** * Make sure that the initial variables are not a minimizer. */ vec2norm(&xnorm, x, n); if (!enalbe_owlqn) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto lbfgs_exit; } /** * Compute the initial step: * step = 1.0 / ||d|| */ vec2norminv(&step, d, n); k = 1; end = 0; for (;;) { /* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ if (!enalbe_owlqn) { ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); } else { ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { /* Revert to the previous point. */ veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; break; } n_evaluate += ls; /* Compute x and g norms. */ vec2norm(&xnorm, x, n); if (!enalbe_owlqn) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } /* Report the progress. */ if ((ret = cd.progress(cd.instance, cd.n, x, g, fx, xnorm, gnorm, step, k, n_evaluate)) != 0) { ret = LBFGSERR_CANCELED; break; } /* Convergence test. */ if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_CONVERGENCE; break; } /* Stopping criterion test. */ if (pf) { /* We don't test the stopping criterion while k < past. */ if (param.past <= k) { /* Compute the relative improvement from the past. */ rate = (pf[k % param.past] - fx) / fx; /* The stopping criterion. */ if (rate < param.delta) { ret = LBFGS_CONVERGENCE_DELTA; break; } } /* Store the current value of the objective function. */ pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k + 1) { ret = LBFGSERR_MAXIMUMITERATION; break; } /** * Update s and y: * s_{k+1} = x_{k+1} - x_{k} = step * d_{k} * y_{k+1} = g_{k+1} - g_{k} */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /** * Compute scalars ys and yy: * ys = y^t s = 1 / \rho * yy = y^t y * Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /** * Recursive formula to compute d = -(H g). * This is described in page 779 of: * Jorge Nocedal. * Updating Quasi-Newton Matrices with Limited Storage. * Mathematics of Computation, Vol. 35, No. 151, * pp. 773--782, 1980. */ bound = (m <= k) ? m : k; k++; end = (end + 1) % m; /* Compute the steepest direction. */ /* Compute the negative of (pseudo) gradient. */ if (!enalbe_owlqn) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } j = end; for (i = 0; i < bound; i++) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} q_{k+1} */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i} */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0; i < bound; i++) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \gamma_{i} */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j} */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (enalbe_owlqn) { owlqn_contrain_line_search(d, pg, param.orthantwise_start, param.orthantwise_end); } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; } lbfgs_exit: /* Return the final value of the objective function. */ if (pfx) { *pfx = fx; } vecfree(pf); if (lm != 0) { for (i = 0; i < m; i++) { vecfree(lm[i].s); vecfree(lm[i].y); } xfree(lm); } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret; }
inline static void vec2norminv(double* s, const double* x, const int n) { /* s = 1 / ||x|| */ vec2norm(s, x, n); *s = (1.0 / *s); }
int gd( int n, double* x, double* pfx, lbfgs_evaluate_t evaluate, lbfgs_progress_t progress, void* instance, const lbfgs_parameter_t* _param ) { int ret, ls; int k, n_evaluate = 0; lbfgs_parameter_t param = (_param) ? (*_param) : default_param; double fx, xnorm, gnorm, rate, step; double* g, *d, *xp, *gp; double* pf = 0; callback_data_t cd; if (progress == 0) { progress = default_lbfgs_progress; } cd.n = n; cd.instance = instance; cd.evaluate = evaluate; cd.progress = progress; if (n <= 0) { return LBFGSERR_INVALID_N; } if (param.epsilon < 0.0) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.0) { return LBFGSERR_INVALID_DELTA; } if (param.min_step < 0.0) { return LBFGSERR_INVALID_MINSTEP; } if (param.max_step < param.min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param.ftol < 0.0) { return LBFGSERR_INVALID_FTOL; } if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (param.wolfe <= param.ftol || 1. <= param.wolfe) { return LBFGSERR_INVALID_WOLFE; } } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } g = vecalloc(n); d = vecalloc(n); xp = vecalloc(n); gp = vecalloc(n); if (param.past > 0) { pf = vecalloc((size_t)param.past); } fx = evaluate(instance, n, x, g, 0); n_evaluate++; vecncpy(d, g, n); if (pf) { pf[0] = fx; } vec2norm(&xnorm, x, n); vec2norm(&gnorm, g, n); if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto gd_exit; } /* initial guess of step length */ step = 0.01; k = 1; for (;;) { veccpy(xp, x, n); veccpy(gp, g, n); ls = line_search_backtracking(n, x, &fx, g, d, &step, xp, gp, 0, &cd, ¶m); if (ls < 0) { veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; break; } n_evaluate += ls; vec2norm(&xnorm, x, n); vec2norm(&gnorm, g, n); if ((ret = progress(instance, n, x, g, fx, xnorm, gnorm, step, k, n_evaluate)) != 0) { ret = LBFGSERR_CANCELED; break; } if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_CONVERGENCE; break; } if (pf) { if (param.past <= k) { rate = (pf[k % param.past] - fx) / fx; if (rate < param.delta) { ret = LBFGS_CONVERGENCE_DELTA; break; } } pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k + 1) { ret = LBFGSERR_MAXIMUMITERATION; break; } vecncpy(d, g, n); k++; } gd_exit: if (pfx) { *pfx = fx; } vecfree(pf); vecfree(gp); vecfree(xp); vecfree(d); vecfree(g); return ret; }
int cg( int n, double* x, double* pfx, lbfgs_evaluate_t evaluate, lbfgs_progress_t progress, void* instance, const lbfgs_parameter_t* _param ) { static const double RHO = 0.01; static const double SIG = 0.5; static const double INT = 0.1; static const double EXT = 3.0; static const double RATIO = 100.0; int ret; int k, ls_count, ls_success, ls_failed = 0, n_evaluate = 0; lbfgs_parameter_t param = (_param) ? (*_param) : default_param; double f0, f1, f2 = 0.0, f3, d1, d2, d3, z1, z2 = 0.0, z3, limit, A, B, C; double xnorm, gnorm, rate; double* df0, *df1, *df2, *s, *x0; double* pf = 0; if (progress == 0) { progress = default_lbfgs_progress; } if (n <= 0) { return LBFGSERR_INVALID_N; } if (param.epsilon < 0.0) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.0) { return LBFGSERR_INVALID_DELTA; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } df0 = vecalloc(n); df1 = vecalloc(n); df2 = vecalloc(n); s = vecalloc(n); x0 = vecalloc(n); if (param.past > 0) { pf = vecalloc((size_t)param.past); } f1 = evaluate(instance, n, x, df1, 0); n_evaluate++; if (pf) { pf[0] = f1; } vec2norm(&xnorm, x, n); vec2norm(&gnorm, df1, n); if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto cg_exit; } vecncpy(s, df1, n); vecdot(&d1, s, s, n); d1 = -d1; /** * Compute the initial step z1: */ z1 = 1.0 / (1.0 - d1); k = 1; for (;;) { /* Store the current position and gradient vectors. */ f0 = f1; veccpy(x0, x, n); veccpy(df0, df1, n); /* update x using current step: x=x+z1*s */ vecadd(x, s, z1, n); f2 = evaluate(instance, n, x, df2, 0); n_evaluate++; vecdot(&d2, df2, s, n); /* set point 3 equal to point 1 */ f3 = f1; d3 = d1; z3 = -z1; /* begin line search */ ls_success = 0; ls_count = 0; limit = -1.0; for (;;) { while (f2 > f1 + RHO * z1 * d1 || d2 > -SIG * d1) { limit = z1; if (f2 > f1) { /* quadratic fit */ z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3); } else { /* cubic fit */ A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3); B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2); z2 = (sqrt(B * B - A * d2 * z3 * z3) - B) / A; } if (isinf(z2) || isnan(z2)) { /* if we had a numerical problem then bisect */ z2 = z3 / 2.0; } /* don't accept too close to limits */ z2 = max2(min2(z2, INT* z3), (1.0 - INT) * z3); /* update step and x */ z1 = z1 + z2; vecadd(x, s, z2, n); f2 = evaluate(instance, n, x, df2, 0); n_evaluate++; ls_count++; vecdot(&d2, df2, s, n); z3 = z3 - z2; } if (f2 > f1 + z1 * RHO * d1 || d2 > -SIG * d1) { /* a line search failure */ break; } else if (d2 > SIG * d1) { /* a line search success */ ls_success = 1; break; } else if (ls_count >= param.max_linesearch) { ret = LBFGSERR_MAXIMUMLINESEARCH; goto cg_exit; } /* cubic extrapolation */ A = 6.0 * (f2 - f3) / z3 + 3.0 * (d2 + d3); B = 3.0 * (f3 - f2) - z3 * (d3 + 2 * d2); z2 = -d2 * z3 * z3 / (B + sqrt(B * B - A * d2 * z3 * z3)); /* adjust current step z2 for many cases */ if (isnan(z2) || isinf(z2) || z2 < 0.0) { if (limit < -0.5) { z2 = z1 * (EXT - 1.0); } else { z2 = (limit - z1) / 2.0; } } else if (limit > -0.5 && z2 + z1 > limit) { z2 = (limit - z1) / 2.0; } else if (limit < -0.5 && z2 + z1 > z1 * EXT) { z2 = z1 * (EXT - 1.0); } else if (z2 < -z3 * INT) { z2 = -z3 * INT; } else if (limit > -0.5 && z2 < (limit - z1) * (1.0 - INT)) { z2 = (limit - z1) * (1.0 - INT); } /* set point 3 equal to point 2 */ f3 = f2; d3 = d2; z3 = -z2; z1 = z1 + z2; vecadd(x, s, z2, n); f2 = evaluate(instance, n, x, df2, 0); n_evaluate++; ls_count++; vecdot(&d2, df2, s, n); } if (ls_success) { vec2norm(&xnorm, x, n); vec2norm(&gnorm, df2, n); if ((ret = progress(instance, n, x, df2, f2, xnorm, gnorm, z2, k, n_evaluate)) != 0) { ret = LBFGSERR_CANCELED; break; } if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_CONVERGENCE; break; } if (pf) { if (param.past <= k) { rate = (pf[k % param.past] - f2) / f2; if (rate < param.delta) { ret = LBFGS_CONVERGENCE_DELTA; break; } } pf[k % param.past] = f2; } if (param.max_iterations != 0 && param.max_iterations < k + 1) { ret = LBFGSERR_MAXIMUMITERATION; break; } k++; f1 = f2; /** * Polack-Ribiere direction * s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2 */ vecdot(&A, df2, df2, n); vecdot(&B, df1, df2, n); vecdot(&C, df1, df1, n); vecscale(s, (A - B) / C, n); vecadd(s, df2, -1.0, n); vecswap(df1, df2, n); vecdot(&d2, df1, s, n); if (d2 > 0) { vecncpy(s, df1, n); vecdot(&d2, s, s, n); d2 = -d2; } z1 = z1 * min2(RATIO, d1 / (d2 - DBL_MIN)); d1 = d2; ls_failed = 0; } else { /* restore previous point */ f1 = f0; veccpy(x, x0, n); veccpy(df1, df0, n); if (ls_failed) { /* line search failed twice */ ret = LBFGSERR_LINE_SEARCH_FAILED; break; } vecswap(df1, df2, n); vecncpy(s, df1, n);/* try steepest */ vecdot(&d1, s, s, n); d1 = -d1; z1 = 1.0 / (1.0 - d1); ls_failed = 1; } } cg_exit: if (pfx) { *pfx = f2; } vecfree(pf); vecfree(x0); vecfree(s); vecfree(df2); vecfree(df1); vecfree(df0); return ret; }