/* Update L-BFGS approximation of the Hessian. */ static void update(opk_vmlmb_t* opt, const opk_vector_t* x, const opk_vector_t* g) { double sty, yty; opk_index_t k; k = SLOT(0); AXPBY(S(k), 1, x, -1, opt->x0); AXPBY(Y(k), 1, g, -1, opt->g0); if (opt->method != OPK_VMLMB) { /* Compute initial inverse Hessian approximation. */ sty = DOT(S(k), Y(k)); if (sty <= 0) { /* This pair will be skipped. This may however indicate a problem, see Nocedal & Wright "Numerical Optimization", section 8.1, p. 201 (1999). FIXME: restart? */ RHO(k) = 0; } else { /* Compute RHO(k) and GAMMA. */ RHO(k) = 1/sty; yty = DOT(Y(k), Y(k)); if (yty > 0) { opt->gamma = sty/yty; } } } ++opt->updates; if (opt->mp < opt->m) { ++opt->mp; } }
/* Update L-BFGS approximation of the Hessian. */ static void update(opk_vmlmn_t* opt, const opk_vector_t* x, const opk_vector_t* g) { double sty, yty; opk_index_t k; k = slot(opt, 0); AXPBY(S(k), 1, x, -1, opt->x0); AXPBY(Y(k), 1, g, -1, opt->g0); if (opt->method != OPK_VMLMN) { /* Compute initial inverse Hessian approximation. */ sty = DOT(Y(k), S(k)); if (sty <= 0) { RHO(k) = 0; } else { RHO(k) = 1/sty; yty = DOT(Y(k), Y(k)); if (yty > 0) { opt->gamma = sty/yty; } } } ++opt->updates; if (opt->mp < opt->m) { ++opt->mp; } }
opk_task_t opk_iterate_vmlmb(opk_vmlmb_t* opt, opk_vector_t* x, double f, opk_vector_t* g) { double dtg, gtest, stpmin, stpmax; opk_index_t k; opk_status_t status; opk_lnsrch_task_t lnsrch_task; opk_bool_t bounded; if (opt == NULL) { return OPK_TASK_ERROR; } bounded = (opt->box != NULL); switch (opt->task) { case OPK_TASK_COMPUTE_FG: /* Caller has computed the function value and the gradient at the current point. */ ++opt->evaluations; if (opt->evaluations > 1) { /* A line search is in progress, check whether it has converged. */ if (opk_lnsrch_use_deriv(opt->lnsrch)) { if (bounded) { /* Compute the directional derivative as the inner product between the effective step and the gradient divided by the step length. */ #if 0 if (opt->tmp == NULL && (opt->tmp = opk_vcreate(opt->vspace)) == NULL) { return failure(opt, OPK_INSUFFICIENT_MEMORY); } AXPBY(opt->tmp, 1, x, -1, opt->x0); dtg = DOT(opt->tmp, g)/opt->stp; #else dtg = (DOT(x, g) - DOT(opt->x0, g))/opt->stp; #endif } else { /* Compute the directional derivative. */ dtg = -opk_vdot(opt->d, g); } } else { /* Line search does not need directional derivative. */ dtg = 0; } lnsrch_task = opk_lnsrch_iterate(opt->lnsrch, &opt->stp, f, dtg); if (lnsrch_task == OPK_LNSRCH_SEARCH) { /* Line search has not yet converged, break to compute a new trial point along the search direction. */ break; } if (lnsrch_task != OPK_LNSRCH_CONVERGENCE) { /* An error may have occurred during the line search. Figure out whether this error can be safely ignored. */ status = opk_lnsrch_get_status(opt->lnsrch); if (lnsrch_task != OPK_LNSRCH_WARNING || status != OPK_ROUNDING_ERRORS_PREVENT_PROGRESS) { return failure(opt, status); } } ++opt->iterations; } /* The current step is acceptable. Check for global convergence. */ if (bounded) { /* Determine the set of free variables. */ status = opk_get_free_variables(opt->w, x, opt->box, g, OPK_ASCENT); if (status != OPK_SUCCESS) { return failure(opt, status); } } if (opt->method == OPK_VMLMB) { /* Compute the Euclidean norm of the projected gradient. */ opt->gnorm = WNORM2(g); } else if (opt->method == OPK_BLMVM) { /* Compute the projected gradient and its norm. */ opk_vproduct(opt->gp, opt->w, g); opt->gnorm = NORM2(opt->gp); } else { /* Compute the Euclidean norm of the gradient. */ opt->gnorm = NORM2(g); } if (opt->evaluations == 1) { opt->ginit = opt->gnorm; } gtest = max3(0.0, opt->gatol, opt->grtol*opt->ginit); return success(opt, (opt->gnorm <= gtest ? OPK_TASK_FINAL_X : OPK_TASK_NEW_X)); case OPK_TASK_NEW_X: case OPK_TASK_FINAL_X: /* Compute a new search direction. */ if (opt->iterations >= 1) { /* Update L-BFGS approximation of the Hessian. */ update(opt, x, (opt->method == OPK_BLMVM ? opt->gp : g)); } status = apply(opt, g); if (status == OPK_SUCCESS) { /* The L-BFGS approximation produces a search direction D. To warrant convergence, we have to check whether -D is a sufficient descent direction (that is to say that D is a sufficient ascent direction). As shown by Zoutendijk, this is true if cos(theta) = (D/|D|)'.(G/|G|) is larger or equal EPSILON > 0, where G is the gradient at X and D the, ascent for us, search direction. */ if (bounded) { /* Project the search direction produced by the L-BFGS recursion. */ status = opk_project_direction(opt->d, x, opt->box, opt->d, OPK_ASCENT); if (status != OPK_SUCCESS) { return failure(opt, status); } } dtg = -DOT(opt->d, g); if (opt->epsilon > 0 && -dtg < opt->epsilon*NORM2(opt->d)*opt->gnorm) { /* -D is not a sufficient descent direction. Set the directional derivative to zero to force using the steepest descent direction. */ dtg = 0.0; } } else { /* The L-BFGS approximation is not available (first iteration or just after a reset) or failed to produce a direction. Set the directional derivative to zero to use the steepest descent direction. */ dtg = 0.0; } /* Determine the initial step length. */ if (dtg < 0) { /* A sufficient descent direction has been produced by L-BFGS recursion. An initial unit step will be used. */ opt->stp = 1.0; } else { /* First iteration or L-BFGS recursion failed to produce a sufficient descent direction, use the (projected) gradient as a search direction. */ if (opt->mp > 0) { /* L-BFGS recursion did not produce a sufficient descent direction. */ ++opt->restarts; opt->mp = 0; } if (opt->method == OPK_VMLMB) { /* Use the projected gradient. */ opk_vproduct(opt->d, opt->w, g); } else if (opt->method == OPK_BLMVM) { /* Use the projected gradient (which has already been computed and * stored in the scratch vector). */ opk_vcopy(opt->d, opt->gp); } else { /* Use the gradient. */ opk_vcopy(opt->d, g); } dtg = -opt->gnorm*opt->gnorm; if (f != 0) { opt->stp = 2*fabs(f/dtg); } else { /* Use a small step compared to X. */ double dnorm = opt->gnorm; double xnorm = (bounded ? WNORM2(x) : NORM2(x)); if (xnorm > 0) { opt->stp = opt->delta*xnorm/dnorm; } else { opt->stp = opt->delta/dnorm; } } } stpmin = opt->stp*opt->stpmin; stpmax = opt->stp*opt->stpmax; if (bounded) { /* Shortcut the step length. */ double bsmin1, bsmin2, bsmax; status = opk_get_step_limits(&bsmin1, &bsmin2, &bsmax, x, opt->box, opt->d, OPK_ASCENT); if (bsmin1 < 0) { fprintf(stderr, "FIXME: SMIN1 =%g, SMIN2 =%g, SMAX =%g\n", bsmin1, bsmin2, bsmax); } if (status != OPK_SUCCESS) { return failure(opt, status); } if (bsmax <= 0) { return failure(opt, OPK_WOULD_BLOCK); } if (opt->stp > bsmax) { opt->stp = bsmax; } if (stpmax > bsmax) { stpmax = bsmax; } opt->bsmin = bsmin2; } /* Save current point. */ if (opt->save_memory) { k = SLOT(0); opt->x0 = S(k); /* weak reference */ opt->g0 = Y(k); /* weak reference */ if (opt->mp == opt->m) { --opt->mp; } } COPY(opt->x0, x); COPY(opt->g0, (opt->method == OPK_BLMVM ? opt->gp : g)); opt->f0 = f; /* Start the line search and break to take the first step along the line search. */ if (opk_lnsrch_start(opt->lnsrch, f, dtg, opt->stp, stpmin, stpmax) != OPK_LNSRCH_SEARCH) { return failure(opt, opk_lnsrch_get_status(opt->lnsrch)); } break; default: /* There must be something wrong. */ return opt->task; } /* Compute a new trial point along the line search. */ opk_vaxpby(x, 1, opt->x0, -opt->stp, opt->d); if (bounded && opt->stp > opt->bsmin) { opk_status_t status = opk_project_variables(x, x, opt->box); if (status != OPK_SUCCESS) { return failure(opt, status); } } return success(opt, OPK_TASK_COMPUTE_FG); }
opk_task_t opk_iterate_vmlmn(opk_vmlmn_t* opt, opk_vector_t* x, double f, opk_vector_t* g) { double dtg; opk_index_t k; opk_status_t status; opk_lnsrch_task_t lnsrch_task; opk_bool_t bounded, final; bounded = (opt->bounds != 0); switch (opt->task) { case OPK_TASK_COMPUTE_FG: /* Caller has computed the function value and the gradient at the current point. */ ++opt->evaluations; if (opt->evaluations > 1) { /* A line search is in progress, check whether it has converged. */ if (opk_lnsrch_use_deriv(opt->lnsrch)) { if (bounded) { /* Compute the directional derivative as the inner product between the effective step and the gradient. */ #if 0 if (opt->tmp == NULL && (opt->tmp = opk_vcreate(opt->vspace)) == NULL) { return failure(opt, OPK_INSUFFICIENT_MEMORY); } AXPBY(opt->tmp, 1, x, -1, opt->x0); dtg = DOT(opt->tmp, g)/opt->stp; #else dtg = (DOT(x, g) - DOT(opt->x0, g))/opt->stp; #endif } else { /* Compute the directional derivative. */ dtg = -opk_vdot(opt->d, g); } } else { /* Line search does not need directional derivative. */ dtg = 0; } lnsrch_task = opk_lnsrch_iterate(opt->lnsrch, &opt->stp, f, dtg); if (lnsrch_task == OPK_LNSRCH_SEARCH) { /* Line search has not yet converged, break to compute a new trial point along the search direction. */ break; } if (lnsrch_task != OPK_LNSRCH_CONVERGENCE) { status = opk_lnsrch_get_status(opt->lnsrch); if (lnsrch_task != OPK_LNSRCH_WARNING || status != OPK_ROUNDING_ERRORS_PREVENT_PROGRESS) { return failure(opt, status); } } ++opt->iterations; } if (bounded) { /* Determine the set of free variables. */ status = opk_box_get_free_variables(opt->w, x, opt->xl, opt->xu, g, OPK_ASCENT); if (status != OPK_SUCCESS) { return failure(opt, status); } } /* Check for global convergence. */ if (opt->method == OPK_VMLMN) { /* Compute the Euclidean norm of the projected gradient. */ opt->gnorm = WNORM2(g); } else if (opt->method == OPK_BLMVM) { /* Compute the projected gradient and its norm. */ opk_vproduct(opt->tmp, opt->w, g); opt->gnorm = NORM2(opt->tmp); } else { /* Compute the Euclidean norm of the gradient. */ opt->gnorm = NORM2(g); } if (opt->evaluations == 1) { opt->ginit = opt->gnorm; } final = (opt->gnorm <= max3(0.0, opt->gatol, opt->grtol*opt->ginit)); return success(opt, (final ? OPK_TASK_FINAL_X : OPK_TASK_NEW_X)); case OPK_TASK_NEW_X: case OPK_TASK_FINAL_X: /* Compute a new search direction. */ if (opt->iterations >= 1) { /* Update L-BFGS approximation of the Hessian. */ update(opt, x, (opt->method == OPK_BLMVM ? opt->tmp : g)); } if (apply(opt, g) == OPK_SUCCESS) { /* We take care of checking whether -D is a sufficient descent direction (that is to say that D is a sufficient ascent direction). As shown by Zoutendijk, this is true if cos(theta) = (D/|D|)'.(G/|G|) is larger or equal EPSILON > 0, where G is the gradient at X and D the (ascent for us) search direction. */ dtg = -DOT(opt->d, g); if (opt->epsilon > 0 && dtg > -opt->epsilon*NORM2(opt->d)*opt->gnorm) { /* We do not have a sufficient descent direction. Set the directional derivative to zero to force using the steepest descent direction. */ dtg = 0.0; } } else { /* The L-BFGS approximation is unset (first iteration) or failed to produce a direction. Set the directional derivative to zero to use the steepest descent direction. */ dtg = 0.0; } /* Determine the initial step length. */ if (dtg < 0) { /* A sufficient descent direction has been produced by L-BFGS recursion. An initial unit step will be used. */ opt->stp = 1.0; } else { /* First iteration or L-BFGS recursion failed to produce a sufficient descent direction, use the (projected) gradient as a search direction. */ if (opt->mp > 0) { /* L-BFGS recursion did not produce a sufficient descent direction. */ ++opt->restarts; opt->mp = 0; } if (opt->method == OPK_VMLMN) { /* Use the projected gradient. */ opk_vproduct(opt->d, opt->w, g); } else if (opt->method == OPK_BLMVM) { /* Use the projected gradient (which has aready been computed and * stored in the scratch vector). */ opk_vcopy(opt->d, opt->tmp); } else { /* Use the gradient. */ opk_vcopy(opt->d, g); } dtg = -opt->gnorm*opt->gnorm; if (f != 0) { opt->stp = 2*fabs(f/dtg); } else { /* Use a small step compared to X. */ double dnorm = opt->gnorm; double xnorm = (bounded ? WNORM2(x) : NORM2(x)); if (xnorm > 0) { opt->stp = opt->delta*xnorm/dnorm; } else { opt->stp = opt->delta/dnorm; } } } if (bounded) { /* Shortcut the step length. */ double bsmin, bsmax, wolfe; status = opk_box_get_step_limits(&bsmin, &wolfe, &bsmax, x, opt->xl, opt->xu, opt->d, OPK_ASCENT); if (status != OPK_SUCCESS) { return failure(opt, status); } if (bsmax <= 0) { return failure(opt, OPK_WOULD_BLOCK); } if (opt->stp > bsmax) { opt->stp = bsmax; } opt->bsmin = bsmin; } /* Save current point. */ #if SAVE_MEMORY k = slot(opt, 0); opt->x0 = S(k); /* weak reference */ opt->g0 = Y(k); /* weak reference */ if (opt->mp == opt->m) { --opt->mp; } #endif COPY(opt->x0, x); COPY(opt->g0, (opt->method == OPK_BLMVM ? opt->tmp : g)); opt->f0 = f; /* Start the line search and break to take the first step along the line search. */ if (opk_lnsrch_start(opt->lnsrch, f, dtg, opt->stp, opt->stp*opt->stpmin, opt->stp*opt->stpmax) != OPK_LNSRCH_SEARCH) { return failure(opt, opk_lnsrch_get_status(opt->lnsrch)); } break; default: /* There must be something wrong. */ return opt->task; }