Exemple #1
0
nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data,
			  unsigned m, nlopt_constraint *fc,
			  const double *lb, const double *ub, /* bounds */
			  double *x, /* in: initial guess, out: minimizer */
			  double *minf,
			  nlopt_stopping *stop,
			  nlopt_opt dual_opt)
{
     nlopt_result ret = NLOPT_SUCCESS;
     double *xcur, rho, *sigma, *dfdx, *dfdx_cur, *xprev, *xprevprev, fcur;
     double *dfcdx, *dfcdx_cur;
     double *fcval, *fcval_cur, *rhoc, *gcval, *y, *dual_lb, *dual_ub;
     unsigned i, ifc, j, k = 0;
     dual_data dd;
     int feasible;
     double infeasibility;
     unsigned mfc;

     m = nlopt_count_constraints(mfc = m, fc);
     if (nlopt_get_dimension(dual_opt) != m) return NLOPT_INVALID_ARGS;
     sigma = (double *) malloc(sizeof(double) * (6*n + 2*m*n + m*7));
     if (!sigma) return NLOPT_OUT_OF_MEMORY;
     dfdx = sigma + n;
     dfdx_cur = dfdx + n;
     xcur = dfdx_cur + n;
     xprev = xcur + n;
     xprevprev = xprev + n;
     fcval = xprevprev + n;
     fcval_cur = fcval + m;
     rhoc = fcval_cur + m;
     gcval = rhoc + m;
     dual_lb = gcval + m;
     dual_ub = dual_lb + m;
     y = dual_ub + m;
     dfcdx = y + m;
     dfcdx_cur = dfcdx + m*n;

     dd.n = n;
     dd.x = x;
     dd.lb = lb;
     dd.ub = ub;
     dd.sigma = sigma;
     dd.dfdx = dfdx;
     dd.dfcdx = dfcdx;
     dd.fcval = fcval;
     dd.rhoc = rhoc;
     dd.xcur = xcur;
     dd.gcval = gcval;

     for (j = 0; j < n; ++j) {
	  if (nlopt_isinf(ub[j]) || nlopt_isinf(lb[j]))
	       sigma[j] = 1.0; /* arbitrary default */
	  else
	       sigma[j] = 0.5 * (ub[j] - lb[j]);
     }
     rho = 1.0;
     for (i = 0; i < m; ++i) {
	  rhoc[i] = 1.0;
	  dual_lb[i] = y[i] = 0.0;
	  dual_ub[i] = HUGE_VAL;
     }

     dd.fval = fcur = *minf = f(n, x, dfdx, f_data);
     stop->nevals++;
     memcpy(xcur, x, sizeof(double) * n);
     if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }

     feasible = 1; infeasibility = 0;
     for (i = ifc = 0; ifc < mfc; ++ifc) {
	  nlopt_eval_constraint(fcval + i, dfcdx + i*n,
				fc + ifc, n, x);
	  i += fc[ifc].m;
	  if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
     }
     for (i = 0; i < m; ++i) {
	  feasible = feasible && (fcval[i] <= 0 || isnan(fcval[i]));
	  if (fcval[i] > infeasibility) infeasibility = fcval[i];
     }
     /* For non-feasible initial points, set a finite (large)
	upper-bound on the dual variables.  What this means is that,
	if no feasible solution is found from the dual problem, it
	will minimize the dual objective with the unfeasible
	constraint weighted by 1e40 -- basically, minimizing the
	unfeasible constraint until it becomes feasible or until we at
	least obtain a step towards a feasible point.
	
	Svanberg suggested a different approach in his 1987 paper, basically
	introducing additional penalty variables for unfeasible constraints,
	but this is easier to implement and at least as efficient. */
     if (!feasible)
	  for (i = 0; i < m; ++i) dual_ub[i] = 1e40;

     nlopt_set_min_objective(dual_opt, dual_func, &dd);
     nlopt_set_lower_bounds(dual_opt, dual_lb);
     nlopt_set_upper_bounds(dual_opt, dual_ub);
     nlopt_set_stopval(dual_opt, -HUGE_VAL);
     nlopt_remove_inequality_constraints(dual_opt);
     nlopt_remove_equality_constraints(dual_opt);

     while (1) { /* outer iterations */
	  double fprev = fcur;
	  if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
	  else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
	  else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
	  else if (feasible && *minf < stop->minf_max) 
	       ret = NLOPT_MINF_MAX_REACHED;
	  if (ret != NLOPT_SUCCESS) goto done;
	  if (++k > 1) memcpy(xprevprev, xprev, sizeof(double) * n);
	  memcpy(xprev, xcur, sizeof(double) * n);

	  while (1) { /* inner iterations */
	       double min_dual, infeasibility_cur;
	       int feasible_cur, inner_done;
	       unsigned save_verbose;
	       int new_infeasible_constraint;
	       nlopt_result reti;

	       /* solve dual problem */
	       dd.rho = rho; dd.count = 0;
	       save_verbose = mma_verbose;
	       mma_verbose = 0; /* no recursive verbosity */
	       reti = nlopt_optimize_limited(dual_opt, y, &min_dual,
					     0,
					     stop->maxtime - (nlopt_seconds() 
							      - stop->start));
	       mma_verbose = save_verbose;
	       if (reti < 0 || reti == NLOPT_MAXTIME_REACHED) {
		    ret = reti;
		    goto done;
	       }

	       dual_func(m, y, NULL, &dd); /* evaluate final xcur etc. */
	       if (mma_verbose) {
		    printf("MMA dual converged in %d iterations to g=%g:\n",
			   dd.count, dd.gval);
		    for (i = 0; i < MIN(mma_verbose, m); ++i)
			 printf("    MMA y[%d]=%g, gc[%d]=%g\n",
				i, y[i], i, dd.gcval[i]);
	       }

	       fcur = f(n, xcur, dfdx_cur, f_data);
	       stop->nevals++;
	       if (nlopt_stop_forced(stop)) { 
		    ret = NLOPT_FORCED_STOP; goto done; }
	       feasible_cur = 1; infeasibility_cur = 0;
	       new_infeasible_constraint = 0;
	       inner_done = dd.gval >= fcur;
	       for (i = ifc = 0; ifc < mfc; ++ifc) {
		    nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n,
					  fc + ifc, n, xcur);
		    i += fc[ifc].m;
		    if (nlopt_stop_forced(stop)) { 
			 ret = NLOPT_FORCED_STOP; goto done; }
	       }
	       for (i = ifc = 0; ifc < mfc; ++ifc) {
		    unsigned i0 = i, inext = i + fc[ifc].m;
		    for (; i < inext; ++i)
			 if (!isnan(fcval_cur[i])) {
			      feasible_cur = feasible_cur 
				   && (fcval_cur[i] <= fc[ifc].tol[i-i0]);
			      if (!isnan(fcval[i]))
				   inner_done = inner_done && 
					(dd.gcval[i] >= fcval_cur[i]);
			      else if (fcval_cur[i] > 0)
				   new_infeasible_constraint = 1;
			      if (fcval_cur[i] > infeasibility_cur)
				   infeasibility_cur = fcval_cur[i];
			 }
	       }

	       if ((fcur < *minf && (inner_done || feasible_cur || !feasible))
		    || (!feasible && infeasibility_cur < infeasibility)) {
		    if (mma_verbose && !feasible_cur)
			 printf("MMA - using infeasible point?\n");
		    dd.fval = *minf = fcur;
		    infeasibility = infeasibility_cur;
		    memcpy(fcval, fcval_cur, sizeof(double)*m);
		    memcpy(x, xcur, sizeof(double)*n);
		    memcpy(dfdx, dfdx_cur, sizeof(double)*n);
		    memcpy(dfcdx, dfcdx_cur, sizeof(double)*n*m);
		    
		    /* once we have reached a feasible solution, the
		       algorithm should never make the solution infeasible
		       again (if inner_done), although the constraints may
		       be violated slightly by rounding errors etc. so we
		       must be a little careful about checking feasibility */
		    if (infeasibility_cur == 0) {
			 if (!feasible) { /* reset upper bounds to infin. */
			      for (i = 0; i < m; ++i) dual_ub[i] = HUGE_VAL;
			      nlopt_set_upper_bounds(dual_opt, dual_ub);
			 }
			 feasible = 1;
		    }
		    else if (new_infeasible_constraint) feasible = 0;

	       }
	       if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
	       else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
	       else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
	       else if (feasible && *minf < stop->minf_max) 
		    ret = NLOPT_MINF_MAX_REACHED;
	       if (ret != NLOPT_SUCCESS) goto done;

	       if (inner_done) break;

	       if (fcur > dd.gval)
		    rho = MIN(10*rho, 1.1 * (rho + (fcur-dd.gval) / dd.wval));
	       for (i = 0; i < m; ++i)
		    if (!isnan(fcval_cur[i]) && fcval_cur[i] > dd.gcval[i])
			 rhoc[i] = 
			      MIN(10*rhoc[i], 
				  1.1 * (rhoc[i] + (fcval_cur[i]-dd.gcval[i]) 
					 / dd.wval));
	       
	       if (mma_verbose)
		    printf("MMA inner iteration: rho -> %g\n", rho);
	       for (i = 0; i < MIN(mma_verbose, m); ++i)
		    printf("                 MMA rhoc[%d] -> %g\n", i,rhoc[i]);
	  }

	  if (nlopt_stop_ftol(stop, fcur, fprev))
	       ret = NLOPT_FTOL_REACHED;
	  if (nlopt_stop_x(stop, xcur, xprev))
	       ret = NLOPT_XTOL_REACHED;
	  if (ret != NLOPT_SUCCESS) goto done;
	       
	  /* update rho and sigma for iteration k+1 */
	  rho = MAX(0.1 * rho, MMA_RHOMIN);
	  if (mma_verbose)
	       printf("MMA outer iteration: rho -> %g\n", rho);
	  for (i = 0; i < m; ++i)
	       rhoc[i] = MAX(0.1 * rhoc[i], MMA_RHOMIN);
	  for (i = 0; i < MIN(mma_verbose, m); ++i)
	       printf("                 MMA rhoc[%d] -> %g\n", i, rhoc[i]);
	  if (k > 1) {
	       for (j = 0; j < n; ++j) {
		    double dx2 = (xcur[j]-xprev[j]) * (xprev[j]-xprevprev[j]);
		    double gam = dx2 < 0 ? 0.7 : (dx2 > 0 ? 1.2 : 1);
		    sigma[j] *= gam;
		    if (!nlopt_isinf(ub[j]) && !nlopt_isinf(lb[j])) {
			 sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j]));
			 sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j]));
		    }
	       }
	       for (j = 0; j < MIN(mma_verbose, n); ++j)
		    printf("                 MMA sigma[%d] -> %g\n", 
			   j, sigma[j]);
	  }
     }

 done:
     free(sigma);
     return ret;
}
Exemple #2
0
int nlopt_stop_f(const nlopt_stopping *s, double f, double oldf)
{
     return (f <= s->minf_max || nlopt_stop_ftol(s, f, oldf));
}
Exemple #3
0
nlopt_result auglag_minimize(int n, nlopt_func f, void *f_data,
			     int m, nlopt_constraint *fc,
			     int p, nlopt_constraint *h,
			     const double *lb, const double *ub, /* bounds */
			     double *x, /* in: initial guess, out: minimizer */
			     double *minf,
			     nlopt_stopping *stop,
			     nlopt_opt sub_opt, int sub_has_fc)
{
     auglag_data d;
     nlopt_result ret = NLOPT_SUCCESS;
     double ICM = HUGE_VAL, minf_penalty = HUGE_VAL, penalty;
     double *xcur = NULL, fcur;
     int i, ii, feasible, minf_feasible = 0;
     unsigned int k;
     int auglag_iters = 0;
     int max_constraint_dim;

     /* magic parameters from Birgin & Martinez */
     const double tau = 0.5, gam = 10;
     const double lam_min = -1e20, lam_max = 1e20, mu_max = 1e20;

     d.f = f; d.f_data = f_data;
     d.m = m; d.fc = fc;
     d.p = p; d.h = h;
     d.stop = stop;

     /* whether we handle inequality constraints via the augmented
	Lagrangian penalty function, or directly in the sub-algorithm */
     if (sub_has_fc)
	  d.m = 0;
     else
	  m = 0;

     max_constraint_dim = MAX(nlopt_max_constraint_dim(d.m, fc),
			      nlopt_max_constraint_dim(d.p, h));

     d.mm = nlopt_count_constraints(d.m, fc);
     d.pp = nlopt_count_constraints(d.p, h);

     ret = nlopt_set_min_objective(sub_opt, auglag, &d); if (ret<0) return ret;
     ret = nlopt_set_lower_bounds(sub_opt, lb); if (ret<0) return ret;
     ret = nlopt_set_upper_bounds(sub_opt, ub); if (ret<0) return ret;
     ret = nlopt_set_stopval(sub_opt, 
			     d.m==0 && d.p==0 ? stop->minf_max : -HUGE_VAL);
     if (ret<0) return ret;
     ret = nlopt_remove_inequality_constraints(sub_opt); if (ret<0) return ret;
     ret = nlopt_remove_equality_constraints(sub_opt); if (ret<0) return ret;
     for (i = 0; i < m; ++i) {
	  if (fc[i].f)
	       ret = nlopt_add_inequality_constraint(sub_opt,
						     fc[i].f, fc[i].f_data,
						     fc[i].tol[0]);
	  else
	       ret = nlopt_add_inequality_mconstraint(sub_opt, fc[i].m, 
						      fc[i].mf, fc[i].f_data,
						      fc[i].tol);
	  if (ret < 0) return ret;
     }

     xcur = (double *) malloc(sizeof(double) * (n
						+ max_constraint_dim * (1 + n)
						+ d.pp + d.mm));
     if (!xcur) return NLOPT_OUT_OF_MEMORY;
     memcpy(xcur, x, sizeof(double) * n);

     d.restmp = xcur + n;
     d.gradtmp = d.restmp + max_constraint_dim;
     memset(d.gradtmp, 0, sizeof(double) * (n*max_constraint_dim + d.pp+d.mm));
     d.lambda = d.gradtmp + n * max_constraint_dim;
     d.mu = d.lambda + d.pp;

     *minf = HUGE_VAL;

     /* starting rho suggested by B & M */
     if (d.p > 0 || d.m > 0) {
	  double con2 = 0;
	  ++ *(d.stop->nevals_p);
	  fcur = f(n, xcur, NULL, f_data);
	  if (nlopt_stop_forced(stop)) {
	       ret = NLOPT_FORCED_STOP; goto done; }
	  penalty = 0;
	  feasible = 1;
	  for (i = 0; i < d.p; ++i) {
	       nlopt_eval_constraint(d.restmp, NULL, d.h + i, n, xcur);
	       if (nlopt_stop_forced(stop)) {
		    ret = NLOPT_FORCED_STOP; goto done; }
	       for (k = 0; k < d.h[i].m; ++k) {
		    double hi = d.restmp[k];
		    penalty += fabs(hi);
		    feasible = feasible && fabs(hi) <= h[i].tol[k];
		    con2 += hi * hi;
	       }
	  }
	  for (i = 0; i < d.m; ++i) {
	       nlopt_eval_constraint(d.restmp, NULL, d.fc + i, n, xcur);
	       if (nlopt_stop_forced(stop)) {
		    ret = NLOPT_FORCED_STOP; goto done; }
	       for (k = 0; k < d.fc[i].m; ++k) {
		    double fci = d.restmp[k];
		    penalty += fci > 0 ? fci : 0;
		    feasible = feasible && fci <= fc[i].tol[k];
		    if (fci > 0) con2 += fci * fci;
	       }
	  }
	  *minf = fcur;
	  minf_penalty = penalty;
	  minf_feasible = feasible;
	  d.rho = MAX(1e-6, MIN(10, 2 * fabs(*minf) / con2));
     }
     else
	  d.rho = 1; /* whatever, doesn't matter */

     if (auglag_verbose) {
	  printf("auglag: initial rho=%g\nauglag initial lambda=", d.rho);
	  for (i = 0; i < d.pp; ++i) printf(" %g", d.lambda[i]);
	  printf("\nauglag initial mu = ");
	  for (i = 0; i < d.mm; ++i) printf(" %g", d.mu[i]);
	  printf("\n");
     }

     do {
	  double prev_ICM = ICM;
	  
	  ret = nlopt_optimize_limited(sub_opt, xcur, &fcur,
				       stop->maxeval - *(stop->nevals_p),
				       stop->maxtime - (nlopt_seconds() 
							- stop->start));
	  if (auglag_verbose)
	       printf("auglag: subopt return code %d\n", ret);
	  if (ret < 0) break;
	  
	  ++ *(d.stop->nevals_p);
	  fcur = f(n, xcur, NULL, f_data);
	  if (nlopt_stop_forced(stop)) {
	       ret = NLOPT_FORCED_STOP; goto done; }
	  if (auglag_verbose)
	       printf("auglag: fcur = %g\n", fcur);
	  
	  ICM = 0;
	  penalty = 0;
	  feasible = 1;
	  for (i = ii = 0; i < d.p; ++i) {
	       nlopt_eval_constraint(d.restmp, NULL, d.h + i, n, xcur);
	       if (nlopt_stop_forced(stop)) {
		    ret = NLOPT_FORCED_STOP; goto done; }
	       for (k = 0; k < d.h[i].m; ++k) {
		    double hi = d.restmp[k];
		    double newlam = d.lambda[ii] + d.rho * hi;
		    penalty += fabs(hi);
		    feasible = feasible && fabs(hi) <= h[i].tol[k];
		    ICM = MAX(ICM, fabs(hi));
		    d.lambda[ii++] = MIN(MAX(lam_min, newlam), lam_max);
	       }
	  }
	  for (i = ii = 0; i < d.m; ++i) {
	       nlopt_eval_constraint(d.restmp, NULL, d.fc + i, n, xcur);
	       if (nlopt_stop_forced(stop)) {
		    ret = NLOPT_FORCED_STOP; goto done; }
	       for (k = 0; k < d.fc[i].m; ++k) {
		    double fci = d.restmp[k];
		    double newmu = d.mu[ii] + d.rho * fci;
		    penalty += fci > 0 ? fci : 0;
		    feasible = feasible && fci <= fc[i].tol[k];
		    ICM = MAX(ICM, fabs(MAX(fci, -d.mu[ii] / d.rho)));
		    d.mu[ii++] = MIN(MAX(0.0, newmu), mu_max);
	       }
	  }
	  if (ICM > tau * prev_ICM) {
	       d.rho *= gam;
	  }

	  auglag_iters++;
	  
	  if (auglag_verbose) {
	       printf("auglag %d: ICM=%g (%sfeasible), rho=%g\nauglag lambda=",
		      auglag_iters, ICM, feasible ? "" : "not ", d.rho);
	       for (i = 0; i < d.pp; ++i) printf(" %g", d.lambda[i]);
	       printf("\nauglag %d: mu = ", auglag_iters);
	       for (i = 0; i < d.mm; ++i) printf(" %g", d.mu[i]);
	       printf("\n");
	  }

	  if ((feasible && (!minf_feasible || penalty < minf_penalty
			    || fcur < *minf)) || 
	      (!minf_feasible && penalty < minf_penalty)) {
	       ret = NLOPT_SUCCESS;
	       if (feasible) {
		    if (fcur < stop->minf_max) 
			 ret = NLOPT_MINF_MAX_REACHED;
		    else if (nlopt_stop_ftol(stop, fcur, *minf)) 
			 ret = NLOPT_FTOL_REACHED;
		    else if (nlopt_stop_x(stop, xcur, x))
			 ret = NLOPT_XTOL_REACHED;
	       }
	       *minf = fcur;
	       minf_penalty = penalty;
	       minf_feasible = feasible;
	       memcpy(x, xcur, sizeof(double) * n);
	       if (ret != NLOPT_SUCCESS) break;
	  }

	  if (nlopt_stop_forced(stop)) {ret = NLOPT_FORCED_STOP; break;}
	  if (nlopt_stop_evals(stop)) {ret = NLOPT_MAXEVAL_REACHED; break;}
          if (nlopt_stop_time(stop)) {ret = NLOPT_MAXTIME_REACHED; break;}

	  /* TODO: use some other stopping criterion on ICM? */
	  /* The paper uses ICM <= epsilon and DFM <= epsilon, where
	     DFM is a measure of the size of the Lagrangian gradient.
	     Besides the fact that these kinds of absolute tolerances
	     (non-scale-invariant) are unsatisfying and it is not
	     clear how the user should specify it, the ICM <= epsilon
	     condition seems not too different from requiring feasibility,
	     especially now that the user can provide constraint-specific
	     tolerances analogous to epsilon. */
	  if (ICM == 0) {ret = NLOPT_FTOL_REACHED; break;}
     } while (1);

done:
     free(xcur);
     return ret;
}
Exemple #4
0
/* Internal version of nldrmd_minimize, intended to be used as
   a subroutine for the subplex method.  Three differences compared
   to nldrmd_minimize:

   *minf should contain the value of f(x)  (so that we don't have to
   re-evaluate f at the starting x).

   if psi > 0, then it *replaces* xtol and ftol in stop with the condition
   that the simplex diameter |xl - xh| must be reduced by a factor of psi 
   ... this is for when nldrmd is used within the subplex method; for
   ordinary termination tests, set psi = 0. 

   scratch should contain an array of length >= (n+1)*(n+1) + 2*n,
   used as scratch workspace. 

   On output, *fdiff will contain the difference between the high
   and low function values of the last simplex. */
nlopt_result nldrmd_minimize_(int n, nlopt_func f, void *f_data,
			     const double *lb, const double *ub, /* bounds */
			     double *x, /* in: initial guess, out: minimizer */
			     double *minf,
			     const double *xstep, /* initial step sizes */
			     nlopt_stopping *stop,
			     double psi, double *scratch,
			     double *fdiff)
{
     double *pts; /* (n+1) x (n+1) array of n+1 points plus function val [0] */
     double *c; /* centroid * n */
     double *xcur; /* current point */
     rb_tree t; /* red-black tree of simplex, sorted by f(x) */
     int i, j;
     double ninv = 1.0 / n;
     nlopt_result ret = NLOPT_SUCCESS;
     double init_diam = 0;

     pts = scratch;
     c = scratch + (n+1)*(n+1);
     xcur = c + n;

     rb_tree_init(&t, simplex_compare);

     *fdiff = HUGE_VAL;

     /* initialize the simplex based on the starting xstep */
     memcpy(pts+1, x, sizeof(double)*n);
     pts[0] = *minf;
     if (*minf < stop->minf_max) { ret=NLOPT_MINF_MAX_REACHED; goto done; }
     for (i = 0; i < n; ++i) {
	  double *pt = pts + (i+1)*(n+1);
	  memcpy(pt+1, x, sizeof(double)*n);
	  pt[1+i] += xstep[i];
	  if (pt[1+i] > ub[i]) {
	       if (ub[i] - x[i] > fabs(xstep[i]) * 0.1)
		    pt[1+i] = ub[i];
	       else /* ub is too close to pt, go in other direction */
		    pt[1+i] = x[i] - fabs(xstep[i]);
	  }
	  if (pt[1+i] < lb[i]) {
	       if (x[i] - lb[i] > fabs(xstep[i]) * 0.1)
		    pt[1+i] = lb[i];
	       else {/* lb is too close to pt, go in other direction */
		    pt[1+i] = x[i] + fabs(xstep[i]);
		    if (pt[1+i] > ub[i]) /* go towards further of lb, ub */
			 pt[1+i] = 0.5 * ((ub[i] - x[i] > x[i] - lb[i] ?
					   ub[i] : lb[i]) + x[i]);
	       }
	  }
	  if (close(pt[1+i], x[i])) { ret=NLOPT_FAILURE; goto done; }
	  pt[0] = f(n, pt+1, NULL, f_data);
	  CHECK_EVAL(pt+1, pt[0]);
     }

 restart:
     for (i = 0; i < n + 1; ++i)
	  if (!rb_tree_insert(&t, pts + i*(n+1))) {
	       ret = NLOPT_OUT_OF_MEMORY;
	       goto done;
	  }

     while (1) {
	  rb_node *low = rb_tree_min(&t);
	  rb_node *high = rb_tree_max(&t);
	  double fl = low->k[0], *xl = low->k + 1;
	  double fh = high->k[0], *xh = high->k + 1;
	  double fr;

	  *fdiff = fh - fl;

	  if (init_diam == 0) /* initialize diam. for psi convergence test */
	       for (i = 0; i < n; ++i) init_diam += fabs(xl[i] - xh[i]);

	  if (psi <= 0 && nlopt_stop_ftol(stop, fl, fh)) {
	       ret = NLOPT_FTOL_REACHED;
	       goto done;
	  }

	  /* compute centroid ... if we cared about the perfomance of this,
	     we could do it iteratively by updating the centroid on
	     each step, but then we would have to be more careful about
	     accumulation of rounding errors... anyway n is unlikely to
	     be very large for Nelder-Mead in practical cases */
	  memset(c, 0, sizeof(double)*n);
	  for (i = 0; i < n + 1; ++i) {
	       double *xi = pts + i*(n+1) + 1;
	       if (xi != xh)
		    for (j = 0; j < n; ++j)
			 c[j] += xi[j];
	  }
	  for (i = 0; i < n; ++i) c[i] *= ninv;

	  /* x convergence check: find xcur = max radius from centroid */
	  memset(xcur, 0, sizeof(double)*n);
	  for (i = 0; i < n + 1; ++i) {
               double *xi = pts + i*(n+1) + 1;
	       for (j = 0; j < n; ++j) {
		    double dx = fabs(xi[j] - c[j]);
		    if (dx > xcur[j]) xcur[j] = dx;
	       }
	  }
	  for (i = 0; i < n; ++i) xcur[i] += c[i];
	  if (psi > 0) {
	       double diam = 0;
	       for (i = 0; i < n; ++i) diam += fabs(xl[i] - xh[i]);
	       if (diam < psi * init_diam) {
		    ret = NLOPT_XTOL_REACHED;
		    goto done;
	       }
	  }
	  else if (nlopt_stop_x(stop, c, xcur)) {
	       ret = NLOPT_XTOL_REACHED;
	       goto done;
	  }

	  /* reflection */
	  if (!reflectpt(n, xcur, c, alpha, xh, lb, ub)) { 
	       ret=NLOPT_XTOL_REACHED; goto done; 
	  }
	  fr = f(n, xcur, NULL, f_data);
	  CHECK_EVAL(xcur, fr);

	  if (fr < fl) { /* new best point, expand simplex */
	       if (!reflectpt(n, xh, c, gamm, xh, lb, ub)) {
		    ret=NLOPT_XTOL_REACHED; goto done; 
	       }
	       fh = f(n, xh, NULL, f_data);
	       CHECK_EVAL(xh, fh);
	       if (fh >= fr) { /* expanding didn't improve */
		    fh = fr;
		    memcpy(xh, xcur, sizeof(double)*n);
	       }
	  }
	  else if (fr < rb_tree_pred(high)->k[0]) { /* accept new point */
	       memcpy(xh, xcur, sizeof(double)*n);
	       fh = fr;
	  }
	  else { /* new worst point, contract */
	       double fc;
	       if (!reflectpt(n,xcur,c, fh <= fr ? -beta : beta, xh, lb,ub)) {
		    ret=NLOPT_XTOL_REACHED; goto done; 
	       }
	       fc = f(n, xcur, NULL, f_data);
	       CHECK_EVAL(xcur, fc);
	       if (fc < fr && fc < fh) { /* successful contraction */
		    memcpy(xh, xcur, sizeof(double)*n);
		    fh = fc;
	       }
	       else { /* failed contraction, shrink simplex */
		    rb_tree_destroy(&t);
		    rb_tree_init(&t, simplex_compare);
		    for (i = 0; i < n+1; ++i) {
			 double *pt = pts + i * (n+1);
			 if (pt+1 != xl) {
			      if (!reflectpt(n,pt+1, xl,-delta,pt+1, lb,ub)) {
				   ret = NLOPT_XTOL_REACHED;
				   goto done;
			      }
			      pt[0] = f(n, pt+1, NULL, f_data);
			      CHECK_EVAL(pt+1, pt[0]);
			 }
		    }
		    goto restart;
	       }
	  }

	  high->k[0] = fh;
	  rb_tree_resort(&t, high);
     }
     
done:
     rb_tree_destroy(&t);
     return ret;
}