Ejemplo n.º 1
0
static nlopt_result optimize_rect(double *r, params *p)
{
     int i, n = p->n;
     double *lb = p->work, *ub = lb + n;
     double *x = r + 3, *c = x + n, *w = c + n;
     double t = nlopt_seconds();
     double minf;
     nlopt_stopping *stop = p->stop;
     nlopt_result ret;
     
     if (stop->maxeval > 0 &&
	 stop->nevals >= stop->maxeval) return NLOPT_MAXEVAL_REACHED;
     if (stop->maxtime > 0 &&
	 t - stop->start >= stop->maxtime) return NLOPT_MAXTIME_REACHED;

     for (i = 0; i < n; ++i) {
	  lb[i] = c[i] - 0.5 * w[i];
	  ub[i] = c[i] + 0.5 * w[i];
     }
     ret = nlopt_minimize(p->local_alg, n, fcount, p, 
			  lb, ub, x, &minf,
			  stop->minf_max, stop->ftol_rel, stop->ftol_abs,
			  stop->xtol_rel, stop->xtol_abs,
			  p->local_maxeval > 0 ?
			  MIN(p->local_maxeval, 
			      stop->maxeval - stop->nevals)
			  : stop->maxeval - stop->nevals,
			  stop->maxtime - (t - stop->start));
     r[1] = -minf;
     if (ret > 0) {
	  if (minf < p->minf) {
	       p->minf = minf;
	       memcpy(p->xmin, x, sizeof(double) * n);
	       if (ret == NLOPT_MINF_MAX_REACHED) return ret;
	  }
	  return NLOPT_SUCCESS;
     }
     return ret;
}
Ejemplo n.º 2
0
static double match_eps_func(int n, const double *u, double *grad, void *data)
{
    match_eps_data *d = (match_eps_data *) data;
    double *eps = d->eps, *work = d->work;
    int eps_nx = d->eps_nx, eps_ny = d->eps_ny, eps_nz = d->eps_nz;
    material_grid *grids = d->grids;
    int ngrids = d->ngrids;
    double scaleby = 1.0 / H.N, val = 0;

    int i, j, k, n1, n2, n3, n_other, n_last, rank, last_dim;
#ifdef HAVE_MPI
    int local_n2, local_y_start, local_n3;
#endif
    real s1, s2, s3, c1, c2, c3;

    material_grids_set(u, d->grids, d->ngrids);
    reset_epsilon();
    if (grad) memset(work, 0, sizeof(double) * n);
    d->iter++;

    n1 = mdata->nx;
    n2 = mdata->ny;
    n3 = mdata->nz;
    n_other = mdata->other_dims;
    n_last = mdata->last_dim_size / (sizeof(scalar_complex)/sizeof(scalar));
    last_dim = mdata->last_dim;
    rank = (n3 == 1) ? (n2 == 1 ? 1 : 2) : 3;

    s1 = geometry_lattice.size.x / n1;
    s2 = geometry_lattice.size.y / n2;
    s3 = geometry_lattice.size.z / n3;
    c1 = n1 <= 1 ? 0 : geometry_lattice.size.x * 0.5;
    c2 = n2 <= 1 ? 0 : geometry_lattice.size.y * 0.5;
    c3 = n3 <= 1 ? 0 : geometry_lattice.size.z * 0.5;

    /* Here we have different loops over the coordinates, depending
    upon whether we are using complex or real and serial or
           parallel transforms.  Each loop must define, in its body,
           variables (i2,j2,k2) describing the coordinate of the current
           point, and "index" describing the corresponding index in
    the curfield array.

           This was all stolen from fields.c...it would be better
           if we didn't have to cut and paste, sigh. */

#ifdef SCALAR_COMPLEX

#  ifndef HAVE_MPI

    for (i = 0; i < n1; ++i)
        for (j = 0; j < n2; ++j)
            for (k = 0; k < n3; ++k)
            {
                int i2 = i, j2 = j, k2 = k;
                int index = ((i * n2 + j) * n3 + k);

#  else /* HAVE_MPI */

    local_n2 = mdata->local_ny;
    local_y_start = mdata->local_y_start;

    /* first two dimensions are transposed in MPI output: */
    for (j = 0; j < local_n2; ++j)
        for (i = 0; i < n1; ++i)
            for (k = 0; k < n3; ++k)
            {
                int i2 = i, j2 = j + local_y_start, k2 = k;
                int index = ((j * n1 + i) * n3 + k);

#  endif /* HAVE_MPI */

#else /* not SCALAR_COMPLEX */

#  ifndef HAVE_MPI

    for (i = 0; i < n_other; ++i)
        for (j = 0; j < n_last; ++j)
        {
            int index = i * n_last + j;
            int i2, j2, k2;
            switch (rank) {
            case 2:
                i2 = i;
                j2 = j;
                k2 = 0;
                break;
            case 3:
                i2 = i / n2;
                j2 = i % n2;
                k2 = j;
                break;
            default:
                i2 = j;
                j2 = k2 = 0;
                break;
            }

#  else /* HAVE_MPI */

    local_n2 = mdata->local_ny;
    local_y_start = mdata->local_y_start;

    /* For a real->complex transform, the last dimension is cut in
    half.  For a 2d transform, this is taken into account in local_ny
    already, but for a 3d transform we must compute the new n3: */
    if (n3 > 1)
        local_n3 = mdata->last_dim_size / 2;
    else
        local_n3 = 1;

    /* first two dimensions are transposed in MPI output: */
    for (j = 0; j < local_n2; ++j)
        for (i = 0; i < n1; ++i)
            for (k = 0; k < local_n3; ++k)
            {
#         define i2 i
                int j2 = j + local_y_start;
#         define k2 k
                int index = ((j * n1 + i) * local_n3 + k);

#  endif /* HAVE_MPI */

#endif /* not SCALAR_COMPLEX */

                {
                    real epsilon, eps0;
                    double scalegrad;
                    vector3 p;

                    epsilon = mean_medium_from_matrix(mdata->eps_inv + index);
                    eps0 = linear_interpolate((i2 + 0.5) / n1,
                                              (j2 + 0.5) / n2,
                                              (k2 + 0.5) / n3,
                                              eps, eps_nx, eps_ny, eps_nz, 1);
                    val += (epsilon - eps0) * (epsilon - eps0);
                    scalegrad = 2.0 * scaleby * (epsilon - eps0);

                    if (grad) {
                        p.x = i2 * s1 - c1;
                        p.y = j2 * s2 - c2;
                        p.z = k2 * s3 - c3;
                        material_grids_addgradient_point(work, p, scalegrad,
                                                         grids, ngrids);
                    }

#ifndef SCALAR_COMPLEX
                    {
                        int last_index;
#  ifdef HAVE_MPI
                        if (n3 == 1)
                            last_index = j + local_y_start;
                        else
                            last_index = k;
#  else
                        last_index = j;
#  endif

                        if (last_index != 0 && 2*last_index != last_dim) {
                            int i2c, j2c, k2c;
                            i2c = i2 ? (n1 - i2) : 0;
                            j2c = j2 ? (n2 - j2) : 0;
                            k2c = k2 ? (n3 - k2) : 0;

                            eps0 = linear_interpolate((i2c + 0.5) / n1,
                                                      (j2c + 0.5) / n2,
                                                      (k2c + 0.5) / n3, eps,
                                                      eps_nx, eps_ny, eps_nz, 1);
                            val += (epsilon - eps0) * (epsilon - eps0);

                            if (grad) {
                                p.x = i2c * s1 - c1;
                                p.y = j2c * s2 - c2;
                                p.z = k2c * s3 - c3;

                                material_grids_addgradient_point(work, p,
                                                                 scalegrad,
                                                                 grids, ngrids);
                            }
                        }
                    }
#endif /* !SCALAR_COMPLEX */
                }
            }
    if (grad) /* gradient w.r.t. epsilon needs to be summed over processes */
        mpi_allreduce(work, grad, n, double, MPI_DOUBLE,
                      MPI_SUM, mpb_comm);
    {
        double valtmp = val * scaleby;
        mpi_allreduce(&valtmp, &val, 1, double, MPI_DOUBLE,
                      MPI_SUM, mpb_comm);
    }
    mpi_one_printf("match-epsilon-file:, %d, %g\n", d->iter, sqrt(val));
    return val;
}

void material_grids_match_epsilon_fileB(string filename, number eps_tol)
{
    int dims[3] = {1,1,1}, rank = 3;
    matrixio_id file_id;
    match_eps_data d;
    int i, n, have_uprod;
    double *u, *lb, *ub, *u_tol, func_min;

    file_id = matrixio_open_serial(filename, 1);
    d.eps = matrixio_read_real_data(file_id, NULL, &rank,dims, 0,0,0,0);
    CHECK(d.eps, "couldn't find dataset in epsilon file");
    matrixio_close(file_id);

    d.eps_nx = dims[0];
    d.eps_ny = dims[1];
    d.eps_nz = dims[2];

    d.grids = get_material_grids(geometry, &d.ngrids);
    d.iter = 0;

    n = material_grids_ntot(d.grids, d.ngrids);
    u = (double *) malloc(sizeof(double) * n * 5);
    lb = u + n;
    ub = lb + n;
    u_tol = ub + n;
    d.work = u_tol + n;

    material_grids_get(u, d.grids, d.ngrids);

    for (i = 0; i < d.ngrids && d.grids[i].material_grid_kind != U_PROD; ++i);
    have_uprod = i < d.ngrids;
    for (i = 0; i < n; ++i) {
        ub[i] = 1;
        u_tol[i] = eps_tol;
        lb[i] = have_uprod ? 1e-4 : 0;
        if (u[i] < lb[i]) u[i] = lb[i];
    }

#if defined(HAVE_NLOPT_H) && defined(HAVE_NLOPT)
    {
        nlopt_result res;
        res = nlopt_minimize(NLOPT_LD_MMA, n, match_eps_func, &d,
                             lb, ub, u, &func_min,
                             -HUGE_VAL, 0,0, 0,u_tol, 0,0);
        CHECK(res > 0, "failure of nlopt_minimize");
    }
#else
    CHECK(0, "nlopt library is required for match-epsilon-file");
#endif

    material_grids_set(u, d.grids, d.ngrids);
    reset_epsilon();

    mpi_one_printf("match-epsilon-file converged to %g after %d iterations\n",
                   sqrt(func_min), d.iter);

    free(u);
    free(d.eps);
}
Ejemplo n.º 3
0
int local(Trial &T, TBox &box, TBox &domain, double eps_cl, double *mgr,
          Global &glob, int axis, RCRVector x_av
#ifdef NLOPT_UTIL_H
      , nlopt_stopping *stop
#endif
      ) {

  int n=box.GetDim();
  RVector x(n);
  double tmp, f;

  x=T.xvals ;

#ifdef LS_DEBUG
  cout << "Local Search, x=" << x << endl;
#endif

  if (box.OutsideBox(x, domain) != 0) {
    cout << "Starting point is not inside the boundary. Exiting...\n" ;
    exit(1) ;
    return LS_Out ;
  }

  // Check if we are close to a stationary point located previously
  if (box.CloseToMin(x, &tmp, eps_cl)) {
#ifdef LS_DEBUG
     cout << "Close to a previously located stationary point, exiting" << endl;
#endif
     T.objval=tmp;
     return LS_Old ;
   }

#if 0

  if (axis != -1) {
    cout << "NLopt code only works with axis == -1, exiting...\n" ;
    exit(EXIT_FAILURE);
  }
  f_local_data data;
  data.glob = &glob;
  data.maxgrad = *mgr;
  data.stop = stop;
  nlopt_result ret = nlopt_minimize(NLOPT_LOCAL_LBFGS, n, f_local, &data,
                    box.lb.raw_data(), box.ub.raw_data(),
                    x.raw_data(), &f,
                    stop->minf_max,
                    stop->ftol_rel, stop->ftol_abs,
                    stop->xtol_rel, stop->xtol_abs,
                    stop->maxeval - stop->nevals,
                    stop->maxtime - stop->start);
  *mgr = data.maxgrad;
  T.xvals=x ; T.objval=f ;
  if (ret == NLOPT_MAXEVAL_REACHED || ret == NLOPT_MAXTIME_REACHED)
    return LS_MaxEvalTime;
  else if (ret > 0)
    return LS_New;
  else
    return LS_Out; // failure

#else /* not using NLopt local optimizer ... use original STOgo BFGS code */

  int k_max, info, outside = 0;
  int k, i, good_enough, iTmp ;

  double maxgrad, delta, f_new;
  double alpha, gamma, beta, d2, s2, nom, den, ro ;
  double nrm_sd, nrm_hn, snrm_hn, nrm_dl ;
  RVector g(n), h_sd(n), h_dl(n), h_n(n), x_new(n), g_new(n) ;
  RVector s(n),y(n),z(n),w(n) ; // Temporary vectors
  RMatrix B(n), H(n) ;          // Hessian and it's inverse

  k_max = max_iter*n ;

  // Initially B and H are equal to the identity matrix
  B=0 ; H=0 ;
  for (i=0 ; i<n ; i++) {
    B(i,i)=1 ;
    H(i,i)=1 ;
  }

  RVector g_av(x_av.GetLength());
  if (axis==-1) {
    f=glob.ObjectiveGradient(x,g,OBJECTIVE_AND_GRADIENT);
  }
  else {
    x_av(axis)=x(0);
    f=glob.ObjectiveGradient(x_av,g_av,OBJECTIVE_AND_GRADIENT);
    g(0)=g_av(axis);
  }
  IF_NLOPT_CHECK_EVALS;
  FC++;GC++;

  if (axis == -1) {
    // Skipping AV
#ifdef INI3
    // Elaborate scheme to initalize delta
    delta=delta_coef*norm2(g) ;
    copy(g,z) ;
    axpy(1.0,x,z) ;
    if (!box.InsideBox(z)) {
      if (box.Intersection(x,g,z)==TRUE) {
    axpy(-1.0,x,z) ;
    delta=min(delta,delta_coef*norm2(z)) ;
      }
      else {
    // Algorithm broke down, use INI1
        delta = (1.0/7)*box.ShortestSide(&iTmp) ;
      }
    }
#endif
#ifdef INI2
    // Use INI2 scheme
    delta = box.ClosestSide(x)*delta_coef ;
    if (delta<MacEpsilon)
      // Patch to avoid trust region with radius close to zero
      delta = (1.0/7)*box.ShortestSide(&iTmp) ;
#endif
#ifdef INI1
    delta = delta_coef*box.ShortestSide(&iTmp) ;
#endif
  }
  else {
    // Use a simple scheme for the 1D minimization (INI1)
    delta = (1.0/7.0)*box.ShortestSide(&iTmp) ;
  }

  k=0 ; good_enough = 0 ; info=LS_New ; outside=0 ;
  maxgrad=*mgr ;
  while (good_enough == 0) {
    k++ ;
    if (k>k_max) {
#ifdef LS_DEBUG
      cout << "Maximum number of iterations reached\n" ;
#endif
      info=LS_MaxIter ;
      break ;
    }

    // Update maximal gradient value
    maxgrad=max(maxgrad,normInf(g)) ;

    // Steepest descent, h_sd = -g
    copy(g,h_sd) ;
    scal(-1.0,h_sd) ;
    nrm_sd=norm2(h_sd) ;

    if (nrm_sd < epsilon) {
      // Stop criterion (gradient) fullfilled
#ifdef LS_DEBUG
      cout << "Gradient small enough" << endl ;
#endif
      good_enough = 1 ;
      break ;
    }

    // Compute Newton step, h_n = -H*g
    gemv('N',-1.0, H, g, 0.0, h_n) ;
    nrm_hn = norm2(h_n) ;

    if (nrm_hn < delta) {
      // Pure Newton step
      copy(h_n, h_dl) ;
#ifdef LS_DEBUG
      cout << "[Newton step]      " ;
#endif
    }
    else {
      gemv('N',1.0,B,g,0.0,z) ;
      tmp=dot(g,z) ;
      if (tmp==0) {
    info = LS_Unstable ;
    break ;
      }
      alpha=(nrm_sd*nrm_sd)/tmp ; // Normalization (N38,eq. 3.30)
      scal(alpha,h_sd) ;
      nrm_sd=fabs(alpha)*nrm_sd ;

      if (nrm_sd >= delta) {
    gamma = delta/nrm_sd ; // Normalization (N38, eq. 3.33)
    copy(h_sd,h_dl) ;
    scal(gamma,h_dl) ;
#ifdef LS_DEBUG
    cout << "[Steepest descent]  " ;
#endif
      }
      else {
    // Combination of Newton and SD steps
    d2 = delta*delta ;
    copy(h_sd,s) ;
    s2=nrm_sd*nrm_sd ;
    nom = d2 - s2 ;
    snrm_hn=nrm_hn*nrm_hn ;
    tmp = dot(h_n,s) ;
        den = tmp-s2 + sqrt((tmp-d2)*(tmp-d2)+(snrm_hn-d2)*(d2-s2)) ;
    if (den==0) {
      info = LS_Unstable ;
      break ;
    }
    // Normalization (N38, eq. 3.31)
    beta = nom/den ;
    copy(h_n,h_dl) ;
    scal(beta,h_dl) ;
    axpy((1-beta),h_sd,h_dl) ;
#ifdef LS_DEBUG
    cout << "[Mixed step]        " ;
#endif
      }
    }
    nrm_dl=norm2(h_dl) ;

    //x_new = x+h_dl ;
    copy(x,x_new) ;
    axpy(1.0,h_dl,x_new) ;

    // Check if x_new is inside the box
    iTmp=box.OutsideBox(x_new, domain) ;
    if (iTmp == 1) {
#ifdef LS_DEBUG
      cout << "x_new is outside the box " << endl ;
#endif
      outside++ ;
      if (outside>max_outside_steps) {
    // Previous point was also outside, exit
    break ;
      }
    }
    else if (iTmp == 2) {
#ifdef LS_DEBUG
      cout << " x_new is outside the domain" << endl ;
#endif
      info=LS_Out ;
      break ;
    }
    else {
      outside=0 ;
    }

    // Compute the gain
    if (axis==-1)
      f_new=glob.ObjectiveGradient(x_new,g_new,OBJECTIVE_AND_GRADIENT);
    else {
      x_av(axis)=x_new(0);
      f_new=glob.ObjectiveGradient(x_av,g_av,OBJECTIVE_AND_GRADIENT);
    }
    IF_NLOPT_CHECK_EVALS;
    FC++; GC++;
    gemv('N',0.5,B,h_dl,0.0,z);
    ro = (f_new-f) / (dot(g,h_dl) + dot(h_dl,z)); // Quadratic model
    if (ro > 0.75) {
      delta = delta*2;
    }
    if (ro < 0.25) {
      delta = delta/3;
    }
    if (ro > 0) {
      // Update the Hessian and it's inverse using the BFGS formula
#if 0 // changed by SGJ to compute OBJECTIVE_AND_GRADIENT above
      if (axis==-1)
    glob.ObjectiveGradient(x_new,g_new,GRADIENT_ONLY);
      else {
    x_av(axis)=x_new(0);
    glob.ObjectiveGradient(x_av,g_av,GRADIENT_ONLY);
    g_new(0)=g_av(axis);
      }
      GC++;
      IF_NLOPT_CHECK_EVALS;
#else
      if (axis != -1)
    g_new(0)=g_av(axis);
#endif

      // y=g_new-g
      copy(g_new,y);
      axpy(-1.0,g,y);

      // Check curvature condition
      alpha=dot(y,h_dl);
      if (alpha <= sqrt(MacEpsilon)*nrm_dl*norm2(y)) {
#ifdef LS_DEBUG
    cout << "Curvature condition violated " ;
#endif
      }
      else {
    // Update Hessian
    gemv('N',1.0,B,h_dl,0.0,z) ; // z=Bh_dl
    beta=-1/dot(h_dl,z) ;
    ger(1/alpha,y,y,B) ;
    ger(beta,z,z,B) ;

        // Update Hessian inverse
        gemv('N',1.0,H,y,0.0,z) ; // z=H*y
        gemv('T',1.0,H,y,0.0,w) ; // w=y'*H
    beta=dot(y,z) ;
    beta=(1+beta/alpha)/alpha ;

    // It should be possible to do this updating more efficiently, by
    // exploiting the fact that (h_dl*y'*H) = transpose(H*y*h_dl')
    ger(beta,h_dl,h_dl,H) ;
    ger(-1/alpha,z,h_dl,H) ;
    ger(-1/alpha,h_dl,w,H) ;
      }

      if (nrm_dl < norm2(x)*epsilon) {
    // Stop criterion (iteration progress) fullfilled
#ifdef LS_DEBUG
    cout << "Progress is marginal" ;
#endif
    good_enough = 1 ;
      }

      // Check if we are close to a stationary point located previously
      if (box.CloseToMin(x_new, &f_new, eps_cl)) {
    // Note that x_new and f_new may be overwritten on exit from CloseToMin
#ifdef LS_DEBUG
    cout << "Close to a previously located stationary point, exiting" << endl;
#endif
    info = LS_Old ;
    good_enough = 1 ;
      }

      // Update x, g and f
      copy(x_new,x) ; copy(g_new,g) ; f=f_new ;

#ifdef LS_DEBUG
      cout << " x=" << x << endl ;
#endif

    }
    else {
#ifdef LS_DEBUG
      cout << "Step is no good, ro=" << ro << " delta=" << delta << endl ;
#endif
    }

  } // wend

  // Make sure the routine returns correctly...
  // Check if last iterate is outside the boundary
  if (box.OutsideBox(x, domain) != 0) {
    info=LS_Out; f=DBL_MAX;
  }

  if (info == LS_Unstable) {
    cout << "Local search became unstable. No big deal but exiting anyway\n" ;
    exit(1);
  }

  *mgr=maxgrad ;

  T.xvals=x ; T.objval=f ;
  if (outside>0)
    return LS_Out ;
  else
    return info ;

#endif
}
Ejemplo n.º 4
0
static int test_function(int ifunc)
{
  testfunc func;
  int i, iter;
  double *x, minf, minf_max, f0, *xtabs, *lb, *ub;
  nlopt_result ret;
  double start = nlopt_seconds();
  int total_count = 0, max_count = 0, min_count = 1<<30;
  double total_err = 0, max_err = 0;
  bounds_wrap_data bw;
  
  if (ifunc < 0 || ifunc >= NTESTFUNCS) {
    fprintf(stderr, "testopt: invalid function %d\n", ifunc);
    listfuncs(stderr);
    return 0;
  }
  func = testfuncs[ifunc];
  x = (double *) malloc(sizeof(double) * func.n * 5);
  if (!x) { fprintf(stderr, "testopt: Out of memory!\n"); return 0; }

  lb = x + func.n * 3;
  ub = lb + func.n;
  xtabs = x + func.n * 2;
  bw.lb = lb;
  bw.ub = ub;
  bw.f = func.f;
  bw.f_data = func.f_data;

  for (i = 0; i < func.n; ++i) xtabs[i] = xtol_abs;
  minf_max = minf_max_delta > (-HUGE_VAL) ? minf_max_delta + func.minf : (-HUGE_VAL);
  
  printf("-----------------------------------------------------------\n");
  printf("Optimizing %s (%d dims) using %s algorithm\n",
	 func.name, func.n, nlopt_algorithm_name(algorithm));
  printf("lower bounds at lb = [");
  for (i = 0; i < func.n; ++i) printf(" %g", func.lb[i]);
  printf("]\n");
  printf("upper bounds at ub = [");
  for (i = 0; i < func.n; ++i) printf(" %g", func.ub[i]);
  printf("]\n");
  memcpy(lb, func.lb, func.n * sizeof(double));
  memcpy(ub, func.ub, func.n * sizeof(double));
  for (i = 0; i < func.n; ++i) if (fix_bounds[i]) {
      printf("fixing bounds for dim[%d] to xmin[%d]=%g\n",
	     i, i, func.xmin[i]);
      lb[i] = ub[i] = func.xmin[i];
  }
  if (force_constraints) {
    for (i = 0; i < func.n; ++i) {
      if (nlopt_iurand(2) == 0)
	ub[i] = nlopt_urand(lb[i], func.xmin[i]);
      else
	lb[i] = nlopt_urand(func.xmin[i], ub[i]);
    }
    printf("adjusted lower bounds at lb = [");
    for (i = 0; i < func.n; ++i) printf(" %g", lb[i]);
    printf("]\n");
    printf("adjusted upper bounds at ub = [");
    for (i = 0; i < func.n; ++i) printf(" %g", ub[i]);
    printf("]\n");
  }

  if (fabs(func.f(func.n, func.xmin, 0, func.f_data) - func.minf) > 1e-8) {
    fprintf(stderr, "BUG: function does not achieve given lower bound!\n");
    fprintf(stderr, "f(%g", func.xmin[0]);
    for (i = 1; i < func.n; ++i) fprintf(stderr, ", %g", func.xmin[i]);
    fprintf(stderr, ") = %0.16g instead of %0.16g, |diff| = %g\n", 
	    func.f(func.n, func.xmin, 0, func.f_data), func.minf,
	    fabs(func.f(func.n, func.xmin, 0, func.f_data) - func.minf));
    return 0;
  }

  for (iter = 0; iter < iterations; ++iter) {
    double val;
    testfuncs_counter = 0;

    printf("Starting guess x = [");
    for (i = 0; i < func.n; ++i) {
      if (center_start)
	x[i] = (ub[i] + lb[i]) * 0.5;
      else if (xinit_tol < 0) { /* random starting point near center of box */
	double dx = (ub[i] - lb[i]) * 0.25;
	double xm = 0.5 * (ub[i] + lb[i]);
	x[i] = nlopt_urand(xm - dx, xm + dx);
      }
      else {
	x[i] = nlopt_urand(-xinit_tol, xinit_tol)
	  + (1 + nlopt_urand(-xinit_tol, xinit_tol)) * func.xmin[i];
	if (x[i] > ub[i]) x[i] = ub[i];
	else if (x[i] < lb[i]) x[i] = lb[i];
      }
      printf(" %g", x[i]);
    }
    printf("]\n");
    f0 = func.f(func.n, x, x + func.n, func.f_data);
    printf("Starting function value = %g\n", f0);
    
    if (iter == 0 && testfuncs_verbose && func.has_gradient) {
      printf("checking gradient:\n");
      for (i = 0; i < func.n; ++i) {
	double f;
	x[i] *= 1 + 1e-6;
	f = func.f(func.n, x, NULL, func.f_data);
	x[i] /= 1 + 1e-6;
	printf("  grad[%d] = %g vs. numerical derivative %g\n",
	       i, x[i + func.n], (f - f0) / (x[i] * 1e-6));
      }
    }
    
    testfuncs_counter = 0;
    ret = nlopt_minimize(algorithm,
			 func.n, bounds_wrap_func, &bw,
			 lb, ub,
			 x, &minf,
			 minf_max, ftol_rel, ftol_abs, xtol_rel, xtabs,
			 maxeval, maxtime);
    printf("finished after %g seconds.\n", nlopt_seconds() - start);
    printf("return code %d from nlopt_minimize\n", ret);
    if (ret < 0 && ret != NLOPT_ROUNDOFF_LIMITED
	&& ret != NLOPT_FORCED_STOP) {
      fprintf(stderr, "testopt: error in nlopt_minimize\n");
      free(x);
      return 0;
    }
    printf("Found minimum f = %g after %d evaluations.\n", 
	   minf, testfuncs_counter);
    total_count += testfuncs_counter;
    if (testfuncs_counter > max_count) max_count = testfuncs_counter;
    if (testfuncs_counter < min_count) min_count = testfuncs_counter;
    printf("Minimum at x = [");
    for (i = 0; i < func.n; ++i) printf(" %g", x[i]);
    printf("]\n");
    if (func.minf == 0)
      printf("|f - minf| = %g\n", fabs(minf - func.minf));
    else
      printf("|f - minf| = %g, |f - minf| / |minf| = %e\n",
	     fabs(minf - func.minf), fabs(minf - func.minf) / fabs(func.minf));
    total_err += fabs(minf - func.minf);
    if (fabs(minf - func.minf) > max_err)
      max_err = fabs(minf - func.minf);
    printf("vs. global minimum f = %g at x = [", func.minf);
    for (i = 0; i < func.n; ++i) printf(" %g", func.xmin[i]);
    printf("]\n");

    val = func.f(func.n, x, NULL, func.f_data);
    if (val != minf) {
      fprintf(stderr, "Mismatch %g between returned minf=%g and f(x) = %g\n", 
	      minf - val, minf, val);
      free(x);
      return 0;
    }
  }
  if (iterations > 1)
    printf("average #evaluations = %g (%d-%d)\naverage |f-minf| = %g, max |f-minf| = %g\n", total_count * 1.0 / iterations, min_count, max_count, total_err / iterations, max_err);

  free(x);
  return 1;
}