Ejemplo n.º 1
0
/* Function:  esl_hxp_FitCompleteBinned()
 *
 * Purpose:   Given a histogram <g> with binned observations, where each
 *            bin i holds some number of observed samples x with values from 
 *            lower bound l to upper bound u (that is, $l < x \leq u$),
 *            and given a starting guess <h> for hyperexponential parameters;
 *
 *            Find maximum likelihood parameters <h> by conjugate gradient
 *            descent, starting from the initial <h> and leaving the
 *            optimized solution in <h>.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation error, and <h> is left in its
 *            initial state.
 */
int 
esl_hxp_FitCompleteBinned(ESL_HISTOGRAM *g, ESL_HYPEREXP *h)
{
  struct hyperexp_binned_data data;
  int     status;
  double *p   = NULL;
  double *u   = NULL;
  double *wrk = NULL;
  double  fx;
  int     i;
  double  tol = 1e-6;
  int     np;

  np = 0;
  if (! h->fixmix) np = h->K-1;  /* K-1 mix coefficients...      */
  for (i = 0; i < h->K; i++)     /* ...and up to K lambdas free. */
    if (! h->fixlambda[i]) np++;

  ESL_ALLOC(p,   sizeof(double) * np);
  ESL_ALLOC(u,   sizeof(double) * np);
  ESL_ALLOC(wrk, sizeof(double) * np * 4);

  /* Copy shared info into the "data" structure  */
  data.g     = g;
  data.h     = h;

  /* From h, create the parameter vector. */
  hyperexp_pack_paramvector(p, np, h);

  /* Define the step size vector u.
   */
  for (i = 0; i < np; i++) u[i] = 1.0;

  /* Feed it all to the mighty optimizer.
   */
  status = esl_min_ConjugateGradientDescent(p, u, np, 
					    &hyperexp_complete_binned_func, 
					    &hyperexp_complete_binned_gradient,
					    (void *) (&data), tol, wrk, &fx);
  if (status != eslOK) goto ERROR;

  /* Convert the final parameter vector back to a hyperexponential
   */
  hyperexp_unpack_paramvector(p, np, h);
  
  free(p);
  free(u);
  free(wrk);
  esl_hyperexp_SortComponents(h);
  return eslOK;

 ERROR:
  if (p   != NULL) free(p);
  if (u   != NULL) free(u);
  if (wrk != NULL) free(wrk);
  return status;
}
Ejemplo n.º 2
0
/* Function:  esl_sxp_FitCompleteBinned()
 *
 * Purpose:   Given a histogram <g> with binned observations, where each
 *            bin i holds some number of observed samples x with values from 
 *            lower bound l to upper bound u (that is, $l < x \leq u$);
 *            find maximum likelihood parameters mu, lambda, tau by conjugate
 *            gradient descent optimization.
 */
int
esl_sxp_FitCompleteBinned(ESL_HISTOGRAM *g,
			  double *ret_mu, double *ret_lambda, double *ret_tau)

{
  struct sxp_binned_data data;
  double p[2], u[2], wrk[8];
  double mu, tau, lambda;
  double tol = 1e-6;
  double fx;
  int    status;
  double ai, mean;
  int    i;

  /* Set the fixed mu.
   * Make a good initial guess of lambda, based on exponential fit.
   * Choose an arbitrary tau.
   */
  if      (g->is_tailfit) mu = g->phi;  /* all x > mu in this case */
  else if (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin);
  else                    mu = g->xmin; 

  mean = 0.;
  for (i = g->cmin; i <= g->imax; i++) 
    { 
      ai = esl_histogram_Bin2LBound(g, i);
      ai += 0.5*g->w;		/* midpoint in bin */
      mean += (double)g->obs[i] * ai;
    }
  mean  /= g->No;
  lambda = 1 / (mean - mu);

  tau    = 0.9;

  /* load data structure, param vector, and step vector */
  data.g  = g;
  data.mu = mu;
  p[0]    = log(lambda);
  p[1]    = log(tau);
  u[0]    = 1.0;
  u[1]    = 1.0;

  /* hand it off */
  status =  esl_min_ConjugateGradientDescent(p, u, 2, 
					     &sxp_complete_binned_func, 
					     NULL,
					     (void *) (&data), tol, wrk, &fx);
  *ret_mu     = mu;
  *ret_lambda = exp(p[0]);
  *ret_tau    = exp(p[1]);
  return status;
}
Ejemplo n.º 3
0
/* Function:  esl_wei_FitComplete()
 *
 * Purpose:   Given an array of <n> samples <x[0]..x[n-1>, fit
 *            them to a stretched exponential distribution starting
 *            at lower bound <mu> (all $x_i > \mu$), and 
 *            return maximum likelihood parameters <ret_lambda>
 *            and <ret_tau>.
 *            
 * Args:      x          - complete GEV-distributed data [0..n-1]
 *            n          - number of samples in <x>
 *            ret_mu     - RETURN: lower bound of the distribution (all x_i >= mu)
 *            ret_lambda - RETURN: maximum likelihood estimate of lambda
 *            ret_tau    - RETURN: maximum likelihood estimate of tau
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslENOHALT> if the fit doesn't converge.
 *
 * Xref:      STL9/136-137
 */
int
esl_wei_FitComplete(double *x, int n, double *ret_mu,
		    double *ret_lambda, double *ret_tau)
{
  struct wei_data data;
  double p[2];			/* parameter vector                  */
  double u[2];			/* max initial step size vector      */
  double wrk[8];		/* 4 tmp vectors of length 2         */
  double mean;
  double mu, lambda, tau;      	/* initial param guesses             */
  double tol = 1e-6;		/* convergence criterion for CG      */
  double fx;			/* f(x) at minimum; currently unused */
  int    status;

  /* Make a good initial guess, based on exponential fit;
   * set an arbitrary tau.
   */
  mu =  esl_vec_DMin(x, n);
  esl_stats_DMean(x, n, &mean, NULL);
  lambda = 1 / (mean - mu);
  tau    = 0.9;

  /* Load the data structure
   */
  data.x   = x;
  data.n   = n;
  data.mu  = mu;

  /* Change of variables;
   *   lambda > 0, so c.o.v.  lambda = exp^w,  w = log(lambda);
   *   tau > 0, same c.o.v.
   */
  p[0] = log(lambda);		
  p[1] = log(tau);

  u[0] = 1.0;
  u[1] = 1.0;

  /* pass problem to the optimizer
   */
  status = esl_min_ConjugateGradientDescent(p, u, 2, 
					    &wei_func, NULL,
					    (void *)(&data),
					    tol, wrk, &fx);
  *ret_mu     = mu;
  *ret_lambda = exp(p[0]);
  *ret_tau    = exp(p[1]);
  return status;
}
Ejemplo n.º 4
0
/* Function:  esl_gumbel_FitTruncated()
* Synopsis:  Estimates $\mu$, $\lambda$ from truncated data.
* Incept:    SRE, Wed Jun 29 14:14:17 2005 [St. Louis]
*
* Purpose:   Given a left-truncated array of Gumbel-distributed
*            samples <x[0]..x[n-1]> and the truncation threshold
*            <phi> (such that all <x[i]> $\geq$ <phi>).
*            Find maximum likelihood parameters <mu> and <lambda>.
*            
*            <phi> should not be much greater than <mu>, the
*            mode of the Gumbel, or the fit will become unstable
*            or may even fail to converge. The problem is
*            that for <phi> $>$ <mu>, the tail of the Gumbel
*            becomes a scale-free exponential, and <mu> becomes
*            undetermined.
*            
* Algorithm: Uses conjugate gradient descent to optimize the
*            log likelihood of the data. Follows a general
*            approach to fitting missing data problems outlined
*            in [Gelman95].
*
* Args:      x          - observed data samples [0..n-1]
*            n          - number of samples
*            phi        - truncation threshold; all x[i] >= phi
*            ret_mu     - RETURN: ML estimate of mu       
*            ret_lambda - RETURN: ML estimate of lambda
*
* Returns:   <eslOK> on success.
*
* Throws:    <eslENOHALT> if the fit doesn't converge.
*/
int
esl_gumbel_FitTruncated(double *x, int n, double phi, 
                        double *ret_mu, double *ret_lambda)
{
    struct tevd_data data;
    double wrk[8];		/* workspace for CG: 4 tmp vectors of size 2 */
    double p[2];			/* mu, w;  lambda = e^w */
    double u[2];			/* max initial step size for mu, lambda */
    int    status;
    double mean, variance;
    double mu, lambda;
    double fx;

    data.x   = x;
    data.n   = n;
    data.phi = phi;

    /* The source of the following magic is Evans/Hastings/Peacock, 
    * Statistical Distributions, 3rd edition (2000), p.86, which gives
    * eq's for the mean and variance of a Gumbel in terms of mu and lambda;
    * we turn them around to get mu and lambda in terms of the mean and variance.
    * These would be reasonable estimators if we had a full set of Gumbel
    * distributed variates. They'll be off for a truncated sample, but
    * close enough to be a useful starting point.
    */
    esl_stats_DMean(x, n, &mean, &variance);
    lambda = eslCONST_PI / sqrt(6.*variance);
    mu     = mean - 0.57722/lambda;

    p[0] = mu;
    p[1] = log(lambda);		/* c.o.v. because lambda is constrained to >0 */

    u[0] = 2.0;
    u[1] = 0.1;

    /* Pass the problem to the optimizer. The work is done by the
    * equations in tevd_func() and tevd_grad().
    */
    status = esl_min_ConjugateGradientDescent(p, u, 2, 
        &tevd_func, &tevd_grad,(void *)(&data),
        1e-4, wrk, &fx);

    *ret_mu     = p[0];
    *ret_lambda = exp(p[1]);	/* reverse the c.o.v. */
    return status;
}
Ejemplo n.º 5
0
/* fitting_engine()
 * Fitting code shared by the FitComplete() and FitCensored() API.
 * 
 * The fitting_engine(), in turn, is just an adaptor wrapped around
 * the conjugate gradient descent minimizer.
 */
static int
fitting_engine(struct gev_data *data, 
	       double *ret_mu, double *ret_lambda, double *ret_alpha)
{
  double p[3];			/* parameter vector                  */
  double u[3];			/* max initial step size vector      */
  double wrk[12];		/* 4 tmp vectors of length 3         */
  double mean, variance;
  double mu, lambda, alpha;	/* initial param guesses             */
  double tol = 1e-6;		/* convergence criterion for CG      */
  double fx;			/* f(x) at minimum; currently unused */
  int    status;

  /* Make an initial guess. 
   * (very good guess for complete data; merely sufficient for censored)
   */
  esl_stats_DMean(data->x, data->n, &mean, &variance);
  lambda = eslCONST_PI / sqrt(6.*variance);
  mu     = mean - 0.57722/lambda;
  alpha  = 0.0001;

  p[0] = mu;
  p[1] = log(lambda);	/* c.o.v. from lambda to w */
  p[2] = alpha;

  /* max initial step sizes: keeps bracketing from exploding */
  u[0] = 1.0;
  u[1] = fabs(log(0.02));
  u[2] = 0.02;

  /* pass problem to the optimizer
   */
  status = esl_min_ConjugateGradientDescent(p, u, 3, 
					    &gev_func, 
					    &gev_gradient,
					    (void *)data,
					    tol, wrk, &fx);
  *ret_mu     = p[0];
  *ret_lambda = exp(p[1]);
  *ret_alpha  = p[2];
  return status;
}
Ejemplo n.º 6
0
/* Function:  esl_sxp_FitComplete()
 *
 * Purpose:   Given a vector of <n> observed data samples <x[]>,
 *            find maximum likelihood parameters by conjugate gradient 
 *            descent optimization.
 */
int
esl_sxp_FitComplete(double *x, int n,
		    double *ret_mu, double *ret_lambda, double *ret_tau)

{
  struct sxp_data data;
  double p[2], u[2], wrk[8];
  double mu, tau, lambda;
  double mean;
  double tol = 1e-6;
  double fx;
  int    status;

  /* initial guesses; mu is definitely = minimum x,
   * and just use arbitrary #'s to init lambda, tau
   */
  mu =  esl_vec_DMin(x, n);
  esl_stats_DMean(x, n, &mean, NULL);
  lambda = 1 / (mean - mu);
  tau    = 0.9;


  /* load data structure, param vector, and step vector */
  data.x  = x;
  data.n  = n;
  data.mu = mu;
  p[0]    = log(lambda);
  p[1]    = log(tau);
  u[0]    = 1.0;
  u[1]    = 1.0;

  /* hand it off */
  status =  esl_min_ConjugateGradientDescent(p, u, 2, 
					     &sxp_complete_func, 
					     NULL,
					     (void *) (&data), tol, wrk, &fx);
  *ret_mu     = mu;
  *ret_lambda = exp(p[0]);
  *ret_tau    = exp(p[1]);
  return status;
}
Ejemplo n.º 7
0
int
main(int argc, char **argv)
{
  int    n = 6;
  double a[6] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 };
  double x[6] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 };
  double u[6] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
  double wrk[24];
  double fx;
  int    i;

  esl_min_ConjugateGradientDescent(x, u, n, 
				   &example_func, &example_dfunc, (void *) a, 
				   0.0001, wrk, &fx);

  printf("At minimum: f(x) = %g\n", fx);
  printf("vector x = ");
  for (i = 0; i < 6; i++) printf("%g  ", x[i]);
  printf("\n");

  return 0;
}
Ejemplo n.º 8
0
/* Function:  esl_mixgev_FitComplete()
 *
 * Purpose:   Given <n> observed data values <x[0..n-1]>, and
 *            an initial guess at a mixture GEV fit to those data
 *            <mg>, use conjugate gradient descent to perform
 *            a locally optimal maximum likelihood mixture
 *            GEV parameter fit to the data.
 *            
 *            To obtain a reasonable initial guess for <mg>,
 *            see <esl_mixgev_FitGuess()>. 
 *
 * Args:      x   - observed data, <x[0..n-1]>.
 *            n   - number of samples in <x>
 *            mg  - mixture GEV to estimate, w/ params set to
 *                  an initial guess.
 *
 * Returns:   <eslOK> on success, and <mg> contains local
 *            ML estimate for mixture GEV parameters.
 *
 * Throws:    <eslEMEM> on allocation error, and <mg> is unchanged
 *            from its initial state.
 */
int
esl_mixgev_FitComplete(double *x, int n, ESL_MIXGEV *mg)
{
  struct mixgev_data data;
  int     status;
  double *p = NULL;
  double *u = NULL;
  double *wrk = NULL;
  double  tol;
  int     np;
  double  fx;
  int     k;
  int     i;

  tol = 1e-6;

  /* Determine number of free parameters and allocate 
   */
  np = mg->K-1;			/* K-1 mix coefficients free */
  for (k = 0; k < mg->K; k++)
    np += (mg->isgumbel[k])? 2 : 3;
  ESL_ALLOC(p,   sizeof(double) * np);
  ESL_ALLOC(u,   sizeof(double) * np);
  ESL_ALLOC(wrk, sizeof(double) * np * 4);

  /* Copy shared info into the "data" structure
   */
  data.x   = x;
  data.n   = n;
  data.wrk = wrk;
  data.mg  = mg;

  /* From mg, create the parameter vector.
   */
  mixgev_pack_paramvector(p, np, mg);

  /* Define the step size vector u.
   */
  i = 0;
  for (k = 1; k < mg->K; k++) u[i++] = 1.0;
  for (k = 0; k < mg->K; k++)
    {
      u[i++] = 1.0;
      u[i++] = 1.0;
      if (! mg->isgumbel[k]) u[i++] = 0.02;
    }
  ESL_DASSERT1( (np == i) );

  /* Feed it all to the mighty optimizer.
   */

  status = esl_min_ConjugateGradientDescent(p, u, np, &mixgev_complete_func, NULL,
					    (void *) (&data), tol, wrk, &fx);
  if (status != eslOK) goto ERROR;

  /* Convert the final parameter vector back to a mixture GEV
   */
  mixgev_unpack_paramvector(p, np, mg);
  
  free(p);
  free(u);
  free(wrk);
  return eslOK;

 ERROR:
  if (p != NULL)   free(p);
  if (u != NULL)   free(u);
  if (wrk != NULL) free(wrk);
  return status;
}
Ejemplo n.º 9
0
/* Function:  esl_wei_FitCompleteBinned()
 *
 * Purpose:   Given a histogram <g> with binned observations, where each
 *            bin i holds some number of observed samples x with values from 
 *            lower bound l to upper bound u (that is, $l < x \leq u$), and
 *            <mu>, the known offset (minimum value) of the distribution;
 *            return maximum likelihood parameters <ret_lambda>
 *            and <ret_tau>.
 *            
 * Args:      x          - complete GEV-distributed data [0..n-1]
 *            n          - number of samples in <x>
 *            ret_mu     - lower bound of the distribution (all x_i > mu)
 *            ret_lambda - RETURN: maximum likelihood estimate of lambda
 *            ret_tau    - RETURN: maximum likelihood estimate of tau
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslENOHALT> if the fit doesn't converge.
 *
 * Xref:      STL9/136-137
 */
int
esl_wei_FitCompleteBinned(ESL_HISTOGRAM *h, double *ret_mu,
			  double *ret_lambda, double *ret_tau)
{
  struct wei_binned_data data;
  double p[2];			/* parameter vector                  */
  double u[2];			/* max initial step size vector      */
  double wrk[8];		/* 4 tmp vectors of length 2         */
  double mean;
  double mu, lambda, tau;      	/* initial param guesses             */
  double tol = 1e-6;		/* convergence criterion for CG      */
  double fx;			/* f(x) at minimum; currently unused */
  int    status;
  int    i;
  double ai;

  /* Set the fixed mu.
   * Make a good initial guess of lambda, based on exponential fit.
   * Choose an arbitrary tau.
   */
  if      (h->is_tailfit) mu = h->phi;  /* all x > mu in this case */
  else if (h->is_rounded) mu = esl_histogram_Bin2LBound(h, h->imin);
  else                    mu = h->xmin; 

  mean = 0.;
  for (i = h->cmin; i <= h->imax; i++) 
    { 
      ai = esl_histogram_Bin2LBound(h, i);
      ai += 0.5*h->w;		/* midpoint in bin */
      mean += (double)h->obs[i] * ai;
    }
  mean  /= h->No;
  lambda = 1 / (mean - mu);

  tau    = 0.9;

  /* load the data structure */
  data.h   = h;
  data.mu  = mu;

  /* Change of variables;
   *   lambda > 0, so c.o.v.  lambda = exp^w,  w = log(lambda);
   *   tau > 0, same c.o.v.
   */
  p[0] = log(lambda);		
  p[1] = log(tau);

  u[0] = 1.0;
  u[1] = 1.0;

  /* pass problem to the optimizer
   */
  status = esl_min_ConjugateGradientDescent(p, u, 2, 
					    &wei_binned_func, NULL,
					    (void *)(&data),
					    tol, wrk, &fx);
  *ret_mu     = mu;
  *ret_lambda = exp(p[0]);
  *ret_tau    = exp(p[1]);
  return status;
}