示例#1
0
/* direct_mv_fit()
* SRE, Wed Jun 29 08:23:47 2005
* 
* Purely for curiousity: a complete data fit using the
* simple direct method, calculating mu and lambda from mean
* and variance.
*/
static int
direct_mv_fit(double *x, int n, double *ret_mu, double *ret_lambda)
{
    double mean, variance;

    esl_stats_DMean(x, n, &mean, &variance);
    *ret_lambda = eslCONST_PI / sqrt(6.*variance);
    *ret_mu     = mean - 0.57722/(*ret_lambda);
    return eslOK;
}
示例#2
0
/* Function:  esl_wei_FitComplete()
 *
 * Purpose:   Given an array of <n> samples <x[0]..x[n-1>, fit
 *            them to a stretched exponential distribution starting
 *            at lower bound <mu> (all $x_i > \mu$), and 
 *            return maximum likelihood parameters <ret_lambda>
 *            and <ret_tau>.
 *            
 * Args:      x          - complete GEV-distributed data [0..n-1]
 *            n          - number of samples in <x>
 *            ret_mu     - RETURN: lower bound of the distribution (all x_i >= mu)
 *            ret_lambda - RETURN: maximum likelihood estimate of lambda
 *            ret_tau    - RETURN: maximum likelihood estimate of tau
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslENOHALT> if the fit doesn't converge.
 *
 * Xref:      STL9/136-137
 */
int
esl_wei_FitComplete(double *x, int n, double *ret_mu,
		    double *ret_lambda, double *ret_tau)
{
  struct wei_data data;
  double p[2];			/* parameter vector                  */
  double u[2];			/* max initial step size vector      */
  double wrk[8];		/* 4 tmp vectors of length 2         */
  double mean;
  double mu, lambda, tau;      	/* initial param guesses             */
  double tol = 1e-6;		/* convergence criterion for CG      */
  double fx;			/* f(x) at minimum; currently unused */
  int    status;

  /* Make a good initial guess, based on exponential fit;
   * set an arbitrary tau.
   */
  mu =  esl_vec_DMin(x, n);
  esl_stats_DMean(x, n, &mean, NULL);
  lambda = 1 / (mean - mu);
  tau    = 0.9;

  /* Load the data structure
   */
  data.x   = x;
  data.n   = n;
  data.mu  = mu;

  /* Change of variables;
   *   lambda > 0, so c.o.v.  lambda = exp^w,  w = log(lambda);
   *   tau > 0, same c.o.v.
   */
  p[0] = log(lambda);		
  p[1] = log(tau);

  u[0] = 1.0;
  u[1] = 1.0;

  /* pass problem to the optimizer
   */
  status = esl_min_ConjugateGradientDescent(p, u, 2, 
					    &wei_func, NULL,
					    (void *)(&data),
					    tol, wrk, &fx);
  *ret_mu     = mu;
  *ret_lambda = exp(p[0]);
  *ret_tau    = exp(p[1]);
  return status;
}
示例#3
0
/* Function:  esl_gumbel_FitTruncated()
* Synopsis:  Estimates $\mu$, $\lambda$ from truncated data.
* Incept:    SRE, Wed Jun 29 14:14:17 2005 [St. Louis]
*
* Purpose:   Given a left-truncated array of Gumbel-distributed
*            samples <x[0]..x[n-1]> and the truncation threshold
*            <phi> (such that all <x[i]> $\geq$ <phi>).
*            Find maximum likelihood parameters <mu> and <lambda>.
*            
*            <phi> should not be much greater than <mu>, the
*            mode of the Gumbel, or the fit will become unstable
*            or may even fail to converge. The problem is
*            that for <phi> $>$ <mu>, the tail of the Gumbel
*            becomes a scale-free exponential, and <mu> becomes
*            undetermined.
*            
* Algorithm: Uses conjugate gradient descent to optimize the
*            log likelihood of the data. Follows a general
*            approach to fitting missing data problems outlined
*            in [Gelman95].
*
* Args:      x          - observed data samples [0..n-1]
*            n          - number of samples
*            phi        - truncation threshold; all x[i] >= phi
*            ret_mu     - RETURN: ML estimate of mu       
*            ret_lambda - RETURN: ML estimate of lambda
*
* Returns:   <eslOK> on success.
*
* Throws:    <eslENOHALT> if the fit doesn't converge.
*/
int
esl_gumbel_FitTruncated(double *x, int n, double phi, 
                        double *ret_mu, double *ret_lambda)
{
    struct tevd_data data;
    double wrk[8];		/* workspace for CG: 4 tmp vectors of size 2 */
    double p[2];			/* mu, w;  lambda = e^w */
    double u[2];			/* max initial step size for mu, lambda */
    int    status;
    double mean, variance;
    double mu, lambda;
    double fx;

    data.x   = x;
    data.n   = n;
    data.phi = phi;

    /* The source of the following magic is Evans/Hastings/Peacock, 
    * Statistical Distributions, 3rd edition (2000), p.86, which gives
    * eq's for the mean and variance of a Gumbel in terms of mu and lambda;
    * we turn them around to get mu and lambda in terms of the mean and variance.
    * These would be reasonable estimators if we had a full set of Gumbel
    * distributed variates. They'll be off for a truncated sample, but
    * close enough to be a useful starting point.
    */
    esl_stats_DMean(x, n, &mean, &variance);
    lambda = eslCONST_PI / sqrt(6.*variance);
    mu     = mean - 0.57722/lambda;

    p[0] = mu;
    p[1] = log(lambda);		/* c.o.v. because lambda is constrained to >0 */

    u[0] = 2.0;
    u[1] = 0.1;

    /* Pass the problem to the optimizer. The work is done by the
    * equations in tevd_func() and tevd_grad().
    */
    status = esl_min_ConjugateGradientDescent(p, u, 2, 
        &tevd_func, &tevd_grad,(void *)(&data),
        1e-4, wrk, &fx);

    *ret_mu     = p[0];
    *ret_lambda = exp(p[1]);	/* reverse the c.o.v. */
    return status;
}
示例#4
0
/* fitting_engine()
 * Fitting code shared by the FitComplete() and FitCensored() API.
 * 
 * The fitting_engine(), in turn, is just an adaptor wrapped around
 * the conjugate gradient descent minimizer.
 */
static int
fitting_engine(struct gev_data *data, 
	       double *ret_mu, double *ret_lambda, double *ret_alpha)
{
  double p[3];			/* parameter vector                  */
  double u[3];			/* max initial step size vector      */
  double wrk[12];		/* 4 tmp vectors of length 3         */
  double mean, variance;
  double mu, lambda, alpha;	/* initial param guesses             */
  double tol = 1e-6;		/* convergence criterion for CG      */
  double fx;			/* f(x) at minimum; currently unused */
  int    status;

  /* Make an initial guess. 
   * (very good guess for complete data; merely sufficient for censored)
   */
  esl_stats_DMean(data->x, data->n, &mean, &variance);
  lambda = eslCONST_PI / sqrt(6.*variance);
  mu     = mean - 0.57722/lambda;
  alpha  = 0.0001;

  p[0] = mu;
  p[1] = log(lambda);	/* c.o.v. from lambda to w */
  p[2] = alpha;

  /* max initial step sizes: keeps bracketing from exploding */
  u[0] = 1.0;
  u[1] = fabs(log(0.02));
  u[2] = 0.02;

  /* pass problem to the optimizer
   */
  status = esl_min_ConjugateGradientDescent(p, u, 3, 
					    &gev_func, 
					    &gev_gradient,
					    (void *)data,
					    tol, wrk, &fx);
  *ret_mu     = p[0];
  *ret_lambda = exp(p[1]);
  *ret_alpha  = p[2];
  return status;
}
示例#5
0
/* Function:  esl_sxp_FitComplete()
 *
 * Purpose:   Given a vector of <n> observed data samples <x[]>,
 *            find maximum likelihood parameters by conjugate gradient 
 *            descent optimization.
 */
int
esl_sxp_FitComplete(double *x, int n,
		    double *ret_mu, double *ret_lambda, double *ret_tau)

{
  struct sxp_data data;
  double p[2], u[2], wrk[8];
  double mu, tau, lambda;
  double mean;
  double tol = 1e-6;
  double fx;
  int    status;

  /* initial guesses; mu is definitely = minimum x,
   * and just use arbitrary #'s to init lambda, tau
   */
  mu =  esl_vec_DMin(x, n);
  esl_stats_DMean(x, n, &mean, NULL);
  lambda = 1 / (mean - mu);
  tau    = 0.9;


  /* load data structure, param vector, and step vector */
  data.x  = x;
  data.n  = n;
  data.mu = mu;
  p[0]    = log(lambda);
  p[1]    = log(tau);
  u[0]    = 1.0;
  u[1]    = 1.0;

  /* hand it off */
  status =  esl_min_ConjugateGradientDescent(p, u, 2, 
					     &sxp_complete_func, 
					     NULL,
					     (void *) (&data), tol, wrk, &fx);
  *ret_mu     = mu;
  *ret_lambda = exp(p[0]);
  *ret_tau    = exp(p[1]);
  return status;
}
/* Function:  esl_mixgev_FitGuess()
 *
 * Purpose:   Make initial randomized guesses at the parameters
 *            of mixture GEV <mg>, using random number generator
 *            <r> and observed data consisting of <n> values
 *            <x[0..n-1]>. This guess is a suitable starting
 *            point for a parameter optimization routine, such
 *            as <esl_mixgev_FitComplete()>.
 *            
 *            Specifically, we estimate one 'central' guess 
 *            for a single Gumbel fit to the data, using the
 *            method of moments. Then we add $\pm 10\%$ to that 'central' 
 *            $\mu$ and $\lambda$ to get each component 
 *            $\mu_i$ and $\lambda_i$. The $\alpha_i$ parameters
 *            are generated by sampling uniformly from $-0.1..0.1$.
 *            Mixture coefficients $q_i$ are sampled uniformly.
 *
 * Args:      r   - randomness source 
 *            x   - vector of observed data values to fit, 0..n-1
 *            n   - number of values in <x>
 *            mg  - mixture GEV to put guessed params into
 *
 * Returns:   <eslOK> on success.
 */
int
esl_mixgev_FitGuess(ESL_RANDOMNESS *r, double *x, int n, ESL_MIXGEV *mg)
{
  double mean, variance;
  double mu, lambda;
  int    k;

  esl_stats_DMean(x, n, &mean, &variance);
  lambda = eslCONST_PI / sqrt(6.*variance);
  mu     = mean - 0.57722/lambda;

  esl_dirichlet_DSampleUniform(r, mg->K, mg->q);
  for (k = 0; k < mg->K; k++)
    {
      mg->mu[k]     = mu     + 0.2 * mu     * (esl_random(r) - 0.5);
      mg->lambda[k] = lambda + 0.2 * lambda * (esl_random(r) - 0.5);
      if (mg->isgumbel[k]) mg->alpha[k] = 0.;
      else mg->alpha[k] = 0.2 * (esl_random(r) - 0.5);
    }
  return eslOK;
}
示例#7
0
/* Function:  esl_gumbel_FitCompleteLoc()
* Synopsis:  Estimates $\mu$ from complete data, given $\lambda$.
* Incept:    SRE, Thu Nov 24 09:09:17 2005 [St. Louis]
*
* Purpose:   Given an array of Gumbel-distributed samples 
*            <x[0]..x[n-1]> (complete data), and a known
*            (or otherwise fixed) <lambda>, find a maximum
*            likelihood estimate for location parameter <mu>.
*            
* Algorithm: A straightforward simplification of FitComplete().           
*
* Args:     x          - list of Gumbel distributed samples
*           n          - number of samples
*           lambda     - known lambda (scale) parameter
*           ret_mu     : RETURN: ML estimate of mu
*           
* Returns:  <eslOK> on success.
*
* Throws:    (no abnormal error conditions)
* 
* Note:     Here and in FitComplete(), we have a potential
*           under/overflow problem. We ought to be doing the
*           esum in log space.
*/
int
esl_gumbel_FitCompleteLoc(double *x, int n, double lambda, double *ret_mu)
{
    double esum;
    int    i;

    /* Substitute into Lawless 4.1.5 to find mu */
    esum = 0.;
    for (i = 0; i < n; i++)
        esum  += exp(-lambda * x[i]);
    *ret_mu = -log(esum / n) / lambda;
    return eslOK;

#if 0
    /* Replace the code above w/ code below to test the direct method. */
    double mean, variance;
    esl_stats_DMean(x, n, &mean, &variance);
    *ret_mu     = mean - 0.57722/lambda;
    return eslOK;
#endif
}
示例#8
0
/* Function: esl_gumbel_FitCensored()
* Synopsis: Estimates $\mu$, $\lambda$ from censored data.
* Date:     SRE, Mon Nov 17 10:01:05 1997 [St. Louis]
* 
* Purpose: Given a left-censored array of Gumbel-distributed samples
*          <x[0]..x[n-1]>, the number of censored samples <z>, and the
*          censoring value <phi> (all <x[i]> $>$ <phi>).
*          Find maximum likelihood parameters <mu> and <lambda>.
*           
* Algorithm: Uses approach described in [Lawless82]. Solves
*            for lambda using Newton/Raphson iterations;
*            then substitutes lambda into Lawless' equation 4.2.3
*            to get mu. 
*           
* Args:     x          - array of Gumbel-distributed samples, 0..n-1
*           n          - number of observed samples
*           z          - number of censored samples
*           phi        - censoring value (all x_i >= phi)
*           ret_mu     : RETURN: ML estimate of mu
*           ret_lambda : RETURN: ML estimate of lambda
*           
* Returns:  <eslOK> on success.
*
* Throws:   <eslENOHALT> if the fit doesn't converge.
*/
int
esl_gumbel_FitCensored(double *x, int n, int z, double phi, 
                       double *ret_mu, double *ret_lambda)
{
    double variance;
    double lambda, mu;
    double fx;			/* f(x)  */
    double dfx;			/* f'(x) */
    double esum;                  /* \sum e^(-lambda xi) */ 
    double tol = 1e-5;
    int    i;

    /* 1. Find an initial guess at lambda
    *    (Evans/Hastings/Peacock, Statistical Distributions, 2000, p.86)
    */
    esl_stats_DMean(x, n, NULL, &variance);
    lambda = eslCONST_PI / sqrt(6.*variance);

    /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda
    */
    for (i = 0; i < 100; i++)
    {
        lawless422(x, n, z, phi, lambda, &fx, &dfx);
        if (fabs(fx) < tol) break;             /* success */
        lambda = lambda - fx / dfx;	     /* Newton/Raphson is simple */
        if (lambda <= 0.) lambda = 0.001;      /* but be a little careful  */
    }

    /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed.
    *      Resort to a bisection search. Worse convergence speed
    *      but guaranteed to converge (unlike Newton/Raphson).
    *      We assume (!?) that fx is a monotonically decreasing function of x;
    *      i.e. fx > 0 if we are left of the root, fx < 0 if we
    *      are right of the root.
    */ 
    if (i == 100)
    {
        double left, right, mid;
        ESL_DPRINTF1(("esl_gumbel_FitCensored(): Newton/Raphson failed; switched to bisection"));

        /* First bracket the root */
        left  = 0.;		/* we know that's the left bound */
        right = eslCONST_PI / sqrt(6.*variance); /* start from here, move "right"... */
        lawless422(x, n, z, phi, right, &fx, &dfx);
        while (fx > 0.)
        {
            right *= 2.;
            if (right > 100.) /* no reasonable lambda should be > 100, we assert */
                ESL_EXCEPTION(eslENOHALT, "Failed to bracket root in esl_gumbel_FitCensored().");
            lawless422(x, n, z, phi, right, &fx, &dfx);
        }

        /* Now we bisection search in left/right interval */
        for (i = 0; i < 100; i++)
        {
            mid = (left + right) / 2.; 
            lawless422(x, n, z, phi, mid, &fx, &dfx);
            if (fabs(fx) < tol) break;             /* success */
            if (fx > 0.)	left = mid;
            else          right = mid;
        }
        if (i == 100) 
            ESL_EXCEPTION(eslENOHALT, "Even bisection search failed in esl_gumbel_FitCensored().");
        lambda = mid;
    }

    /* 3. Substitute into Lawless 4.2.3 to find mu
    */
    esum = 0.;
    for (i = 0; i < n; i++)
        esum  += exp(-lambda * x[i]);
    esum += z * exp(-1. * lambda * phi);    /* term from censored data */
    mu = -log(esum / n) / lambda;        

    *ret_lambda = lambda;
    *ret_mu     = mu;   
    return eslOK;
}