/* direct_mv_fit() * SRE, Wed Jun 29 08:23:47 2005 * * Purely for curiousity: a complete data fit using the * simple direct method, calculating mu and lambda from mean * and variance. */ static int direct_mv_fit(double *x, int n, double *ret_mu, double *ret_lambda) { double mean, variance; esl_stats_DMean(x, n, &mean, &variance); *ret_lambda = eslCONST_PI / sqrt(6.*variance); *ret_mu = mean - 0.57722/(*ret_lambda); return eslOK; }
/* Function: esl_wei_FitComplete() * * Purpose: Given an array of <n> samples <x[0]..x[n-1>, fit * them to a stretched exponential distribution starting * at lower bound <mu> (all $x_i > \mu$), and * return maximum likelihood parameters <ret_lambda> * and <ret_tau>. * * Args: x - complete GEV-distributed data [0..n-1] * n - number of samples in <x> * ret_mu - RETURN: lower bound of the distribution (all x_i >= mu) * ret_lambda - RETURN: maximum likelihood estimate of lambda * ret_tau - RETURN: maximum likelihood estimate of tau * * Returns: <eslOK> on success. * * Throws: <eslENOHALT> if the fit doesn't converge. * * Xref: STL9/136-137 */ int esl_wei_FitComplete(double *x, int n, double *ret_mu, double *ret_lambda, double *ret_tau) { struct wei_data data; double p[2]; /* parameter vector */ double u[2]; /* max initial step size vector */ double wrk[8]; /* 4 tmp vectors of length 2 */ double mean; double mu, lambda, tau; /* initial param guesses */ double tol = 1e-6; /* convergence criterion for CG */ double fx; /* f(x) at minimum; currently unused */ int status; /* Make a good initial guess, based on exponential fit; * set an arbitrary tau. */ mu = esl_vec_DMin(x, n); esl_stats_DMean(x, n, &mean, NULL); lambda = 1 / (mean - mu); tau = 0.9; /* Load the data structure */ data.x = x; data.n = n; data.mu = mu; /* Change of variables; * lambda > 0, so c.o.v. lambda = exp^w, w = log(lambda); * tau > 0, same c.o.v. */ p[0] = log(lambda); p[1] = log(tau); u[0] = 1.0; u[1] = 1.0; /* pass problem to the optimizer */ status = esl_min_ConjugateGradientDescent(p, u, 2, &wei_func, NULL, (void *)(&data), tol, wrk, &fx); *ret_mu = mu; *ret_lambda = exp(p[0]); *ret_tau = exp(p[1]); return status; }
/* Function: esl_gumbel_FitTruncated() * Synopsis: Estimates $\mu$, $\lambda$ from truncated data. * Incept: SRE, Wed Jun 29 14:14:17 2005 [St. Louis] * * Purpose: Given a left-truncated array of Gumbel-distributed * samples <x[0]..x[n-1]> and the truncation threshold * <phi> (such that all <x[i]> $\geq$ <phi>). * Find maximum likelihood parameters <mu> and <lambda>. * * <phi> should not be much greater than <mu>, the * mode of the Gumbel, or the fit will become unstable * or may even fail to converge. The problem is * that for <phi> $>$ <mu>, the tail of the Gumbel * becomes a scale-free exponential, and <mu> becomes * undetermined. * * Algorithm: Uses conjugate gradient descent to optimize the * log likelihood of the data. Follows a general * approach to fitting missing data problems outlined * in [Gelman95]. * * Args: x - observed data samples [0..n-1] * n - number of samples * phi - truncation threshold; all x[i] >= phi * ret_mu - RETURN: ML estimate of mu * ret_lambda - RETURN: ML estimate of lambda * * Returns: <eslOK> on success. * * Throws: <eslENOHALT> if the fit doesn't converge. */ int esl_gumbel_FitTruncated(double *x, int n, double phi, double *ret_mu, double *ret_lambda) { struct tevd_data data; double wrk[8]; /* workspace for CG: 4 tmp vectors of size 2 */ double p[2]; /* mu, w; lambda = e^w */ double u[2]; /* max initial step size for mu, lambda */ int status; double mean, variance; double mu, lambda; double fx; data.x = x; data.n = n; data.phi = phi; /* The source of the following magic is Evans/Hastings/Peacock, * Statistical Distributions, 3rd edition (2000), p.86, which gives * eq's for the mean and variance of a Gumbel in terms of mu and lambda; * we turn them around to get mu and lambda in terms of the mean and variance. * These would be reasonable estimators if we had a full set of Gumbel * distributed variates. They'll be off for a truncated sample, but * close enough to be a useful starting point. */ esl_stats_DMean(x, n, &mean, &variance); lambda = eslCONST_PI / sqrt(6.*variance); mu = mean - 0.57722/lambda; p[0] = mu; p[1] = log(lambda); /* c.o.v. because lambda is constrained to >0 */ u[0] = 2.0; u[1] = 0.1; /* Pass the problem to the optimizer. The work is done by the * equations in tevd_func() and tevd_grad(). */ status = esl_min_ConjugateGradientDescent(p, u, 2, &tevd_func, &tevd_grad,(void *)(&data), 1e-4, wrk, &fx); *ret_mu = p[0]; *ret_lambda = exp(p[1]); /* reverse the c.o.v. */ return status; }
/* fitting_engine() * Fitting code shared by the FitComplete() and FitCensored() API. * * The fitting_engine(), in turn, is just an adaptor wrapped around * the conjugate gradient descent minimizer. */ static int fitting_engine(struct gev_data *data, double *ret_mu, double *ret_lambda, double *ret_alpha) { double p[3]; /* parameter vector */ double u[3]; /* max initial step size vector */ double wrk[12]; /* 4 tmp vectors of length 3 */ double mean, variance; double mu, lambda, alpha; /* initial param guesses */ double tol = 1e-6; /* convergence criterion for CG */ double fx; /* f(x) at minimum; currently unused */ int status; /* Make an initial guess. * (very good guess for complete data; merely sufficient for censored) */ esl_stats_DMean(data->x, data->n, &mean, &variance); lambda = eslCONST_PI / sqrt(6.*variance); mu = mean - 0.57722/lambda; alpha = 0.0001; p[0] = mu; p[1] = log(lambda); /* c.o.v. from lambda to w */ p[2] = alpha; /* max initial step sizes: keeps bracketing from exploding */ u[0] = 1.0; u[1] = fabs(log(0.02)); u[2] = 0.02; /* pass problem to the optimizer */ status = esl_min_ConjugateGradientDescent(p, u, 3, &gev_func, &gev_gradient, (void *)data, tol, wrk, &fx); *ret_mu = p[0]; *ret_lambda = exp(p[1]); *ret_alpha = p[2]; return status; }
/* Function: esl_sxp_FitComplete() * * Purpose: Given a vector of <n> observed data samples <x[]>, * find maximum likelihood parameters by conjugate gradient * descent optimization. */ int esl_sxp_FitComplete(double *x, int n, double *ret_mu, double *ret_lambda, double *ret_tau) { struct sxp_data data; double p[2], u[2], wrk[8]; double mu, tau, lambda; double mean; double tol = 1e-6; double fx; int status; /* initial guesses; mu is definitely = minimum x, * and just use arbitrary #'s to init lambda, tau */ mu = esl_vec_DMin(x, n); esl_stats_DMean(x, n, &mean, NULL); lambda = 1 / (mean - mu); tau = 0.9; /* load data structure, param vector, and step vector */ data.x = x; data.n = n; data.mu = mu; p[0] = log(lambda); p[1] = log(tau); u[0] = 1.0; u[1] = 1.0; /* hand it off */ status = esl_min_ConjugateGradientDescent(p, u, 2, &sxp_complete_func, NULL, (void *) (&data), tol, wrk, &fx); *ret_mu = mu; *ret_lambda = exp(p[0]); *ret_tau = exp(p[1]); return status; }
/* Function: esl_mixgev_FitGuess() * * Purpose: Make initial randomized guesses at the parameters * of mixture GEV <mg>, using random number generator * <r> and observed data consisting of <n> values * <x[0..n-1]>. This guess is a suitable starting * point for a parameter optimization routine, such * as <esl_mixgev_FitComplete()>. * * Specifically, we estimate one 'central' guess * for a single Gumbel fit to the data, using the * method of moments. Then we add $\pm 10\%$ to that 'central' * $\mu$ and $\lambda$ to get each component * $\mu_i$ and $\lambda_i$. The $\alpha_i$ parameters * are generated by sampling uniformly from $-0.1..0.1$. * Mixture coefficients $q_i$ are sampled uniformly. * * Args: r - randomness source * x - vector of observed data values to fit, 0..n-1 * n - number of values in <x> * mg - mixture GEV to put guessed params into * * Returns: <eslOK> on success. */ int esl_mixgev_FitGuess(ESL_RANDOMNESS *r, double *x, int n, ESL_MIXGEV *mg) { double mean, variance; double mu, lambda; int k; esl_stats_DMean(x, n, &mean, &variance); lambda = eslCONST_PI / sqrt(6.*variance); mu = mean - 0.57722/lambda; esl_dirichlet_DSampleUniform(r, mg->K, mg->q); for (k = 0; k < mg->K; k++) { mg->mu[k] = mu + 0.2 * mu * (esl_random(r) - 0.5); mg->lambda[k] = lambda + 0.2 * lambda * (esl_random(r) - 0.5); if (mg->isgumbel[k]) mg->alpha[k] = 0.; else mg->alpha[k] = 0.2 * (esl_random(r) - 0.5); } return eslOK; }
/* Function: esl_gumbel_FitCompleteLoc() * Synopsis: Estimates $\mu$ from complete data, given $\lambda$. * Incept: SRE, Thu Nov 24 09:09:17 2005 [St. Louis] * * Purpose: Given an array of Gumbel-distributed samples * <x[0]..x[n-1]> (complete data), and a known * (or otherwise fixed) <lambda>, find a maximum * likelihood estimate for location parameter <mu>. * * Algorithm: A straightforward simplification of FitComplete(). * * Args: x - list of Gumbel distributed samples * n - number of samples * lambda - known lambda (scale) parameter * ret_mu : RETURN: ML estimate of mu * * Returns: <eslOK> on success. * * Throws: (no abnormal error conditions) * * Note: Here and in FitComplete(), we have a potential * under/overflow problem. We ought to be doing the * esum in log space. */ int esl_gumbel_FitCompleteLoc(double *x, int n, double lambda, double *ret_mu) { double esum; int i; /* Substitute into Lawless 4.1.5 to find mu */ esum = 0.; for (i = 0; i < n; i++) esum += exp(-lambda * x[i]); *ret_mu = -log(esum / n) / lambda; return eslOK; #if 0 /* Replace the code above w/ code below to test the direct method. */ double mean, variance; esl_stats_DMean(x, n, &mean, &variance); *ret_mu = mean - 0.57722/lambda; return eslOK; #endif }
/* Function: esl_gumbel_FitCensored() * Synopsis: Estimates $\mu$, $\lambda$ from censored data. * Date: SRE, Mon Nov 17 10:01:05 1997 [St. Louis] * * Purpose: Given a left-censored array of Gumbel-distributed samples * <x[0]..x[n-1]>, the number of censored samples <z>, and the * censoring value <phi> (all <x[i]> $>$ <phi>). * Find maximum likelihood parameters <mu> and <lambda>. * * Algorithm: Uses approach described in [Lawless82]. Solves * for lambda using Newton/Raphson iterations; * then substitutes lambda into Lawless' equation 4.2.3 * to get mu. * * Args: x - array of Gumbel-distributed samples, 0..n-1 * n - number of observed samples * z - number of censored samples * phi - censoring value (all x_i >= phi) * ret_mu : RETURN: ML estimate of mu * ret_lambda : RETURN: ML estimate of lambda * * Returns: <eslOK> on success. * * Throws: <eslENOHALT> if the fit doesn't converge. */ int esl_gumbel_FitCensored(double *x, int n, int z, double phi, double *ret_mu, double *ret_lambda) { double variance; double lambda, mu; double fx; /* f(x) */ double dfx; /* f'(x) */ double esum; /* \sum e^(-lambda xi) */ double tol = 1e-5; int i; /* 1. Find an initial guess at lambda * (Evans/Hastings/Peacock, Statistical Distributions, 2000, p.86) */ esl_stats_DMean(x, n, NULL, &variance); lambda = eslCONST_PI / sqrt(6.*variance); /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda */ for (i = 0; i < 100; i++) { lawless422(x, n, z, phi, lambda, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ lambda = lambda - fx / dfx; /* Newton/Raphson is simple */ if (lambda <= 0.) lambda = 0.001; /* but be a little careful */ } /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed. * Resort to a bisection search. Worse convergence speed * but guaranteed to converge (unlike Newton/Raphson). * We assume (!?) that fx is a monotonically decreasing function of x; * i.e. fx > 0 if we are left of the root, fx < 0 if we * are right of the root. */ if (i == 100) { double left, right, mid; ESL_DPRINTF1(("esl_gumbel_FitCensored(): Newton/Raphson failed; switched to bisection")); /* First bracket the root */ left = 0.; /* we know that's the left bound */ right = eslCONST_PI / sqrt(6.*variance); /* start from here, move "right"... */ lawless422(x, n, z, phi, right, &fx, &dfx); while (fx > 0.) { right *= 2.; if (right > 100.) /* no reasonable lambda should be > 100, we assert */ ESL_EXCEPTION(eslENOHALT, "Failed to bracket root in esl_gumbel_FitCensored()."); lawless422(x, n, z, phi, right, &fx, &dfx); } /* Now we bisection search in left/right interval */ for (i = 0; i < 100; i++) { mid = (left + right) / 2.; lawless422(x, n, z, phi, mid, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ if (fx > 0.) left = mid; else right = mid; } if (i == 100) ESL_EXCEPTION(eslENOHALT, "Even bisection search failed in esl_gumbel_FitCensored()."); lambda = mid; } /* 3. Substitute into Lawless 4.2.3 to find mu */ esum = 0.; for (i = 0; i < n; i++) esum += exp(-lambda * x[i]); esum += z * exp(-1. * lambda * phi); /* term from censored data */ mu = -log(esum / n) / lambda; *ret_lambda = lambda; *ret_mu = mu; return eslOK; }