static double sxp_complete_binned_func(double *p, int np, void *dptr) { struct sxp_binned_data *data = (struct sxp_binned_data *) dptr; ESL_HISTOGRAM *g = data->g; double logL = 0.; double ai, bi; /* lower, upper bounds on bin */ double lambda, tau; int i; double tmp; lambda = exp(p[0]); tau = exp(p[1]); ESL_DASSERT1(( ! isnan(lambda) )); ESL_DASSERT1(( ! isnan(tau) )); for (i = g->cmin; i <= g->imax; i++) /* for each occupied bin */ { if (g->obs[i] == 0) continue; ai = esl_histogram_Bin2LBound(g, i); bi = esl_histogram_Bin2UBound(g, i); if (ai < data->mu) ai = data->mu; /* careful at leftmost bound */ tmp = esl_sxp_cdf(bi, data->mu, lambda, tau) - esl_sxp_cdf(ai, data->mu, lambda, tau); if (tmp == 0.) return eslINFINITY; logL += g->obs[i] * log(tmp); } return -logL; /* minimizing NLL */ }
/* Function: esl_exp_FitCompleteBinned() * Incept: SRE, Sun Aug 21 13:07:22 2005 [St. Louis] * * Purpose: Fit a complete exponential distribution to the observed * binned data in a histogram <g>, where each * bin i holds some number of observed samples x with values from * lower bound l to upper bound u (that is, $l < x \leq u$); * find maximum likelihood parameters $\mu,\lambda$ and * return them in <*ret_mu>, <*ret_lambda>. * * If the binned data in <g> were set to focus on * a tail by virtual censoring, the "complete" exponential is * fitted to this tail. The caller then also needs to * remember what fraction of the probability mass was in this * tail. * * The ML estimate for $mu$ is the smallest observed * sample. For complete data, <ret_mu> is generally set to * the smallest observed sample value, except in the * special case of a "rounded" complete dataset, where * <ret_mu> is set to the lower bound of the smallest * occupied bin. For tails, <ret_mu> is set to the cutoff * threshold <phi>, where we are guaranteed that <phi> is * at the lower bound of a bin (by how the histogram * object sets tails). * * The ML estimate for <ret_lambda> has an analytical * solution, so this routine is fast. * * If all the data are in one bin, the ML estimate of * $\lambda$ will be $\infty$. This is mathematically correct, * but is probably a situation the caller wants to avoid, perhaps * by choosing smaller bins. * * This function currently cannot fit an exponential tail * to truly censored, binned data, because it assumes that * all bins have equal width, but in true censored data, the * lower cutoff <phi> may fall anywhere in the first bin. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if dataset is true-censored. */ int esl_exp_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda) { int i; double ai, bi, delta; double sa, sb; double mu = 0.; if (g->dataset_is == ESL_HISTOGRAM::COMPLETE) { if (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin); else mu = g->xmin; } else if (g->dataset_is == ESL_HISTOGRAM::VIRTUAL_CENSORED) /* i.e., we'll fit to tail */ mu = g->phi; else if (g->dataset_is == ESL_HISTOGRAM::TRUE_CENSORED) ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset"); delta = g->w; sa = sb = 0.; for (i = g->cmin; i <= g->imax; i++) /* for each occupied bin */ { if (g->obs[i] == 0) continue; ai = esl_histogram_Bin2LBound(g,i); bi = esl_histogram_Bin2UBound(g,i); sa += g->obs[i] * (ai-mu); sb += g->obs[i] * (bi-mu); } *ret_mu = mu; *ret_lambda = 1/delta * (log(sb) - log(sa)); return eslOK; }
/* wei_binned_func(): * Returns the negative log likelihood of a binned data sample, * in the API of the conjugate gradient descent optimizer in esl_minimizer. */ static double wei_binned_func(double *p, int nparam, void *dptr) { struct wei_binned_data *data = (struct wei_binned_data *) dptr; ESL_HISTOGRAM *h = data->h; double lambda, tau; double logL; double ai,bi; int i; double tmp; /* Unpack what the optimizer gave us. */ lambda = exp(p[0]); /* see below for c.o.v. notes */ tau = exp(p[1]); logL = 0.; for (i = h->cmin; i <= h->imax; i++) { if (h->obs[i] == 0) continue; ai = esl_histogram_Bin2LBound(h,i); bi = esl_histogram_Bin2UBound(h,i); if (ai < data->mu) ai = data->mu; tmp = esl_wei_cdf(bi, data->mu, lambda, tau) - esl_wei_cdf(ai, data->mu, lambda, tau); /* for cdf~1.0, numerical roundoff error can create tmp<0 by a * teensy amount; tolerate that, but catch anything worse */ ESL_DASSERT1( (tmp + 1e-7 > 0.)); if (tmp <= 0.) return eslINFINITY; logL += h->obs[i] * log(tmp); } return -logL; /* goal: minimize NLL */ }