Exemple #1
0
static double 
sxp_complete_binned_func(double *p, int np, void *dptr)
{
  struct sxp_binned_data *data = (struct sxp_binned_data *) dptr;
  ESL_HISTOGRAM          *g    = data->g;
  double logL = 0.;
  double ai, bi;		/* lower, upper bounds on bin */
  double lambda, tau;
  int    i;
  double tmp;

  lambda = exp(p[0]);
  tau    = exp(p[1]);  

  ESL_DASSERT1(( ! isnan(lambda) ));
  ESL_DASSERT1(( ! isnan(tau) ));
  
  for (i = g->cmin; i <= g->imax; i++) /* for each occupied bin */
    {
      if (g->obs[i] == 0) continue;
      
      ai = esl_histogram_Bin2LBound(g, i);
      bi = esl_histogram_Bin2UBound(g, i);
      if (ai < data->mu) ai = data->mu; /* careful at leftmost bound */

      tmp = esl_sxp_cdf(bi, data->mu, lambda, tau) -
            esl_sxp_cdf(ai, data->mu, lambda, tau);
      if      (tmp == 0.) return eslINFINITY;
      logL += g->obs[i] * log(tmp);
    }
  return -logL;			/* minimizing NLL */
}
/* Function:  esl_exp_FitCompleteBinned()
* Incept:    SRE, Sun Aug 21 13:07:22 2005 [St. Louis]
*
* Purpose:   Fit a complete exponential distribution to the observed
*            binned data in a histogram <g>, where each
*            bin i holds some number of observed samples x with values from 
*            lower bound l to upper bound u (that is, $l < x \leq u$);
*            find maximum likelihood parameters $\mu,\lambda$ and 
*            return them in <*ret_mu>, <*ret_lambda>.
*            
*            If the binned data in <g> were set to focus on 
*            a tail by virtual censoring, the "complete" exponential is 
*            fitted to this tail. The caller then also needs to
*            remember what fraction of the probability mass was in this
*            tail.
*            
*            The ML estimate for $mu$ is the smallest observed
*            sample.  For complete data, <ret_mu> is generally set to
*            the smallest observed sample value, except in the
*            special case of a "rounded" complete dataset, where
*            <ret_mu> is set to the lower bound of the smallest
*            occupied bin. For tails, <ret_mu> is set to the cutoff
*            threshold <phi>, where we are guaranteed that <phi> is
*            at the lower bound of a bin (by how the histogram
*            object sets tails). 
*
*            The ML estimate for <ret_lambda> has an analytical 
*            solution, so this routine is fast. 
*            
*            If all the data are in one bin, the ML estimate of
*            $\lambda$ will be $\infty$. This is mathematically correct,
*            but is probably a situation the caller wants to avoid, perhaps
*            by choosing smaller bins.
*
*            This function currently cannot fit an exponential tail
*            to truly censored, binned data, because it assumes that
*            all bins have equal width, but in true censored data, the
*            lower cutoff <phi> may fall anywhere in the first bin.
*
* Returns:   <eslOK> on success.
*
* Throws:    <eslEINVAL> if dataset is true-censored.
*/
int
esl_exp_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda)
{
    int    i;
    double ai, bi, delta;
    double sa, sb;
    double mu = 0.;

	if (g->dataset_is == ESL_HISTOGRAM::COMPLETE)
    {
        if   (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin);
        else                 mu = g->xmin;
    }
    else if (g->dataset_is == ESL_HISTOGRAM::VIRTUAL_CENSORED) /* i.e., we'll fit to tail */
        mu = g->phi;
    else if (g->dataset_is == ESL_HISTOGRAM::TRUE_CENSORED)
        ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset");

    delta = g->w;
    sa = sb = 0.;
    for (i = g->cmin; i <= g->imax; i++) /* for each occupied bin */
    {
        if (g->obs[i] == 0) continue;
        ai = esl_histogram_Bin2LBound(g,i);
        bi = esl_histogram_Bin2UBound(g,i);
        sa += g->obs[i] * (ai-mu);
        sb += g->obs[i] * (bi-mu);
    }
    *ret_mu     = mu;
    *ret_lambda = 1/delta * (log(sb) - log(sa));
    return eslOK;
}
Exemple #3
0
/* wei_binned_func():
 * Returns the negative log likelihood of a binned data sample,
 * in the API of the conjugate gradient descent optimizer in esl_minimizer.
 */
static double
wei_binned_func(double *p, int nparam, void *dptr)
{
  struct wei_binned_data *data = (struct wei_binned_data *) dptr;
  ESL_HISTOGRAM          *h    = data->h;
  double lambda, tau;
  double logL;
  double ai,bi;
  int    i; 
  double tmp;
    
  /* Unpack what the optimizer gave us.
   */
  lambda = exp(p[0]); /* see below for c.o.v. notes */
  tau    = exp(p[1]);

  logL = 0.;
  for (i = h->cmin; i <= h->imax; i++)
    {
      if (h->obs[i] == 0) continue;

      ai = esl_histogram_Bin2LBound(h,i);
      bi = esl_histogram_Bin2UBound(h,i);
      if (ai < data->mu) ai = data->mu;

      tmp = esl_wei_cdf(bi, data->mu, lambda, tau) -
            esl_wei_cdf(ai, data->mu, lambda, tau);

      /* for cdf~1.0, numerical roundoff error can create tmp<0 by a
       * teensy amount; tolerate that, but catch anything worse */
      ESL_DASSERT1( (tmp + 1e-7 > 0.)); 
      if (tmp <= 0.) return eslINFINITY;

      logL += h->obs[i] * log(tmp);
    }
  return -logL;			/* goal: minimize NLL */
}