示例#1
0
int
main(int argc, char **argv)
{
  ESL_RANDOMNESS *r;		/* source of random numbers        */
  ESL_HISTOGRAM  *h;		/* histogram to store the data     */
  ESL_HYPEREXP   *hxp;		/* hyperexponential to sample from */
  ESL_HYPEREXP   *ehxp;		/* estimated hyperexponential      */
  double      x;		/* sampled data point              */
  int         n = 100000;	/* number of samples               */
  double     *data;
  int         ndata;
  int         i;

  hxp = esl_hyperexp_Create(3);
  hxp->mu = -2.0;
  hxp->q[0]      = 0.6;    hxp->q[1]      = 0.3;   hxp->q[2]      = 0.1; 
  hxp->lambda[0] = 1.0;    hxp->lambda[1] = 0.3;   hxp->lambda[2] = 0.1;

  r   = esl_randomness_Create(0);
  h   = esl_histogram_CreateFull(hxp->mu, 100, 1.0);

  for (i = 0; i < n; i++)
    {
      x    = esl_hxp_Sample(r, hxp);
      esl_histogram_Add(h, x);
    }
  esl_histogram_GetData(h, &data, &ndata);

  /* Plot the empirical (sampled) and expected survivals */
  esl_histogram_PlotSurvival(stdout, h);
  esl_hxp_Plot(stdout, hxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1);

  /* ML fit to complete data, and plot fitted survival curve */
  ehxp = esl_hyperexp_Create(3);
  esl_hxp_FitGuess(data, ndata, ehxp);
  esl_hxp_FitComplete(data, ndata, ehxp);
  esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv,  h->xmin, h->xmax, 0.1);

  /* ML fit to binned data, plot fitted survival curve  */
  esl_hxp_FitGuessBinned(h, ehxp);
  esl_hxp_FitCompleteBinned(h, ehxp);
  esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv,  h->xmin, h->xmax, 0.1);

  esl_randomness_Destroy(r);
  esl_histogram_Destroy(h);
  esl_hyperexp_Destroy(hxp);
  esl_hyperexp_Destroy(ehxp);
  return 0;
}
示例#2
0
int
main(int argc, char **argv)
{
  double mu         = -50.0;
  double lambda     = 2.5;
  double tau        = 0.7;
  ESL_HISTOGRAM  *h = esl_histogram_CreateFull(mu, 100., 0.1);
  ESL_RANDOMNESS *r = esl_randomness_Create(0);
  int    n          = 10000;
  double *data;
  int     ndata;
  double emu, elam, etau;
  int    i;
  double x;

  for (i = 0; i < n; i++)
    {
      x  =  esl_sxp_Sample(r, mu, lambda, tau);
      esl_histogram_Add(h, x);
    }
  esl_histogram_GetData(h, &data, &ndata);

  /* Plot the empirical (sampled) and expected survivals */
  esl_histogram_PlotSurvival(stdout, h);
  esl_sxp_Plot(stdout, mu, lambda, tau,
	       &esl_sxp_surv,  h->xmin, h->xmax, 0.1);

  /* ML fit to complete data, and plot fitted survival curve */
  esl_sxp_FitComplete(data, ndata, &emu, &elam, &etau);
  esl_sxp_Plot(stdout, emu, elam, etau,
	       &esl_sxp_surv,  h->xmin, h->xmax, 0.1);

  /* ML fit to binned data, plot fitted survival curve  */
  esl_sxp_FitCompleteBinned(h, &emu, &elam, &etau);
  esl_sxp_Plot(stdout, emu, elam, etau,
	       &esl_sxp_surv,  h->xmin, h->xmax, 0.1);

  esl_randomness_Destroy(r);
  esl_histogram_Destroy(h);
  return 0;
}
示例#3
0
static int 
output_result(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm, double *scores, int *alilens)
{
  ESL_HISTOGRAM *h = NULL;
  int            i;
  double         tailp;
  double         x10;
  double         mu, lambda, E10;
  double         mufix,  E10fix;
  double         mufix2, E10fix2;
  double         E10p;
  double         almean, alvar;	/* alignment length mean and variance (optional output) */
  double         pmu, plambda;
  int            status;

  /* fetch statistical params from HMM for expected distribution */
  if       (esl_opt_GetBoolean(go, "--vit")) { pmu = hmm->evparam[p7_VMU];  plambda = hmm->evparam[p7_VLAMBDA]; }
  else if  (esl_opt_GetBoolean(go, "--msv")) { pmu = hmm->evparam[p7_MMU];  plambda = hmm->evparam[p7_MLAMBDA]; }
  else if  (esl_opt_GetBoolean(go, "--fwd")) { pmu = hmm->evparam[p7_FTAU]; plambda = hmm->evparam[p7_FLAMBDA]; }

  /* Optional output of scores/alignment lengths: */
  if (cfg->xfp)                      fwrite(scores, sizeof(double), cfg->N, cfg->xfp);
  if (cfg->alfp)                     for (i = 0; i < cfg->N; i++) fprintf(cfg->alfp, "%d  %.3f\n", alilens[i], scores[i]);
  if (esl_opt_GetBoolean(go, "-v"))  for (i = 0; i < cfg->N; i++) printf("%.3f\n", scores[i]);

  /* optional "filter power" data file: <hmm name> <# seqs <= P threshold> <fraction of seqs <= P threshold>  */
  if (cfg->ffp)                      output_filter_power(go, cfg, errbuf, hmm, scores);

  /* Count the scores into a histogram object.  */
  if ((h = esl_histogram_CreateFull(-50., 50., 0.2)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "allocation failed");
  for (i = 0; i < cfg->N; i++) esl_histogram_Add(h, scores[i]);

  /* For viterbi, MSV, and hybrid, fit data to a Gumbel, either with known lambda or estimated lambda. */
  if (esl_opt_GetBoolean(go, "--vit")  || esl_opt_GetBoolean(go, "--msv"))
    {
      esl_histogram_GetRank(h, 10, &x10);
      tailp  = 1.0;

      /* mu, lambda, E10 fields are for ML Gumbel fit to the observed data */
      if (esl_gumbel_FitComplete(scores, cfg->N, &mu, &lambda) != eslOK) 	esl_fatal("gumbel complete data fit failed");

      E10    = cfg->N * esl_gumbel_surv(x10, mu, lambda); 

      /* mufix, E10fix fields:   assume lambda = log2; fit an ML mu to the data */
      if (esl_gumbel_FitCompleteLoc(scores, cfg->N, 0.693147, &mufix) != eslOK) esl_fatal("gumbel mu- (location-)only data fit failed for lambda = log2");
      E10fix = cfg->N * esl_gumbel_surv(x10, mufix, 0.693147); 

      /* mufix2, E10fix2 fields: assume H3's own lambda estimate; fit ML mu */
      if (esl_gumbel_FitCompleteLoc(scores, cfg->N, plambda, &mufix2) != eslOK) esl_fatal("gumbel mu- (location-)only data fit failed for fitted lambda");
      E10fix2 = cfg->N * esl_gumbel_surv(x10, mufix2, plambda); 
      
      /* pmu, plambda, E10p:  use H3 expectation estimates (pmu, plambda) */
      E10p    = cfg->N * esl_gumbel_surv(x10, pmu,  plambda); 
      
      fprintf(cfg->ofp, "%-20s  %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f", 
              hmm->name, tailp, mu, lambda, E10, mufix, E10fix, mufix2, E10fix2, pmu, plambda, E10p);

      if (esl_opt_GetBoolean(go, "-a")) {
	esl_stats_IMean(alilens, cfg->N, &almean, &alvar);
	fprintf(cfg->ofp, " %8.4f %8.4f\n", almean, sqrt(alvar));
      } else 
	fprintf(cfg->ofp, "\n");

      if (cfg->survfp != NULL) {
	double xmax = esl_opt_IsOn(go, "--xmax") ? esl_opt_GetReal(go, "--xmax") : h->xmax + 5.;

	esl_histogram_PlotSurvival(cfg->survfp, h);
	esl_gumbel_Plot(cfg->survfp, pmu,   plambda,  esl_gumbel_surv, h->xmin - 5., xmax, 0.1);
	esl_gumbel_Plot(cfg->survfp, mu,    lambda,   esl_gumbel_surv, h->xmin - 5., xmax, 0.1);
	esl_gumbel_Plot(cfg->survfp, mufix, 0.693147, esl_gumbel_surv, h->xmin - 5., xmax, 0.1);
      }

      if (cfg->efp != NULL) {
	double x;

	fprintf(cfg->efp, "# %s\n", hmm->name);
	for (i = 1; i <= 1000 && i <= cfg->N; i++) {
	  esl_histogram_GetRank(h, i, &x);
	  fprintf(cfg->efp, "%d %g\n", i, cfg->N * esl_gumbel_surv(x, pmu, plambda));
	}
	fprintf(cfg->efp, "&\n");
      }
    }

  /* For Forward, fit tail to exponential tails, for a range of tail mass choices. */
  else if (esl_opt_GetBoolean(go, "--fwd"))
    {
      double  tmin      = esl_opt_GetReal(go, "--tmin");
      double  tmax      = esl_opt_GetReal(go, "--tmax");
      double  tpoints   = (double) esl_opt_GetInteger(go, "--tpoints");
      int     do_linear = esl_opt_GetBoolean(go, "--tlinear");
      double *xv;
      double  tau;
      int     n;

      esl_histogram_GetRank(h, 10, &x10);

      tailp = tmin;
      do {
	if (tailp > 1.0)       tailp = 1.0;
	esl_histogram_GetTailByMass(h, tailp, &xv, &n, NULL);
	
	if (esl_exp_FitComplete(xv, n, &mu, &lambda) != eslOK) esl_fatal("exponential fit failed");
	E10    = cfg->N * tailp * esl_exp_surv(x10, mu,  lambda);
	mufix  = mu;
	E10fix = cfg->N * tailp * esl_exp_surv(x10, mu,  0.693147);
	E10p   = cfg->N * esl_exp_surv(x10, pmu, plambda); /* the pmu is relative to a P=1.0 tail origin. */
	
	tau    = mu + log(tailp) / lambda;

	fprintf(cfg->ofp, "%-20s  %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f\n", 
		hmm->name, tailp, mu, lambda, E10, mufix, E10fix, pmu, plambda, E10p);

	if      (tpoints == 1) break;
	else if (do_linear)    tailp += (tmax-tmin) / (tpoints-1);
	else                   tailp *= exp(log(tmax/tmin) / (tpoints-1));
      } while (tailp <= tmax+1e-7);

      if (cfg->survfp) 
	{
	  double xmax = esl_opt_IsOn(go, "--xmax") ? esl_opt_GetReal(go, "--xmax") : h->xmax + 5.;

	  esl_histogram_PlotSurvival(cfg->survfp, h);
	  esl_exp_Plot(cfg->survfp, pmu,  plambda, esl_exp_surv, pmu, xmax, 0.1);
	  esl_exp_Plot(cfg->survfp, tau,   lambda, esl_exp_surv, tau, xmax, 0.1);
	  esl_exp_Plot(cfg->survfp, tau, 0.693147, esl_exp_surv, tau, xmax, 0.1);
	}

      if (cfg->efp != NULL) {
	double x;

	fprintf(cfg->efp, "# %s\n", hmm->name);
	for (i = 1; i <= 1000 && i <= cfg->N; i++) {
	  esl_histogram_GetRank(h, i, &x);
	  fprintf(cfg->efp, "%d %g\n", i, cfg->N * esl_exp_surv(x, pmu, plambda));
	}
	fprintf(cfg->efp, "&\n");
      }

    }

  /* fallthrough: both normal, error cases execute same cleanup code */
  status = eslOK;
 ERROR:
  esl_histogram_Destroy(h);
  return status;
}