Ejemplo n.º 1
0
/* output_filter_power()
 *
 * Used for testing whether the filters (MSV scores, Viterbi scores)
 * have the power they're supposed to have: for example, if MSV filter
 * is set at a P-value threshold of 0.02, ~2% of sequences should get
 * through, regardless of things like model and target sequence
 * length.
 * 
 * Output a file suitable for constructing histograms over many HMMs,
 * for a particular choice of hmmsim'ed L and N targets:
 *    <hmm name>  <# of seqs passing threshold>  <fraction of seqs passing threshold>
 * 
 * SRE, Thu Apr  9 08:57:32 2009 [Janelia] xref J4/133
 */
static int
output_filter_power(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm, double *scores)
{
  double pthresh = esl_opt_GetReal(go, "--pthresh"); /* P-value threshold set for the filter score       */
  double P;					     /* calculated P-value (using HMM's own calibration) */
  int    npass = 0;				     /* number of scores that pass the P threshold       */
  double fpass;					     /* fraction of scores that pass the P threshold     */
  int    i;					     /* counter over scores                              */
  int    do_gumbel;				     /* flag for how to determine P values               */
  double pmu, plambda;

  if       (esl_opt_GetBoolean(go, "--vit")) { pmu = hmm->evparam[p7_VMU];  plambda = hmm->evparam[p7_VLAMBDA]; do_gumbel = TRUE;  }
  else if  (esl_opt_GetBoolean(go, "--msv")) { pmu = hmm->evparam[p7_MMU];  plambda = hmm->evparam[p7_MLAMBDA]; do_gumbel = TRUE;  }
  else if  (esl_opt_GetBoolean(go, "--fwd")) { pmu = hmm->evparam[p7_FTAU]; plambda = hmm->evparam[p7_FLAMBDA]; do_gumbel = FALSE; }
  else     ESL_FAIL(eslEINVAL, errbuf, "can only use --ffile with viterbi, msv, or fwd scores");

  for (i = 0; i < cfg->N; i++)
    {
      P = (do_gumbel ?  esl_gumbel_surv(scores[i], pmu, plambda) : 
                        esl_exp_surv   (scores[i], pmu, plambda));
      if (P <= pthresh) npass++;
    }
  fpass = (double) npass / (double) cfg->N;

  fprintf(cfg->ffp, "%s\t%d\t%.4f\n", hmm->name, npass, fpass);
  return eslOK;
}
Ejemplo n.º 2
0
/* Function:  esl_hxp_surv()
 *
 * Purpose:   Returns the survivor function $P(X > x)$ (1-CDF)
 *            for quantile <x>, given hyperexponential parameters <h>.
 */
double
esl_hxp_surv(double x, ESL_HYPEREXP *h)
{
  double srv = 0.;
  int    k;
  
  if (x < h->mu) return 1.0;

  for (k = 0; k < h->K; k++)
    srv += h->q[k] * esl_exp_surv(x, h->mu, h->lambda[k]);
  return srv;
}
Ejemplo n.º 3
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_OMX         *bck     = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  float           fraw, braw, nullsc, fsc;
  float           gfraw, gbraw, gfsc;
  double          P, gP;
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  /* Open sequence file for reading */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);

  /* create default null model, then create and optimize profile */
  bg = p7_bg_Create(abc);               
  p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc); 
  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);

  /* p7_oprofile_Dump(stdout, om);  */

  /* allocate DP matrices for O(M+L) parsers */
  fwd = p7_omx_Create(gm->M, 0, sq->n);
  bck = p7_omx_Create(gm->M, 0, sq->n);
  gx  = p7_gmx_Create(gm->M,    sq->n);

  /* allocate DP matrices for O(ML) fills */
  /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */
  /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */

  /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */     /* makes the fast DP algorithms dump their matrices */
  /* p7_omx_SetDumpMode(stdout, bck, TRUE); */  

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {
      p7_oprofile_ReconfigLength(om, sq->n);
      p7_ReconfigLength(gm,          sq->n);
      p7_bg_SetLength(bg,            sq->n);
      p7_omx_GrowTo(fwd, om->M, 0,   sq->n); 
      p7_omx_GrowTo(bck, om->M, 0,   sq->n); 
      p7_gmx_GrowTo(gx,  gm->M,      sq->n); 

      p7_bg_NullOne  (bg, sq->dsq, sq->n, &nullsc);
    
      p7_ForwardParser (sq->dsq, sq->n, om,      fwd, &fraw);
      p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw);

      /* p7_Forward (sq->dsq, sq->n, om,      fwd, &fsc);        printf("forward:              %.2f nats\n", fsc);  */
      /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc);        printf("backward:             %.2f nats\n", bsc);  */

      /* Comparison to other F/B implementations */
      p7_GForward     (sq->dsq, sq->n, gm, gx,  &gfraw);
      p7_GBackward    (sq->dsq, sq->n, gm, gx,  &gbraw);

      /* p7_gmx_Dump(stdout, gx);  */

      fsc  =  (fraw-nullsc) / eslCONST_LOG2;
      gfsc = (gfraw-nullsc) / eslCONST_LOG2;
      P  = esl_exp_surv(fsc,   om->evparam[p7_FTAU],  om->evparam[p7_FLAMBDA]);
      gP = esl_exp_surv(gfsc,  gm->evparam[p7_FTAU],  gm->evparam[p7_FLAMBDA]);

      if (esl_opt_GetBoolean(go, "-1"))
	{
	  printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc);
	}
      else if (esl_opt_GetBoolean(go, "-P"))
	{ /* output suitable for direct use in profmark benchmark postprocessors: */
	  printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name);
	}
      else
	{
	  printf("target sequence:      %s\n",        sq->name);
	  printf("fwd filter raw score: %.2f nats\n", fraw);
	  printf("bck filter raw score: %.2f nats\n", braw);
	  printf("null score:           %.2f nats\n", nullsc);
	  printf("per-seq score:        %.2f bits\n", fsc);
	  printf("P-value:              %g\n",        P);
	  printf("GForward raw score:   %.2f nats\n", gfraw);
	  printf("GBackward raw score:  %.2f nats\n", gbraw);
	  printf("GForward seq score:   %.2f bits\n", gfsc);
	  printf("GForward P-value:     %g\n",        gP);
	}

      esl_sq_Reuse(sq);
    }

  /* cleanup */
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  p7_omx_Destroy(bck);
  p7_omx_Destroy(fwd);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Ejemplo n.º 4
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS     *go	   = NULL;      /* command line processing                   */
  ESL_ALPHABET    *abc     = NULL;
  char            *hmmfile = NULL;
  P7_HMMFILE      *hfp     = NULL;
  P7_HMM          *hmm     = NULL;
  P7_BG           *bg      = NULL;
  int              nhmm;	
  double           x;
  float            KL;
  int              status;
  char             errbuf[eslERRBUFSIZE];
  float            nseq;

  int              do_eval2score = 0;
  int              do_score2eval = 0;
  int              z_val;
  float            e_val;
  float            s_val;

  /* Process the command line options.
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || 
      esl_opt_VerifyConfig(go)               != eslOK)
    {
      printf("Failed to parse command line: %s\n", go->errbuf);
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }
  if (esl_opt_GetBoolean(go, "-h") == TRUE) 
    {
      p7_banner(stdout, argv[0], banner);
      esl_usage(stdout, argv[0], usage);
      puts("\nOptions:");
      esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/


      exit(0);
    }
  if (esl_opt_ArgNumber(go) != 1) 
    {
      puts("Incorrect number of command line arguments.");
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }

  if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL) 
    {
      puts("Failed to read <hmmfile> argument from command line.");
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }

  output_header(stdout, go);

  if ( esl_opt_IsOn(go, "--eval2score") ) {
    do_eval2score = TRUE;
    e_val         =  esl_opt_GetReal(go, "-E");
  } else if ( esl_opt_IsOn(go, "--score2eval") ) {
    do_score2eval = TRUE;
    s_val         =  esl_opt_GetReal(go, "-S");
  } else if (  esl_opt_IsUsed(go, "--baseZ") || esl_opt_IsUsed(go, "--baseZ1") || esl_opt_IsUsed(go, "-Z") ) {
    puts("The flags -Z, --baseZ, and --baseZ1 are for use with --eval2score and --score2eval.");
    esl_usage(stdout, argv[0], usage);
    printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
    exit(1);
  }

  if (esl_opt_IsUsed(go, "--baseZ") ) {
    z_val    = 1000000 * 2 * (long)(esl_opt_GetInteger(go, "--baseZ"));
  } else if (esl_opt_IsUsed(go, "--baseZ1") ) {
    z_val    = 1000000 * (long)(esl_opt_GetInteger(go, "--baseZ1"));
  } else {
    z_val    =  esl_opt_GetInteger(go, "-Z");
  }

  /* Initializations: open the HMM file
   */
  status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf);
  if      (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf);
  else if (status == eslEFORMAT)   p7_Fail("File format problem in trying to open HMM file %s.\n%s\n",                hmmfile, errbuf);
  else if (status != eslOK)        p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n",               status, hmmfile, errbuf);  

  /* Main body: read HMMs one at a time, print one line of stats
   */
  printf("#\n");
  printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s", "idx",  "name",                 "accession",    "nseq",     "eff_nseq", "M",      "relent", "info",   "p relE", "compKL");
  if (do_eval2score)
    printf (" %6s %6.2g", "sc for", e_val);
  if (do_score2eval)
    printf (" %6s %6.2f", "E-val for", s_val);

  printf("\n");
  printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------");
  if (do_eval2score)
    printf (" %13s", "-------------");
  if (do_score2eval)
    printf (" %13s", "-------------");
  printf("\n");


  nhmm = 0;
  while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) 
    {
      if      (status == eslEOD)       esl_fatal("read failed, HMM file %s may be truncated?", hmmfile);
      else if (status == eslEFORMAT)   esl_fatal("bad file format in HMM file %s",             hmmfile);
      else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets",   hmmfile);
      else if (status != eslOK)        esl_fatal("Unexpected error in reading HMMs from %s",   hmmfile);
      nhmm++;

      if ( esl_opt_IsOn(go, "--eval2score") || esl_opt_IsOn(go, "--score2eval") ) {
        if (esl_opt_IsUsed(go, "--baseZ") || esl_opt_IsUsed(go, "--baseZ1" ) ) {
          if ( hmm->abc->type != eslRNA   && hmm->abc->type != eslDNA) {
            puts("The flags --baseZ and --baseZ1 can't be used with non-nucleotide models.");
            esl_usage(stdout, argv[0], usage);
            printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
            exit(1);
          }
        } else if ( hmm->abc->type != eslAMINO  && hmm->abc->type != eslRNA && hmm->abc->type != eslDNA) {
          puts("The flags --eval2score and --score2eval can't be used with non-sequence models.");
          esl_usage(stdout, argv[0], usage);
          printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
          exit(1);
        }
      }

      if (esl_opt_IsUsed(go, "--baseZ") ) {
        nseq = (float)z_val / (float)(hmm->max_length);
      } else if (esl_opt_IsUsed(go, "--baseZ1") ) {
        nseq = (float)z_val / (float)(hmm->max_length);
      } else {
        nseq = z_val;
      }

      if (bg == NULL) bg = p7_bg_Create(abc);

      p7_MeanPositionRelativeEntropy(hmm, bg, &x); 
      p7_hmm_CompositionKLDist(hmm, bg, &KL, NULL);

      printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f",
	     nhmm,
	     hmm->name,
	     hmm->acc == NULL ? "-" : hmm->acc,
	     hmm->nseq,
	     hmm->eff_nseq,
	     hmm->M,
	     p7_MeanMatchRelativeEntropy(hmm, bg),
	     p7_MeanMatchInfo(hmm, bg),
	     x,
	     KL);



      if ( do_eval2score )
	{
	  float sc;
	  sc = esl_exp_invsurv( e_val / nseq ,  hmm->evparam[p7_FTAU],  hmm->evparam[p7_FLAMBDA]);
	  printf("%13.2f", sc);
	}
      else  if ( do_score2eval)  
	{
	  float e;
	  e = nseq * esl_exp_surv( s_val ,  hmm->evparam[p7_FTAU],  hmm->evparam[p7_FLAMBDA]);
	  printf("%13.2g", e);
	}
      printf("\n");

      /* p7_MeanForwardScore(hmm, bg)); */
      p7_hmm_Destroy(hmm);
    }

  p7_bg_Destroy(bg);
  esl_alphabet_Destroy(abc);
  p7_hmmfile_Close(hfp);
  esl_getopts_Destroy(go);
  exit(0);
}
Ejemplo n.º 5
0
static int 
output_result(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm, double *scores, int *alilens)
{
  ESL_HISTOGRAM *h = NULL;
  int            i;
  double         tailp;
  double         x10;
  double         mu, lambda, E10;
  double         mufix,  E10fix;
  double         mufix2, E10fix2;
  double         E10p;
  double         almean, alvar;	/* alignment length mean and variance (optional output) */
  double         pmu, plambda;
  int            status;

  /* fetch statistical params from HMM for expected distribution */
  if       (esl_opt_GetBoolean(go, "--vit")) { pmu = hmm->evparam[p7_VMU];  plambda = hmm->evparam[p7_VLAMBDA]; }
  else if  (esl_opt_GetBoolean(go, "--msv")) { pmu = hmm->evparam[p7_MMU];  plambda = hmm->evparam[p7_MLAMBDA]; }
  else if  (esl_opt_GetBoolean(go, "--fwd")) { pmu = hmm->evparam[p7_FTAU]; plambda = hmm->evparam[p7_FLAMBDA]; }

  /* Optional output of scores/alignment lengths: */
  if (cfg->xfp)                      fwrite(scores, sizeof(double), cfg->N, cfg->xfp);
  if (cfg->alfp)                     for (i = 0; i < cfg->N; i++) fprintf(cfg->alfp, "%d  %.3f\n", alilens[i], scores[i]);
  if (esl_opt_GetBoolean(go, "-v"))  for (i = 0; i < cfg->N; i++) printf("%.3f\n", scores[i]);

  /* optional "filter power" data file: <hmm name> <# seqs <= P threshold> <fraction of seqs <= P threshold>  */
  if (cfg->ffp)                      output_filter_power(go, cfg, errbuf, hmm, scores);

  /* Count the scores into a histogram object.  */
  if ((h = esl_histogram_CreateFull(-50., 50., 0.2)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "allocation failed");
  for (i = 0; i < cfg->N; i++) esl_histogram_Add(h, scores[i]);

  /* For viterbi, MSV, and hybrid, fit data to a Gumbel, either with known lambda or estimated lambda. */
  if (esl_opt_GetBoolean(go, "--vit")  || esl_opt_GetBoolean(go, "--msv"))
    {
      esl_histogram_GetRank(h, 10, &x10);
      tailp  = 1.0;

      /* mu, lambda, E10 fields are for ML Gumbel fit to the observed data */
      if (esl_gumbel_FitComplete(scores, cfg->N, &mu, &lambda) != eslOK) 	esl_fatal("gumbel complete data fit failed");

      E10    = cfg->N * esl_gumbel_surv(x10, mu, lambda); 

      /* mufix, E10fix fields:   assume lambda = log2; fit an ML mu to the data */
      if (esl_gumbel_FitCompleteLoc(scores, cfg->N, 0.693147, &mufix) != eslOK) esl_fatal("gumbel mu- (location-)only data fit failed for lambda = log2");
      E10fix = cfg->N * esl_gumbel_surv(x10, mufix, 0.693147); 

      /* mufix2, E10fix2 fields: assume H3's own lambda estimate; fit ML mu */
      if (esl_gumbel_FitCompleteLoc(scores, cfg->N, plambda, &mufix2) != eslOK) esl_fatal("gumbel mu- (location-)only data fit failed for fitted lambda");
      E10fix2 = cfg->N * esl_gumbel_surv(x10, mufix2, plambda); 
      
      /* pmu, plambda, E10p:  use H3 expectation estimates (pmu, plambda) */
      E10p    = cfg->N * esl_gumbel_surv(x10, pmu,  plambda); 
      
      fprintf(cfg->ofp, "%-20s  %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f", 
              hmm->name, tailp, mu, lambda, E10, mufix, E10fix, mufix2, E10fix2, pmu, plambda, E10p);

      if (esl_opt_GetBoolean(go, "-a")) {
	esl_stats_IMean(alilens, cfg->N, &almean, &alvar);
	fprintf(cfg->ofp, " %8.4f %8.4f\n", almean, sqrt(alvar));
      } else 
	fprintf(cfg->ofp, "\n");

      if (cfg->survfp != NULL) {
	double xmax = esl_opt_IsOn(go, "--xmax") ? esl_opt_GetReal(go, "--xmax") : h->xmax + 5.;

	esl_histogram_PlotSurvival(cfg->survfp, h);
	esl_gumbel_Plot(cfg->survfp, pmu,   plambda,  esl_gumbel_surv, h->xmin - 5., xmax, 0.1);
	esl_gumbel_Plot(cfg->survfp, mu,    lambda,   esl_gumbel_surv, h->xmin - 5., xmax, 0.1);
	esl_gumbel_Plot(cfg->survfp, mufix, 0.693147, esl_gumbel_surv, h->xmin - 5., xmax, 0.1);
      }

      if (cfg->efp != NULL) {
	double x;

	fprintf(cfg->efp, "# %s\n", hmm->name);
	for (i = 1; i <= 1000 && i <= cfg->N; i++) {
	  esl_histogram_GetRank(h, i, &x);
	  fprintf(cfg->efp, "%d %g\n", i, cfg->N * esl_gumbel_surv(x, pmu, plambda));
	}
	fprintf(cfg->efp, "&\n");
      }
    }

  /* For Forward, fit tail to exponential tails, for a range of tail mass choices. */
  else if (esl_opt_GetBoolean(go, "--fwd"))
    {
      double  tmin      = esl_opt_GetReal(go, "--tmin");
      double  tmax      = esl_opt_GetReal(go, "--tmax");
      double  tpoints   = (double) esl_opt_GetInteger(go, "--tpoints");
      int     do_linear = esl_opt_GetBoolean(go, "--tlinear");
      double *xv;
      double  tau;
      int     n;

      esl_histogram_GetRank(h, 10, &x10);

      tailp = tmin;
      do {
	if (tailp > 1.0)       tailp = 1.0;
	esl_histogram_GetTailByMass(h, tailp, &xv, &n, NULL);
	
	if (esl_exp_FitComplete(xv, n, &mu, &lambda) != eslOK) esl_fatal("exponential fit failed");
	E10    = cfg->N * tailp * esl_exp_surv(x10, mu,  lambda);
	mufix  = mu;
	E10fix = cfg->N * tailp * esl_exp_surv(x10, mu,  0.693147);
	E10p   = cfg->N * esl_exp_surv(x10, pmu, plambda); /* the pmu is relative to a P=1.0 tail origin. */
	
	tau    = mu + log(tailp) / lambda;

	fprintf(cfg->ofp, "%-20s  %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f\n", 
		hmm->name, tailp, mu, lambda, E10, mufix, E10fix, pmu, plambda, E10p);

	if      (tpoints == 1) break;
	else if (do_linear)    tailp += (tmax-tmin) / (tpoints-1);
	else                   tailp *= exp(log(tmax/tmin) / (tpoints-1));
      } while (tailp <= tmax+1e-7);

      if (cfg->survfp) 
	{
	  double xmax = esl_opt_IsOn(go, "--xmax") ? esl_opt_GetReal(go, "--xmax") : h->xmax + 5.;

	  esl_histogram_PlotSurvival(cfg->survfp, h);
	  esl_exp_Plot(cfg->survfp, pmu,  plambda, esl_exp_surv, pmu, xmax, 0.1);
	  esl_exp_Plot(cfg->survfp, tau,   lambda, esl_exp_surv, tau, xmax, 0.1);
	  esl_exp_Plot(cfg->survfp, tau, 0.693147, esl_exp_surv, tau, xmax, 0.1);
	}

      if (cfg->efp != NULL) {
	double x;

	fprintf(cfg->efp, "# %s\n", hmm->name);
	for (i = 1; i <= 1000 && i <= cfg->N; i++) {
	  esl_histogram_GetRank(h, i, &x);
	  fprintf(cfg->efp, "%d %g\n", i, cfg->N * esl_exp_surv(x, pmu, plambda));
	}
	fprintf(cfg->efp, "&\n");
      }

    }

  /* fallthrough: both normal, error cases execute same cleanup code */
  status = eslOK;
 ERROR:
  esl_histogram_Destroy(h);
  return status;
}
Ejemplo n.º 6
0
/* Function:  esl_exp_generic_surv()
* Incept:    SRE, Thu Aug 25 07:59:05 2005[St. Louis]
*
* Purpose:   Generic-API version of survival function.
*/
double
esl_exp_generic_surv(double x, void *params)
{
    double *p = (double *) params;
    return esl_exp_surv(x, p[0], p[1]);
}
Ejemplo n.º 7
0
void run_hmmer_pipeline(const char* seq) {
  int index, i, status;
  ESL_SQ* sq = esl_sq_CreateFrom(NULL, seq, NULL, NULL, NULL);
  P7_OPROFILE *om = NULL;
  P7_PROFILE *gm = NULL;
  float usc, vfsc, fwdsc;   /* filter scores                           */
  float filtersc;           /* HMM null filter score                   */
  float nullsc;             /* null model score                        */
  float seqbias;
  float seq_score;          /* the corrected per-seq bit score */
  double P;
  WRAPPER_RESULT* result;

  num_results = 0;
  if(sq->n == 0) {
    esl_sq_Destroy(sq);
    return;
  }

  esl_sq_Digitize(abc, sq);  

  int n = 0;
  float oasc;

  for(index = 0;index < num_models;index++) {
    om = models[index];

    p7_omx_Reuse(oxf);
    p7_omx_Reuse(oxb);

    p7_omx_GrowTo(oxf, om->M, sq->n, sq->n);
    p7_omx_GrowTo(oxb, om->M, sq->n, sq->n);

    p7_oprofile_ReconfigLength(om, sq->n);

    p7_bg_SetFilter(bg, om->M, om->compo);
    p7_bg_SetLength(bg, sq->n);

    //Calibrate null model
    p7_bg_NullOne(bg, sq->dsq, sq->n, &nullsc);

    //MSV Filter
    p7_MSVFilter(sq->dsq, sq->n, om, oxf, &usc);
    seq_score = (usc - nullsc) / eslCONST_LOG2;
    P = esl_gumbel_surv(seq_score,  om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);
    if (P > f1) continue;

    //Bias filter (model compo)
    p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc);
    seq_score = (usc - filtersc) / eslCONST_LOG2;
    P = esl_gumbel_surv(seq_score,  om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);
    if (P > f1) continue;

    //Viterbi filter (Only do if P value from Bias is high)
    if(P > f2) {
      p7_ViterbiFilter(sq->dsq, sq->n, om, oxf, &vfsc);
      seq_score = (vfsc - filtersc) / eslCONST_LOG2;
      P = esl_gumbel_surv(seq_score,  om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA]);
      if (P > f2) continue;
    }

    //Get the real probability (forward)
    p7_Forward(sq->dsq, sq->n, om, oxf, &fwdsc);
    seq_score = (fwdsc - filtersc) / eslCONST_LOG2;
    P = esl_exp_surv(seq_score,  om->evparam[p7_FTAU],  om->evparam[p7_FLAMBDA]);
    if(hmmer_error) {
      fprintf(stderr, "HMM: %s, seq: %s", om->name, seq);
      hmmer_error = 0;
      continue;
    }
    if (P > f3) continue;

    //Real hit, go in to posterior decoding and alignment
    p7_omx_Reuse(oxb);
    p7_trace_Reuse(tr);

    p7_Backward(sq->dsq, sq->n, om, oxf, oxb, NULL);

    status = p7_Decoding(om, oxf, oxb, oxb);

    if(status == eslOK) {
      //And then trace the result
      p7_OptimalAccuracy(om, oxb, oxf, &oasc);
      p7_OATrace(om, oxb, oxf, tr);
    } else if(status == eslERANGE) {
      fprintf(stderr, "Decoding overflow on model %s\n", om->name);
      gm = gmodels[index];
      if(gxf == NULL) {
	gxf = p7_gmx_Create(gm->M, sq->n);
	gxb = p7_gmx_Create(gm->M, sq->n);
      } else {
	p7_gmx_GrowTo(gxf, gm->M, sq->n);
	p7_gmx_GrowTo(gxb, gm->M, sq->n);
      }

      p7_ReconfigLength(gm, sq->n);

      p7_GForward (sq->dsq, sq->n, gm, gxf, &fwdsc);
      p7_GBackward(sq->dsq, sq->n, gm, gxb, NULL);

      p7_GDecoding(gm, gxf, gxb, gxb);
      p7_GOptimalAccuracy(gm, gxb, gxf, &oasc);
      p7_GOATrace        (gm, gxb, gxf, tr);

      p7_gmx_Reuse(gxf);
      p7_gmx_Reuse(gxb);
    }

    if(hmmer_error) {
      fprintf(stderr, "HMM: %s, seq: %s", om->name, seq);
      hmmer_error = 0;
      continue;
    }

    result = wrapper_results[num_results];
    reuse_result(result, tr->N + om->M, om->name); //We're way overallocating here, but it's hard to know at this point how much space we'll need for the alignment (plus leading and trailing gaps)
    trace_into(tr, result, sq, abc, om->M);
    result->bits = seq_score;
    num_results++;
  }

  esl_sq_Destroy(sq);
}