Esempio n. 1
0
/* Function:  p7_oprofile_Sample()
 * Synopsis:  Sample a random profile.
 * Incept:    MSF Tue Nov 3, 2009 [Janelia]
 *
 * Purpose:   Sample a random profile of <M> nodes for alphabet <abc>,
 *            using <r> as the source of random numbers. Parameterize
 *            it for generation of target sequences of mean length
 *            <L>. Calculate its log-odds scores using background
 *            model <bg>.
 *            
 * Args:      r       - random number generator
 *            abc     - emission alphabet 
 *            bg      - background frequency model
 *            M       - size of sampled profile, in nodes
 *            L       - configured target seq mean length
 *            opt_hmm - optRETURN: sampled HMM
 *            opt_gm  - optRETURN: sampled normal profile
 *            opt_om  - RETURN: optimized profile
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    (no abnormal error conditions)
 */
int
p7_oprofile_Sample(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, const P7_BG *bg, int M, int L,
		   P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_OPROFILE **ret_om)
{
  P7_HMM         *hmm  = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_OPROFILE    *om   = NULL;
  int             status;

  if ((gm = p7_profile_Create (M, abc)) == NULL)  { status = eslEMEM; goto ERROR; }
  if ((om = p7_oprofile_Create(M, abc)) == NULL)  { status = eslEMEM; goto ERROR; }

  if ((status = p7_hmm_Sample(r, M, abc, &hmm))             != eslOK) goto ERROR;
  if ((status = p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)) != eslOK) goto ERROR;
  if ((status = p7_oprofile_Convert(gm, om))                != eslOK) goto ERROR;
  if ((status = p7_oprofile_ReconfigLength(om, L))          != eslOK) goto ERROR;

  if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm);
  if (opt_gm  != NULL) *opt_gm  = gm;  else p7_profile_Destroy(gm);
  *ret_om = om;
  return eslOK;

 ERROR:
  if (opt_hmm != NULL) *opt_hmm = NULL;
  if (opt_gm  != NULL) *opt_gm  = NULL;
  *ret_om = NULL;
  return status;
}
Esempio n. 2
0
/* Function:  p7_ViterbiMu()
 * Synopsis:  Determines the local Viterbi Gumbel mu parameter for a model.
 * Incept:    SRE, Tue May 19 10:26:19 2009 [Janelia]
 *
 * Purpose:   Identical to p7_MSVMu(), above, except that it fits
 *            Viterbi scores instead of MSV scores. 
 *
 *            The difference between the two mus is small, but can be
 *            up to ~1 bit or so for large, low-info models [J4/126] so
 *            decided to calibrate the two mus separately [J5/8]. 
 *            
 * Args:      r       :  source of random numbers
 *            om      :  score profile (length config is changed upon return!)
 *            bg      :  null model    (length config is changed upon return!)
 *            L       :  length of sequences to simulate
 *            N	      :  number of sequences to simulate		
 *            lambda  :  known Gumbel lambda parameter
 *            ret_vmu :  RETURN: ML estimate of location param mu
 *
 * Returns:   <eslOK> on success, and <ret_mu> contains the ML estimate
 *            of $\mu$.
 *
 * Throws:    (no abnormal error conditions)
 */
int
p7_ViterbiMu(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double *ret_vmu)
{
  P7_OMX  *ox      = p7_omx_Create(om->M, 0, 0); /* DP matrix: 1 row version */
  ESL_DSQ *dsq     = NULL;
  double  *xv      = NULL;
  int      i;
  float    sc, nullsc;
#ifndef p7_IMPL_DUMMY
  float    maxsc   = (32767.0 - om->base_w) / om->scale_w; /* if score overflows, use this [J4/139] */
#endif
  int      status;

  if (ox == NULL) { status = eslEMEM; goto ERROR; }
  ESL_ALLOC(xv,  sizeof(double)  * N);
  ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2));

  p7_oprofile_ReconfigLength(om, L);
  p7_bg_SetLength(bg, L);

  for (i = 0; i < N; i++)
    {
      if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR;
      if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc))          != eslOK) goto ERROR;   

      status = p7_ViterbiFilter(dsq, L, om, ox, &sc); 
#ifndef p7_IMPL_DUMMY
      if (status == eslERANGE) { sc = maxsc; status = eslOK; }
#endif
      if (status != eslOK)     goto ERROR;

      xv[i] = (sc - nullsc) / eslCONST_LOG2;
    }

  if ((status = esl_gumbel_FitCompleteLoc(xv, N, lambda, ret_vmu))  != eslOK) goto ERROR;
  p7_omx_Destroy(ox);
  free(xv);
  free(dsq);
  return eslOK;

 ERROR:
  *ret_vmu = 0.0;
  if (ox  != NULL) p7_omx_Destroy(ox);
  if (xv  != NULL) free(xv);
  if (dsq != NULL) free(dsq);
  return status;

}
Esempio n. 3
0
/* Function:  p7_Tau()
 * Synopsis:  Determine Forward tau by brief simulation.
 * Incept:    SRE, Thu Aug  9 15:08:39 2007 [Janelia]
 *
 * Purpose:   Determine the <tau> parameter for an exponential tail fit
 *            to the Forward score distribution for model <om>, on
 *            random sequences with the composition of the background
 *            model <bg>. This <tau> parameter is for an exponential
 *            distribution anchored from $P=1.0$, so it's not really a
 *            tail per se; but it's only an accurate fit in the tail
 *            of the Forward score distribution, from about $P=0.001$
 *            or so.
 *            
 *            The determination of <tau> is done by a brief simulation
 *            in which we fit a Gumbel distribution to a small number
 *            of Forward scores of random sequences, and use that to
 *            predict the location of the tail at probability <tailp>.
 *            
 *            The Gumbel is of course inaccurate, but we can use it
 *            here solely as an empirical distribution to determine
 *            the location of a reasonable <tau> more accurately on a
 *            smaller number of samples than we could do with raw
 *            order statistics. 
 *            
 *            Typical choices are L=100, N=200, tailp=0.04, which
 *            typically yield estimates $\hat{\mu}$ with a precision
 *            (standard deviation) of $\pm$ 0.2 bits, corresponding to
 *            a $\pm$ 15\% error in E-values. See [J1/135].
 *            
 *            The use of Gumbel fitting to a small number of $N$
 *            samples and the extrapolation of $\hat{\mu}$ from the
 *            estimated location of the 0.04 tail mass are both
 *            empirical and carefully optimized against several
 *            tradeoffs. Most importantly, around this choice of tail
 *            probability, a systematic error introduced by the use of
 *            the Gumbel fit is being cancelled by systematic error
 *            introduced by the use of a higher tail probability than
 *            the regime in which the exponential tail is a valid
 *            approximation. See [J1/135] for discussion.
 *            
 *            This function changes the length configuration of both
 *            <om> and <bg>. The caller must remember to reconfigure
 *            both of their length models appropriately for any
 *            subsequent alignments.
 *            
 * Args:      r      : source of randomness
 *            om     : configured profile to sample sequences from
 *            bg     : null model (for background residue frequencies)
 *            L      : mean length model for seq emission from profile
 *            N      : number of sequences to generate
 *            lambda : expected slope of the exponential tail (from p7_Lambda())
 *            tailp  : tail mass from which we will extrapolate mu
 *            ret_mu : RETURN: estimate for the Forward mu (base of exponential tail)
 *
 * Returns:   <eslOK> on success, and <*ret_fv> is the score difference
 *            in bits.
 *
 * Throws:    <eslEMEM> on allocation error, and <*ret_fv> is 0.
 */
int
p7_Tau(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double tailp, double *ret_tau)
{
  P7_OMX  *ox      = p7_omx_Create(om->M, 0, L);     /* DP matrix: for ForwardParser,  L rows */
  ESL_DSQ *dsq     = NULL;
  double  *xv      = NULL;
  float    fsc, nullsc;		                  
  double   gmu, glam;
  int      status;
  int      i;

  ESL_ALLOC(xv,  sizeof(double)  * N);
  ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2));
  if (ox == NULL) { status = eslEMEM; goto ERROR; }

  p7_oprofile_ReconfigLength(om, L);
  p7_bg_SetLength(bg, L);

  for (i = 0; i < N; i++)
    {
      if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR;
      if ((status = p7_ForwardParser(dsq, L, om, ox, &fsc))      != eslOK) goto ERROR;
      if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc))          != eslOK) goto ERROR;   
      xv[i] = (fsc - nullsc) / eslCONST_LOG2;
    }
  if ((status = esl_gumbel_FitComplete(xv, N, &gmu, &glam)) != eslOK) goto ERROR;

  /* Explanation of the eqn below: first find the x at which the Gumbel tail
   * mass is predicted to be equal to tailp. Then back up from that x
   * by log(tailp)/lambda to set the origin of the exponential tail to 1.0
   * instead of tailp.
   */
  *ret_tau =  esl_gumbel_invcdf(1.0-tailp, gmu, glam) + (log(tailp) / lambda);
  
  free(xv);
  free(dsq);
  p7_omx_Destroy(ox);
  return eslOK;

 ERROR:
  *ret_tau = 0.;
  if (xv  != NULL) free(xv);
  if (dsq != NULL) free(dsq);
  if (ox  != NULL) p7_omx_Destroy(ox);
  return status;
}
Esempio n. 4
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_OMX         *bck     = NULL;
  P7_OMX         *pp      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           fsc, bsc;
  double          Mcs;

  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);                 p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);    p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);    p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  fwd = p7_omx_Create(gm->M, L, L);
  bck = p7_omx_Create(gm->M, L, L);
  pp  = p7_omx_Create(gm->M, L, L);

  esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  p7_Forward (dsq, L, om, fwd,      &fsc);
  p7_Backward(dsq, L, om, fwd, bck, &bsc);

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    p7_Decoding(om, fwd, bck, pp);              
  esl_stopwatch_Stop(w);

  Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_omx_Destroy(fwd);
  p7_omx_Destroy(bck);
  p7_omx_Destroy(pp);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return eslOK;
}
Esempio n. 5
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *ox      = NULL;
  P7_GMX         *gx      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           sc1, sc2;
  double          base_time, bench_time, Mcs;

  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  if (esl_opt_GetBoolean(go, "-x")) p7_profile_SameAsVF(om, gm);

  ox = p7_omx_Create(gm->M, 0, 0);
  gx = p7_gmx_Create(gm->M, L);

  /* Get a baseline time: how long it takes just to generate the sequences */
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  esl_stopwatch_Stop(w);
  base_time = w->user;

  /* Run the benchmark */
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
      p7_ViterbiFilter(dsq, L, om, ox, &sc1);   

      if (esl_opt_GetBoolean(go, "-c")) 
	{
	  p7_GViterbi(dsq, L, gm, gx, &sc2); 
	  printf("%.4f %.4f\n", sc1, sc2);  
	}

      if (esl_opt_GetBoolean(go, "-x"))
	{
	  p7_GViterbi(dsq, L, gm, gx, &sc2); 
	  sc2 /= om->scale_w;
	  if (om->mode == p7_UNILOCAL)   sc2 -= 2.0; /* that's ~ L \log \frac{L}{L+2}, for our NN,CC,JJ */
	  else if (om->mode == p7_LOCAL) sc2 -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */
	  printf("%.4f %.4f\n", sc1, sc2);  
	}
    }
  esl_stopwatch_Stop(w);
  bench_time = w->user - base_time;
  Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_omx_Destroy(ox);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
Esempio n. 6
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *ox      = NULL;
  P7_GMX         *gx      = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  float           vfraw, nullsc, vfscore;
  float           graw, gscore;
  double          P, gP;
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  /* Read in one sequence */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);

  /* create default null model, then create and optimize profile */
  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);

  /* allocate DP matrices, both a generic and an optimized one */
  ox = p7_omx_Create(gm->M, 0, sq->n);
  gx = p7_gmx_Create(gm->M, sq->n);

  /* Useful to place and compile in for debugging: 
     p7_oprofile_Dump(stdout, om);      dumps the optimized profile
     p7_omx_SetDumpMode(ox, TRUE);      makes the fast DP algorithms dump their matrices
     p7_gmx_Dump(stdout, gx);           dumps a generic DP matrix
  */

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {
      p7_oprofile_ReconfigLength(om, sq->n);
      p7_ReconfigLength(gm,          sq->n);
      p7_bg_SetLength(bg,            sq->n);
      p7_omx_GrowTo(ox, om->M, 0,    sq->n); 
      p7_gmx_GrowTo(gx, gm->M,       sq->n); 

      p7_ViterbiFilter  (sq->dsq, sq->n, om, ox, &vfraw);
      p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc);
      vfscore = (vfraw - nullsc) / eslCONST_LOG2;
      P        = esl_gumbel_surv(vfscore,  om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA]);

      p7_GViterbi       (sq->dsq, sq->n, gm, gx, &graw); 
      gscore   = (graw - nullsc) / eslCONST_LOG2;
      gP       = esl_gumbel_surv(gscore,  gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA]);

      if (esl_opt_GetBoolean(go, "-1"))
	{
	  printf("%-30s\t%-20s\t%9.2g\t%7.2f\t%9.2g\t%7.2f\n", sq->name, hmm->name, P, vfscore, gP, gscore);
	}
      else if (esl_opt_GetBoolean(go, "-P"))
	{ /* output suitable for direct use in profmark benchmark postprocessors: */
	  printf("%g\t%.2f\t%s\t%s\n", P, vfscore, sq->name, hmm->name);
	}
      else
	{
	  printf("target sequence:      %s\n",        sq->name);
	  printf("vit filter raw score: %.2f nats\n", vfraw);
	  printf("null score:           %.2f nats\n", nullsc);
	  printf("per-seq score:        %.2f bits\n", vfscore);
	  printf("P-value:              %g\n",        P);
	  printf("GViterbi raw score:   %.2f nats\n", graw);
	  printf("GViterbi seq score:   %.2f bits\n", gscore);
	  printf("GViterbi P-value:     %g\n",        gP);
	}
      
      esl_sq_Reuse(sq);
    }

  /* cleanup */
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  p7_omx_Destroy(ox);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Esempio n. 7
0
static void 
scan_thread(void *arg)
{
  int               i;
  int               count;
  int               workeridx;
  WORKER_INFO      *info;
  ESL_THREADS      *obj;

  ESL_STOPWATCH    *w;

  P7_BG            *bg       = NULL;         /* null model                     */
  P7_PIPELINE      *pli      = NULL;         /* work pipeline                  */
  P7_TOPHITS       *th       = NULL;         /* top hit results                */

  obj = (ESL_THREADS *) arg;
  esl_threads_Started(obj, &workeridx);

  info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx);

  w = esl_stopwatch_Create();
  esl_stopwatch_Start(w);

  /* Convert to an optimized model */
  bg = p7_bg_Create(info->abc);

  /* Create processing pipeline and hit list */
  th  = p7_tophits_Create(); 
  pli = p7_pipeline_Create(info->opts, 100, 100, FALSE, p7_SCAN_MODELS);

  p7_pli_NewSeq(pli, info->seq);

  /* loop until all sequences have been processed */
  count = 1;
  while (count > 0) {
    int           inx;
    int          blksz;
    P7_OPROFILE **om;

    /* grab the next block of sequences */
    if (pthread_mutex_lock(info->inx_mutex) != 0) p7_Fail("mutex lock failed");
    inx   = *info->inx;
    blksz = *info->blk_size;
    if (inx > *info->limit) {
      blksz /= 5;
      if (blksz < 1000) {
        *info->limit = info->om_cnt * 2;
      } else {
        *info->limit = inx + (info->om_cnt - inx) * 2 / 3; 
      }
    }
    *info->blk_size = blksz;
    *info->inx += blksz;
    if (pthread_mutex_unlock(info->inx_mutex) != 0) p7_Fail("mutex unlock failed");

    om    = info->om_list + inx;
    count = info->om_cnt - inx;
    if (count > blksz) count = blksz;

    /* Main loop: */
    for (i = 0; i < count; ++i, ++om) {
      p7_pli_NewModel(pli, *om, bg);
      p7_bg_SetLength(bg, info->seq->n);
      p7_oprofile_ReconfigLength(*om, info->seq->n);
	      
      p7_Pipeline(pli, *om, bg, info->seq, th);
      p7_pipeline_Reuse(pli);
    }
  }

  /* make available the pipeline objects to the main thread */
  info->th = th;
  info->pli = pli;

  /* clean up */
  p7_bg_Destroy(bg);

  esl_stopwatch_Stop(w);
  info->elapsed = w->elapsed;

  esl_stopwatch_Destroy(w);

  esl_threads_Finished(obj, workeridx);

  pthread_exit(NULL);
  return;
}
Esempio n. 8
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *ox1     = NULL;
  P7_OMX         *ox2     = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  float           null2[p7_MAXCODE];
  int             i,j,d,pos;
  int             nsamples = 200;
  float           fsc, bsc;
  double          Mcs;

  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);                 p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);    p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);    p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  ox1 = p7_omx_Create(gm->M, L, L);
  ox2 = p7_omx_Create(gm->M, L, L);

  esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  p7_Forward (dsq, L, om, ox1,      &fsc);

  if (esl_opt_GetBoolean(go, "-t"))
    {
      P7_TRACE *tr   = p7_trace_Create();
      float    *n2sc = malloc(sizeof(float) * (L+1));

      esl_stopwatch_Start(w);
      for (i = 0; i < N; i++)
	{ /* This is approximately what p7_domaindef.c::region_trace_ensemble() is doing: */
	  for (j = 0; j < nsamples; j++)
	    {
	      p7_StochasticTrace(r, dsq, L, om, ox1, tr);
	      p7_trace_Index(tr);
	      pos = 1; 
	      for (d = 0; d < tr->ndom; d++)
		{
		  p7_Null2_ByTrace(om, tr, tr->tfrom[d], tr->tto[d], ox2, null2);
		  for (; pos <= tr->sqfrom[d]; pos++) n2sc[pos] += 1.0;
		  for (; pos < tr->sqto[d];    pos++) n2sc[pos] += null2[dsq[pos]];
		}
	      for (; pos <= L; pos++)  n2sc[pos] += 1.0;
	      p7_trace_Reuse(tr);
	    }

	  for (pos = 1; pos <= L; pos++)
	    n2sc[pos] = logf(n2sc[pos] / nsamples);
	}
      esl_stopwatch_Stop(w);

      free(n2sc);
      p7_trace_Destroy(tr);
    }
  else
    {
      p7_Backward(dsq, L, om, ox1, ox2, &bsc);
      p7_Decoding(om, ox1, ox2, ox2);              

      esl_stopwatch_Start(w);
      for (i = 0; i < N; i++)
	p7_Null2_ByExpectation(om, ox2, null2);
      esl_stopwatch_Stop(w);
    }


  Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_omx_Destroy(ox1);
  p7_omx_Destroy(ox2);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
Esempio n. 9
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx1     = NULL;
  P7_GMX         *gx2     = NULL;
  P7_OMX         *ox1     = NULL;
  P7_OMX         *ox2     = NULL;
  P7_TRACE       *tr      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           fsc, bsc, accscore;
  float           fsc_g, bsc_g, accscore_g;
  double          Mcs;

  p7_FLogsumInit();

  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);                 p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);    p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);    p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001)
    p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode");

  ox1 = p7_omx_Create(gm->M, L, L);
  ox2 = p7_omx_Create(gm->M, L, L);
  tr  = p7_trace_CreateWithPP();

  esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  p7_Forward (dsq, L, om, ox1,      &fsc);
  p7_Backward(dsq, L, om, ox1, ox2, &bsc);
  p7_Decoding(om, ox1, ox2, ox2);              

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      p7_OptimalAccuracy(om, ox2, ox1, &accscore);

      if (! esl_opt_GetBoolean(go, "--notrace"))
	{
	  p7_OATrace(om, ox2, ox1, tr);
	  p7_trace_Reuse(tr);
	}
    }
  esl_stopwatch_Stop(w);

  Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x") )
    {
      gx1 = p7_gmx_Create(gm->M, L);
      gx2 = p7_gmx_Create(gm->M, L);
      
      p7_GForward (dsq, L, gm, gx1, &fsc_g);
      p7_GBackward(dsq, L, gm, gx2, &bsc_g);
      p7_GDecoding(gm, gx1, gx2, gx2);             
      p7_GOptimalAccuracy(gm, gx2, gx1, &accscore_g);

      printf("generic:  fwd=%8.4f  bck=%8.4f  acc=%8.4f\n", fsc_g, bsc_g, accscore_g);
      printf("VMX:      fwd=%8.4f  bck=%8.4f  acc=%8.4f\n", fsc,   bsc,   accscore);

      p7_gmx_Destroy(gx1);
      p7_gmx_Destroy(gx2);
    }

  free(dsq);
  p7_omx_Destroy(ox1);
  p7_omx_Destroy(ox2);
  p7_trace_Destroy(tr);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
Esempio n. 10
0
static void 
search_thread(void *arg)
{
  int               i;
  int               count;
  int               seed;
  int               status;
  int               workeridx;
  WORKER_INFO      *info;
  ESL_THREADS      *obj;
  ESL_SQ            dbsq;
  ESL_STOPWATCH    *w        = NULL;         /* timing stopwatch               */
  P7_BUILDER       *bld      = NULL;         /* HMM construction configuration */
  P7_BG            *bg       = NULL;         /* null model                     */
  P7_PIPELINE      *pli      = NULL;         /* work pipeline                  */
  P7_TOPHITS       *th       = NULL;         /* top hit results                */
  P7_PROFILE       *gm       = NULL;         /* generic model                  */
  P7_OPROFILE      *om       = NULL;         /* optimized query profile        */

  obj = (ESL_THREADS *) arg;
  esl_threads_Started(obj, &workeridx);

  info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx);
  w    = esl_stopwatch_Create();
  bg   = p7_bg_Create(info->abc);
  esl_stopwatch_Start(w);

  /* set up the dummy description and accession fields */
  dbsq.desc = "";
  dbsq.acc  = "";

  /* process a query sequence or hmm */
  if (info->seq != NULL) {
    bld = p7_builder_Create(NULL, info->abc);
    if ((seed = esl_opt_GetInteger(info->opts, "--seed")) > 0) {
      esl_randomness_Init(bld->r, seed);
      bld->do_reseeding = TRUE;
    }
    bld->EmL = esl_opt_GetInteger(info->opts, "--EmL");
    bld->EmN = esl_opt_GetInteger(info->opts, "--EmN");
    bld->EvL = esl_opt_GetInteger(info->opts, "--EvL");
    bld->EvN = esl_opt_GetInteger(info->opts, "--EvN");
    bld->EfL = esl_opt_GetInteger(info->opts, "--EfL");
    bld->EfN = esl_opt_GetInteger(info->opts, "--EfN");
    bld->Eft = esl_opt_GetReal   (info->opts, "--Eft");

    if (esl_opt_IsOn(info->opts, "--mxfile")) status = p7_builder_SetScoreSystem (bld, esl_opt_GetString(info->opts, "--mxfile"), NULL, esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg);
    else                                      status = p7_builder_LoadScoreSystem(bld, esl_opt_GetString(info->opts, "--mx"),           esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg); 
    if (status != eslOK) {
      //client_error(info->sock, status, "hmmgpmd: failed to set single query sequence score system: %s", bld->errbuf);
      fprintf(stderr, "hmmpgmd: failed to set single query sequence score system: %s", bld->errbuf);
      pthread_exit(NULL);
      return;
    }
    p7_SingleBuilder(bld, info->seq, bg, NULL, NULL, NULL, &om); /* bypass HMM - only need model */
    p7_builder_Destroy(bld);
  } else {
    gm = p7_profile_Create (info->hmm->M, info->abc);
    om = p7_oprofile_Create(info->hmm->M, info->abc);
    p7_ProfileConfig(info->hmm, bg, gm, 100, p7_LOCAL);
    p7_oprofile_Convert(gm, om);
  }

  /* Create processing pipeline and hit list */
  th  = p7_tophits_Create(); 
  pli = p7_pipeline_Create(info->opts, om->M, 100, FALSE, p7_SEARCH_SEQS);
  p7_pli_NewModel(pli, om, bg);

  if (pli->Z_setby == p7_ZSETBY_NTARGETS) pli->Z = info->db_Z;

  /* loop until all sequences have been processed */
  count = 1;
  while (count > 0) {
    int          inx;
    int          blksz;
    HMMER_SEQ  **sq;

    /* grab the next block of sequences */
    if (pthread_mutex_lock(info->inx_mutex) != 0) p7_Fail("mutex lock failed");
    inx = *info->inx;
    blksz = *info->blk_size;
    if (inx > *info->limit) {
      blksz /= 5;
      if (blksz < 1000) {
        *info->limit = info->sq_cnt * 2;
      } else {
        *info->limit = inx + (info->sq_cnt - inx) * 2 / 3; 
      }
    }
    *info->blk_size = blksz;
    *info->inx += blksz;
    if (pthread_mutex_unlock(info->inx_mutex) != 0) p7_Fail("mutex unlock failed");

    sq = info->sq_list + inx;

    count = info->sq_cnt - inx;
    if (count > blksz) count = blksz;

    /* Main loop: */
    for (i = 0; i < count; ++i, ++sq) {
      if ( !(info->range_list) || hmmpgmd_IsWithinRanges ((*sq)->idx, info->range_list)) {
        dbsq.name  = (*sq)->name;
        dbsq.dsq   = (*sq)->dsq;
        dbsq.n     = (*sq)->n;
        dbsq.idx   = (*sq)->idx;
        if((*sq)->desc != NULL) dbsq.desc  = (*sq)->desc;

        p7_bg_SetLength(bg, dbsq.n);
        p7_oprofile_ReconfigLength(om, dbsq.n);

        p7_Pipeline(pli, om, bg, &dbsq, th);

        p7_pipeline_Reuse(pli);
      }
    }
  }

  /* make available the pipeline objects to the main thread */
  info->th = th;
  info->pli = pli;

  /* clean up */
  p7_bg_Destroy(bg);
  p7_oprofile_Destroy(om);

  if (gm != NULL)  p7_profile_Destroy(gm);

  esl_stopwatch_Stop(w);
  info->elapsed = w->elapsed;

  esl_stopwatch_Destroy(w);

  esl_threads_Finished(obj, workeridx);

  pthread_exit(NULL);
  return;
}
Esempio n. 11
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_OMX         *bck     = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           fsc, bsc;
  float           fsc2, bsc2;
  double          base_time, bench_time, Mcs;

  p7_FLogsumInit();

  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001)
    p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode");

  if (esl_opt_GetBoolean(go, "-P")) {
    fwd = p7_omx_Create(gm->M, 0, L);
    bck = p7_omx_Create(gm->M, 0, L);
  } else {
    fwd = p7_omx_Create(gm->M, L, L);
    bck = p7_omx_Create(gm->M, L, L);
  }
  gx  = p7_gmx_Create(gm->M, L);

  /* Get a baseline time: how long it takes just to generate the sequences */
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  esl_stopwatch_Stop(w);
  base_time = w->user;

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
      if (esl_opt_GetBoolean(go, "-P")) {
	if (! esl_opt_GetBoolean(go, "-B"))  p7_ForwardParser (dsq, L, om,      fwd, &fsc);
	if (! esl_opt_GetBoolean(go, "-F"))  p7_BackwardParser(dsq, L, om, fwd, bck, &bsc);
      } else {
	if (! esl_opt_GetBoolean(go, "-B"))  p7_Forward (dsq, L, om,      fwd, &fsc);
	if (! esl_opt_GetBoolean(go, "-F"))  p7_Backward(dsq, L, om, fwd, bck, &bsc);
      }

      if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x"))
	{
	  p7_GForward (dsq, L, gm, gx, &fsc2); 
	  p7_GBackward(dsq, L, gm, gx, &bsc2); 
	  printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2);  
	}
    }
  esl_stopwatch_Stop(w);
  bench_time = w->user - base_time;
  Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_omx_Destroy(bck);
  p7_omx_Destroy(fwd);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
Esempio n. 12
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_OMX         *bck     = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  float           fraw, braw, nullsc, fsc;
  float           gfraw, gbraw, gfsc;
  double          P, gP;
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  /* Open sequence file for reading */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);

  /* create default null model, then create and optimize profile */
  bg = p7_bg_Create(abc);               
  p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc); 
  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);

  /* p7_oprofile_Dump(stdout, om);  */

  /* allocate DP matrices for O(M+L) parsers */
  fwd = p7_omx_Create(gm->M, 0, sq->n);
  bck = p7_omx_Create(gm->M, 0, sq->n);
  gx  = p7_gmx_Create(gm->M,    sq->n);

  /* allocate DP matrices for O(ML) fills */
  /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */
  /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */

  /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */     /* makes the fast DP algorithms dump their matrices */
  /* p7_omx_SetDumpMode(stdout, bck, TRUE); */  

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {
      p7_oprofile_ReconfigLength(om, sq->n);
      p7_ReconfigLength(gm,          sq->n);
      p7_bg_SetLength(bg,            sq->n);
      p7_omx_GrowTo(fwd, om->M, 0,   sq->n); 
      p7_omx_GrowTo(bck, om->M, 0,   sq->n); 
      p7_gmx_GrowTo(gx,  gm->M,      sq->n); 

      p7_bg_NullOne  (bg, sq->dsq, sq->n, &nullsc);
    
      p7_ForwardParser (sq->dsq, sq->n, om,      fwd, &fraw);
      p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw);

      /* p7_Forward (sq->dsq, sq->n, om,      fwd, &fsc);        printf("forward:              %.2f nats\n", fsc);  */
      /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc);        printf("backward:             %.2f nats\n", bsc);  */

      /* Comparison to other F/B implementations */
      p7_GForward     (sq->dsq, sq->n, gm, gx,  &gfraw);
      p7_GBackward    (sq->dsq, sq->n, gm, gx,  &gbraw);

      /* p7_gmx_Dump(stdout, gx);  */

      fsc  =  (fraw-nullsc) / eslCONST_LOG2;
      gfsc = (gfraw-nullsc) / eslCONST_LOG2;
      P  = esl_exp_surv(fsc,   om->evparam[p7_FTAU],  om->evparam[p7_FLAMBDA]);
      gP = esl_exp_surv(gfsc,  gm->evparam[p7_FTAU],  gm->evparam[p7_FLAMBDA]);

      if (esl_opt_GetBoolean(go, "-1"))
	{
	  printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc);
	}
      else if (esl_opt_GetBoolean(go, "-P"))
	{ /* output suitable for direct use in profmark benchmark postprocessors: */
	  printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name);
	}
      else
	{
	  printf("target sequence:      %s\n",        sq->name);
	  printf("fwd filter raw score: %.2f nats\n", fraw);
	  printf("bck filter raw score: %.2f nats\n", braw);
	  printf("null score:           %.2f nats\n", nullsc);
	  printf("per-seq score:        %.2f bits\n", fsc);
	  printf("P-value:              %g\n",        P);
	  printf("GForward raw score:   %.2f nats\n", gfraw);
	  printf("GBackward raw score:  %.2f nats\n", gbraw);
	  printf("GForward seq score:   %.2f bits\n", gfsc);
	  printf("GForward P-value:     %g\n",        gP);
	}

      esl_sq_Reuse(sq);
    }

  /* cleanup */
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  p7_omx_Destroy(bck);
  p7_omx_Destroy(fwd);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Esempio n. 13
0
static void 
pipeline_thread(void *arg)
{
  int i, j;
  int status;
  int workeridx;
  WORKER_INFO   *info;
  ESL_THREADS   *obj;
  P7_OM_BLOCK   *block;
  void          *newBlock;
  P7_OPROFILE   *om        = NULL;
  P7_SCOREDATA  *scoredata = NULL;   /* hmm-specific data used by nhmmer */

  P7_DOMAIN *dcl;
  int seq_len = 0;
  int prev_hit_cnt = 0;

#ifdef eslAUGMENT_ALPHABET
  ESL_SQ        *sq_revcmp = NULL;
#endif /*eslAUGMENT_ALPHABET*/
  

  impl_ThreadInit();

  obj = (ESL_THREADS *) arg;
  esl_threads_Started(obj, &workeridx);

  info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx);

  status = esl_workqueue_WorkerUpdate(info->queue, NULL, &newBlock);
  if (status != eslOK) esl_fatal("Work queue worker failed");

#ifdef eslAUGMENT_ALPHABET
  //reverse complement
  if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) {
    sq_revcmp =  esl_sq_CreateDigital(info->qsq->abc);
    esl_sq_Copy(info->qsq,sq_revcmp);
    esl_sq_ReverseComplement(sq_revcmp);
    info->pli->nres += info->qsq->n;
  }
#endif /*eslAUGMENT_ALPHABET*/


  /* loop until all blocks have been processed */
  block = (P7_OM_BLOCK *) newBlock;
  while (block->count > 0)
  {
      /* Main loop: */
      for (i = 0; i < block->count; ++i)
      {
        om = block->list[i];
        seq_len = 0;

        p7_pli_NewModel(info->pli, om, info->bg);
        p7_bg_SetLength(info->bg, info->qsq->n);
        p7_oprofile_ReconfigLength(om, info->qsq->n);

        scoredata = p7_hmm_ScoreDataCreate(om, FALSE);


#ifdef eslAUGMENT_ALPHABET
        //reverse complement
        if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL )
        {
          p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, sq_revcmp, info->th, 0);
          p7_pipeline_Reuse(info->pli); // prepare for next search

          seq_len = info->qsq->n;
          for (j = prev_hit_cnt; j < info->th->N ; j++)
          {
            dcl = info->th->unsrt[j].dcl;
            // modify hit positions to account for the position of the window in the full sequence
            dcl->ienv = seq_len - dcl->ienv + 1;
            dcl->jenv = seq_len - dcl->jenv + 1;
            dcl->iali = seq_len - dcl->iali + 1;
            dcl->jali = seq_len - dcl->jali + 1;
            dcl->ad->sqfrom = seq_len - dcl->ad->sqfrom + 1;
            dcl->ad->sqto = seq_len - dcl->ad->sqto + 1;
          }

        }
#endif
        if (info->pli->strand != p7_STRAND_BOTTOMONLY) {
          p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, info->qsq, info->th, 0);
          p7_pipeline_Reuse(info->pli);
          seq_len += info->qsq->n;
        }

        for (j = prev_hit_cnt; j < info->th->N ; j++)
        {
          info->th->unsrt[j].lnP         += log((float)seq_len / (float)om->max_length);
          info->th->unsrt[j].dcl[0].lnP   = info->th->unsrt[j].lnP;
          info->th->unsrt[j].sortkey      = -1.0 * info->th->unsrt[j].lnP;
          info->th->unsrt[j].dcl[0].ad->L = om->M;
        }

        prev_hit_cnt = info->th->N;
        p7_hmm_ScoreDataDestroy(scoredata);
        p7_oprofile_Destroy(om);
        block->list[i] = NULL;
      }


      status = esl_workqueue_WorkerUpdate(info->queue, block, &newBlock);
      if (status != eslOK) esl_fatal("Work queue worker failed");

      block = (P7_OM_BLOCK *) newBlock;
  }

#ifdef eslAUGMENT_ALPHABET
  esl_sq_Destroy(sq_revcmp);
#endif


  status = esl_workqueue_WorkerUpdate(info->queue, block, NULL);
  if (status != eslOK) esl_fatal("Work queue worker failed");

  esl_threads_Finished(obj, workeridx);
  return;
}
Esempio n. 14
0
static int
serial_loop(WORKER_INFO *info, P7_HMMFILE *hfp)
{
  int            status;
  int i;
  int seq_len = 0;
  int prev_hit_cnt = 0;
  P7_OPROFILE   *om        = NULL;
  P7_SCOREDATA  *scoredata = NULL;   /* hmm-specific data used by nhmmer */
  ESL_ALPHABET  *abc = NULL;
  P7_DOMAIN *dcl;

#ifdef eslAUGMENT_ALPHABET
  ESL_SQ        *sq_revcmp = NULL;
  if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) {
    sq_revcmp =  esl_sq_CreateDigital(info->qsq->abc);
    esl_sq_Copy(info->qsq,sq_revcmp);
    esl_sq_ReverseComplement(sq_revcmp);

    info->pli->nres += info->qsq->n;
  }
#endif /*eslAUGMENT_ALPHABET*/


  /* Main loop: */
  while ((status = p7_oprofile_ReadMSV(hfp, &abc, &om)) == eslOK)
  {
      seq_len = 0;

      p7_pli_NewModel(info->pli, om, info->bg);
      p7_bg_SetLength(info->bg, info->qsq->n);
      p7_oprofile_ReconfigLength(om, info->qsq->n);

      scoredata = p7_hmm_ScoreDataCreate(om, FALSE);

#ifdef eslAUGMENT_ALPHABET
      //reverse complement
      if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL )
      {

        p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, sq_revcmp, info->th, 0);
        p7_pipeline_Reuse(info->pli); // prepare for next search
        seq_len = info->qsq->n;
        for (i = prev_hit_cnt; i < info->th->N ; i++)
        {
          dcl = info->th->unsrt[i].dcl;
          // modify hit positions to account for the position of the window in the full sequence
          dcl->ienv = seq_len - dcl->ienv + 1;
          dcl->jenv = seq_len - dcl->jenv + 1;
          dcl->iali = seq_len - dcl->iali + 1;
          dcl->jali = seq_len - dcl->jali + 1;
          dcl->ad->sqfrom = seq_len - dcl->ad->sqfrom + 1;
          dcl->ad->sqto = seq_len - dcl->ad->sqto + 1;
        }

      }
#endif


      if (info->pli->strand != p7_STRAND_BOTTOMONLY) {
        p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, info->qsq, info->th, 0);
        p7_pipeline_Reuse(info->pli);
        seq_len += info->qsq->n;
      }

      for (i = prev_hit_cnt; i < info->th->N ; i++)
      {
        info->th->unsrt[i].lnP         += log((float)seq_len / (float)om->max_length);
        info->th->unsrt[i].dcl[0].lnP   = info->th->unsrt[i].lnP;
        info->th->unsrt[i].sortkey      = -1.0 * info->th->unsrt[i].lnP;
        info->th->unsrt[i].dcl[0].ad->L =  om->M;
      }

      prev_hit_cnt = info->th->N;

      p7_oprofile_Destroy(om);
      p7_hmm_ScoreDataDestroy(scoredata);

  }

  esl_alphabet_Destroy(abc);
#ifdef eslAUGMENT_ALPHABET
  esl_sq_Destroy(sq_revcmp);
#endif
  return status;
}
Esempio n. 15
0
void run_hmmer_pipeline(const char* seq) {
  int index, i, status;
  ESL_SQ* sq = esl_sq_CreateFrom(NULL, seq, NULL, NULL, NULL);
  P7_OPROFILE *om = NULL;
  P7_PROFILE *gm = NULL;
  float usc, vfsc, fwdsc;   /* filter scores                           */
  float filtersc;           /* HMM null filter score                   */
  float nullsc;             /* null model score                        */
  float seqbias;
  float seq_score;          /* the corrected per-seq bit score */
  double P;
  WRAPPER_RESULT* result;

  num_results = 0;
  if(sq->n == 0) {
    esl_sq_Destroy(sq);
    return;
  }

  esl_sq_Digitize(abc, sq);  

  int n = 0;
  float oasc;

  for(index = 0;index < num_models;index++) {
    om = models[index];

    p7_omx_Reuse(oxf);
    p7_omx_Reuse(oxb);

    p7_omx_GrowTo(oxf, om->M, sq->n, sq->n);
    p7_omx_GrowTo(oxb, om->M, sq->n, sq->n);

    p7_oprofile_ReconfigLength(om, sq->n);

    p7_bg_SetFilter(bg, om->M, om->compo);
    p7_bg_SetLength(bg, sq->n);

    //Calibrate null model
    p7_bg_NullOne(bg, sq->dsq, sq->n, &nullsc);

    //MSV Filter
    p7_MSVFilter(sq->dsq, sq->n, om, oxf, &usc);
    seq_score = (usc - nullsc) / eslCONST_LOG2;
    P = esl_gumbel_surv(seq_score,  om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);
    if (P > f1) continue;

    //Bias filter (model compo)
    p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc);
    seq_score = (usc - filtersc) / eslCONST_LOG2;
    P = esl_gumbel_surv(seq_score,  om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);
    if (P > f1) continue;

    //Viterbi filter (Only do if P value from Bias is high)
    if(P > f2) {
      p7_ViterbiFilter(sq->dsq, sq->n, om, oxf, &vfsc);
      seq_score = (vfsc - filtersc) / eslCONST_LOG2;
      P = esl_gumbel_surv(seq_score,  om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA]);
      if (P > f2) continue;
    }

    //Get the real probability (forward)
    p7_Forward(sq->dsq, sq->n, om, oxf, &fwdsc);
    seq_score = (fwdsc - filtersc) / eslCONST_LOG2;
    P = esl_exp_surv(seq_score,  om->evparam[p7_FTAU],  om->evparam[p7_FLAMBDA]);
    if(hmmer_error) {
      fprintf(stderr, "HMM: %s, seq: %s", om->name, seq);
      hmmer_error = 0;
      continue;
    }
    if (P > f3) continue;

    //Real hit, go in to posterior decoding and alignment
    p7_omx_Reuse(oxb);
    p7_trace_Reuse(tr);

    p7_Backward(sq->dsq, sq->n, om, oxf, oxb, NULL);

    status = p7_Decoding(om, oxf, oxb, oxb);

    if(status == eslOK) {
      //And then trace the result
      p7_OptimalAccuracy(om, oxb, oxf, &oasc);
      p7_OATrace(om, oxb, oxf, tr);
    } else if(status == eslERANGE) {
      fprintf(stderr, "Decoding overflow on model %s\n", om->name);
      gm = gmodels[index];
      if(gxf == NULL) {
	gxf = p7_gmx_Create(gm->M, sq->n);
	gxb = p7_gmx_Create(gm->M, sq->n);
      } else {
	p7_gmx_GrowTo(gxf, gm->M, sq->n);
	p7_gmx_GrowTo(gxb, gm->M, sq->n);
      }

      p7_ReconfigLength(gm, sq->n);

      p7_GForward (sq->dsq, sq->n, gm, gxf, &fwdsc);
      p7_GBackward(sq->dsq, sq->n, gm, gxb, NULL);

      p7_GDecoding(gm, gxf, gxb, gxb);
      p7_GOptimalAccuracy(gm, gxb, gxf, &oasc);
      p7_GOATrace        (gm, gxb, gxf, tr);

      p7_gmx_Reuse(gxf);
      p7_gmx_Reuse(gxb);
    }

    if(hmmer_error) {
      fprintf(stderr, "HMM: %s, seq: %s", om->name, seq);
      hmmer_error = 0;
      continue;
    }

    result = wrapper_results[num_results];
    reuse_result(result, tr->N + om->M, om->name); //We're way overallocating here, but it's hard to know at this point how much space we'll need for the alignment (plus leading and trailing gaps)
    trace_into(tr, result, sq, abc, om->M);
    result->bits = seq_score;
    num_results++;
  }

  esl_sq_Destroy(sq);
}