Ejemplo n.º 1
0
int
main(int argc, char **argv)
{
  char            *hmmfile = argv[1];  /* name of HMM file to read one HMM from   */
  ESL_ALPHABET    *abc     = NULL;     /* sequence alphabet                       */
  ESL_RANDOMNESS  *r       = NULL;     /* source of randomness                    */
  P7_HMMFILE      *hfp     = NULL;     /* open hmmfile                            */
  P7_HMM          *hmm     = NULL;     /* HMM to emit from                        */
  P7_PROFILE      *gm      = NULL;     /* profile HMM (scores)                    */
  P7_BG           *bg      = NULL;     /* null model                              */
  P7_TRACE        *tr      = NULL;     /* sampled trace                           */
  ESL_SQ          *sq      = NULL;     /* sampled digital sequence                */
  int              n       = 1000;
  int              counts[p7T_NSTATETYPES];
  int              i;
  float            sc;
  float            nullsc;
  double           bitscore;

  r  = esl_randomness_CreateFast(0);
  tr = p7_trace_Create();
  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("failed to open %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("failed to read HMM");
  sq = esl_sq_CreateDigital(abc);
  bg = p7_bg_Create(abc);
  gm = p7_profile_Create(hmm->M, abc);

  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);

  for (i = 0; i < n; i++) 
    {
      p7_ProfileEmit(r, hmm, gm, bg, sq, tr);
      p7_trace_GetStateUseCounts(tr, counts);

      p7_ReconfigLength(gm, sq->n);
      p7_bg_SetLength(bg, sq->n);
      p7_trace_Score(tr, sq->dsq, gm, &sc);
      p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc);
      bitscore = (sc - nullsc)/ eslCONST_LOG2;

      printf("%d  %8.4f\n",
	     counts[p7T_M] + (counts[p7T_I] + counts[p7T_D])/2,
	     bitscore);
    }

  p7_profile_Destroy(gm);
  esl_sq_Destroy(sq);
  p7_trace_Destroy(tr);
  esl_randomness_Destroy(r);
  esl_alphabet_Destroy(abc);
  p7_hmmfile_Close(hfp);
  p7_hmm_Destroy(hmm);
  return eslOK;
} 
Ejemplo n.º 2
0
/* process_workunit()
 *
 * This is the routine that actually does the work.
 *
 * A work unit consists of one HMM, <hmm>.
 * The result is the <scores> array, which contains an array of N scores;
 * caller provides this memory.
 * How those scores are generated is controlled by the application configuration in <cfg>.
 */
static int
process_workunit(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm, double *scores, int *alilens)
{
  int             L   = esl_opt_GetInteger(go, "-L");
  P7_PROFILE     *gm  = NULL;
  P7_OPROFILE    *om  = NULL;
  P7_REFMX       *rmx = NULL;
  P7_CHECKPTMX   *cx  = NULL;
  P7_FILTERMX    *fx  = NULL;
  P7_TRACE       *tr  = NULL;
  ESL_DSQ        *dsq = NULL;
  int             i;
  int             scounts[p7T_NSTATETYPES]; /* state usage counts from a trace */
  float           sc;
  float           nullsc;
  int             status;
   P7_HARDWARE *hw;
  if ((hw = p7_hardware_Create ()) == NULL)  p7_Fail("Couldn't get HW information data structure"); 
  /* Optionally set a custom background, determined by model composition;
   * an experimental hack. 
   */
  if (esl_opt_GetBoolean(go, "--bgcomp")) 
    {
      float *p = NULL;
      float  KL;

      p7_hmm_CompositionKLDist(hmm, cfg->bg, &KL, &p);
      esl_vec_FCopy(p, cfg->abc->K, cfg->bg->f);
    }

  /* Create and configure our generic profile, as requested */
  gm = p7_profile_Create(hmm->M, cfg->abc);
  if (esl_opt_GetBoolean(go, "--multi")) 
    {
      if      (esl_opt_GetBoolean(go, "--dual"))   { p7_profile_Config      (gm, hmm, cfg->bg);    }
      else if (esl_opt_GetBoolean(go, "--local"))  { p7_profile_ConfigLocal (gm, hmm, cfg->bg, L); }
      else if (esl_opt_GetBoolean(go, "--glocal")) { p7_profile_ConfigGlocal(gm, hmm, cfg->bg, L); }
    }
  else if (esl_opt_GetBoolean(go, "--uni")) 
    {
      if      (esl_opt_GetBoolean(go, "--dual"))   { p7_profile_ConfigCustom   (gm, hmm, cfg->bg, L, 0.0, 0.5); }
      else if (esl_opt_GetBoolean(go, "--local"))  { p7_profile_ConfigUnilocal (gm, hmm, cfg->bg, L);           }
      else if (esl_opt_GetBoolean(go, "--glocal")) { p7_profile_ConfigUniglocal(gm, hmm, cfg->bg, L);           }
    }
  p7_profile_SetLength(gm, L);
  p7_bg_SetLength(cfg->bg, L);  

  if (esl_opt_GetBoolean(go, "--x-no-lengthmodel")) elide_length_model(gm, cfg->bg);

  /* Allocate DP matrix for <gm>.
   */
  rmx = p7_refmx_Create(gm->M, L);

  /* Create and configure the vectorized profile, if needed;
   * and allocate its DP matrix
   */
  if (esl_opt_GetBoolean(go, "--vector"))
    {
      om = p7_oprofile_Create(gm->M, cfg->abc, om->simd);
      p7_oprofile_Convert(gm, om);
      cx = p7_checkptmx_Create(gm->M, L, ESL_MBYTES(32), om->simd);
      fx = p7_filtermx_Create(gm->M, om->simd);
    }
  
  /* Remaining allocation */
  ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2));
  tr = p7_trace_Create();

  /* Collect scores from N random sequences of length L  */
  for (i = 0; i < cfg->N; i++)
    {
      esl_rsq_xfIID(cfg->r, cfg->bg->f, cfg->abc->K, L, dsq);
      sc = eslINFINITY;

      /* Vectorized implementations of Viterbi, MSV may overflow.
       * In this case, they'll leave sc=eslINFINITY.
       * Then we fail over to the nonvector "generic" implementation.
       * That's why this next block isn't an if/else.
       */
      if (esl_opt_GetBoolean(go, "--vector")) 
	{
	  if      (esl_opt_GetBoolean(go, "--vit")) p7_ViterbiFilter(dsq, L, om, fx, &sc);
	  else if (esl_opt_GetBoolean(go, "--fwd")) p7_ForwardFilter(dsq, L, om, cx, &sc);
	  else if (esl_opt_GetBoolean(go, "--msv")) p7_MSVFilter    (dsq, L, om, fx, &sc);
	} 

      /* If we tried a vector calculation above but it overflowed,
       * or if we're to do --generic DP calculations, sc==eslINFINITY now;
       * hence the if condition here:
       */
      if (sc == eslINFINITY)
	{
	  if      (esl_opt_GetBoolean(go, "--fwd"))  p7_ReferenceForward(dsq, L, gm, rmx,     &sc); /* any mode: dual,local,glocal; gm's config takes care of this */
	  else if (esl_opt_GetBoolean(go, "--vit"))  p7_ReferenceViterbi(dsq, L, gm, rmx, tr, &sc); /* local-only mode. cmdline opts processing has already assured that --local set */
	  else if (esl_opt_GetBoolean(go, "--msv"))  p7_Die("We used to be able to do a generic MSV algorithm - but no longer");
	}

      /* Optional: get Viterbi alignment length too. */
      if (esl_opt_GetBoolean(go, "-a"))  /* -a only works with Viterbi; getopts has checked this already; <tr> must be valid */
	{
	  p7_trace_GetStateUseCounts(tr, scounts);

	  /* there's various ways we could counts "alignment length". 
	   * Here we'll use the total length of model used, in nodes: M+D states.
           * score vs al would gives us relative entropy / model position.
	   */
	  /* alilens[i] = scounts[p7T_D] + scounts[p7T_I]; SRE: temporarily testing this instead */
	  alilens[i] = scounts[p7T_ML] + scounts[p7T_DL] + scounts[p7T_IL] +
	    scounts[p7T_MG] + scounts[p7T_DG] + scounts[p7T_IG];
	  p7_trace_Reuse(tr);
	}

      p7_bg_NullOne(cfg->bg, dsq, L, &nullsc);
      scores[i] = (sc - nullsc) / eslCONST_LOG2;

      if (cx) p7_checkptmx_Reuse(cx);
      if (fx) p7_filtermx_Reuse(fx);
      p7_refmx_Reuse(rmx);
    }
  status      = eslOK;
  /* deliberate flowthru */
 ERROR:
  if (dsq != NULL) free(dsq);
  p7_checkptmx_Destroy(cx);
  p7_filtermx_Destroy(fx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_refmx_Destroy(rmx);
  p7_trace_Destroy(tr);
  if (status == eslEMEM) sprintf(errbuf, "allocation failure");
  return status;
}