Beispiel #1
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go      = NULL;	               /* application configuration       */
  char         *alifile = NULL;	               /* alignment file name             */
  int           infmt   = eslMSAFILE_UNKNOWN;  /* format code for alifile         */
  int           outfmt  = eslMSAFILE_UNKNOWN;  /* output format for fetched msa's */
  ESLX_MSAFILE *afp     = NULL;	               /* open alignment file             */
  FILE         *ofp     = NULL;	               /* output stream for alignments    */
  int           status;		               /* easel return code               */

  /***********************************************
   * Parse command line
   ***********************************************/

  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n",       go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") )                   cmdline_help   (argv[0], go);
  if (esl_opt_ArgNumber(go) < 1)                       cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        

  if (esl_opt_IsOn(go, "--informat")) {
    infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"));
    if (infmt == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid input alignment file format for --informat", esl_opt_GetString(go, "--informat")); 
  }

  outfmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--outformat")); 
  if (outfmt == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid output alignment file format for --outformat", esl_opt_GetString(go, "--outformat")); 

  alifile = esl_opt_GetArg(go, 1);
  
  /* Open the alignment file.  */
  if ( (status  = eslx_msafile_Open(NULL, alifile, NULL, infmt, NULL, &afp)) != eslOK)
    eslx_msafile_OpenFailure(afp, status);

  /* Open the SSI index, if any */
  if (! esl_opt_GetBoolean(go, "--index")) 
    {
      if (afp->bf->mode_is == eslBUFFER_FILE    ||
	  afp->bf->mode_is == eslBUFFER_ALLFILE ||
	  afp->bf->mode_is == eslBUFFER_MMAP)
	{
	  char *ssifile = NULL;
	  esl_sprintf(&ssifile, "%s.ssi", afp->bf->filename);
      
	  status = esl_ssi_Open(ssifile, &(afp->ssi));
	  if      (status == eslERANGE )   esl_fatal("SSI index %s has 64-bit offsets; this system doesn't support them", ssifile);
	  else if (status == eslEFORMAT)   esl_fatal("SSI index %s has an unrecognized format. Try recreating, w/ esl-afetch --index", ssifile);
	  else if (status == eslENOTFOUND) afp->ssi = NULL;
	  else if (status != eslOK)        esl_fatal("SSI index %s: open failed, error code %d\n", ssifile, status);
	  
	  free(ssifile);
	}
    }

  /* Open the output file, if any
   */
  if (esl_opt_GetBoolean(go, "-O")) 
    {
      if ((ofp = fopen(esl_opt_GetArg(go, 2), "w")) == NULL)
	esl_fatal("Failed to open output file %s\n", esl_opt_GetArg(go, 2));
    }
  else if (esl_opt_GetString(go, "-o") != NULL)
    {
      if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL)
	esl_fatal("Failed to open output file %s\n", esl_opt_GetString(go, "-o"));
    }
  else ofp = stdout;

  /* Hand off control flow as appropriate */
  if (esl_opt_GetBoolean(go, "--index")) 
    {
      if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        
      create_ssi_index(go, afp);
    }
  else if (esl_opt_GetBoolean(go, "-f"))
    {
      if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        
      multifetch(go, ofp, outfmt, esl_opt_GetArg(go, 2), afp);
    }
  else 
    {
      if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        
      onefetch(go, ofp, outfmt, esl_opt_GetArg(go, 2), afp);
      if (ofp != stdout) printf("\n\nRetrieved alignment %s.\n",  esl_opt_GetArg(go, 2));
    }

  eslx_msafile_Close(afp);
  esl_getopts_Destroy(go);
  exit(0);
}
Beispiel #2
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx1     = NULL;
  P7_GMX         *gx2     = NULL;
  P7_OMX         *ox1     = NULL;
  P7_OMX         *ox2     = NULL;
  P7_TRACE       *tr      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           fsc, bsc, accscore;
  float           fsc_g, bsc_g, accscore_g;
  double          Mcs;

  p7_FLogsumInit();

  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);                 p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);    p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);    p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001)
    p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode");

  ox1 = p7_omx_Create(gm->M, L, L);
  ox2 = p7_omx_Create(gm->M, L, L);
  tr  = p7_trace_CreateWithPP();

  esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  p7_Forward (dsq, L, om, ox1,      &fsc);
  p7_Backward(dsq, L, om, ox1, ox2, &bsc);
  p7_Decoding(om, ox1, ox2, ox2);              

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      p7_OptimalAccuracy(om, ox2, ox1, &accscore);

      if (! esl_opt_GetBoolean(go, "--notrace"))
	{
	  p7_OATrace(om, ox2, ox1, tr);
	  p7_trace_Reuse(tr);
	}
    }
  esl_stopwatch_Stop(w);

  Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x") )
    {
      gx1 = p7_gmx_Create(gm->M, L);
      gx2 = p7_gmx_Create(gm->M, L);
      
      p7_GForward (dsq, L, gm, gx1, &fsc_g);
      p7_GBackward(dsq, L, gm, gx2, &bsc_g);
      p7_GDecoding(gm, gx1, gx2, gx2);             
      p7_GOptimalAccuracy(gm, gx2, gx1, &accscore_g);

      printf("generic:  fwd=%8.4f  bck=%8.4f  acc=%8.4f\n", fsc_g, bsc_g, accscore_g);
      printf("VMX:      fwd=%8.4f  bck=%8.4f  acc=%8.4f\n", fsc,   bsc,   accscore);

      p7_gmx_Destroy(gx1);
      p7_gmx_Destroy(gx2);
    }

  free(dsq);
  p7_omx_Destroy(ox1);
  p7_omx_Destroy(ox2);
  p7_trace_Destroy(tr);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #3
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *ox1     = NULL;
  P7_OMX         *ox2     = NULL;
  P7_GMX         *gx      = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  P7_TRACE       *tr      = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  char            errbuf[eslERRBUFSIZE];
  float           fsc, bsc;
  float           accscore;
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");
  p7_hmmfile_Close(hfp);
 
  /* Read in one sequence */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);
  if  (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence");
  esl_sqfile_Close(sqfp);
 
  /* Configure a profile from the HMM */
  bg = p7_bg_Create(abc);                 p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc);    p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* multihit local: H3 default */
  om = p7_oprofile_Create(gm->M, abc);    p7_oprofile_Convert(gm, om);

  /* Allocations */
  ox1 = p7_omx_Create(gm->M, sq->n, sq->n);
  ox2 = p7_omx_Create(gm->M, sq->n, sq->n);
  gx  = p7_gmx_Create(gm->M, sq->n);
  tr  = p7_trace_CreateWithPP();
  p7_FLogsumInit();

  /* Run Forward, Backward; do OA fill and trace */
  p7_Forward (sq->dsq, sq->n, om, ox1,      &fsc);
  p7_Backward(sq->dsq, sq->n, om, ox1, ox2, &bsc);
  p7_Decoding(om, ox1, ox2, ox2);                   /* <gx2> is now the posterior decoding matrix */
  p7_OptimalAccuracy(om, ox2, ox1, &accscore);	    /* <gx1> is now the OA matrix */
  p7_OATrace(om, ox2, ox1, tr);
  
  if (esl_opt_GetBoolean(go, "-d")) { p7_omx_FDeconvert(ox2, gx);  p7_gmx_Dump(stdout, gx); }
  if (esl_opt_GetBoolean(go, "-m")) { p7_omx_FDeconvert(ox1, gx);  p7_gmx_Dump(stdout, gx); }

  p7_trace_Dump(stdout, tr, gm, sq->dsq);

  if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf);

  printf("fwd = %.4f nats\n", fsc);
  printf("bck = %.4f nats\n", bsc);
  printf("acc = %.4f (%.2f%%)\n", accscore, accscore * 100. / (float) sq->n);

  /* Cleanup */
  esl_sq_Destroy(sq);
  p7_trace_Destroy(tr);
  p7_omx_Destroy(ox1);
  p7_omx_Destroy(ox2);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #4
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go    = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  ESL_ALPHABET   *abc   = esl_alphabet_Create(eslAMINO);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *qfile = esl_opt_GetArg(go, 2);
  ESL_SQ         *qsq   = esl_sq_CreateDigital(abc);
  ESL_SQFILE     *qfp   = NULL;
  FILE           *hmmfp = NULL;
  ESL_SCOREMATRIX *S    = esl_scorematrix_Create(abc);
  ESL_DMATRIX     *Q    = NULL;
  P7_BG           *bg   = p7_bg_Create(abc);		
  P7_HMM          *hmm  = NULL;
  double          *fa   = NULL;
  double          popen   = esl_opt_GetReal  (go, "-q");
  double          pextend = esl_opt_GetReal  (go, "-r");
  char            *mxfile = esl_opt_GetString(go, "-m");
  char            errbuf[eslERRBUFSIZE];
  double          slambda;
  int             a,b;
  int             status;

  /* Reverse engineer a scoring matrix to obtain conditional prob's
   * that we'll use for the single-seq query HMM. Because score mx is
   * symmetric, we can set up P[a][b] = P(b | a), so we can use the
   * matrix rows as HMM match emission vectors. This means dividing
   * the joint probs through by f_a.
   */
  if (mxfile == NULL) {
    if (esl_scorematrix_Set("BLOSUM62", S) != eslOK) esl_fatal("failed to set BLOSUM62 scores");
  } else {
    ESL_FILEPARSER *efp = NULL;

    if ( esl_fileparser_Open(mxfile, NULL,  &efp) != eslOK) esl_fatal("failed to open score file %s",  mxfile);
    if ( esl_scorematrix_Read(efp, abc, &S)               != eslOK) esl_fatal("failed to read matrix from %s", mxfile);
    esl_fileparser_Close(efp);
  }

  /* A wasteful conversion of the HMMER single-precision background probs to Easel double-prec */
  ESL_ALLOC(fa, sizeof(double) * bg->abc->K);
  esl_vec_F2D(bg->f, bg->abc->K, fa);

  /* Backcalculate joint probabilities Q, given score matrix S and background frequencies fa */
  status = esl_scorematrix_ProbifyGivenBG(S, fa, fa, &slambda, &Q); 
  if      (status == eslEINVAL)  esl_fatal("built-in score matrix %s has no valid solution for lambda", matrix);
  else if (status == eslENOHALT) esl_fatal("failed to solve score matrix %s for lambda", matrix);
  else if (status != eslOK)      esl_fatal("unexpected error in solving score matrix %s for probability parameters", matrix);

  esl_scorematrix_JointToConditionalOnQuery(abc, Q);

  /* Open the query sequence file in FASTA format */
  status = esl_sqfile_Open(qfile, eslSQFILE_FASTA, NULL, &qfp);
  if      (status == eslENOTFOUND) esl_fatal("No such file %s.", qfile);
  else if (status == eslEFORMAT)   esl_fatal("Format of %s unrecognized.", qfile);
  else if (status == eslEINVAL)    esl_fatal("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        esl_fatal("Open of %s failed, code %d.", qfile, status);

  /* Open the output HMM file */
  if ((hmmfp = fopen(hmmfile, "w")) == NULL) esl_fatal("Failed to open output HMM file %s", hmmfile);

  /* For each sequence, build a model and save it. 
   */
  while ((status = esl_sqio_Read(qfp, qsq)) == eslOK)
    {
      p7_Seqmodel(abc, qsq->dsq, qsq->n, qsq->name, Q, bg->f, popen, pextend, &hmm);
      if ( p7_hmm_Validate(hmm, errbuf, 1e-5)     != eslOK) esl_fatal("HMM validation failed: %s\n", errbuf);
      if ( p7_hmmfile_WriteASCII(hmmfp, -1, hmm)  != eslOK) esl_fatal("HMM save failed");

      p7_hmm_Destroy(hmm);
    }
  if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s line %" PRId64 "):\n%s\n",
					    qfp->filename, qfp->linenumber, qfp->errbuf);     
  else if (status != eslEOF)     esl_fatal("Unexpected error %d reading sequence file %s",
					    status, qfp->filename);
  
  esl_dmatrix_Destroy(Q);
  esl_scorematrix_Destroy(S);
  free(fa);
  free(fb);
  esl_sq_Destroy(qsq);
  esl_sqfile_Close(qfp);
  fclose(hmmfp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #5
0
int 
main(int argc, char **argv)
{
  ESL_STOPWATCH *w;
  ESL_GETOPTS *go;
  char        *msafile;
  ESL_MSAFILE *afp;
  ESL_MSA     *msa;
  int          do_gsc;
  int          do_pb;
  int          do_blosum;
  int          maxN;
  double       maxid;
  double       cpu;

  /* Process command line
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("failed to parse cmd line: %s", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) esl_fatal("failed to parse cmd line: %s", go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") == TRUE) {
    puts(usage); 
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    return 0;
  }
  do_blosum = esl_opt_GetBoolean(go, "--blosum");
  do_gsc    = esl_opt_GetBoolean(go, "--gsc");
  do_pb     = esl_opt_GetBoolean(go, "--pb");
  maxid     = esl_opt_GetReal   (go, "--id");
  maxN      = esl_opt_GetInteger(go, "--maxN");
  if (esl_opt_ArgNumber(go) != 1) {
    puts("Incorrect number of command line arguments.");
    puts(usage);
    return 1;
  }
  if ((msafile = esl_opt_GetArg(go, 1)) == NULL) esl_fatal("failed to parse cmd line: %s", go->errbuf);
  esl_getopts_Destroy(go);

  w = esl_stopwatch_Create();

  /* Weight one or more alignments from input file
   */
  esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp);
  while (esl_msa_Read(afp, &msa) == eslOK)
    {
      if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; }

      esl_stopwatch_Start(w);

      if      (do_gsc) 	  esl_msaweight_GSC(msa);
      else if (do_pb) 	  esl_msaweight_PB(msa);
      else if (do_blosum) esl_msaweight_BLOSUM(msa, maxid);

      esl_stopwatch_Stop(w);
      cpu = w->user;
      printf("%-20s %6d  %6d  %.3f\n", msa->name, msa->alen, msa->nseq, cpu);
      esl_msa_Destroy(msa);
    } 
  esl_msafile_Close(afp);

  esl_stopwatch_Destroy(w);
  return eslOK;
}
Beispiel #6
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  ESL_SQ         *sq      = NULL;
  float           nullsc, filtersc, H;
  int             status;
 
  /* Read one HMM from <hmmfile> */
  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
  p7_hmmfile_Close(hfp);

  /* Open <seqfile> for reading */
  status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) esl_fatal("No such file.");
  else if (status == eslEFORMAT)   esl_fatal("Format unrecognized.");
  else if (status != eslOK)        esl_fatal("Open failed, code %d.", status);

  sq = esl_sq_CreateDigital(abc);
  bg = p7_bg_Create(abc);

  p7_bg_SetFilter(bg, hmm->M, hmm->compo);

  H = esl_vec_FEntropy(bg->f, bg->abc->K);
  printf("bg iid H = %.4f\n", H);

  H = esl_vec_FEntropy(hmm->compo, bg->abc->K);
  printf("modelcomp H = %.4f\n", H);

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {
      p7_bg_SetLength(bg, sq->n);

      p7_bg_NullOne    (bg, sq->dsq, sq->n, &nullsc);
      p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc);

      printf("%-20s %5d %8.5f %8.5f %8.5f\n", sq->name, (int) sq->n, nullsc, filtersc, filtersc-nullsc);

      esl_sq_Reuse(sq);
    }
  if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s)\n%s\n",
					   sqfp->filename, sqfp->get_error(sqfp));     
  else if (status != eslEOF)     esl_fatal("Unexpected error %d reading sequence file %s",
					   status, sqfp->filename);

  esl_sqfile_Close(sqfp);
  esl_sq_Destroy(sq);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #7
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_TRACE       *tr      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           sc, fsc, vsc;
  float           bestsc  = -eslINFINITY;
  
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);                p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);   p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL);
  om = p7_oprofile_Create(gm->M, abc);   p7_oprofile_Convert(gm, om);

  fwd = p7_omx_Create(gm->M, L, L);
  gx  = p7_gmx_Create(gm->M, L);
  tr  = p7_trace_Create();
  esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);

  p7_GViterbi(dsq, L, gm, gx,  &vsc);
  p7_Forward (dsq, L, om, fwd, &fsc);

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      p7_StochasticTrace(r, dsq, L, om, fwd, tr);
      p7_trace_Score(tr, dsq, gm, &sc);
      bestsc = ESL_MAX(bestsc, sc);
      p7_trace_Reuse(tr);
    }
  esl_stopwatch_Stop(w);
  esl_stopwatch_Display(stdout, w, "# CPU time: ");

  printf("forward sc   = %.4f nats\n", fsc);
  printf("viterbi sc   = %.4f nats\n", vsc);
  printf("max trace sc = %.4f nats\n", bestsc);

  free(dsq);
  p7_trace_Destroy(tr);
  p7_gmx_Destroy(gx);
  p7_omx_Destroy(fwd);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_OMX         *bck     = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  float           fraw, braw, nullsc, fsc;
  float           gfraw, gbraw, gfsc;
  double          P, gP;
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");

  /* Open sequence file for reading */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);

  /* create default null model, then create and optimize profile */
  bg = p7_bg_Create(abc);               
  p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc); 
  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);

  /* p7_oprofile_Dump(stdout, om);  */

  /* allocate DP matrices for O(M+L) parsers */
  fwd = p7_omx_Create(gm->M, 0, sq->n);
  bck = p7_omx_Create(gm->M, 0, sq->n);
  gx  = p7_gmx_Create(gm->M,    sq->n);

  /* allocate DP matrices for O(ML) fills */
  /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */
  /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */

  /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */     /* makes the fast DP algorithms dump their matrices */
  /* p7_omx_SetDumpMode(stdout, bck, TRUE); */  

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {
      p7_oprofile_ReconfigLength(om, sq->n);
      p7_ReconfigLength(gm,          sq->n);
      p7_bg_SetLength(bg,            sq->n);
      p7_omx_GrowTo(fwd, om->M, 0,   sq->n); 
      p7_omx_GrowTo(bck, om->M, 0,   sq->n); 
      p7_gmx_GrowTo(gx,  gm->M,      sq->n); 

      p7_bg_NullOne  (bg, sq->dsq, sq->n, &nullsc);
    
      p7_ForwardParser (sq->dsq, sq->n, om,      fwd, &fraw);
      p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw);

      /* p7_Forward (sq->dsq, sq->n, om,      fwd, &fsc);        printf("forward:              %.2f nats\n", fsc);  */
      /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc);        printf("backward:             %.2f nats\n", bsc);  */

      /* Comparison to other F/B implementations */
      p7_GForward     (sq->dsq, sq->n, gm, gx,  &gfraw);
      p7_GBackward    (sq->dsq, sq->n, gm, gx,  &gbraw);

      /* p7_gmx_Dump(stdout, gx, p7_DEFAULT);  */

      fsc  =  (fraw-nullsc) / eslCONST_LOG2;
      gfsc = (gfraw-nullsc) / eslCONST_LOG2;
      P  = esl_exp_surv(fsc,   om->evparam[p7_FTAU],  om->evparam[p7_FLAMBDA]);
      gP = esl_exp_surv(gfsc,  gm->evparam[p7_FTAU],  gm->evparam[p7_FLAMBDA]);

      if (esl_opt_GetBoolean(go, "-1"))
	{
	  printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc);
	}
      else if (esl_opt_GetBoolean(go, "-P"))
	{ /* output suitable for direct use in profmark benchmark postprocessors: */
	  printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name);
	}
      else
	{
	  printf("target sequence:      %s\n",        sq->name);
	  printf("fwd filter raw score: %.2f nats\n", fraw);
	  printf("bck filter raw score: %.2f nats\n", braw);
	  printf("null score:           %.2f nats\n", nullsc);
	  printf("per-seq score:        %.2f bits\n", fsc);
	  printf("P-value:              %g\n",        P);
	  printf("GForward raw score:   %.2f nats\n", gfraw);
	  printf("GBackward raw score:  %.2f nats\n", gbraw);
	  printf("GForward seq score:   %.2f bits\n", gfsc);
	  printf("GForward P-value:     %g\n",        gP);
	}

      esl_sq_Reuse(sq);
    }

  /* cleanup */
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  p7_omx_Destroy(bck);
  p7_omx_Destroy(fwd);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_OMX         *bck     = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           fsc, bsc;
  float           fsc2, bsc2;
  double          base_time, bench_time, Mcs;

  p7_FLogsumInit();

  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001)
    p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode");

  if (esl_opt_GetBoolean(go, "-P")) {
    fwd = p7_omx_Create(gm->M, 0, L);
    bck = p7_omx_Create(gm->M, 0, L);
  } else {
    fwd = p7_omx_Create(gm->M, L, L);
    bck = p7_omx_Create(gm->M, L, L);
  }
  gx  = p7_gmx_Create(gm->M, L);

  /* Get a baseline time: how long it takes just to generate the sequences */
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  esl_stopwatch_Stop(w);
  base_time = w->user;

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
      if (esl_opt_GetBoolean(go, "-P")) {
	if (! esl_opt_GetBoolean(go, "-B"))  p7_ForwardParser (dsq, L, om,      fwd, &fsc);
	if (! esl_opt_GetBoolean(go, "-F"))  p7_BackwardParser(dsq, L, om, fwd, bck, &bsc);
      } else {
	if (! esl_opt_GetBoolean(go, "-B"))  p7_Forward (dsq, L, om,      fwd, &fsc);
	if (! esl_opt_GetBoolean(go, "-F"))  p7_Backward(dsq, L, om, fwd, bck, &bsc);
      }

      if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x"))
	{
	  p7_GForward (dsq, L, gm, gx, &fsc2); 
	  p7_GBackward(dsq, L, gm, gx, &bsc2); 
	  printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2);  
	}
    }
  esl_stopwatch_Stop(w);
  bench_time = w->user - base_time;
  Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_omx_Destroy(bck);
  p7_omx_Destroy(fwd);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
int
main(int argc, char **argv)
{
    ESL_GETOPTS    *go       = NULL;	                /* application configuration       */
    char           *seqfile  = NULL;	                /* sequence file name              */
    char           *maskfile = NULL;	                /* mask coordinate file name       */
    int             infmt    = eslSQFILE_UNKNOWN;         /* format code for seqfile         */
    int             outfmt   = eslSQFILE_FASTA;           /* format code for output seqs     */
    ESL_SQFILE     *sqfp     = NULL;                      /* open sequence file              */
    ESL_FILEPARSER *maskefp  = NULL;	                /* open mask coord file            */
    FILE           *ofp      = NULL;	                /* output stream for masked seqs   */
    char           *source   = NULL;			/* name of current seq to mask     */
    char           *p1, *p2;				/* pointers used in parsing        */
    int64_t         start, end;				/* start, end coord for masking    */
    int64_t         i, j, pos;				/* coords in a sequence            */
    int64_t         overmask;				/* # of extra residues to mask     */
    ESL_SQ         *sq       = esl_sq_Create();		/* current sequence                */
    int             do_fetching;
    int             do_lowercase;
    int             maskchar;
    int             status;		                /* easel return code               */


    /****************************************************************************
     * Parse command line
     ****************************************************************************/

    go = esl_getopts_Create(options);
    if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf);
    if (esl_opt_VerifyConfig(go)               != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n",       go->errbuf);
    if (esl_opt_GetBoolean(go, "-h") )                   cmdline_help   (argv[0], go);
    if (esl_opt_ArgNumber(go) != 2)                      cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");

    do_fetching  = esl_opt_GetBoolean(go, "-R");
    do_lowercase = esl_opt_GetBoolean(go, "-l");
    overmask     = (esl_opt_IsOn(go, "-x") ? esl_opt_GetInteger(go, "-x") : 0);
    maskchar     = (esl_opt_IsOn(go, "-m") ? esl_opt_GetChar(go, "-m")    : 'X');

    seqfile  = esl_opt_GetArg(go, 1);
    maskfile = esl_opt_GetArg(go, 2);

    /* Open the <seqfile>: text mode, not digital */
    if (esl_opt_GetString(go, "--informat") != NULL) {
        infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat"));
        if (infmt == eslSQFILE_UNKNOWN) cmdline_failure(argv[0], "%s is not a valid input sequence file format for --informat");
    }
    status = esl_sqfile_Open(seqfile, infmt, NULL, &sqfp);
    if      (status == eslENOTFOUND) cmdline_failure(argv[0], "Sequence file %s not found.\n",     seqfile);
    else if (status == eslEFORMAT)   cmdline_failure(argv[0], "Format of file %s unrecognized.\n", seqfile);
    else if (status == eslEINVAL)    cmdline_failure(argv[0], "Can't autodetect stdin or .gz.\n");
    else if (status != eslOK)        cmdline_failure(argv[0], "Open failed, code %d.\n", status);

    if (do_fetching && sqfp->data.ascii.ssi == NULL)
        cmdline_failure(argv[0], "-R option (random access/fetching) requires %s to be SSI indexed\n", seqfile);

    /* Open the <maskfile> */
    if (esl_fileparser_Open(maskfile, NULL, &maskefp) != eslOK)
        cmdline_failure(argv[0], "Failed to open mask coordinate file %s\n", maskfile);
    esl_fileparser_SetCommentChar(maskefp, '#');

    /* Open the output file, if any */
    if (esl_opt_GetString(go, "-o") != NULL)
    {
        if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL)
            cmdline_failure(argv[0], "Failed to open output file %s\n", esl_opt_GetString(go, "-o"));
    }
    else ofp = stdout;


    /****************************************************************************
     * Main loop over lines in <maskfile>
     ****************************************************************************/

    /* Read one data line at a time from the <maskfile>;
     * parse into data fields <seqname> <start> <end>
     */
    while (esl_fileparser_NextLine(maskefp) == eslOK)
    {
        /* First field is sequence name */
        if (esl_fileparser_GetTokenOnLine(maskefp, &source,  NULL) != eslOK)
            esl_fatal("Failed to read source seq name on line %d of file %s\n", maskefp->linenumber, maskfile);

        /* Get the sequence */
        if (do_fetching)
        {   /* If the <seqfile> is SSI indexed, try to reposition it and read <source> seq by random access */
            status = esl_sqio_Fetch(sqfp, source, sq);
            if      (status == eslENOTFOUND) esl_fatal("seq %s not found in SSI index for file %s\n", source, sqfp->filename);
            else if (status == eslEINVAL)    esl_fatal("No SSI index or can't reposition in file %s\n", sqfp->filename);
            else if (status == eslEFORMAT)   esl_fatal("Parse failed:\n%s\n", esl_sqfile_GetErrorBuf(sqfp));
            else if (status != eslOK)        esl_fatal("Unexpected failure in fetching %s from file %s\n", source, sqfp->filename);
        }
        else
        {   /* else, assume we're reading sequentially; <sqfile> and <maskfile> have seqs in same order */
            status = esl_sqio_Read(sqfp, sq);
            if      (status == eslEOF)      esl_fatal("File %s ended prematurely; didn't find %s\n", sqfp->filename, source);
            else if (status == eslEFORMAT)  esl_fatal("Parse failed:\n%s\n", esl_sqfile_GetErrorBuf(sqfp));
            else if (status != eslOK)       esl_fatal("Unexpected error reading sequence file %s\n", sqfp->filename);

            if ((strcmp(sq->name, source) != 0) && (strcmp(sq->acc, source) != 0))
                esl_fatal("Sequences in <sqfile> and <maskfile> aren't in same order; try -R");
        }

        /* If we're masking by lowercase, first make sure everything's uppercase */
        if (do_lowercase)
            for (pos = 0; pos < sq->n; pos++)
                if (isalpha(sq->seq[pos]))
                    sq->seq[pos] = toupper(sq->seq[pos]);

        /* Next two fields are <start>, <end> for the masking  */
        /* possible future extension: wrap loop around this, enable multiple masked regions */
        if (esl_fileparser_GetTokenOnLine(maskefp, &p1, NULL) != eslOK)
            esl_fatal("Failed to read start coord on line %d of file %s\n", maskefp->linenumber, maskfile);
        start = strtoll(p1, &p2, 0) - 1;

        if (esl_fileparser_GetTokenOnLine(maskefp, &p2, NULL) != eslOK)
            esl_fatal("Failed to read end coord on line %d of file %s\n", maskefp->linenumber, maskfile);
        end   = strtoll(p2, &p1, 0) - 1;

        /* Do the masking */
        if (esl_opt_GetBoolean(go, "-r")) /* Reverse masking */
        {   /* leave start..end unmasked; mask prefix 0..start-1, end+1..L-1 */
            i = 0;
            j = ESL_MIN(sq->n-1, start - 1 + overmask);
            for (pos = i; pos <= j; pos++)
                if (isalpha(sq->seq[pos]))
                    sq->seq[pos] = (do_lowercase ? tolower(sq->seq[pos]) : maskchar);

            i = ESL_MAX(0, end + 1 - overmask);
            j = sq->n-1;
            for (pos = i; pos <= j; pos++)
                if (isalpha(sq->seq[pos]))
                    sq->seq[pos] = (do_lowercase ? tolower(sq->seq[pos]) : maskchar);
        }
        else
        {   /* normal: mask start..end */
            i = ESL_MAX(0,       start - overmask);
            j = ESL_MIN(sq->n-1, end   + overmask);
            for (pos = i; pos <= j; pos++)
                if (isalpha(sq->seq[pos]))
                    sq->seq[pos] = (do_lowercase ? tolower(sq->seq[pos]) : maskchar);
        }

        esl_sqio_Write(ofp, sq, outfmt, FALSE);
        esl_sq_Reuse(sq);
    }

    esl_sq_Destroy(sq);
    esl_fileparser_Close(maskefp);
    esl_sqfile_Close(sqfp);
    esl_getopts_Destroy(go);
    if (ofp != stdout) fclose(ofp);
    return 0;
}
Beispiel #11
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go      = NULL;	                        /* application configuration       */
  char         *seqfile = NULL;	                        /* sequence file name              */
  int           infmt   = eslSQFILE_UNKNOWN;		/* format code for seqfile         */
  ESL_SQFILE   *sqfp    = NULL;                         /* open sequence file              */
  FILE         *ofp     = NULL;	                        /* output stream for sequences     */
  int           status;		                        /* easel return code               */

  /***********************************************
   * Parse command line
   ***********************************************/

  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n",       go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") )                   cmdline_help   (argv[0], go);
  if (esl_opt_ArgNumber(go) < 1)                       cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        

  /* Open the sequence file */
  seqfile = esl_opt_GetArg(go, 1);
  if (esl_opt_GetString(go, "--informat") != NULL) {
    infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat"));
    if (infmt == eslSQFILE_UNKNOWN) esl_fatal("%s is not a valid input sequence file format for --informat", esl_opt_GetString(go, "--informat")); 
  }
  status = esl_sqfile_Open(seqfile, infmt, NULL, &sqfp);
  if      (status == eslENOTFOUND) cmdline_failure(argv[0], "Sequence file %s not found.\n",     seqfile);
  else if (status == eslEFORMAT)   cmdline_failure(argv[0], "Format of file %s unrecognized.\n", seqfile);
  else if (status == eslEINVAL)    cmdline_failure(argv[0], "Can't autodetect stdin or .gz.\n");
  else if (status != eslOK)        cmdline_failure(argv[0], "Open failed, code %d.\n", status);

  /* Open the output file, if any */
  if (esl_opt_GetBoolean(go, "-O")) 
    {
      if ((ofp = fopen(esl_opt_GetArg(go, 2), "w")) == NULL)
	cmdline_failure(argv[0], "Failed to open output file %s\n", esl_opt_GetArg(go, 2));
    }
  else if (esl_opt_GetString(go, "-o") != NULL)
    {
      if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL)
	cmdline_failure(argv[0], "Failed to open output file %s\n", esl_opt_GetString(go, "-o"));
    }
  else ofp = stdout;

  /* Indexing  mode */
  if (esl_opt_GetBoolean(go, "--index")) 
    {
      if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        
      if (sqfp->data.ascii.do_gzip)  cmdline_failure(argv[0], "Can't index a .gz compressed file");
      if (sqfp->data.ascii.do_stdin) cmdline_failure(argv[0], "Can't index a standard input pipe");

      create_ssi_index(go, sqfp);
    }

  /* List retrieval mode */
  else if (esl_opt_GetBoolean(go, "-f"))
    {
      if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        

      /* Open the SSI index for retrieval */
      if (! sqfp->data.ascii.do_gzip && ! sqfp->data.ascii.do_stdin &&  ! esl_sqio_IsAlignment(sqfp->format)) 
	{
	  status = esl_sqfile_OpenSSI(sqfp, NULL);
	  if      (status == eslEFORMAT)   cmdline_failure(argv[0], "SSI index is in incorrect format\n");
	  else if (status == eslERANGE)    cmdline_failure(argv[0], "SSI index is in 64-bit format and we can't read it\n");
	  else if (status != eslOK)        cmdline_failure(argv[0], "Failed to open SSI index\n");
	}

      if (esl_opt_GetBoolean(go, "-C")) multifetch_subseq(go, ofp, esl_opt_GetArg(go, 2), sqfp);
      else              	        multifetch       (go, ofp, esl_opt_GetArg(go, 2), sqfp);
    }

  /* Single sequence retrieval mode */
  else 
    {
      if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        
      char *key     = esl_opt_GetArg(go, 2);
      char *cstring = esl_opt_GetString(go, "-c");
      char *newname = esl_opt_GetString(go, "-n");

      /* Open the SSI index for retrieval */
      if (! sqfp->data.ascii.do_gzip && ! sqfp->data.ascii.do_stdin &&  ! esl_sqio_IsAlignment(sqfp->format)) 
	{
	  status = esl_sqfile_OpenSSI(sqfp, NULL);
	  if      (status == eslEFORMAT)   cmdline_failure(argv[0], "SSI index is in incorrect format\n");
	  else if (status == eslERANGE)    cmdline_failure(argv[0], "SSI index is in 64-bit format and we can't read it\n");
	  else if (status != eslOK)        cmdline_failure(argv[0], "Failed to open SSI index\n");
	}

      /* -c: subsequence retrieval; else full sequence retrieval */
      if (cstring != NULL)
	{
	  uint32_t start, end;

	  status = esl_regexp_ParseCoordString(cstring, &start, &end);
	  if (status == eslESYNTAX) esl_fatal("-c takes arg of subseq coords <from>..<to>; %s not recognized", cstring);
	  if (status == eslFAIL)    esl_fatal("Failed to find <from> or <to> coord in %s", cstring);

	  onefetch_subseq(go, ofp, sqfp, newname, key, start, end);
	  if (ofp != stdout) printf("\n\nRetrieved subsequence %s/%d-%d.\n",  key, start, end);
	}
      else 
	{
	  onefetch(go, ofp, esl_opt_GetArg(go, 2), sqfp);
	  if (ofp != stdout) printf("\n\nRetrieved sequence %s.\n",  esl_opt_GetArg(go, 2));
	}
    }

  esl_sqfile_Close(sqfp);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #12
0
/* seq_shuffling()
 * SRE, Tue Jan 22 08:35:51 2008 [Market Street Cafe, Leesburg]
 *
 * Shuffling of input sequences.
 *
 * Fixed-length (L>0) vs. full-length (L=0) modes handled differently.
 * In fixed-length mode:
 *   <shuff->seq> only needs to be allocated once, for L
 *   <targ> is an allocated copy of a random subseq of length L
 *   sequences < L residues long can't be shuffled
 * In full-length mode:
 *   <shuff->seq> is grown to length <sq->n> for each input seq
 *   <targ> just points to <sq->seq>
 */
static int 
seq_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt)
{
  char       *seqfile = esl_opt_GetArg(go, 1);
  int         infmt   = eslSQFILE_UNKNOWN;
  ESL_SQFILE *sqfp    = NULL;
  ESL_SQ     *sq      = esl_sq_Create();
  ESL_SQ     *shuff   = esl_sq_Create();
  char       *targ    = NULL;
  int         N       = esl_opt_GetInteger(go, "-N");
  int         L       = esl_opt_GetInteger(go, "-L"); /* L>0 means select random fixed-len subseqs */
  int         kmers   = 0;
  int         i;
  int         status;
  
  if (esl_opt_GetString(go, "--informat") != NULL) {
    infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat"));
    if (infmt == eslSQFILE_UNKNOWN) esl_fatal("%s is not a valid input sequence file format for --informat"); 
  }

  if (esl_opt_IsOn(go, "-k")) kmers = esl_opt_GetInteger(go, "-k");


  status = esl_sqfile_Open(seqfile, infmt, NULL, &sqfp);
  if      (status == eslENOTFOUND) esl_fatal("No such file %s", seqfile);
  else if (status == eslEFORMAT)   esl_fatal("Format of seqfile %s unrecognized.", seqfile);
  else if (status == eslEINVAL)    esl_fatal("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        esl_fatal("Open failed, code %d.", status);

  if (L>0) { 
    esl_sq_GrowTo(shuff, L);
    shuff->n = L;
    ESL_ALLOC(targ, sizeof(char) * (L+1));
  }

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {
      if (L == 0) {		     /* shuffling entire sequence   */
	esl_sq_GrowTo(shuff, sq->n); /* make sure shuff can hold sq */	  
	shuff->n = sq->n;
	targ = sq->seq;
      } else {
	if (sq->n < L) continue;     /* reject seqs < L long */
      }

      for (i = 0; i < N; i++)
	{
	  if (L > 0) {		/* fixed-len mode: copy a random subseq */
	    int pos = esl_rnd_Roll(r, sq->n - L + 1);
	    strncpy(targ, sq->seq + pos, L);
	    targ[L] = '\0';	    
	  }

	  /* Do the requested kind of shuffling */
	  if      (esl_opt_GetBoolean(go, "-m"))  esl_rsq_CShuffle     (r, targ,        shuff->seq);  /* monoresidue shuffling */
	  else if (esl_opt_GetBoolean(go, "-d"))  esl_rsq_CShuffleDP   (r, targ,        shuff->seq);  /* diresidue shuffling */
	  else if (esl_opt_IsOn      (go, "-k"))  esl_rsq_CShuffleKmers(r, targ, kmers, shuff->seq);  /* diresidue shuffling */
	  else if (esl_opt_GetBoolean(go, "-0"))  esl_rsq_CMarkov0     (r, targ,        shuff->seq);  /* 0th order Markov */
	  else if (esl_opt_GetBoolean(go, "-1"))  esl_rsq_CMarkov1     (r, targ,        shuff->seq);  /* 1st order Markov */
	  else if (esl_opt_GetBoolean(go, "-r"))  esl_rsq_CReverse     (   targ,        shuff->seq);  /* reverse */
	  else if (esl_opt_IsOn      (go, "-w")) { /* regionally shuffle */	
	    int W= esl_opt_GetInteger(go, "-w"); esl_rsq_CShuffleWindows(r, targ, W, shuff->seq);
	  }

	  /* Set the name of the shuffled sequence */
	  if (N > 1) esl_sq_FormatName(shuff, "%s-shuffled-%d", sq->name, i);
	  else       esl_sq_FormatName(shuff, "%s-shuffled", sq->name);

	  /* Output the resulting sequence */
	  esl_sqio_Write(ofp, shuff, outfmt, FALSE);

	  /* don't need to reuse the shuffled sequence: we will use exactly the same memory */
	}
      esl_sq_Reuse(sq);
    }
  if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n",
					   sqfp->filename, esl_sqfile_GetErrorBuf(sqfp));
  else if (status != eslEOF)     esl_fatal("Unexpected error %d reading sequence file %s",
					    status, sqfp->filename);

  if (L>0) free(targ);
  esl_sq_Destroy(shuff);
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  return eslOK;

 ERROR:
  if (targ != NULL) free(targ);
  esl_sq_Destroy(shuff);
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  return status;
}
Beispiel #13
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_OMX         *bck     = NULL;
  P7_OMX         *pp      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           fsc, bsc;
  double          Mcs;

  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);                 p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);    p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);    p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  fwd = p7_omx_Create(gm->M, L, L);
  bck = p7_omx_Create(gm->M, L, L);
  pp  = p7_omx_Create(gm->M, L, L);

  esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  p7_Forward (dsq, L, om, fwd,      &fsc);
  p7_Backward(dsq, L, om, fwd, bck, &bsc);

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    p7_Decoding(om, fwd, bck, pp);              
  esl_stopwatch_Stop(w);

  Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_omx_Destroy(fwd);
  p7_omx_Destroy(bck);
  p7_omx_Destroy(pp);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return eslOK;
}
Beispiel #14
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS *go;
  char        *msafile;
  ESL_MSAFILE *afp;
  ESL_MSA     *msa;
  int          do_gsc;
  int          do_pb;
  int          do_blosum;
  int          maxN;
  double       maxid;
  int          nsmall, nbig;
  int          i;


  /* Process command line
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("%s", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) esl_fatal("%s", go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") == TRUE){
    puts(usage); 
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    return 0;
  }
  do_blosum = esl_opt_GetBoolean(go, "--blosum");
  do_gsc    = esl_opt_GetBoolean(go, "--gsc");
  do_pb     = esl_opt_GetBoolean(go, "--pb");
  maxid     = esl_opt_GetReal   (go, "--id");
  maxN      = esl_opt_GetInteger(go, "--maxN");
  if (esl_opt_ArgNumber(go) != 1) {
    puts("Incorrect number of command line arguments.");
    puts(usage);
    return 1;
  }
  if ((msafile = esl_opt_GetArg(go, 1)) == NULL) esl_fatal("%s", go->errbuf);
  esl_getopts_Destroy(go);

  /* Weight one or more alignments from input file
   */
  esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp);
  while (esl_msa_Read(afp, &msa) == eslOK)
    {
      if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; }

      if      (do_gsc) 	  esl_msaweight_GSC(msa);
      else if (do_pb) 	  esl_msaweight_PB(msa);
      else if (do_blosum) esl_msaweight_BLOSUM(msa, maxid);

      for (nsmall = 0, nbig = 0, i = 0; i < msa->nseq; i++) {
	if (msa->wgt[i] < 0.2) nsmall++;
	if (msa->wgt[i] > 5.0) nbig++;
      }

      printf("%-20s  %5d %5d %8.4f  %8.4f  %5d  %5d\n", 
	     msa->name, 
	     msa->nseq, 
	     msa->alen,
	     esl_vec_DMin(msa->wgt, msa->nseq),
	     esl_vec_DMax(msa->wgt, msa->nseq),
	     nsmall,
	     nbig);
      esl_msa_Destroy(msa);
    } 
  esl_msafile_Close(afp);
  return eslOK;
}
Beispiel #15
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_GMX         *fwd     = NULL;
  P7_GMX         *bck     = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           sc;
  double          base_time, bench_time, Mcs;

  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL);
  fwd = p7_gmx_Create(gm->M, L);
  bck = p7_gmx_Create(gm->M, L);

  /* Baseline time. */
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  esl_stopwatch_Stop(w);
  base_time = w->user;

  /* Benchmark time. */
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
      if (! esl_opt_GetBoolean(go, "-B"))  p7_GForward (dsq, L, gm, fwd, &sc);
      if (! esl_opt_GetBoolean(go, "-F"))  p7_GBackward(dsq, L, gm, bck, NULL);
    }
  esl_stopwatch_Stop(w);
  bench_time = w->user - base_time;
  Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_gmx_Destroy(bck);
  p7_gmx_Destroy(fwd);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
int
main(int argc, char **argv)
{
  int status;
  ESL_GETOPTS     *go	   = NULL;      /* command line processing                   */
  ESL_ALPHABET    *abc     = NULL;
  char            *hmmfile = NULL;
  char            *outhmmfile = NULL;
  P7_HMMFILE      *hfp     = NULL;
  P7_HMM          *hmm     = NULL;
  P7_BG           *bg      = NULL;
  int              nhmm;	
  double           x;
  float            KL;

  char        errmsg[eslERRBUFSIZE];

  /* Process the command line options.
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || 
      esl_opt_VerifyConfig(go)               != eslOK)
    {
      printf("Failed to parse command line: %s\n", go->errbuf);
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }
  if (esl_opt_GetBoolean(go, "-h") == TRUE) 
    {
      profillic_p7_banner(stdout, argv[0], banner);
      esl_usage(stdout, argv[0], usage);
      puts("\nwhere options are:");
      esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/
      exit(0);
    }
  if (esl_opt_ArgNumber(go) != 2) 
    {
      puts("Incorrect number of command line arguments.");
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }

  if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL) 
    {
      puts("Failed to read <input hmmfile> argument from command line.");
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }

  if ((outhmmfile = esl_opt_GetArg(go, 2)) == NULL) 
    {
      puts("Failed to read <output hmmfile> argument from command line.");
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }

  profillic_p7_banner(stdout, argv[0], banner);
  
  /* Initializations: open the input HMM file for reading
   */
  status = p7_hmmfile_Open(hmmfile, NULL, &hfp);
  if      (status == eslENOTFOUND) p7_Fail("Failed to open HMM file %s for reading.\n",                   hmmfile);
  else if (status == eslEFORMAT)   p7_Fail("File %s does not appear to be in a recognized HMM format.\n", hmmfile);
  else if (status != eslOK)        p7_Fail("Unexpected error %d in opening HMM file %s.\n",       status, hmmfile);  

  /* Main body: read HMMs one at a time, print one line of stats
   */
  printf("#\n");
  printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "idx",  "name",                 "accession",    "nseq",     "eff_nseq", "M",      "relent", "info",   "p relE", "compKL");
  printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------");

  nhmm = 0;
  if ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) 
    {
      if      (status == eslEOD)       esl_fatal("read failed, HMM file %s may be truncated?", hmmfile);
      else if (status == eslEFORMAT)   esl_fatal("bad file format in HMM file %s",             hmmfile);
      else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets",   hmmfile);
      else if (status != eslOK)        esl_fatal("Unexpected error in reading HMMs from %s",   hmmfile);
      nhmm++;

      if (bg == NULL) bg = p7_bg_Create(abc);

      if( abc->type == eslDNA ) {
        galosh::ProfileTreeRoot<seqan::Dna, floatrealspace> profile;
        if( (status = convert_to_galosh_profile( hmm, profile )) != eslOK ) esl_fatal("Unexpected error in converting HMM from file %s to a dna galosh profile",   hmmfile);
        std::ofstream fs ( outhmmfile );
    
        if( !fs.is_open() ) {
          esl_fatal("Unexpected error in opening the file %s for writing", outhmmfile);
        } else {
          fs << profile;
          fs.close();
        }
      } else if( abc->type == eslAMINO ) {
        galosh::ProfileTreeRoot<seqan::AminoAcid20, floatrealspace> profile;
        if( (status = convert_to_galosh_profile( hmm, profile )) != eslOK ) esl_fatal("Unexpected error in converting HMM from file %s to an amino galosh profile",   hmmfile);
        std::ofstream fs ( outhmmfile );
    
        if( !fs.is_open() ) {
          esl_fatal("Unexpected error in opening the file %s for writing", outhmmfile);
        } else {
          fs << profile;
          fs.close();
        }
      } else {
        ESL_EXCEPTION(eslEUNIMPLEMENTED, "Sorry, at present the profillic-hmmtoprofile software can only handle amino and dna.");
      }
  
      p7_MeanPositionRelativeEntropy(hmm, bg, &x); 
      p7_hmm_CompositionKLDist(hmm, bg, &KL, NULL);

      printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f\n",
	     nhmm,
	     hmm->name,
	     hmm->acc == NULL ? "-" : hmm->acc,
	     hmm->nseq,
	     hmm->eff_nseq,
	     hmm->M,
	     p7_MeanMatchRelativeEntropy(hmm, bg),
	     p7_MeanMatchInfo(hmm, bg),
	     x,
	     KL);

	     /*	     p7_MeanForwardScore(hmm, bg)); */

      p7_hmm_Destroy(hmm);
    }

  p7_bg_Destroy(bg);
  esl_alphabet_Destroy(abc);
  p7_hmmfile_Close(hfp);
  esl_getopts_Destroy(go);
  exit(0);
}
Beispiel #17
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_GMX         *fwd     = NULL;
  P7_GMX         *bck     = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  float           fsc, bsc;
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");
  p7_hmmfile_Close(hfp);
 
  /* Read in one sequence */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);
  if  (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence");
  esl_sqfile_Close(sqfp);
 
  /* Configure a profile from the HMM */
  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL);
  
  /* Allocate matrices */
  fwd = p7_gmx_Create(gm->M, sq->n);
  bck = p7_gmx_Create(gm->M, sq->n);

  /* Run Forward, Backward */
  p7_GForward (sq->dsq, sq->n, gm, fwd, &fsc);
  p7_GBackward(sq->dsq, sq->n, gm, bck, &bsc);

  printf("fwd = %.4f nats\n", fsc);
  printf("bck = %.4f nats\n", bsc);

  /* Cleanup */
  esl_sq_Destroy(sq);
  p7_gmx_Destroy(fwd);
  p7_gmx_Destroy(bck);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_GMX         *fwd     = NULL;
  P7_GMX         *bck     = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  float           fsc, bsc;
  float           nullsc;
  int             status;

  /* Initialize log-sum calculator */
  p7_FLogsumInit();

  /* Read in one HMM */
  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
  p7_hmmfile_Close(hfp);
 
  /* Read in one sequence */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);
 
  /* Configure a profile from the HMM */
  bg = p7_bg_Create(abc);
  gm = p7_profile_Create(hmm->M, abc);

  /* Now reconfig the models however we were asked to */
  if      (esl_opt_GetBoolean(go, "--fs"))  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);
  else if (esl_opt_GetBoolean(go, "--sw"))  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL);
  else if (esl_opt_GetBoolean(go, "--ls"))  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_GLOCAL);
  else if (esl_opt_GetBoolean(go, "--s"))   p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNIGLOCAL);
  
  /* Allocate matrices */
  fwd = p7_gmx_Create(gm->M, sq->n);
  bck = p7_gmx_Create(gm->M, sq->n);

  printf("%-30s   %-10s %-10s   %-10s %-10s\n", "# seq name",      "fwd (raw)",   "bck (raw) ",  "fwd (bits)",  "bck (bits)");
  printf("%-30s   %10s %10s   %10s %10s\n",     "#--------------", "----------",  "----------",  "----------",  "----------");

  while ( (status = esl_sqio_Read(sqfp, sq)) != eslEOF)
    {
      if      (status == eslEFORMAT) p7_Fail("Parse failed (sequence file %s)\n%s\n", sqfp->filename, sqfp->get_error(sqfp));     
      else if (status != eslOK)      p7_Fail("Unexpected error %d reading sequence file %s", status, sqfp->filename);

      /* Resize the DP matrices if necessary */
      p7_gmx_GrowTo(fwd, gm->M, sq->n);
      p7_gmx_GrowTo(bck, gm->M, sq->n);

      /* Set the profile and null model's target length models */
      p7_bg_SetLength(bg,   sq->n);
      p7_ReconfigLength(gm, sq->n);

      /* Run Forward, Backward */
      p7_GForward (sq->dsq, sq->n, gm, fwd, &fsc);
      p7_GBackward(sq->dsq, sq->n, gm, bck, &bsc);

      p7_gmx_Dump(stdout, fwd, p7_DEFAULT);

      /* Those scores are partial log-odds likelihoods in nats.
       * Subtract off the rest of the null model, convert to bits.
       */
      p7_bg_NullOne(bg, sq->dsq, sq->n, &nullsc);

      printf("%-30s   %10.4f %10.4f   %10.4f %10.4f\n", 
	     sq->name, 
	     fsc, bsc, 
	     (fsc - nullsc) / eslCONST_LOG2, (bsc - nullsc) / eslCONST_LOG2);

      p7_gmx_Reuse(fwd);
      p7_gmx_Reuse(bck);
      esl_sq_Reuse(sq);
    }

  /* Cleanup */
  esl_sqfile_Close(sqfp);
  esl_sq_Destroy(sq);
  p7_gmx_Destroy(fwd);
  p7_gmx_Destroy(bck);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go      = NULL;	               /* application configuration       */
  ESL_ALPHABET *abc     = NULL;      	       /* biological alphabet             */
  char         *alifile = NULL;	               /* alignment file name             */
  int           fmt     = eslMSAFILE_UNKNOWN;  /* format code for alifile         */
  ESLX_MSAFILE *afp     = NULL;		       /* open msa file                   */
  ESL_MSAFILE2 *old_afp = NULL;	               /* open msa file, legacy (--small) */
  ESL_MSA      *msa     = NULL;	               /* one multiple sequence alignment */
  int           nali;		               /* number of alignments read       */
  int           i;		               /* counter over seqs               */
  int64_t       alen;		               /* alignment length                */
  int           nseq;                          /* number of sequences in the msa */
  int64_t       rlen;		               /* a raw (unaligned) seq length    */
  int64_t       small, large;	               /* smallest, largest sequence      */
  int64_t       nres;		               /* total # of residues in msa      */
  double        avgid;		               /* average fractional pair id      */
  int           max_comparisons;               /* maximum # comparisons for avg id */
  int           do_stall;                      /* used to stall when debugging     */
  double      **abc_ct = NULL;                 /* [0..msa->alen-1][0..abc->K] number of each residue at each position (abc->K is gap) */
  double     ***bp_ct  = NULL;                 /* [0..msa->alen-1][0..abc->Kp-1][0..abc->Kp-1] per (non-pknotted) consensus basepair *
						* count of each possible basepair over all seqs basepairs are indexed by 'i' the minimum *
						* of 'i:j' for a pair between i and j, where i < j. */
  double       **pp_ct = NULL;                 /* [0..msa->alen-1][0..11], count of each posterior probability (PP) code, over all sequences, gap is 11 */  
  int  *i_am_rf = NULL;                        /* [0..i..msa->alen-1]: TRUE if pos i is non-gap RF posn, if msa->rf == NULL remains NULL */
  int  *rf2a_map = NULL;                       /* [0..rfpos..rflen-1] = apos,                     
						* apos is the alignment position (0..msa->alen-1) that     
						* is non-gap RF position rfpos+1 (for rfpos in 0..rflen-1) */
  int rflen = -1;                              /* nongap RF length */
  char          errbuf[eslERRBUFSIZE];
  int           status;		               /* easel return code               */

  /* optional output files */
  FILE *iinfofp  = NULL; /* output file for --iinfo */
  FILE *pcinfofp = NULL; /* output file for --pcinfo */
  FILE *psinfofp = NULL; /* output file for --psinfo */
  FILE *rinfofp  = NULL; /* output file for --rinfo */
  FILE *icinfofp = NULL; /* output file for --icinfo */
  FILE *listfp   = NULL; /* output file for --list */
  FILE *cinfofp  = NULL; /* output file for --cinfo */
  FILE *bpinfofp = NULL; /* output file for --bpinfo */
  int use_weights;       /* TRUE if --weight, reported weighted counts (using msa->wgt) to all output files */
  int weights_exist;     /* TRUE if at least one msa->wgt value differs from 1.0, FALSE if not (or if msa->wgt==NULL) */ 

  /***********************************************
   * Parse command line
   ***********************************************/

  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK ||
      esl_opt_VerifyConfig(go)               != eslOK)
    {
      printf("Failed to parse command line: %s\n", go->errbuf);
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }

  if (esl_opt_GetBoolean(go, "-h") )
    {
      esl_banner(stdout, argv[0], banner);
      esl_usage (stdout, argv[0], usage);
      puts("\n where options are:");
      esl_opt_DisplayHelp(stdout, go, 1, 2, 80);
      puts("\n small memory mode, requires --amino,--dna, or --rna and --informat pfam:");
      esl_opt_DisplayHelp(stdout, go, 2, 2, 80);
      puts("\n optional output files:");
      esl_opt_DisplayHelp(stdout, go, 3, 2, 80);
      exit(0);
    }

  if (esl_opt_ArgNumber(go) != 1) 
    {
      printf("Incorrect number of command line arguments.\n");
      esl_usage(stdout, argv[0], usage);
      printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
      exit(1);
    }

  alifile = esl_opt_GetArg(go, 1);

  if (esl_opt_IsOn(go, "--informat") &&
      (fmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"))) == eslMSAFILE_UNKNOWN)
    esl_fatal("%s is not a valid input sequence file format for --informat", esl_opt_GetString(go, "--informat")); 
    
  if (esl_opt_GetBoolean(go, "--small") && fmt != eslMSAFILE_PFAM) esl_fatal("--small requires --informat pfam\n"); 

  max_comparisons = 1000;

  do_stall = esl_opt_GetBoolean(go, "--stall"); /* a stall point for attaching gdb */
  while (do_stall); 

  /***********************************************
   * Open the MSA file; determine alphabet; set for digital input
   ***********************************************/

  if      (esl_opt_GetBoolean(go, "--amino"))   abc = esl_alphabet_Create(eslAMINO);
  else if (esl_opt_GetBoolean(go, "--dna"))     abc = esl_alphabet_Create(eslDNA);
  else if (esl_opt_GetBoolean(go, "--rna"))     abc = esl_alphabet_Create(eslRNA);

  /* We'd like to get rid of the legacy msafile interface, but it
   * includes small memory functionality for Pfam format which we have
   * to replace first. For now, use both interfaces, new and legacy
   */
  if ( esl_opt_GetBoolean(go, "--small") )
    {
      if (! abc) esl_fatal("--small requires one of --amino, --dna, --rna be specified.");

      status = esl_msafile2_OpenDigital(abc, alifile, NULL, &old_afp);
      if      (status == eslENOTFOUND) esl_fatal("Alignment file %s doesn't exist or is not readable\n", alifile);
      else if (status == eslEFORMAT)   esl_fatal("Couldn't determine format of alignment %s\n", alifile);
      else if (status != eslOK)        esl_fatal("Alignment file open failed with error %d\n", status);
    }
  else
    {
      if ( (status = eslx_msafile_Open(&abc, alifile, NULL, fmt, NULL, &afp)) != eslOK)
	eslx_msafile_OpenFailure(afp, status);
    }

  /**************************************
   * Open optional output files, as nec *
   **************************************/
  /* determine name for first list file, if nec */
  if( esl_opt_IsOn(go, "--list")) {
    if ((listfp = fopen(esl_opt_GetString(go, "--list"), "w")) == NULL) 
      esl_fatal("Failed to open --list output file %s\n", esl_opt_GetString(go, "--list"));
  }
  if( esl_opt_IsOn(go, "--icinfo")) {
    if ((icinfofp = fopen(esl_opt_GetString(go, "--icinfo"), "w")) == NULL) 
      esl_fatal("Failed to open --icinfo output file %s\n", esl_opt_GetString(go, "--icinfo"));
  }
  if( esl_opt_IsOn(go, "--rinfo")) {
    if ((rinfofp = fopen(esl_opt_GetString(go, "--rinfo"), "w")) == NULL) 
      esl_fatal("Failed to open --rinfo output file %s\n", esl_opt_GetString(go, "--rinfo"));
  }
  if( esl_opt_IsOn(go, "--pcinfo")) {
    if ((pcinfofp = fopen(esl_opt_GetString(go, "--pcinfo"), "w")) == NULL) 
      esl_fatal("Failed to open --pcinfo output file %s\n", esl_opt_GetString(go, "--pcinfo"));
  }
  if( esl_opt_IsOn(go, "--psinfo")) {
    if ((psinfofp = fopen(esl_opt_GetString(go, "--psinfo"), "w")) == NULL) 
      esl_fatal("Failed to open --psinfo output file %s\n", esl_opt_GetString(go, "--psinfo"));
  }
  if( esl_opt_IsOn(go, "--iinfo")) {
    if ((iinfofp = fopen(esl_opt_GetString(go, "--iinfo"), "w")) == NULL) 
      esl_fatal("Failed to open --iinfo output file %s\n", esl_opt_GetString(go, "--iinfo"));
  }
  if( esl_opt_IsOn(go, "--cinfo")) {
    if ((cinfofp = fopen(esl_opt_GetString(go, "--cinfo"), "w")) == NULL) 
      esl_fatal("Failed to open --cinfo output file %s\n", esl_opt_GetString(go, "--cinfo"));
  }
  if( esl_opt_IsOn(go, "--bpinfo")) {
    if ((bpinfofp = fopen(esl_opt_GetString(go, "--bpinfo"), "w")) == NULL) 
      esl_fatal("Failed to open --bpinfo output file %s\n", esl_opt_GetString(go, "--bpinfo"));
  }

  /***********************************************
   * Read MSAs one at a time.
   ***********************************************/

  if (esl_opt_GetBoolean(go, "-1")) {
    puts("#");
    if(! esl_opt_GetBoolean(go, "--small")) { 
      printf("# %-4s %-20s %10s %7s %7s %12s %6s %6s %10s %3s\n", "idx", "name", "format", "nseq", "alen", "nres", "small", "large", "avlen", "%id");
      printf("# %-4s %-20s %10s %7s %7s %12s %6s %6s %10s %3s\n", "----", "--------------------", "----------", "-------", "-------", "------------", "------", "------", "----------", "---");
    }
    else { 
      printf("# %-4s %-20s %10s %7s %7s %12s %10s\n", "idx", "name", "format", "nseq", "alen", "nres", "avlen");
      printf("# %-4s %-20s %10s %7s %7s %12s %10s\n", "----", "--------------------", "----------", "-------", "-------", "------------", "----------");
    }
  }

  nali = 0;
  
  fmt = (esl_opt_GetBoolean(go, "--small") ? old_afp->format : afp->format);

  while ( (status = ( esl_opt_GetBoolean(go, "--small") ? 
		      esl_msafile2_ReadInfoPfam(old_afp, listfp, abc, -1, NULL, NULL, &msa, &nseq, &alen, NULL, NULL, NULL, NULL, NULL, &abc_ct, &pp_ct, NULL, NULL, NULL) :
		      eslx_msafile_Read        (afp, &msa))) == eslOK)
    { 
      nali++;
      nres = 0;

      if (! esl_opt_GetBoolean(go, "--small")) { 
	nseq = msa->nseq;
	alen = msa->alen;
	small = large = -1;
	for (i = 0; i < msa->nseq; i++)
	  {
	    rlen  = esl_abc_dsqrlen(msa->abc, msa->ax[i]);
	    nres += rlen;
	    if (small == -1 || rlen < small) small = rlen;
	    if (large == -1 || rlen > large) large = rlen;
	  }

	esl_dst_XAverageId(abc, msa->ax, msa->nseq, max_comparisons, &avgid);
      }
      else { /* --small invoked */
	for(i = 0; i < alen; i++) nres += (int) esl_vec_DSum(abc_ct[i], abc->K);
      }

      if (esl_opt_GetBoolean(go, "-1")) 
	{
	  printf("%-6d %-20s %10s %7d %7" PRId64 " %12" PRId64, 
		 nali, 
		 msa->name,
		 eslx_msafile_DecodeFormat(fmt),
		 nseq,
		 alen,
		 nres);
	  if (! esl_opt_GetBoolean(go, "--small")) { 
	    printf(" %6" PRId64 " %6" PRId64 " %10.1f %3.0f\n",
		   small,
		   large,
		   (double) nres / (double) msa->nseq,
		   100.*avgid);
	  }
	  else { 
	    printf(" %10.1f\n", (double) nres / (double) nseq);
	  }
	}
      else
	{
	  printf("Alignment number:    %d\n",     nali);
	  if (msa->name != NULL)
	    printf("Alignment name:      %s\n",        msa->name); 
	  printf("Format:              %s\n",          eslx_msafile_DecodeFormat(fmt));
	  printf("Number of sequences: %d\n",          nseq);
	  printf("Alignment length:    %" PRId64 "\n", alen);
	  printf("Total # residues:    %" PRId64 "\n", nres);
	  if(! esl_opt_GetBoolean(go, "--small")) { 
	    printf("Smallest:            %" PRId64 "\n", small);
	    printf("Largest:             %" PRId64 "\n", large);
	  }
	  printf("Average length:      %.1f\n",        (double) nres / (double) nseq);
	  if(! esl_opt_GetBoolean(go, "--small")) { 
	    printf("Average identity:    %.0f%%\n",      100.*avgid); 
	  }
	  printf("//\n");
	}

      /* Dump data to optional output files, if nec */
      if(esl_opt_IsOn(go, "--list")) {
	if(! esl_opt_GetBoolean(go, "--small")) { 
	    /* only print sequence name to list file if ! --small, else we already have in esl_msafile2_ReadInfoPfam() */
	    for(i = 0; i < msa->nseq; i++) fprintf(listfp, "%s\n", msa->sqname[i]);
	}
      }

      /* if RF exists, get i_am_rf array[0..alen] which tells us which positions are non-gap RF positions
       * and rf2a_map, a map of non-gap RF positions to overall alignment positions */
      if(msa->rf != NULL) {
	if((status = map_rfpos_to_apos(msa, abc, errbuf, alen, &i_am_rf, &rf2a_map, &rflen)) != eslOK) esl_fatal(errbuf);
      }
      else i_am_rf = NULL;

      weights_exist = check_msa_weights(msa);
      use_weights   = (weights_exist && esl_opt_GetBoolean(go, "--weight")) ? TRUE : FALSE;
      
      if( (! esl_opt_GetBoolean(go, "--small")) && 
	  (esl_opt_IsOn(go, "--icinfo") || esl_opt_IsOn(go, "--rinfo")  || esl_opt_IsOn(go, "--pcinfo") || 
	   esl_opt_IsOn(go, "--cinfo")  || esl_opt_IsOn(go, "--bpinfo")))  {
	/* collect counts of each residue and PPs (if they exist) from the msa */
	if(esl_opt_GetBoolean(go, "--weight") && msa->wgt == NULL) esl_fatal("--weight requires all alignments have #=GS WT annotation, but aln %d does not", nali);
	if((status = count_msa(msa, errbuf, nali, 
			       esl_opt_GetBoolean(go, "--noambig"), /* ignore ambiguous residues? */
			       esl_opt_GetBoolean(go, "--weight"),  /* use msa->wgt sequence weights? */
			       &abc_ct, 
			       ((bpinfofp != NULL && msa->ss_cons != NULL) ? &bp_ct : NULL), /* get basepair counts? */
			       (msa->pp != NULL ? &pp_ct : NULL)))   /* get PP counts? */
	   != eslOK) esl_fatal(errbuf);
      }

      if( esl_opt_IsOn(go, "--icinfo")) {
	if((status = dump_infocontent_info(icinfofp, abc, abc_ct, use_weights, nali, alen, nseq, i_am_rf, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf);
      }
      if( esl_opt_IsOn(go, "--rinfo")) {
	if((status = dump_residue_info(rinfofp, abc, abc_ct, use_weights, nali, alen, nseq, i_am_rf, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf);
      }
      if(esl_opt_IsOn(go, "--pcinfo")) {
	if(pp_ct == NULL) esl_fatal("Error: --pcinfo requires all alignments have #=GR PP annotation, but alignment %d does not", nali);
	if((status = dump_posterior_column_info(pcinfofp, pp_ct, use_weights, nali, alen, nseq, i_am_rf, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf);
      }
      if(esl_opt_IsOn(go, "--psinfo")) {
	if(msa->pp == NULL) esl_fatal("Error: --psinfo requires all alignments have #=GR PP annotation, but alignment %d does not", nali);
	if((status = dump_posterior_sequence_info(psinfofp, msa, nali, alifile, errbuf) != eslOK)) esl_fatal(errbuf);
      }
      if( esl_opt_IsOn(go, "--iinfo")) {
	if(msa->rf == NULL) esl_fatal("--iinfo requires all alignments have #=GC RF annotation, but alignment %d does not", nali);
	if(esl_opt_GetBoolean(go, "--weight") && msa->wgt == NULL) esl_fatal("--weight requires all alignments have #=GS WT annotation, but aln %d does not", nali);
	if((status = dump_insert_info(iinfofp, msa, use_weights, nali, i_am_rf, alifile, errbuf) != eslOK)) esl_fatal(errbuf);
      }
      if( esl_opt_IsOn(go, "--cinfo")) {
	if((status = dump_column_residue_counts(cinfofp, abc, abc_ct, esl_opt_GetBoolean(go, "--noambig"), use_weights, nali, alen, nseq, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf);
      }
      if( esl_opt_IsOn(go, "--bpinfo")) {
	if(msa->ss_cons == NULL) esl_fatal("--bpinfo requires all alignments have #=GC SS_cons annotation, but alignment %d does not", nali);
	if((status = dump_basepair_counts(bpinfofp, msa, abc, bp_ct, use_weights, nali, nseq, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf);
      }

      esl_msa_Destroy(msa);
      if(abc_ct != NULL)   { esl_Free2D((void **) abc_ct, alen);          abc_ct   = NULL; }
      if(bp_ct != NULL)    { esl_Free3D((void ***) bp_ct, alen, abc->Kp); bp_ct    = NULL; }
      if(pp_ct != NULL)    { esl_Free2D((void **) pp_ct, alen);           pp_ct    = NULL; }
      if(i_am_rf != NULL)  { free(i_am_rf);                               i_am_rf  = NULL; }
      if(rf2a_map != NULL) { free(rf2a_map);                              rf2a_map = NULL; }
    }
  
  /* If an msa read failed, we've dropped out to here with an informative status code. 
   * we have to handle failures from new vs. legacy msa parsing differently 
   */
  if (esl_opt_GetBoolean(go, "--small")) 
    {
      if      (status == eslEFORMAT)  esl_fatal("Alignment file parse error, line %d of file %s:\n%s\nOffending line is:\n%s\n", old_afp->linenumber, old_afp->fname, old_afp->errbuf, old_afp->buf);	
      else if (status != eslEOF)      esl_fatal("Alignment file read failed with error code %d\n", status);
      else if (nali   == 0)           esl_fatal("No alignments found in file %s\n", alifile);
    }
  else
    {
      if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status);
    }

  /* Cleanup, normal return
   */
  if(listfp != NULL) { 
    fclose(listfp);
    printf("# List of sequences in %d alignment(s) saved to file %s\n", nali, esl_opt_GetString(go, "--list"));
  }
  if(icinfofp != NULL) { 
    fclose(icinfofp);
    printf("# Information content data saved to file %s.\n", esl_opt_GetString(go, "--icinfo")); 
  }
  if(rinfofp != NULL) { 
    fclose(rinfofp);
    printf("# Residue data saved to file %s.\n", esl_opt_GetString(go, "--rinfo")); 
  }
  if(pcinfofp != NULL) { 
    fclose(pcinfofp);
    printf("# Per-column posterior probability data saved to file %s.\n", esl_opt_GetString(go, "--pcinfo")); 
  }
  if(psinfofp != NULL) { 
    fclose(psinfofp);
    printf("# Per-sequence posterior probability data saved to file %s.\n", esl_opt_GetString(go, "--psinfo")); 
  }
  if(iinfofp != NULL) { 
    printf("# Insert data saved to file %s.\n", esl_opt_GetString(go, "--iinfo")); 
    fclose(iinfofp);
  }
  if(cinfofp != NULL) { 
    printf("# Per-column counts data saved to file %s.\n", esl_opt_GetString(go, "--cinfo")); 
    fclose(cinfofp);
  }
  if(bpinfofp != NULL) { 
    printf("# Per-column basepair counts data saved to file %s.\n", esl_opt_GetString(go, "--bpinfo")); 
    fclose(bpinfofp);
  }


  if (afp)     eslx_msafile_Close(afp);
  if (old_afp) esl_msafile2_Close(old_afp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #20
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *ox      = NULL;
  P7_GMX         *gx      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
  int             i;
  float           sc1, sc2;
  double          base_time, bench_time, Mcs;

  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");

  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);
  p7_oprofile_ReconfigLength(om, L);

  if (esl_opt_GetBoolean(go, "-x")) p7_profile_SameAsMF(om, gm);

  ox = p7_omx_Create(gm->M, 0, 0);
  gx = p7_gmx_Create(gm->M, L);

  /* Get a baseline time: how long it takes just to generate the sequences */
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
  esl_stopwatch_Stop(w);
  base_time = w->user;

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
      p7_MSVFilter    (dsq, L, om, ox, &sc1);   

      /* -c option: compare generic to fast score */
      if (esl_opt_GetBoolean(go, "-c")) 
	{
	  p7_GMSV    (dsq, L, gm, gx, 2.0, &sc2); 
	  printf("%.4f %.4f\n", sc1, sc2);  
	}

      /* -x option: compare generic to fast score in a way that should give exactly the same result */
      if (esl_opt_GetBoolean(go, "-x"))
	{
	  p7_GViterbi(dsq, L, gm, gx, &sc2); 
	  sc2 /= om->scale_b;
	  if (om->mode == p7_UNILOCAL)   sc2 -= 2.0; /* that's ~ L \log \frac{L}{L+2}, for our NN,CC,JJ */
	  else if (om->mode == p7_LOCAL) sc2 -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */
	  printf("%.4f %.4f\n", sc1, sc2);  
	}
    }
  esl_stopwatch_Stop(w);
  bench_time = w->user - base_time;
  Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
  esl_stopwatch_Display(stdout, w, "# CPU time: ");
  printf("# M    = %d\n",   gm->M);
  printf("# %.1f Mc/s\n", Mcs);

  free(dsq);
  p7_omx_Destroy(ox);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #21
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  ESL_RANDOMNESS *rng     = esl_randomness_CreateFast(0);
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_GMX         *gx      = NULL;
  P7_OMX         *fwd     = NULL;
  P7_TRACE       *tr      = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  int             N       = esl_opt_GetInteger(go, "-N");
  int             i;
  float           vsc, fsc, tsc;
  char            errbuf[eslERRBUFSIZE];
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");

  /* Read in one sequence */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);
  if  (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence");

  /* create default null model, then create and optimize profile */
  bg = p7_bg_Create(abc);                p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc);   p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);   p7_oprofile_Convert(gm, om);

  if (esl_opt_GetBoolean(go, "-p")) p7_oprofile_Dump(stdout, om);  

  fwd = p7_omx_Create(gm->M, sq->n, sq->n);
  gx  = p7_gmx_Create(gm->M, sq->n);
  tr  = p7_trace_Create();

  if (esl_opt_GetBoolean(go, "-m") == TRUE) p7_omx_SetDumpMode(stdout, fwd, TRUE); 
  p7_GViterbi(sq->dsq, sq->n, gm, gx,  &vsc);
  p7_Forward (sq->dsq, sq->n, om, fwd, &fsc);

  for (i = 0; i < N; i++)
    {
      p7_StochasticTrace(rng, sq->dsq, sq->n, om, fwd, tr);
      p7_trace_Score(tr, sq->dsq, gm, &tsc);
  
      if (esl_opt_GetBoolean(go, "-t") == TRUE) p7_trace_Dump(stdout, tr, gm, sq->dsq);
      if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK)  p7_Die("trace %d fails validation:\n%s\n", i, errbuf);

      printf("Sampled trace:  %.4f nats\n", tsc);
      p7_trace_Reuse(tr);
    }
  printf("Forward score:  %.4f nats\n", fsc);
  printf("Viterbi score:  %.4f nats\n", vsc);

  /* cleanup */
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  p7_trace_Destroy(tr);
  p7_omx_Destroy(fwd);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_randomness_Destroy(rng);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #22
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  char           *seqfile = esl_opt_GetArg(go, 2);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_OMX         *ox      = NULL;
  P7_GMX         *gx      = NULL;
  ESL_SQ         *sq      = NULL;
  ESL_SQFILE     *sqfp    = NULL;
  int             format  = eslSQFILE_UNKNOWN;
  float           msvraw, nullsc, msvscore;
  float           graw, gscore;
  double          P, gP;
  int             status;

  /* Read in one HMM */
  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");

  /* Open sequence file for reading */
  sq     = esl_sq_CreateDigital(abc);
  status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("No such file.");
  else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
  else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
  else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);

  /* create default null model, then create and optimize profile */
  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, sq->n);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);

  /* allocate DP matrices, both a generic and an optimized one */
  ox = p7_omx_Create(gm->M, 0, 0); /* one row version */
  gx = p7_gmx_Create(gm->M, sq->n);

  /* Useful to place and compile in for debugging: 
     p7_oprofile_Dump(stdout, om);              dumps the optimized profile
     p7_omx_SetDumpMode(stdout, ox, TRUE);      makes the fast DP algorithms dump their matrices
     p7_gmx_Dump(stdout, gx, p7_DEFAULT);       dumps a generic DP matrix
     p7_oprofile_SameMSV(om, gm);
  */
  p7_omx_SetDumpMode(stdout, ox, TRUE);    

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {
      p7_oprofile_ReconfigLength(om, sq->n);
      p7_ReconfigLength(gm,          sq->n);
      p7_bg_SetLength(bg,            sq->n);
      p7_omx_GrowTo(ox, om->M, 0,    sq->n); 
      p7_gmx_GrowTo(gx, gm->M,       sq->n); 

      p7_MSVFilter   (sq->dsq, sq->n, om, ox, &msvraw);  
      p7_bg_NullOne  (bg, sq->dsq, sq->n, &nullsc);
      msvscore = (msvraw - nullsc) / eslCONST_LOG2;
      P        = esl_gumbel_surv(msvscore,  om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);

      p7_GMSV(sq->dsq, sq->n, gm, gx, 2.0, &graw);
      gscore   = (graw - nullsc) / eslCONST_LOG2;
      gP       = esl_gumbel_surv(gscore,  gm->evparam[p7_MMU],  gm->evparam[p7_MLAMBDA]);

      if (esl_opt_GetBoolean(go, "-1"))
	{
	  printf("%-30s  %-20s  %9.2g  %7.2f  %9.2g  %7.2f\n", sq->name, hmm->name, P, msvscore, gP, gscore);
	}
      else if (esl_opt_GetBoolean(go, "-P"))
	{ /* output suitable for direct use in profmark benchmark postprocessors: */
	  printf("%g  %.2f  %s  %s\n", P, msvscore, sq->name, hmm->name);
	}
      else
	{
	  printf("target sequence:      %s\n",        sq->name);
	  printf("msv filter raw score: %.2f nats\n", msvraw);
	  printf("null score:           %.2f nats\n", nullsc);
	  printf("per-seq score:        %.2f bits\n", msvscore);
	  printf("P-value:              %g\n",        P);
	  printf("GMSV raw score:       %.2f nats\n", graw);
	  printf("GSMV per-seq score:   %.2f bits\n", gscore);
	  printf("GSMV P-value:         %g\n",        gP);
	}
      
      esl_sq_Reuse(sq);
    }

  /* cleanup */
  esl_sq_Destroy(sq);
  esl_sqfile_Close(sqfp);
  p7_omx_Destroy(ox);
  p7_gmx_Destroy(gx);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_hmmfile_Close(hfp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #23
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
  ESL_ALPHABET   *abc     = esl_alphabet_Create(eslAMINO);
  char           *qfile   = esl_opt_GetArg(go, 1);
  char           *tfile   = esl_opt_GetArg(go, 2);
  ESL_SQFILE     *qfp     = NULL;
  ESL_SQFILE     *tfp     = NULL;
  ESL_SQ         *qsq     = esl_sq_CreateDigital(abc);
  ESL_SQ         *tsq     = esl_sq_CreateDigital(abc);
  ESL_SCOREMATRIX *S      = esl_scorematrix_Create(abc);
  ESL_DMATRIX     *Q      = NULL;
  P7_BG           *bg     = p7_bg_Create(abc);
  P7_HMM          *hmm    = NULL;
  P7_PROFILE      *gm     = NULL;
  P7_REFMX        *vit    = p7_refmx_Create(200, 400); /* will grow as needed */
  double          *fa     = malloc(sizeof(double) * abc->K);
  double          popen   = 0.02;
  double          pextend = 0.4;
  double          lambda;
  float           vsc;
  float           nullsc;
  int             status;

  esl_composition_BL62(fa);
  esl_vec_D2F(fa, abc->K, bg->f);
  esl_scorematrix_Set("BLOSUM62", S);
  esl_scorematrix_ProbifyGivenBG(S, fa, fa, &lambda, &Q); 
  esl_scorematrix_JointToConditionalOnQuery(abc, Q);

  if (esl_sqfile_OpenDigital(abc, qfile, eslSQFILE_UNKNOWN, NULL, &qfp) != eslOK) esl_fatal("failed to open %s", qfile);
  if (esl_sqio_Read(qfp, qsq) != eslOK) esl_fatal("failed to read query seq");

  p7_Seqmodel(abc, qsq->dsq, qsq->n, qsq->name, Q, bg->f, popen, pextend, &hmm);  
  p7_hmm_SetComposition(hmm);
  p7_hmm_SetConsensus(hmm, qsq); 
  
  gm = p7_profile_Create(hmm->M, abc);
  p7_profile_ConfigUnilocal(gm, hmm, bg, 400);
  
  if (esl_sqfile_OpenDigital(abc, tfile, eslSQFILE_UNKNOWN, NULL, &tfp) != eslOK) esl_fatal("failed to open %s", tfile);
  while ((status = esl_sqio_Read(tfp, tsq)) == eslOK)
    {
      p7_bg_SetLength     (bg, tsq->n);
      p7_profile_SetLength(gm, tsq->n);
      
      p7_ReferenceViterbi(tsq->dsq, tsq->n, gm, vit, NULL, &vsc);    
      p7_bg_NullOne(bg, tsq->dsq, tsq->n, &nullsc);

      printf("%.4f %-25s %-25s\n", (vsc - nullsc) / eslCONST_LOG2, tsq->name, gm->name);
   
      esl_sq_Reuse(tsq);
      p7_refmx_Reuse(vit);
    }
  
  p7_refmx_Destroy(vit);
  p7_profile_Destroy(gm);
  p7_hmm_Destroy(hmm);
  p7_bg_Destroy(bg);
  esl_dmatrix_Destroy(Q);
  esl_scorematrix_Destroy(S);
  free(fa);
  esl_sq_Destroy(qsq);
  esl_sq_Destroy(tsq);
  esl_sqfile_Close(qfp);
  esl_sqfile_Close(tfp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Beispiel #24
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS *go;
  char        *msafile;
  ESL_MSAFILE *afp;
  ESL_MSA     *msa;
  float       *sqd;
  int          status;
  int          nbad;
  int          nali    = 0;
  int          nbadali = 0;
  int          nwgt    = 0;
  int          nbadwgt = 0;
  int i;
  int be_quiet;
  int do_gsc;
  int do_pb;
  int do_blosum;
  double maxid;
  double tol;
  int    maxN;

  /* Process command line
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") == TRUE) {
    puts(usage); 
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    return 0;
  }
  be_quiet  = esl_opt_GetBoolean(go, "-q");
  do_blosum = esl_opt_GetBoolean(go, "--blosum");
  do_gsc    = esl_opt_GetBoolean(go, "--gsc");
  do_pb     = esl_opt_GetBoolean(go, "--pb");
  maxid     = esl_opt_GetReal   (go, "--id");
  tol       = esl_opt_GetReal   (go, "--tol");
  maxN      = esl_opt_GetInteger(go, "--maxN");
  if (esl_opt_ArgNumber(go) != 1) {
    puts("Incorrect number of command line arguments.");
    puts(usage);
    return 1;
  }
  msafile = esl_opt_GetArg(go, 1);
  esl_getopts_Destroy(go);

  /* Weight one or more alignments from input file
   */
  esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp);
  while (esl_msa_Read(afp, &msa) == eslOK)
    {
      if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; }

      nali++;
      nwgt += msa->nseq;
      ESL_ALLOC(sqd, sizeof(float) * msa->nseq);

      if (do_gsc) {
	esl_msaweight_GSC(msa);
	GSCWeights(msa->aseq, msa->nseq, msa->alen, sqd);
      } else if (do_pb) {
	esl_msaweight_PB(msa);
	PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, sqd);
      } else if (do_blosum) {
	esl_msaweight_BLOSUM(msa, maxid);
	BlosumWeights(msa->aseq, msa->nseq, msa->alen, maxid, sqd);
	/* workaround SQUID bug: BLOSUM weights weren't renormalized to sum to nseq. */
	esl_vec_FNorm (sqd, msa->nseq);
	esl_vec_FScale(sqd, msa->nseq, (float) msa->nseq);	
      }

      if (! be_quiet) {
	for (i = 0; i < msa->nseq; i++)
	  fprintf(stdout, "%-20s  %.3f  %.3f\n",
		  msa->sqname[i], msa->wgt[i], sqd[i]);
      }
	
      nbad = 0;
      for (i = 0; i < msa->nseq; i++)
	if (esl_DCompare((double) sqd[i], msa->wgt[i], tol) != eslOK) 
	  nbad++;
      if (nbad > 0) nbadali++;
      nbadwgt += nbad;

      if (nbad > 0) printf("%-20s  :: alignment shows %d weights that differ (out of %d) \n", 
			   msa->name, nbad, msa->nseq);

      esl_msa_Destroy(msa);
      free(sqd);
    } 
  esl_msafile_Close(afp);

  if (nbadali == 0) 
    printf("OK: all weights identical between squid and Easel in %d alignment(s)\n", nali);
  else {
    printf("%d of %d weights mismatched at (> %f fractional difference)\n",
	   nbadwgt, nwgt, tol);
    printf("involving %d of %d total alignments\n", nbadali, nali);
  }
  return eslOK;

 ERROR:
  return status;
}