C++ (Cpp) esl_strcmpの例

プログラミング言語: C++ (Cpp)

メソッド/関数: esl_strcmp

hotexamples.comのコード掲載数: 7

C++ (Cpp) esl_strcmp - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のesl_strcmpの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: p7_tophits.c プロジェクト: EddyRivasLab/hmmer

int
p7_hit_Compare(const P7_HIT *h1, const P7_HIT *h2, float tol)
{
  int d;
  int status;

  if (    strcmp(h1->name, h2->name) != 0) return eslFAIL;
  if (esl_strcmp(h1->acc,  h2->acc)  != 0) return eslFAIL;
  if (esl_strcmp(h1->desc, h2->desc) != 0) return eslFAIL;

  if ( h1->window_length != h2->window_length) return eslFAIL;
  if ( h1->ndom          != h2->ndom)          return eslFAIL;
  if ( h1->noverlaps     != h2->noverlaps)     return eslFAIL;
  if ( h1->flags         != h2->flags)         return eslFAIL;
  if ( h1->nreported     != h2->nreported)     return eslFAIL;
  if ( h1->nincluded     != h2->nincluded)     return eslFAIL;
  if ( h1->best_domain   != h2->best_domain)   return eslFAIL;
  if ( h1->seqidx        != h2->seqidx)        return eslFAIL;
  if ( h1->subseq_start  != h2->subseq_start)  return eslFAIL;
  if ( h1->offset        != h2->offset)        return eslFAIL;
  
  if ( esl_DCompare( h1->sortkey,   h2->sortkey,   tol ) != eslOK) return eslFAIL;
  if ( esl_FCompare( h1->score,     h2->score,     tol ) != eslOK) return eslFAIL;
  if ( esl_FCompare( h1->pre_score, h2->pre_score, tol ) != eslOK) return eslFAIL;
  if ( esl_FCompare( h1->sum_score, h2->sum_score, tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->lnP,       h2->lnP,       tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->pre_lnP,   h2->pre_lnP,   tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->sum_lnP,   h2->sum_lnP,   tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->nexpected, h2->nexpected, tol ) != eslOK) return eslFAIL;

  for (d = 0; d < h1->ndom; d++)
    if (( status = p7_domain_Compare(&(h1->dcl[d]), &(h2->dcl[d]), tol)) != eslOK) return status;
  return eslOK;
}

コード例 #2

ファイルを表示

ファイル: hmmerfm-exactmatch.c プロジェクト: dboudour2002/musicHMMER

static int
process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_fmfile, char **ret_qfile)
{
  ESL_GETOPTS *go = esl_getopts_Create(options);
  int          status;

  if (esl_opt_ProcessEnvironment(go)         != eslOK)  { if (printf("Failed to process environment: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK)  { if (printf("Failed to parse command line: %s\n", go->errbuf)  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if (esl_opt_VerifyConfig(go)               != eslOK)  { if (printf("Failed to parse command line: %s\n", go->errbuf)  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  /* help format: */
  if (esl_opt_GetBoolean(go, "-h") == TRUE) 
    {
      esl_banner(stdout, argv[0], banner);
      esl_usage(stdout, argv[0], usage);

      if (puts("\nBasic options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 120=textwidth*/

      if (puts("\nSpecial options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
      esl_opt_DisplayHelp(stdout, go, 2, 2, 80); /* 2= group; 2 = indentation; 120=textwidth*/

      exit(0);
  }

  if (esl_opt_ArgNumber(go)                  != 2)    { if (puts("Incorrect number of command line arguments.")     < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if ((*ret_qfile  = esl_opt_GetArg(go, 1)) == NULL)  { if (puts("Failed to get <qfile> argument on command line")  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
  if ((*ret_fmfile = esl_opt_GetArg(go, 2)) == NULL)  { if (puts("Failed to get <fmfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  /* Validate any attempted use of stdin streams */
  if (esl_strcmp(*ret_fmfile, "-") == 0 && esl_strcmp(*ret_qfile, "-") == 0) 
    { if (puts("Either <fmfile> or <qfile> may be '-' (to read from stdin), but not both.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }

  *ret_go = go;
  return eslOK;

 FAILURE:  /* all errors handled here are user errors, so be polite.  */
  esl_usage(stdout, argv[0], usage);
  puts("\nwhere basic options are:");
  esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/
  printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
  esl_getopts_Destroy(go);
  exit(1);

 ERROR:
  if (go) esl_getopts_Destroy(go);
  exit(status);
}

コード例 #3

ファイルを表示

ファイル: hmmerfm-exactmatch.c プロジェクト: dboudour2002/musicHMMER

static int
output_header(FM_METADATA *meta, FILE *ofp, const ESL_GETOPTS *go, char *fmfile, char *qfile)
{
  char *alph;
  char *appname = NULL;
  int   status;

  if      (meta->alph_type == fm_DNA)       alph = "dna";
  else if (meta->alph_type == fm_AMINO)     alph = "amino";

  if ((status = esl_FileTail(go->argv[0], FALSE, &appname)) != eslOK) return status;



  if (fprintf(ofp, "# %s :: %s\n", appname, banner)                                               < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# %s\n", EASEL_COPYRIGHT)                                                     < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# %s\n", EASEL_LICENSE)                                                       < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");


  if (fprintf(ofp, "# input binary-formatted HMMER database:   %s\n", fmfile) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# input file of query sequences:           %s\n", qfile)  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");

  if (esl_opt_IsUsed(go, "--out")) {
    char *outfile = esl_opt_GetString(go, "--out");
    if (fprintf(ofp, "# output file containing list of hits:     %s\n", (esl_strcmp(outfile, "-") == 0 ? "stdout" : outfile)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); 
  }

  if (esl_opt_IsUsed(go, "--count_only") && fprintf(ofp, "# output only counts, not hit locations\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");

  if (fprintf(ofp, "# alphabet     :                           %s\n", alph)                         < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# bin_length   :                           %d\n", meta->freq_cnt_b)             < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# suffix array sample rate:                %d\n", meta->freq_SA)                < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
  if (fprintf(ofp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");


  if (appname) free(appname);
    return eslOK;

ERROR:
if (appname) free(appname);
return status;

}

コード例 #4

ファイルを表示

ファイル: p7_tophits.c プロジェクト: EddyRivasLab/hmmer

static int
hit_sorter_by_modelname_aliposition(const void *vh1, const void *vh2)
{
  P7_HIT *h1 = *((P7_HIT **) vh1);  /* don't ask. don't change. Don't Panic. */
  P7_HIT *h2 = *((P7_HIT **) vh2);

  int res = esl_strcmp( h1->name, h2->name);

  if  ( res != 0 ) return  res; /* first key, seq_idx (unique id for sequences), low to high */

  // if on different strand, the positive strand goes first, else use position
  int dir1 = (h1->dcl[0].ia < h1->dcl[0].ib ? 1 : -1);
  int dir2 = (h2->dcl[0].ia < h2->dcl[0].ib ? 1 : -1);

  if (dir1 != dir2) return dir2; // so if dir1 is pos (1), and dir2 is neg (-1), this will return -1, placing h1 before h2;  otherwise, vice versa

  if ( h1->dcl[0].ia == h2->dcl[0].ia)    return  (h1->dcl[0].ib < h2->dcl[0].ib ? 1 : -1 );
  else                                        return  (h1->dcl[0].ia > h2->dcl[0].ia ? 1 : -1 );
}

コード例 #5

ファイルを表示

ファイル: hmmerfm-exactmatch.c プロジェクト: dboudour2002/musicHMMER

/* Function:  main()
 * Synopsis:  Run set of queries against an FM
 * Purpose:   Read in a FM and a file of query sequences.
 *            For each query, find matching FM interval, then collect positions in
 *            the original text T for the corresponding occurrences. These positions
 *            are 0-based (so first character is position 0).
 */
int
main(int argc,  char *argv[]) 
{
  void* tmp; // used for RALLOC calls
  clock_t t1, t2;
  struct tms ts1, ts2;
  char *fname_fm      = NULL;
  char *fname_queries = NULL;
  FM_HIT *hits        = NULL;
  char *line          = NULL;
  int status        = eslOK;
  int hit_cnt       = 0;
  int hit_indiv_cnt = 0;
  int miss_cnt      = 0;
  int hit_num       = 0;
  int hit_num2       = 0;
  int hits_size     = 0;
  int i,j;
  int count_only    = 0;

  FM_INTERVAL interval;
  FM_DATA *fmsf = NULL;
  FM_DATA *fmsb = NULL;
  FILE* fp_fm   = NULL;
  FILE* fp      = NULL;
  FILE* out     = NULL;
  char *outname = NULL;

  ESL_GETOPTS     *go  = NULL;    /* command line processing                 */
  FM_CFG *cfg;
  FM_METADATA *meta;

  ESL_SQ       *tmpseq;  // used for sequence validation
  ESL_ALPHABET *abc = NULL;


  //start timer
  t1 = times(&ts1);

  process_commandline(argc, argv, &go, &fname_fm, &fname_queries);


  if (esl_opt_IsOn(go, "--out")) {
    outname = esl_opt_GetString(go, "--out");
    if ( esl_strcmp ("-", outname) == 0 ) {
      out = stdout;
      outname = "stdout";
    } else {
      out = fopen(outname,"w");
    }
  }

  if (esl_opt_IsOn(go, "--count_only"))
    count_only = 1;


  if((fp_fm = fopen(fname_fm, "rb")) == NULL)
    esl_fatal("Cannot open file `%s': ", fname_fm);


  fm_configAlloc(&cfg);
  cfg->occCallCnt = 0;
  meta = cfg->meta;
  meta->fp = fp_fm;


  fm_readFMmeta( meta);



  if      (meta->alph_type == fm_DNA)   abc     = esl_alphabet_Create(eslDNA);
  else if (meta->alph_type == fm_AMINO) abc     = esl_alphabet_Create(eslAMINO);
  tmpseq = esl_sq_CreateDigital(abc);



  //read in FM-index blocks
  ESL_ALLOC(fmsf, meta->block_count * sizeof(FM_DATA) );
  if (!meta->fwd_only)
    ESL_ALLOC(fmsb, meta->block_count * sizeof(FM_DATA) );

  for (i=0; i<meta->block_count; i++) {
    fm_FM_read( fmsf+i,meta, TRUE );

    if (!meta->fwd_only) {
      fm_FM_read(fmsb+i, meta, FALSE );
      fmsb[i].SA = fmsf[i].SA;
      fmsb[i].T = fmsf[i].T;
    }
  }
  fclose(fp_fm);

  output_header(meta, stdout, go, fname_fm, fname_queries);


  /* initialize a few global variables, then call initGlobals
   * to do architecture-specific initialization
   */
  fm_configInit(cfg, NULL);

  fm_alphabetCreate(meta, NULL); // don't override charBits

  fp = fopen(fname_queries,"r");
  if (fp == NULL)
    esl_fatal("Unable to open file %s\n", fname_queries);

  ESL_ALLOC(line, FM_MAX_LINE * sizeof(char));

  hits_size = 200;
  ESL_ALLOC(hits, hits_size * sizeof(FM_HIT));

  while(fgets(line, FM_MAX_LINE, fp) ) {
    int qlen=0;
    while (line[qlen] != '\0' && line[qlen] != '\n')  qlen++;
    if (line[qlen] == '\n')  line[qlen] = '\0';

    hit_num = 0;

    for (i=0; i<meta->block_count; i++) {

      fm_getSARangeReverse(fmsf+i, cfg, line, meta->inv_alph, &interval);
      if (interval.lower>=0 && interval.lower <= interval.upper) {
        int new_hit_num =  interval.upper - interval.lower + 1;
        hit_num += new_hit_num;
        if (!count_only) {
          if (hit_num > hits_size) {
            hits_size = 2*hit_num;
            ESL_RALLOC(hits, tmp, hits_size * sizeof(FM_HIT));
          }
          getFMHits(fmsf+i, cfg, &interval, i, hit_num-new_hit_num, qlen, hits, fm_forward);
        }

      }


      /* find reverse hits, using backward search on the forward FM*/
      if (!meta->fwd_only) {
        fm_getSARangeForward(fmsb+i, cfg, line, meta->inv_alph, &interval);// yes, use the backward fm to produce the equivalent of a forward search on the forward fm
        if (interval.lower>=0 && interval.lower <= interval.upper) {
          int new_hit_num =  interval.upper - interval.lower + 1;
          hit_num += new_hit_num;
          if (!count_only) {
            if (hit_num > hits_size) {
              hits_size = 2*hit_num;
              ESL_RALLOC(hits, tmp, hits_size * sizeof(FM_HIT));
            }
            //even though I used fmsb above, use fmsf here, since we'll now do a backward trace
            //in the FM-index to find the next sampled SA position
            getFMHits(fmsf+i, cfg, &interval, i, hit_num-new_hit_num, qlen, hits, fm_backward);
          }
        }

      }

    }


    if (hit_num > 0) {
      if (count_only) {
        hit_cnt++;
        hit_indiv_cnt += hit_num;
      } else {
        hit_num2 = 0;

        //for each hit, identify the sequence id and position within that sequence
        for (i = 0; i< hit_num; i++) {

          status = fm_getOriginalPosition (fmsf, meta, hits[i].block, hits[i].length, fm_forward, hits[i].start,  &(hits[i].block), &(hits[i].start) );
          hits[i].sortkey = (status==eslERANGE ? -1 : meta->seq_data[ hits[i].block ].target_id);

          //validate match - if any characters in orig sequence were ambiguities, reject
          fm_convertRange2DSQ( fmsf, meta, hits[i].start, hits[i].length, p7_NOCOMPLEMENT, tmpseq, TRUE );
          for (j=1; j<=hits[i].length; j++) {
            if (tmpseq->dsq[j] >= abc->K) {
              hits[i].sortkey = -1; //reject
              j = hits[i].length+1; //quit looking
            }
          }

          if (hits[i].sortkey != -1)
            hit_num2++; // legitimate hit

        }
        if (hit_num2 > 0)
          hit_cnt++;

        //now sort according the the sequence_id corresponding to that seq_offset
        qsort(hits, hit_num, sizeof(FM_HIT), hit_sorter);

        //skim past the skipped entries
        i = 0;
        while ( i < hit_num ) {
          if (hits[i].sortkey != -1 )
            break;  //
          i++;
        }


        if (i < hit_num) {
          if (out != NULL) {
            fprintf (out, "%s\n",line);
            //fprintf (out, "\t%10s (%8d %s)\n",meta->seq_data[ hits[i].block ].name, hits[i].start, (hits[i].direction==fm_forward?"+":"-"));
            fprintf (out, "    %8ld %s %10s\n", (long)(hits[i].start), (hits[i].direction==fm_forward?"f":"r"), meta->seq_data[ hits[i].block ].name);
          }
          hit_indiv_cnt++;
          i++; // skip the first one, since I'll be comparing each to the previous

          for (  ; i< hit_num; i++) {
            if ( //meta->seq_data[ hits[i].block ].id != meta->seq_data[ hits[i-1].block ].id ||
                 hits[i].sortkey   != hits[i-1].sortkey ||  //sortkey is seq_data[].id
                 hits[i].direction != hits[i-1].direction ||
                 hits[i].start     != hits[i-1].start )
            {
              if (out != NULL)
                //fprintf (out, "\t%10s (%8d %s)\n",meta->seq_data[ hits[i].block ].name, hits[i].start, (hits[i].direction==fm_forward?"+":"-"));
                fprintf (out, "    %8ld %s %10s\n", (long)(hits[i].start), (hits[i].direction==fm_forward?"f":"r"), meta->seq_data[ hits[i].block ].name);
              hit_indiv_cnt++;
            }
          }
          if (out != NULL)
            fprintf (out, "\n");
        }
      }
    } else {
      miss_cnt++;
    }


  }

  for (i=0; i<meta->block_count; i++) {
    fm_FM_destroy( fmsf+i, 1 );
    if (!meta->fwd_only)
      fm_FM_destroy( fmsb+i, 0 );
  }


  free (hits);
  free (line);
  fclose(fp);

  fm_configDestroy(cfg);


  // compute and print the elapsed time in millisec
  t2 = times(&ts2);
  {
    double clk_ticks = sysconf(_SC_CLK_TCK);
    double elapsedTime = (t2-t1)/clk_ticks;
    double throughput = cfg->occCallCnt/elapsedTime;

    fprintf (stderr, "hit: %-10d  (%d)\n", hit_cnt, hit_indiv_cnt);
    fprintf (stderr, "miss:%-10d\n", miss_cnt);
    fprintf (stderr, "run time:  %.2f seconds\n", elapsedTime);
    fprintf (stderr, "occ calls: %12s\n", commaprint(cfg->occCallCnt));
    fprintf (stderr, "occ/sec:   %12s\n", commaprint(throughput));
  }

  exit(eslOK);


ERROR:
  printf ("failure allocating memory for hits\n");
  exit(status);


}

コード例 #6

ファイルを表示

ファイル: tophits_output.c プロジェクト: EddyRivasLab/hmmer

/* Function:  p7_tophits_RemoveDuplicates()
 * Synopsis:  Remove overlapping hits.
 *
 * Purpose:   After nhmmer pipeline has completed, the TopHits object may
 *               contain duplicates if the target was broken into overlapping
 *               windows. Scan through, and remove duplicates.  Since the
 *               duplicates may be incomplete (one sequence is a partial
 *               hit because it's window didn't cover the full length of
 *               the hit; or perhaps it's full-length, but with slightly
 *               different scoring due to the impact of sequence
 *               composition on score model), keep the one with better
 *               p-value.
 *
 * Returns:   <eslOK> on success.
 */
int
p7_tophits_RemoveDuplicates(P7_TOPHITS *th, int using_bit_cutoffs)
{
  int     i;    /* counter over hits */
  int     j;    /* previous un-duplicated hit */
  int     s_i, s_j, e_i, e_j, dir_i, dir_j, len_i, len_j;
  int     intersect_alistart, intersect_aliend, intersect_alilen;
  int     intersect_hmmstart, intersect_hmmend, intersect_hmmlen;
  //int64_t sub_i, sub_j;
  int     tmp;
  double  p_i, p_j;
  int remove;

  if (th->N<2) return eslOK;

  j=0;
  for (i = 1; i < th->N; i++)
  {

      //sub_j = th->hit[j]->subseq_start;
      p_j = th->hit[j]->lnP;
      s_j = th->hit[j]->dcl[0].ia;
      e_j = th->hit[j]->dcl[0].ib;
      dir_j = (s_j < e_j ? 1 : -1);
      if (dir_j == -1) {
        tmp = s_j;
        s_j = e_j;
        e_j = tmp;
      }
      len_j = e_j - s_j + 1 ;


      //sub_i = th->hit[i]->subseq_start;
      p_i = th->hit[i]->lnP;
      s_i = th->hit[i]->dcl[0].ia;
      e_i = th->hit[i]->dcl[0].ib;
      dir_i = (s_i < e_i ? 1 : -1);
      if (dir_i == -1) {
        tmp = s_i;
        s_i = e_i;
        e_i = tmp;
      }
      len_i = e_i - s_i + 1 ;


      // these will only matter if seqidx and strand are the same
      intersect_alistart  = s_i>s_j ? s_i : s_j;
      intersect_aliend    = e_i<e_j ? e_i : e_j;
      intersect_alilen    = intersect_aliend - intersect_alistart + 1;

      intersect_hmmstart = (th->hit[i]->dcl[0].ad->hmmfrom > th->hit[j]->dcl[0].ad->hmmfrom) ? th->hit[i]->dcl[0].ad->hmmfrom : th->hit[j]->dcl[0].ad->hmmfrom;
      intersect_hmmend   = (th->hit[i]->dcl[0].ad->hmmto   < th->hit[j]->dcl[0].ad->hmmto)   ? th->hit[i]->dcl[0].ad->hmmto : th->hit[j]->dcl[0].ad->hmmto;
      intersect_hmmlen = intersect_hmmend - intersect_hmmstart + 1;

      if ( esl_strcmp(th->hit[i]->name, th->hit[i-1]->name) == 0  && //same model
          th->hit[i]->seqidx ==  th->hit[i-1]->seqidx  && //same source sequence
           dir_i == dir_j && // only bother removing if the overlapping hits are on the same strand
           intersect_hmmlen > 0 && //only if they're both hitting similar parts of the model
           (
               ( s_i >= s_j-3 && s_i <= s_j+3) ||  // at least one side is essentially flush
               ( e_i >= e_j-3 && e_i <= e_j+3) ||
               ( intersect_alilen >= len_i * 0.95) || // or one of the hits covers >90% of the other
               ( intersect_alilen >= len_j * 0.95)
           )
      )
      {
        /* Force one to go unreported.  We choose to keep the one with the
         * better e-value.  This addresses two issues
         * (1) longer hits sometimes encounter higher bias corrections,
         *     leading to lower scores; seems better to focus on the
         *     high-scoring heart of the alignment, if we have a
         *     choice
         * (2) it is possible that a lower-scoring longer hit (see #1)
         *     that is close to threshold will pass the pipeline in
         *     one condition and not the other (e.g. --toponly, or
         *     single vs multi threaded), and if longer hits obscure
         *     shorter higher-scoring ones, a shorter "hit" might be
         *     lost by being obscured by a longer one that is subsequently
         *     removed due to insufficient score.
         * see late notes in ~wheelert/notebook/2012/0518-dfam-scripts/00NOTES
        */
        //remove = 0; // 1 := keep i,  0 := keep i-1
        remove = p_i < p_j ? j : i;

        th->hit[remove]->flags |= p7_IS_DUPLICATE;
        if (using_bit_cutoffs) {
          //report/include flags were already included, need to remove them here
          th->hit[remove]->flags &= ~p7_IS_REPORTED;
          th->hit[remove]->flags &= ~p7_IS_INCLUDED;
        }

        j = remove == j ? i : j;
      } else {
        j = i;
      }
  }
  return eslOK;
}

コード例 #7

ファイルを表示

ファイル: makehmmerdb.c プロジェクト: Janelia-Farm-Xfam/Bio-HMM-Logo

/* Function:  main()
 * Synopsis:  break input sequence set into chunks, for each one building the
 *            Burrows-Wheeler transform and corresponding FM-index. Maintain requisite
 *            meta data.
 * Notes:     Currently depends on the divsufsort-lite code of Yuta Mori, though this
 *            could easily be replaced.
 */
int
main(int argc, char **argv) 
{
  char tmp_filename[16] = "fmtmpXXXXXX";
  FILE *fptmp          = NULL;
  FILE *fp             = NULL;
  uint8_t *T           = NULL;
  uint8_t *BWT         = NULL;
  int *SA              = NULL; //what I write will be 32-bit ints, but I need to keep this as int so it'll work with libdivsufsort
  uint32_t *SAsamp     = NULL;
  uint32_t *occCnts_sb = NULL; // same indexing as above
  uint32_t *cnts_sb    = NULL;
  uint16_t *occCnts_b  = NULL; // this is logically a 2D array, but will be indexed as occ_cnts[alph_size*index + char]  (instead of occ_cnts[index][char])
  uint16_t *cnts_b     = NULL;
  FM_METADATA *meta    = NULL;

  clock_t t1, t2;
  struct tms ts1, ts2;

  long i,j,c;
  int status = eslOK;

  int chars_per_byte;
  int num_freq_cnts_sb ;
  int num_freq_cnts_b ;
  int num_SA_samples ;

  int             infmt     = eslSQFILE_UNKNOWN;
  int             alphatype = eslUNKNOWN;
  int             alphaguess =eslUNKNOWN;
  ESL_ALPHABET   *abc       = NULL;
  ESL_SQ         *sq        = NULL;
  ESL_SQFILE     *sqfp      = NULL;

  ESL_SQ       *tmpsq = NULL;
  ESL_SQ_BLOCK *block = NULL;

  char *fname_in = NULL;
  char *fname_out= NULL;
  int block_size = 50000000;
  int sq_cnt = 0;
  int use_tmpsq = 0;
  uint64_t block_length;
  uint64_t total_char_count = 0;

  int max_block_size;

  int numblocks = 0;
  uint32_t numseqs = 0;


  int allocedseqs = 1000;
  uint32_t seq_offset = 0;
  uint32_t ambig_offset = 0;
  uint32_t overlap = 0;
  uint16_t seq_cnt;
  uint16_t ambig_cnt;

  uint32_t prev_numseqs = 0;

  int compressed_bytes;
  uint32_t term_loc;

  ESL_GETOPTS     *go  = NULL;    /* command line processing                 */

  uint8_t        ambig_repl = 0;
  int            in_ambig_run = 0;
  FM_AMBIGLIST   ambig_list;

  ESL_ALLOC (meta, sizeof(FM_METADATA));
  if (meta == NULL)
    esl_fatal("unable to allocate memory to store FM meta data\n");

  ESL_ALLOC (meta->ambig_list, sizeof(FM_AMBIGLIST));
  if (meta->ambig_list == NULL)
      esl_fatal("unable to allocate memory to store FM ambiguity data\n");
  fm_initAmbiguityList(meta->ambig_list);


  meta->alph_type   = fm_DNA;
  meta->freq_SA     = 8;
  meta->freq_cnt_b  = 256;
  meta->freq_cnt_sb = pow(2,16); //65536 - that's the # values in a short
  meta->seq_count = 0;
  ESL_ALLOC (meta->seq_data, allocedseqs * sizeof(FM_SEQDATA));
  if (meta->seq_data == NULL )
    esl_fatal("unable to allocate memory to store FM sequence data\n");


  process_commandline(argc, argv, &go, &fname_in, &fname_out);

  if (esl_opt_IsOn(go, "--bin_length")) meta->freq_cnt_b = esl_opt_GetInteger(go, "--bin_length");
  if ( meta->freq_cnt_b < 32 || meta->freq_cnt_b >4096 ||  (meta->freq_cnt_b & (meta->freq_cnt_b - 1))  ) // test power of 2
    esl_fatal("bin_length must be a power of 2, at least 128, and at most 4096\n");

  if (esl_opt_IsOn(go, "--sa_freq")) meta->freq_SA = esl_opt_GetInteger(go, "--sa_freq");
  if ( (meta->freq_SA & (meta->freq_SA - 1))  )  // test power of 2
    esl_fatal ("SA_freq must be a power of 2\n");


  if (esl_opt_IsOn(go, "--block_size")) block_size = 1000000 * esl_opt_GetInteger(go, "--block_size");
  if ( block_size <=0  )
    esl_fatal ("block_size must be a positive number\n");

  //start timer
  t1 = times(&ts1);

  output_header(stdout, go, fname_in, fname_out);

  if (esl_opt_GetString(go, "--informat") != NULL) {
    infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat"));
    if (infmt == eslSQFILE_UNKNOWN) esl_fatal("%s is not a valid input sequence file format for --informat");
  }

  status = esl_sqfile_Open(fname_in, infmt, NULL, &sqfp);
  if      (status == eslENOTFOUND) esl_fatal("No such file %s", fname_in);
  else if (status == eslEFORMAT)   esl_fatal("Format of seqfile %s unrecognized.", fname_in);
  else if (status != eslOK)        esl_fatal("Open failed, code %d.", status);

  meta->fwd_only = 0;

  if (esl_opt_IsUsed(go, "--alph")) {
    meta->alph    = esl_opt_GetString(go, "--alph") ;
    if ( esl_strcmp(meta->alph, "dna")==0  ||  esl_strcmp(meta->alph, "rna")==0) {
      meta->alph_type = fm_DNA;
      alphatype = eslDNA;
    } else if (esl_strcmp(meta->alph, "dna_full")==0  || esl_strcmp(meta->alph, "rna_full")==0) {
      meta->alph_type = fm_DNA_full;
      alphatype = eslDNA;
    } else if (esl_strcmp(meta->alph, "amino")==0) {
      meta->alph_type = fm_AMINO;
      alphatype = eslAMINO;
      meta->fwd_only = 1;
    } else {
      esl_fatal("Unknown alphabet type. Try 'dna', 'dna_full', or 'amino'\n%s", "");
    }
  } else {
    esl_sqfile_GuessAlphabet(sqfp, &alphaguess);

    if (alphaguess == eslDNA || alphaguess == eslRNA) {
      meta->alph_type = fm_DNA;
      alphatype = eslDNA;
    } else if (alphaguess == eslAMINO) {
      meta->alph_type = fm_AMINO;
      alphatype = eslAMINO;
      meta->fwd_only = 1;
    } else {
      esl_fatal("Unknown alphabet type. Try 'dna', 'dna_full', or 'amino'\n%s", "");
    }
  }


  if (esl_opt_IsOn(go, "--fwd_only") )
    meta->fwd_only = 1;

  meta->alph = NULL;



  //getInverseAlphabet
  fm_alphabetCreate(meta, &(meta->charBits));
  chars_per_byte = 8/meta->charBits;

    //shift inv_alph up one, to make space for '$' at 0
  for (i=0; i<256; i++)
    if ( meta->inv_alph[i] >= 0)
      meta->inv_alph[i]++;


  abc     = esl_alphabet_Create(alphatype);
  sq      = esl_sq_CreateDigital(abc);
  tmpsq   =  esl_sq_CreateDigital(abc);

  esl_sqfile_SetDigital(sqfp, abc);
  block = esl_sq_CreateDigitalBlock(FM_BLOCK_COUNT, abc);
  block->complete = FALSE;
//  max_block_size = FM_BLOCK_OVERLAP+block_size+1  + block_size*.2; // +1 for the '$'
  max_block_size = FM_BLOCK_OVERLAP+block_size+1  + block_size; // temporary hack to avoid memory over-runs (see end of 1101_fmindex_benchmarking/00NOTES)

  if (alphatype == fm_DNA)
    fm_initAmbiguityList(&ambig_list);


  /* Allocate BWT, Text, SA, and FM-index data structures, allowing storage of maximally large sequence*/
  ESL_ALLOC (T, max_block_size * sizeof(uint8_t));
  ESL_ALLOC (BWT, max_block_size * sizeof(uint8_t));
  ESL_ALLOC (SA, max_block_size * sizeof(int));
  ESL_ALLOC (SAsamp,     (1+floor((double)max_block_size/meta->freq_SA) ) * sizeof(uint32_t));

  ESL_ALLOC (occCnts_sb, (1+ceil((double)max_block_size/meta->freq_cnt_sb)) *  meta->alph_size * sizeof(uint32_t)); // every freq_cnt_sb positions, store an array of ints
  ESL_ALLOC (cnts_sb,    meta->alph_size * sizeof(uint32_t));
  ESL_ALLOC (occCnts_b,  ( 1+ceil((double)max_block_size/meta->freq_cnt_b)) *  meta->alph_size * sizeof(uint16_t)); // every freq_cnt_b positions, store an array of 8-byte ints
  ESL_ALLOC (cnts_b,     meta->alph_size * sizeof(uint16_t));

  if((T == NULL)  || (BWT == NULL)  || (SA==NULL) || (SAsamp==NULL) || (BWT==NULL) || (cnts_b==NULL) || (occCnts_b==NULL) || (cnts_sb==NULL) || (occCnts_sb==NULL) ) {
    esl_fatal( "%s: Cannot allocate memory.\n", argv[0]);
  }


  // Open a temporary file, to which FM-index data will be written
  if (esl_tmpfile(tmp_filename, &fptmp) != eslOK) esl_fatal("unable to open fm-index tmpfile");


  /* Main loop: */
  while (status == eslOK ) {

    //reset block as an empty vessel
    for (i=0; i<block->count; i++)
        esl_sq_Reuse(block->list + i);

    if (use_tmpsq) {
        esl_sq_Copy(tmpsq , block->list);
        block->complete = FALSE;  //this lets ReadBlock know that it needs to append to a small bit of previously-read seqeunce
        block->list->C = FM_BLOCK_OVERLAP; // overload the ->C value, which ReadBlock uses to determine how much
                                               // overlap should be retained in the ReadWindow step
    } else {
        block->complete = TRUE;
    }

    status = esl_sqio_ReadBlock(sqfp, block, block_size, -1, alphatype != eslAMINO);
    if (status == eslEOF) continue;
    if (status != eslOK)  ESL_XEXCEPTION(status, "failure reading sequence block");

    seq_offset = numseqs;
    ambig_offset = meta->ambig_list->count;

    if (block->complete || block->count == 0) {
        use_tmpsq = FALSE;
    } else {
        /* The final sequence on the block was a probably-incomplete window of the active sequence.
         * Grab a copy of the end for use in the next pass, to ensure we don't miss hits crossing
         * the boundary between two blocks.
         */
        esl_sq_Copy(block->list + (block->count - 1) , tmpsq);
        use_tmpsq = TRUE;
    }

    block->first_seqidx = sq_cnt;
    sq_cnt += block->count - (use_tmpsq ? 1 : 0);// if there's an incomplete sequence read into the block wait to count it until it's complete.


    /* Read dseqs from block into text element T.
    *  Convert the dsq from esl-alphabet to fm-alphabet (1..k for alphabet of size k).
    *  (a) collapsing upper/lower case for appropriate sorting.
    *  (b) reserving 0 for '$', which must be lexicographically smallest
    *      (these will later be shifted to 0-based alphabet, once SA has been built)
    *
    */
    block_length = 0;
    for (i=0; i<block->count; i++) {

      //start a new block, with space for the name
      allocateSeqdata(meta, block->list+i, numseqs, &allocedseqs);

      //meta data
      meta->seq_data[numseqs].target_id       = block->first_seqidx + i ;
      meta->seq_data[numseqs].target_start    = block->list[i].start;
      meta->seq_data[numseqs].fm_start        = block_length;

      if (block->list[i].name == NULL) meta->seq_data[numseqs].name[0] = '\0';
          else  strcpy(meta->seq_data[numseqs].name, block->list[i].name );
      if (block->list[i].acc == NULL) meta->seq_data[numseqs].acc[0] = '\0';
          else  strcpy(meta->seq_data[numseqs].acc, block->list[i].acc );
      if (block->list[i].source == NULL) meta->seq_data[numseqs].source[0] = '\0';
          else  strcpy(meta->seq_data[numseqs].source, block->list[i].source );
      if (block->list[i].desc == NULL) meta->seq_data[numseqs].desc[0] = '\0';
          else  strcpy(meta->seq_data[numseqs].desc, block->list[i].desc );

      for (j=1; j<=block->list[i].n; j++) {
        c = abc->sym[block->list[i].dsq[j]];
        if ( meta->alph_type == fm_DNA) {
          if (meta->inv_alph[c] == -1) {
            // replace ambiguity characters by rotating through A,C,G, and T.
            c = meta->alph[ambig_repl];
            ambig_repl = (ambig_repl+1)%4;

            if (!in_ambig_run) {
              fm_addAmbiguityRange(meta->ambig_list, block_length, block_length);
              in_ambig_run=1;
            } else {
              meta->ambig_list->ranges[meta->ambig_list->count - 1].upper = block_length;
            }
          } else {
            in_ambig_run=0;
          }
        } else if (meta->inv_alph[c] == -1) {
          esl_fatal("requested alphabet doesn't match input text\n");
        }

        T[block_length] = meta->inv_alph[c];

        block_length++;
        if (j>block->list[i].C) total_char_count++; // add to total count, only if it's not redundant with earlier read
        meta->seq_data[numseqs].length++;

      }
      numseqs++;
    }

    T[block_length] = 0; // last character 0 is effectively '$' for suffix array
    block_length++;

    seq_cnt = numseqs-seq_offset;
    ambig_cnt = meta->ambig_list->count - ambig_offset;

    //build and write FM-index for T.  This will be a BWT on the reverse of the sequence, required for reverse-traversal of the BWT
    buildAndWriteFMIndex(meta, seq_offset, ambig_offset, seq_cnt, ambig_cnt, (uint32_t)block->list[0].C, T, BWT, SA, SAsamp,
        occCnts_sb, cnts_sb, occCnts_b, cnts_b, block_length, fptmp);


    if ( ! meta->fwd_only ) {
      //build and write FM-index for un-reversed T  (used to find reverse hits using forward traversal of the BWT
      buildAndWriteFMIndex(meta, seq_offset, ambig_offset, seq_cnt, ambig_cnt, 0, T, BWT, SA, NULL,
          occCnts_sb, cnts_sb, occCnts_b, cnts_b, block_length, fptmp);
    }

    prev_numseqs = numseqs;

    numblocks++;
  }


  esl_sqfile_Close(sqfp);
  esl_alphabet_Destroy(abc);
  esl_sq_Destroy(sq);
  esl_sq_Destroy(tmpsq);
  esl_sq_DestroyBlock(block);

  meta->seq_count = numseqs;
  meta->block_count = numblocks;



    /* Finished writing the FM-index data to a temporary file. Now write
     * metadata to fname_out, than append FM-index data from temp file
     */
  if((fp = fopen(fname_out, "wb")) == NULL)
    esl_fatal( "%s: Cannot open file `%s': ", argv[0], fname_out);


    //write out meta data
  if( fwrite(&(meta->fwd_only),     sizeof(meta->fwd_only),     1, fp) != 1 ||
      fwrite(&(meta->alph_type),    sizeof(meta->alph_type),    1, fp) != 1 ||
      fwrite(&(meta->alph_size),    sizeof(meta->alph_size),    1, fp) != 1 ||
      fwrite(&(meta->charBits),     sizeof(meta->charBits),     1, fp) != 1 ||
      fwrite(&(meta->freq_SA),      sizeof(meta->freq_SA),      1, fp) != 1 ||
      fwrite(&(meta->freq_cnt_sb),  sizeof(meta->freq_cnt_sb),  1, fp) != 1 ||
      fwrite(&(meta->freq_cnt_b),   sizeof(meta->freq_cnt_b),   1, fp) != 1 ||
      fwrite(&(meta->block_count),  sizeof(meta->block_count),  1, fp) != 1 ||
      fwrite(&(meta->seq_count),    sizeof(meta->seq_count),    1, fp) != 1 ||
      fwrite(&(meta->ambig_list->count),  sizeof(meta->ambig_list->count),    1, fp) != 1 ||
      fwrite(&total_char_count,     sizeof(total_char_count),   1, fp) != 1
  )
    esl_fatal( "%s: Error writing meta data for FM index.\n", argv[0]);


  for (i=0; i<meta->seq_count; i++) {
    if( fwrite(&(meta->seq_data[i].target_id),    sizeof(meta->seq_data[i].target_id),          1, fp) != 1 ||
        fwrite(&(meta->seq_data[i].target_start), sizeof(meta->seq_data[i].target_start),       1, fp) != 1 ||
        fwrite(&(meta->seq_data[i].fm_start),     sizeof(meta->seq_data[i].fm_start),  1, fp) != 1 ||
        fwrite(&(meta->seq_data[i].length),       sizeof(meta->seq_data[i].length), 1, fp) != 1 ||
        fwrite(&(meta->seq_data[i].name_length),  sizeof(meta->seq_data[i].name_length), 1, fp) != 1 ||
        fwrite(&(meta->seq_data[i].acc_length),   sizeof(meta->seq_data[i].acc_length), 1, fp) != 1 ||
        fwrite(&(meta->seq_data[i].source_length),sizeof(meta->seq_data[i].source_length), 1, fp) != 1 ||
        fwrite(&(meta->seq_data[i].desc_length),  sizeof(meta->seq_data[i].desc_length), 1, fp) != 1 ||
        fwrite(meta->seq_data[i].name,            sizeof(char),    meta->seq_data[i].name_length+1  , fp) !=  meta->seq_data[i].name_length+1 ||
        fwrite(meta->seq_data[i].acc,             sizeof(char),    meta->seq_data[i].acc_length+1   , fp) !=  meta->seq_data[i].acc_length+1 ||
        fwrite(meta->seq_data[i].source,          sizeof(char),    meta->seq_data[i].source_length+1, fp) !=  meta->seq_data[i].source_length+1 ||
        fwrite(meta->seq_data[i].desc,            sizeof(char),    meta->seq_data[i].desc_length+1  , fp) !=  meta->seq_data[i].desc_length+1
    )
      esl_fatal( "%s: Error writing meta data for FM index.\n", argv[0]);
  }

  for (i=0; i<meta->ambig_list->count; i++) {
    if( fwrite(&(meta->ambig_list->ranges[i].lower), sizeof(meta->ambig_list->ranges[i].lower),       1, fp) != 1 ||
        fwrite(&(meta->ambig_list->ranges[i].upper), sizeof(meta->ambig_list->ranges[i].upper),       1, fp) != 1
    )
      esl_fatal( "%s: Error writing ambiguity data for FM index.\n", argv[0]);
  }


  /* now append the FM-index data in fptmp to the desired output file, fp */
  rewind(fptmp);
  for (i=0; i<numblocks; i++) {

    for(j=0; j< (meta->fwd_only?1:2); j++ ) { //do this once or twice, once for forward-T index, and possibly once for reversed
    //first, read
    if(fread(&block_length, sizeof(block_length), 1, fptmp) !=  1)
      esl_fatal( "%s: Error reading block_length in FM index.\n", argv[0]);
    if(fread(&term_loc, sizeof(term_loc), 1, fptmp) !=  1)
      esl_fatal( "%s: Error reading terminal location in FM index.\n", argv[0]);
    if(fread(&seq_offset, sizeof(seq_offset), 1, fptmp) !=  1)
      esl_fatal( "%s: Error reading seq_offset in FM index.\n", argv[0]);
    if(fread(&ambig_offset, sizeof(ambig_offset ), 1, fptmp) !=  1)
      esl_fatal( "%s: Error reading ambig_offset in FM index.\n", argv[0]);
    if(fread(&overlap, sizeof(overlap), 1, fptmp) !=  1)
      esl_fatal( "%s: Error reading overlap in FM index.\n", argv[0]);
    if(fread(&seq_cnt, sizeof(seq_cnt), 1, fptmp) !=  1)
      esl_fatal( "%s: Error reading seq_cnt in FM index.\n", argv[0]);
    if(fread(&ambig_cnt, sizeof(ambig_cnt), 1, fptmp) !=  1)
      esl_fatal( "%s: Error reading ambig_cnt in FM index.\n", argv[0]);


    compressed_bytes =   ((chars_per_byte-1+block_length)/chars_per_byte);
    num_freq_cnts_b  = 1+ceil((double)block_length/meta->freq_cnt_b);
    num_freq_cnts_sb = 1+ceil((double)block_length/meta->freq_cnt_sb);
    num_SA_samples   = 1+floor((double)block_length/meta->freq_SA);


    //j==0 test cause T and SA to be written only for forward sequence
    if(j==0 && fread(T, sizeof(uint8_t), compressed_bytes, fptmp) != compressed_bytes)
      esl_fatal( "%s: Error reading T in FM index.\n", argv[0]);
    if(fread(BWT, sizeof(uint8_t), compressed_bytes, fptmp) != compressed_bytes)
      esl_fatal( "%s: Error reading BWT in FM index.\n", argv[0]);
    if(j==0 && fread(SAsamp, sizeof(uint32_t), (size_t)num_SA_samples, fptmp) != (size_t)num_SA_samples)
      esl_fatal( "%s: Error reading SA in FM index.\n", argv[0]);
    if(fread(occCnts_b, sizeof(uint16_t)*(meta->alph_size), (size_t)num_freq_cnts_b, fptmp) != (size_t)num_freq_cnts_b)
      esl_fatal( "%s: Error reading occCnts_b in FM index.\n", argv[0]);
    if(fread(occCnts_sb, sizeof(uint32_t)*(meta->alph_size), (size_t)num_freq_cnts_sb, fptmp) != (size_t)num_freq_cnts_sb)
      esl_fatal( "%s: Error reading occCnts_sb in FM index.\n", argv[0]);



    //then, write
    if(fwrite(&block_length, sizeof(block_length), 1, fp) !=  1)
      esl_fatal( "%s: Error writing block_length in FM index.\n", argv[0]);
    if(fwrite(&term_loc, sizeof(term_loc), 1, fp) !=  1)
      esl_fatal( "%s: Error writing terminal location in FM index.\n", argv[0]);
    if(fwrite(&seq_offset, sizeof(seq_offset), 1, fp) !=  1)
      esl_fatal( "%s: Error writing seq_offset in FM index.\n", argv[0]);
    if(fwrite(&ambig_offset, sizeof(ambig_offset), 1, fp) !=  1)
      esl_fatal( "%s: Error writing ambig_offset in FM index.\n", argv[0]);
    if(fwrite(&overlap, sizeof(overlap), 1, fp) !=  1)
      esl_fatal( "%s: Error writing overlap in FM index.\n", argv[0]);
    if(fwrite(&seq_cnt, sizeof(seq_cnt), 1, fp) !=  1)
      esl_fatal( "%s: Error writing seq_cnt in FM index.\n", argv[0]);
    if(fwrite(&ambig_cnt, sizeof(ambig_cnt), 1, fp) !=  1)
      esl_fatal( "%s: Error writing ambig_cnt in FM index.\n", argv[0]);


    if(j==0 && fwrite(T, sizeof(uint8_t), compressed_bytes, fp) != compressed_bytes)
      esl_fatal( "%s: Error writing T in FM index.\n", argv[0]);
    if(fwrite(BWT, sizeof(uint8_t), compressed_bytes, fp) != compressed_bytes)
      esl_fatal( "%s: Error writing BWT in FM index.\n", argv[0]);
    if(j==0 && fwrite(SAsamp, sizeof(uint32_t), (size_t)num_SA_samples, fp) != (size_t)num_SA_samples)
      esl_fatal( "%s: Error writing SA in FM index.\n", argv[0]);
    if(fwrite(occCnts_b, sizeof(uint16_t)*(meta->alph_size), (size_t)num_freq_cnts_b, fp) != (size_t)num_freq_cnts_b)
      esl_fatal( "%s: Error writing occCnts_b in FM index.\n", argv[0]);
    if(fwrite(occCnts_sb, sizeof(uint32_t)*(meta->alph_size), (size_t)num_freq_cnts_sb, fp) != (size_t)num_freq_cnts_sb)
      esl_fatal( "%s: Error writing occCnts_sb in FM index.\n", argv[0]);

    }
  }


  fprintf (stderr, "Number of characters in index:  %ld\n", (long)total_char_count);
  fprintf (stderr, "Number of FM-index blocks:      %ld\n", (long)meta->block_count);


  fclose(fp);
  fclose(fptmp);
  free(T);
  free(BWT);
  free(SA);
  free(SAsamp);
  free(occCnts_b);
  free(cnts_b);
  free(occCnts_sb);
  free(cnts_sb);

  fm_metaDestroy(meta);

  esl_getopts_Destroy(go);


  // compute and print the elapsed time in millisec
  t2 = times(&ts2);
  {
    double clk_ticks = sysconf(_SC_CLK_TCK);
    double elapsedTime = (t2-t1)/clk_ticks;

    fprintf (stderr, "run time:  %.2f seconds\n", elapsedTime);
  }


  return (eslOK);


ERROR:
  /* Deallocate memory. */
  if (fp)         fclose(fp);
  if (T)          free(T);
  if (BWT)        free(BWT);
  if (SA)         free(SA);
  if (SAsamp)     free(SAsamp);
  if (occCnts_b)  free(occCnts_b);
  if (cnts_b)     free(cnts_b);
  if (occCnts_sb) free(occCnts_sb);
  if (cnts_sb)    free(cnts_sb);
  if (ambig_list.ranges) free(ambig_list.ranges);

  fm_metaDestroy(meta);
  esl_getopts_Destroy(go);


  esl_sqfile_Close(sqfp);
  esl_alphabet_Destroy(abc);
  esl_sq_Destroy(sq);
  if (tmpsq) esl_sq_Destroy(tmpsq);
  if (block) esl_sq_DestroyBlock(block);

  fprintf (stderr, "failure during memory allocation\n");

  exit(status);

}