Exemplo n.º 1
0
static int verifycodelists(const GtEncseq *encseq,
                           unsigned int kmersize,
                           unsigned int numofchars,
                           const GtArrayGtCodetype *codeliststream,
                           GtError *err)
{
  bool haserr = false;
  GtArrayGtCodetype codeliststring;
  const GtUchar *characters;
  GtUword stringtotallength;

  gt_error_check(err);
  stringtotallength = gt_encseq_total_length(encseq);
  characters = gt_alphabet_characters(gt_encseq_alphabet(encseq));
  GT_INITARRAY(&codeliststring,GtCodetype);
  collectkmercode(&codeliststring,
                  encseq,
                  kmersize,
                  numofchars,
                  stringtotallength);
  if (comparecodelists(codeliststream,
                       &codeliststring,
                       kmersize,
                       numofchars,
                       (const char *) characters,
                       err) != 0)
  {
    haserr = true;
  }
  GT_FREEARRAY(&codeliststring,GtCodetype);
  return haserr ? -1 : 0;
}
Exemplo n.º 2
0
static int inputthesequences(unsigned int *numofchars,
                             unsigned long *nextpostable,
                             Suffixarray *suffixarraytable,
                             const GtStrArray *indexnametab,
                             unsigned int demand,
                             GtLogger *logger,
                             GtError *err)
{
  unsigned long idx;
  const char *indexname;

  gt_error_check(err);
  for (idx=0; idx<gt_str_array_size(indexnametab); idx++)
  {
    indexname = gt_str_array_get(indexnametab,idx);
    if (streamsuffixarray(&suffixarraytable[idx],
                          demand,
                          indexname,
                          logger,
                          err) != 0)
    {
      return -1;
    }
    if (idx == 0)
    {
      *numofchars =
            gt_alphabet_num_of_chars(
                     gt_encseq_alphabet(suffixarraytable[idx].encseq));
    }
    nextpostable[idx] = 0;
  }
  return 0;
}
Exemplo n.º 3
0
static int decode_sequence_file(const char *seqfile,
                                GtEncseqDecodeArguments *args,
                                GtError *err)
{
  GtEncseqLoader *encseq_loader;
  GtEncseq *encseq;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(seqfile);
  encseq_loader = gt_encseq_loader_new();
  if (!had_err && gt_encseq_options_lossless_value(args->eopts)) {
    gt_encseq_loader_require_lossless_support(encseq_loader);
  }
  if (!(encseq = gt_encseq_loader_load(encseq_loader, seqfile, err)))
    had_err = -1;
  if (!had_err && gt_encseq_options_mirrored_value(args->eopts)) {
    if (!gt_alphabet_is_dna(gt_encseq_alphabet(encseq))) {
      gt_error_set(err, "mirroring is only defined on DNA sequences");
      had_err = -1;
    }
    if (!had_err)
      had_err = gt_encseq_mirror(encseq, err);
  }
  if (!had_err)
    had_err = output_sequence(encseq, args, seqfile, err);
  gt_encseq_delete(encseq);
  gt_encseq_loader_delete(encseq_loader);
  return had_err;
}
Exemplo n.º 4
0
static int encseq_lua_alphabet(lua_State *L)
{
  GtEncseq **encseq;
  GtAlphabet *alpha;
  encseq = check_encseq(L, 1);
  gt_assert(*encseq);
  alpha = gt_alphabet_ref(gt_encseq_alphabet(*encseq));
  gt_lua_alphabet_push(L, alpha);
  return 1;
}
Exemplo n.º 5
0
GtSeq* gt_bioseq_get_seq(GtBioseq *bs, GtUword idx)
{
  GtSeq *seq;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  seq = gt_seq_new_own(gt_bioseq_get_sequence(bs, idx),
                       gt_bioseq_get_sequence_length(bs, idx),
                       gt_encseq_alphabet(bs->encseq));
  gt_seq_set_description(seq, gt_bioseq_get_description(bs, idx));
  return seq;
}
Exemplo n.º 6
0
void getencseqkmers(const GtEncseq *encseq,
                    GtReadmode readmode,
                    unsigned int kmersize,
                    void(*processkmercode)(void *,
                                           unsigned long,
                                           const GtKmercode *),
                    void *processkmercodeinfo)
{
  unsigned long currentposition = 0, totallength;
  Kmerstream *spwp;
  GtUchar charcode;
  GtEncseqReader *esr;
  unsigned int numofchars, overshoot;

  totallength = gt_encseq_total_length(encseq);
  if (totallength < (unsigned long) kmersize)
  {
    return;
  }
  numofchars = gt_alphabet_num_of_chars(gt_encseq_alphabet(encseq));
  spwp = kmerstream_new(numofchars,kmersize);
  esr = gt_encseq_create_reader_with_readmode(encseq,readmode,0);
  for (currentposition = 0; currentposition < (unsigned long) kmersize;
       currentposition++)
  {
    charcode = gt_encseq_reader_next_encoded_char(esr);
    GT_CHECKENCCHAR(charcode,encseq,currentposition,readmode);
    spwp->windowwidth++;
    updatespecialpositions(spwp,charcode,false,0);
    spwp->cyclicwindow[spwp->windowwidth-1] = charcode;
  }
  kmerstream_newcode(&spwp->currentkmercode,spwp);
  processkmercode(processkmercodeinfo,0,&spwp->currentkmercode);
  for (currentposition = (unsigned long) kmersize; currentposition<totallength;
       currentposition++)
  {
    charcode = gt_encseq_reader_next_encoded_char(esr);
    GT_CHECKENCCHAR(charcode,encseq,currentposition,readmode);
    shiftrightwithchar(spwp,charcode);
    kmerstream_newcode(&spwp->currentkmercode,spwp);
    processkmercode(processkmercodeinfo,currentposition + 1 - spwp->kmersize,
                    &spwp->currentkmercode);
  }
  gt_encseq_reader_delete(esr);
  for (overshoot=0; overshoot<kmersize; overshoot++)
  {
    shiftrightwithchar(spwp,(GtUchar) WILDCARD);
    kmerstream_newcode(&spwp->currentkmercode,spwp);
    processkmercode(processkmercodeinfo,
                    overshoot + currentposition + 1 - spwp->kmersize,
                    &spwp->currentkmercode);
  }
  kmerstream_delete(spwp);
}
Exemplo n.º 7
0
GtQuerysubstringmatchiterator *gt_querysubstringmatchiterator_new(
                                     const GtEncseq *dbencseq,
                                     GtUword totallength,
                                     const ESASuffixptr *suftabpart,
                                     GtReadmode db_readmode,
                                     GtUword numberofsuffixes,
                                     const GtStrArray *query_files,
                                     const GtEncseq *query_encseq,
                                     GtReadmode query_readmode,
                                     unsigned int userdefinedleastlength,
                                     GtError *err)
{
  GtQuerysubstringmatchiterator *qsmi = gt_malloc(sizeof *qsmi);

  qsmi->dbencseq = dbencseq;
  qsmi->suftabpart = suftabpart;
  qsmi->db_readmode = db_readmode;
  qsmi->numberofsuffixes = numberofsuffixes;
  qsmi->totallength = totallength;
  qsmi->userdefinedleastlength = (GtUword) userdefinedleastlength;
  qsmi->queryunitnum = 0;
  qsmi->desc = NULL;
  qsmi->query_for_seqit = NULL;
  qsmi->query_seqlen = 0;
  qsmi->queryrep.sequence = NULL;
  qsmi->queryrep.encseq = query_encseq;
  qsmi->queryrep.readmode = query_readmode;
  qsmi->queryrep.startpos = 0;
  qsmi->dbstart = 0;
  qsmi->matchlength = 0;
  qsmi->querysubstring.queryrep = &qsmi->queryrep;
  qsmi->mmsi = gt_mmsearchiterator_new_empty();
  qsmi->mmsi_defined = false;
  if (query_files == NULL || gt_str_array_size(query_files) == 0)
  {
    gt_assert(query_encseq != NULL);
    qsmi->seqit = NULL;
    qsmi->query_encseq_numofsequences
      = (uint64_t) gt_encseq_num_of_sequences(query_encseq);
  } else
  {
    gt_assert(query_encseq == NULL);
    qsmi->seqit = gt_seq_iterator_sequence_buffer_new(query_files, err);
    if (qsmi->seqit == NULL)
    {
      gt_querysubstringmatchiterator_delete(qsmi);
      return NULL;
    }
    gt_seq_iterator_set_symbolmap(qsmi->seqit,
                        gt_alphabet_symbolmap(gt_encseq_alphabet(dbencseq)));
  }
  return qsmi;
}
Exemplo n.º 8
0
int gt_test_trieins(bool onlyins,const char *indexname,GtError *err)
{
  Suffixarray suffixarray;
  bool haserr = false;
  unsigned long totallength = 0;

  gt_error_check(err);
  if (streamsuffixarray(&suffixarray,
                        SARR_ESQTAB,
                        indexname,
                        NULL,
                        err) != 0)
  {
    haserr = true;
  } else
  {
    totallength = gt_encseq_total_length(suffixarray.encseq);
  }
  if (!haserr)
  {
    Mergertrierep trierep;
    const GtUchar *characters;

    trierep.encseqreadinfo = gt_malloc(sizeof *trierep.encseqreadinfo);
    trierep.encseqreadinfo->encseqptr = suffixarray.encseq;
    trierep.encseqreadinfo->readmode = suffixarray.readmode;
    characters
      = gt_alphabet_characters(gt_encseq_alphabet(suffixarray.encseq));
    gt_mergertrie_initnodetable(&trierep,totallength,1U);
    maketrie(&trierep,characters,totallength);
    if (onlyins)
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showtrie(&trierep,characters);
#endif
      checktrie(&trierep,totallength+1,totallength,err);
#endif
    } else
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showallnoderelations(trierep.root);
#endif
#endif
      successivelydeletesmallest(&trierep,totallength,characters,err);
    }
    gt_mergertrie_delete(&trierep);
  }
  gt_freesuffixarray(&suffixarray);
  return haserr ? -1 : 0;
}
Exemplo n.º 9
0
GtCondenseq *gt_condenseq_new(const GtEncseq *orig_es, GtLogger *logger)
{
  GtCondenseq *condenseq;
  condenseq = condenseq_new_empty(gt_encseq_alphabet(orig_es));

  condenseq->orig_num_seq = gt_encseq_num_of_sequences(orig_es);

  condenseq->ssptab = condenseq_fill_tab(condenseq, orig_es);
  condenseq->orig_length = gt_encseq_total_length(orig_es);

  condenseq_process_descriptions(condenseq, orig_es, logger);
  return condenseq;
}
Exemplo n.º 10
0
GtSeq* gt_bioseq_get_seq_range(GtBioseq *bs, GtUword idx,
                               GtUword start, GtUword end)
{
  GtSeq *seq;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  gt_assert(end >= start);
  gt_assert(end - start + 1 > gt_encseq_seqlength(bs->encseq, idx));
  seq = gt_seq_new_own(gt_bioseq_get_sequence_range(bs, idx, start, end),
                       end - start + 1,
                       gt_encseq_alphabet(bs->encseq));
  gt_seq_set_description(seq, gt_bioseq_get_description(bs, idx));
  return seq;
}
Exemplo n.º 11
0
void gt_bioseq_show_gc_content(GtBioseq *bs, GtFile *outfp)
{
  gt_assert(bs);
  if (gt_alphabet_is_dna(gt_encseq_alphabet(bs->encseq))) {
    GtUword i, GT_UNUSED purecharlen;
    GtStr *str = gt_str_new();
    purecharlen = gt_encseq_total_length(bs->encseq)
                    - gt_encseq_num_of_sequences(bs->encseq) + 1;
    for (i=0; i < gt_encseq_num_of_sequences(bs->encseq); i++) {
      char *tmp;
      tmp = gt_bioseq_get_sequence(bs, i);
      gt_str_append_cstr(str, tmp);
      gt_free(tmp);
    }
    gt_assert(gt_str_length(str) == purecharlen);
    gt_file_xprintf(outfp, "showing GC-content for sequence file \"%s\"\n",
                    gt_str_get(bs->sequence_file));
    gt_gc_content_show(gt_str_get(str),
                       gt_str_length(str),
                       gt_encseq_alphabet(bs->encseq),
                       outfp);
    gt_str_delete(str);
  }
}
Exemplo n.º 12
0
int gt_verifymappedstr(const GtEncseq *encseq,
                       unsigned int prefixlength,
                       GtError *err)
{
  unsigned int numofchars;
  GtArrayGtCodetype codeliststream;
  bool haserr = false;

  gt_error_check(err);
  numofchars = gt_alphabet_num_of_chars(gt_encseq_alphabet(encseq));
  GT_INITARRAY(&codeliststream,GtCodetype);
  if (getfastastreamkmers(gt_encseq_filenames(encseq),
                          numofchars,
                          prefixlength,
                          gt_alphabet_symbolmap(
                                gt_encseq_alphabet(encseq)),
                          false,
                          &codeliststream,
                          err) != 0)
  {
    haserr = true;
  }
  if (!haserr)
  {
    if (verifycodelists(encseq,
                        prefixlength,
                        numofchars,
                        &codeliststream,
                        err) != 0)
    {
      haserr = true;
    }
  }
  GT_FREEARRAY(&codeliststream,GtCodetype);
  return haserr ? -1 : 0;
}
Exemplo n.º 13
0
/*@notnull@*/ GtKmercodeiterator *gt_kmercodeiterator_encseq_new(
                                            const GtEncseq *encseq,
                                            GtReadmode readmode,
                                            unsigned int kmersize,
                                            unsigned long startpos)
{
  GtKmercodeiterator *kmercodeiterator;
  unsigned int numofchars;
  GtUchar charcode;

  gt_assert(!GT_ISDIRREVERSE(readmode) || startpos == 0);
  kmercodeiterator = gt_malloc(sizeof (*kmercodeiterator));
  kmercodeiterator->totallength = gt_encseq_total_length(encseq);
  kmercodeiterator->startpos = startpos;
  gt_assert(startpos < kmercodeiterator->totallength);
  if (kmercodeiterator->totallength - startpos < (unsigned long) kmersize)
  {
    kmercodeiterator->inputexhausted = true;
    kmercodeiterator->fb = NULL;
    kmercodeiterator->encseq = encseq;
    kmercodeiterator->esr = NULL;
    kmercodeiterator->spwp = NULL;
  } else
  {
    kmercodeiterator->inputexhausted = false;
    kmercodeiterator->fb = NULL;
    kmercodeiterator->encseq = encseq;
    kmercodeiterator->readmode = readmode;
    kmercodeiterator->esr = gt_encseq_create_reader_with_readmode(encseq,
                                                                  readmode,
                                                                  startpos);
    numofchars = gt_alphabet_num_of_chars(gt_encseq_alphabet(encseq));
    kmercodeiterator->spwp = kmerstream_new(numofchars,kmersize);
    kmercodeiterator->hasprocessedfirst = false;
    for (kmercodeiterator->currentposition = startpos;
         kmercodeiterator->currentposition < startpos+(unsigned long) kmersize;
         kmercodeiterator->currentposition++)
    {
      charcode = gt_encseq_reader_next_encoded_char(kmercodeiterator->esr);
      kmercodeiterator->spwp->windowwidth++;
      updatespecialpositions(kmercodeiterator->spwp,charcode,false,0);
      kmercodeiterator->spwp->cyclicwindow[kmercodeiterator->
                                           spwp->windowwidth-1] = charcode;
    }
  }
  return kmercodeiterator;
}
Exemplo n.º 14
0
int gt_esa2shulengthqueryfiles(unsigned long *totalgmatchlength,
                               const Suffixarray *suffixarray,
                               const GtStrArray *queryfilenames,
                               GtError *err)
{
  bool haserr = false;
  GtSeqIterator *seqit;
  const GtUchar *query;
  unsigned long querylen;
  char *desc = NULL;
  int retval;
  GtAlphabet *alphabet;

  gt_error_check(err);
  alphabet = gt_encseq_alphabet(suffixarray->encseq);
  gt_assert(gt_str_array_size(queryfilenames) == 1UL);
  seqit = gt_seq_iterator_sequence_buffer_new(queryfilenames, err);
  if (!seqit)
  {
    haserr = true;
  }
  if (!haserr)
  {
    gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alphabet));
    for (; /* Nothing */; )
    {
      retval = gt_seq_iterator_next(seqit,
                                   &query,
                                   &querylen,
                                   &desc,
                                   err);
      if (retval < 0)
      {
        haserr = true;
        break;
      }
      if (retval == 0)
      {
        break;
      }
      *totalgmatchlength += gt_esa2shulengthquery(suffixarray,query,querylen);
    }
    gt_seq_iterator_delete(seqit);
  }
  return haserr ? -1 : 0;
}
Exemplo n.º 15
0
void gt_encseq2symbolstring(FILE *fpout,
                            const GtEncseq *encseq,
                            GtReadmode readmode,
                            unsigned long start,
                            unsigned long wlen,
                            unsigned long width)
{
    unsigned long j, idx, lastpos;
    GtUchar currentchar;
    GtEncseqReader *esr;
    const GtAlphabet *alpha;

    esr = gt_encseq_create_reader_with_readmode(encseq, readmode, start);
    gt_assert(width > 0);
    lastpos = start + wlen - 1;
    alpha = gt_encseq_alphabet(encseq);
    for (idx = start, j = 0; /* Nothing */ ; idx++)
    {
        currentchar = gt_encseq_reader_next_encoded_char(esr);
        if (currentchar == (GtUchar) SEPARATOR)
        {
            fprintf(fpout,"\n>\n");
            j = 0;
        } else
        {
            gt_alphabet_echo_pretty_symbol(alpha,fpout,currentchar);
        }
        if (idx == lastpos)
        {
            fprintf(fpout,"\n");
            break;
        }
        if (currentchar != (GtUchar) SEPARATOR)
        {
            j++;
            if (j >= width)
            {
                fprintf(fpout,"\n");
                j = 0;
            }
        }
    }
    gt_encseq_reader_delete(esr);
}
Exemplo n.º 16
0
void gt_fprintfencseq(FILE *fpout,
                      const GtEncseq *encseq,
                      unsigned long start,
                      unsigned long wlen)
{
    unsigned long idx;
    GtUchar currentchar;
    const GtAlphabet *alpha;

    alpha = gt_encseq_alphabet(encseq);
    for (idx = start; idx < start + wlen; idx++)
    {
        currentchar = gt_encseq_get_encoded_char(encseq,
                      idx,
                      GT_READMODE_FORWARD);
        gt_assert(ISNOTSPECIAL(currentchar));
        gt_alphabet_echo_pretty_symbol(alpha,fpout,currentchar);
    }
}
Exemplo n.º 17
0
GtWtree* gt_wtree_encseq_new(GtEncseq *encseq)
{
  /* sample rate for compressd bitseq */
  const unsigned int samplerate = 32U;
  GtWtree *wtree;
  GtWtreeEncseq *wtree_encseq;
  wtree = gt_wtree_create(gt_wtree_encseq_class());
  wtree_encseq = gt_wtree_encseq_cast(wtree);
  wtree_encseq->encseq = gt_encseq_ref(encseq);
  wtree_encseq->alpha = gt_alphabet_ref(gt_encseq_alphabet(encseq));
  /* encoded chars + WC given by gt_alphabet_size,
     we have to encode UNDEFCHAR and SEPARATOR too */
  wtree_encseq->alpha_size = gt_alphabet_size(wtree_encseq->alpha) + 2;
  wtree->members->num_of_symbols = (GtUword) wtree_encseq->alpha_size;
  /* levels in tree: \lceil log_2(\sigma)\rceil */
  wtree_encseq->levels =
    gt_determinebitspervalue((GtUword) wtree_encseq->alpha_size);
  wtree_encseq->root_fo = gt_wtree_encseq_fill_offset_new();
  wtree_encseq->current_fo = wtree_encseq->root_fo;
  wtree->members->length =
    gt_encseq_total_length(encseq);
  /* each level has number of symbols bits */
  wtree_encseq->num_of_bits =
    wtree_encseq->levels *
    wtree->members->length;
  wtree_encseq->bits_size =
    wtree_encseq->num_of_bits / (sizeof (GtBitsequence) * CHAR_BIT);
  if (wtree_encseq->num_of_bits % (sizeof (GtBitsequence) * CHAR_BIT) != 0)
    wtree_encseq->bits_size++;
  wtree_encseq->bits =
    gt_calloc((size_t) wtree_encseq->bits_size, sizeof (GtBitsequence));
  wtree_encseq->node_start = 0;
  gt_wtree_encseq_fill_bits(wtree_encseq);
  wtree_encseq->c_bits =
    gt_compressed_bitsequence_new(wtree_encseq->bits,
                                  samplerate,
                                  wtree_encseq->num_of_bits);
  gt_free(wtree_encseq->bits);
  wtree_encseq->bits = NULL;
  return wtree;
}
Exemplo n.º 18
0
static int gt_seed_extend_runner(GT_UNUSED int argc,
                                 GT_UNUSED const char **argv,
                                 GT_UNUSED int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  GtSeedExtendArguments *arguments = tool_arguments;
  GtEncseqLoader *encseq_loader = NULL;
  GtEncseq *aencseq = NULL, *bencseq = NULL;
  GtGreedyextendmatchinfo *grextinfo = NULL;
  GtXdropmatchinfo *xdropinfo = NULL;
  GtQuerymatchoutoptions *querymatchoutopt = NULL;
  GtTimer *seedextendtimer = NULL;
  GtExtendCharAccess cam = GT_EXTEND_CHAR_ACCESS_ANY;
  GtUword errorpercentage = 0UL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  gt_assert(arguments->se_minidentity >= GT_EXTEND_MIN_IDENTITY_PERCENTAGE &&
            arguments->se_minidentity <= 100UL);

  /* Calculate error percentage from minidentity */
  errorpercentage = 100UL - arguments->se_minidentity;

  /* Measure whole running time */
  if (arguments->benchmark || arguments->verbose) {
    gt_showtime_enable();
  }
  if (gt_showtime_enabled())
  {
    seedextendtimer = gt_timer_new();
    gt_timer_start(seedextendtimer);
  }

  /* Load encseq A */
  encseq_loader = gt_encseq_loader_new();
  gt_encseq_loader_enable_autosupport(encseq_loader);
  aencseq = gt_encseq_loader_load(encseq_loader,
                                  gt_str_get(arguments->dbs_indexname),
                                  err);
  if (aencseq == NULL)
    had_err = -1;

  /* If there is a 2nd read set: Load encseq B */
  if (!had_err) {
    if (strcmp(gt_str_get(arguments->dbs_queryname), "") != 0) {
      bencseq = gt_encseq_loader_load(encseq_loader,
                                      gt_str_get(arguments->dbs_queryname),
                                      err);
    } else {
      bencseq = gt_encseq_ref(aencseq);
    }
    if (bencseq == NULL) {
      had_err = -1;
      gt_encseq_delete(aencseq);
    }
  }
  gt_encseq_loader_delete(encseq_loader);

  /* set character access method */
  if (!had_err && (gt_option_is_set(arguments->se_option_greedy) ||
                   gt_option_is_set(arguments->se_option_xdrop) ||
                   arguments->se_alignmentwidth > 0))
  {
    cam = gt_greedy_extend_char_access(gt_str_get
                                       (arguments->se_char_access_mode),
                                       err);
    if ((int) cam == -1) {
      had_err = -1;
      gt_encseq_delete(aencseq);
      gt_encseq_delete(bencseq);
    }
  }

  /* Use bias dependent parameters, adapted from E. Myers' DALIGNER */
  if (!had_err && arguments->bias_parameters) {
    const GtAlphabet *alpha = gt_encseq_alphabet(aencseq);
    const double bias_factor[10] = {.690, .690, .690, .690, .780,
                                    .850, .900, .933, .966, 1.000};

    if (gt_alphabet_is_dna(alpha)) {
      GtUword at, cg;
      at = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'a'));
      at += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 't'));
      cg = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'c'));
      cg += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'g'));
      if (at + cg > 0) {
        const double ratio = (double)MIN(at, cg) / (at + cg);
        int bias_index = (int)MAX(0.0, (ratio + 0.025) * 20.0 - 1.0);
        gt_assert(bias_index < 10);
        arguments->se_maxalilendiff = 30;
        arguments->se_perc_match_hist = (GtUword)(100.0 - errorpercentage *
                                                  bias_factor[bias_index]);
        if (arguments->verbose) {
          printf("# Base ratio = %4.2lf -> percmathistory = "GT_WU"\n",
                 ratio, arguments->se_perc_match_hist);
        }
      } else {
        had_err = -1;
      }
    } else {
      had_err = -1;
    }
    if (had_err) {
      gt_error_set(err, "option \"-bias-parameters\" can only be applied to "
                   "the DNA alphabet");
      gt_encseq_delete(aencseq);
      gt_encseq_delete(bencseq);
    }
  }

  /* Prepare options for greedy extension */
  if (!had_err && gt_option_is_set(arguments->se_option_greedy)) {
    grextinfo = gt_greedy_extend_matchinfo_new(errorpercentage,
                                               arguments->se_maxalilendiff,
                                               arguments->se_historysize,
                                               arguments->se_perc_match_hist,
                                               arguments->se_alignlength,
                                               cam,
                                               arguments->se_extendgreedy);
    if (arguments->benchmark) {
      gt_greedy_extend_matchinfo_silent_set(grextinfo);
    }
  }

  /* Prepare options for xdrop extension */
  if (!had_err && gt_option_is_set(arguments->se_option_xdrop)) {
    xdropinfo = gt_xdrop_matchinfo_new(arguments->se_alignlength,
                                       errorpercentage,
                                       arguments->se_xdropbelowscore,
                                       arguments->se_extendxdrop);
    if (arguments->benchmark) {
      gt_xdrop_matchinfo_silent_set(xdropinfo);
    }
  }

  /* Prepare output options */
  if (!had_err && (arguments->se_alignmentwidth > 0 ||
                   gt_option_is_set(arguments->se_option_xdrop)))
  {
    querymatchoutopt
      = gt_querymatchoutoptions_new(arguments->se_alignmentwidth);

    if (gt_option_is_set(arguments->se_option_xdrop) ||
        gt_option_is_set(arguments->se_option_greedy))
    {
      const GtUword sensitivity = gt_option_is_set(arguments->se_option_greedy)
                                    ? arguments->se_extendgreedy : 100;

      gt_querymatchoutoptions_extend(querymatchoutopt,
                                     errorpercentage,
                                     arguments->se_maxalilendiff,
                                     arguments->se_historysize,
                                     arguments->se_perc_match_hist,
                                     cam,
                                     sensitivity);
    }
  }

  /* Start algorithm */
  if (!had_err) {
    GtDiagbandseed dbsarguments;
    dbsarguments.errorpercentage = errorpercentage;
    dbsarguments.userdefinedleastlength = arguments->se_alignlength;
    dbsarguments.seedlength = arguments->dbs_seedlength;
    dbsarguments.logdiagbandwidth = arguments->dbs_logdiagbandwidth;
    dbsarguments.mincoverage = arguments->dbs_mincoverage;
    dbsarguments.maxfreq = arguments->dbs_maxfreq;
    dbsarguments.memlimit = arguments->dbs_memlimit;
    dbsarguments.mirror = arguments->mirror;
    dbsarguments.overlappingseeds = arguments->overlappingseeds;
    dbsarguments.verify = arguments->dbs_verify;
    dbsarguments.verbose = arguments->verbose;
    dbsarguments.debug_kmer = arguments->dbs_debug_kmer;
    dbsarguments.debug_seedpair = arguments->dbs_debug_seedpair;
    dbsarguments.seed_display = arguments->seed_display;
    dbsarguments.extendgreedyinfo = grextinfo;
    dbsarguments.extendxdropinfo = xdropinfo;
    dbsarguments.querymatchoutopt = querymatchoutopt;

    had_err = gt_diagbandseed_run(aencseq, bencseq, &dbsarguments, err);

    /* clean up */
    gt_encseq_delete(aencseq);
    gt_encseq_delete(bencseq);
    if (gt_option_is_set(arguments->se_option_greedy)) {
      gt_greedy_extend_matchinfo_delete(grextinfo);
    }
    if (gt_option_is_set(arguments->se_option_xdrop)) {
      gt_xdrop_matchinfo_delete(xdropinfo);
    }
    if (arguments->se_alignmentwidth > 0 ||
        gt_option_is_set(arguments->se_option_xdrop)) {
      gt_querymatchoutoptions_delete(querymatchoutopt);
    }
  }

  if (gt_showtime_enabled()) {
    if (!had_err) {
      char *keystring
        = gt_seed_extend_params_keystring(gt_option_is_set(arguments->
                                                           se_option_greedy),
                                          gt_option_is_set(arguments->
                                                           se_option_xdrop),
                                          arguments->dbs_seedlength,
                                          arguments->se_alignlength,
                                          arguments->se_minidentity,
                                          arguments->se_maxalilendiff,
                                          arguments->se_perc_match_hist,
                                          arguments->se_extendgreedy,
                                          arguments->se_extendxdrop,
                                          arguments->se_xdropbelowscore);
      printf("# TIME seedextend-%s", keystring);
      gt_free(keystring);
      gt_timer_show_formatted(seedextendtimer,
                              " overall " GT_WD ".%06ld\n",
                              stdout);
    }
    gt_timer_delete(seedextendtimer);
  }
  return had_err;
}
Exemplo n.º 19
0
GtAlphabet* gt_bioseq_get_alphabet(GtBioseq *bs)
{
  gt_assert(bs);
  return gt_encseq_alphabet(bs->encseq);
}
Exemplo n.º 20
0
static int gt_matstat_runner(GT_UNUSED int argc, GT_UNUSED const char **argv,
                             GT_UNUSED int parsed_args,
                             void *tool_arguments, GtError *err)
{
  Gfmsubcallinfo *arguments = tool_arguments;
  Fmindex fmindex;
  Suffixarray suffixarray;
  void *packedindex = NULL;
  GtLogger *logger = NULL;
  bool haserr = false;
  const GtAlphabet *alphabet = NULL;
#ifdef WITHBCKTAB
  unsigned int prefixlength = 0;
#endif
  GtUword totallength;
  bool gt_mapfmindexfail = false;
  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(false, GT_LOGGER_DEFLT_PREFIX, stdout);
  if (arguments->indextype == Fmindextype)
  {
    if (gt_mapfmindex(&fmindex,gt_str_get(arguments->indexname),
                      logger, err) != 0)
    {
      haserr = true;
      gt_mapfmindexfail = true;
    } else
    {
      alphabet = fmindex.alphabet;
    }
    totallength = fmindex.bwtlength-1;
  } else
  {
    unsigned int mappedbits;

    if (arguments->indextype == Esaindextype)
    {
      mappedbits = SARR_ESQTAB | SARR_SUFTAB
#undef WITHBCKTAB
#ifdef WITHBCKTAB
                   | SARR_BCKTAB
#endif
                   ;
    } else
    {
      if (dotestsequence(arguments))
      {
        mappedbits = SARR_ESQTAB;
      } else
      {
        mappedbits = 0;
      }
    }
    if (gt_mapsuffixarray(&suffixarray,
                       mappedbits,
                       gt_str_get(arguments->indexname),
                       logger,
                       err) != 0)
    {
      haserr = true;
      totallength = 0;
    } else
    {
      alphabet = gt_encseq_alphabet(suffixarray.encseq);
#ifdef WITHBCKTAB
      prefixlength = suffixarray.prefixlength;
#endif
      totallength = gt_encseq_total_length(suffixarray.encseq);
    }
    if (!haserr)
    {
      if (arguments->indextype == Packedindextype)
      {
        packedindex =
          gt_loadvoidBWTSeqForSA(gt_str_get(arguments->indexname),
                                 false,
                                 err);
        if (packedindex == NULL)
        {
          haserr = true;
        }
      }
    }
  }
  if (!haserr)
  {
    const void *theindex;
    Greedygmatchforwardfunction gmatchforwardfunction;

    if (arguments->indextype == Fmindextype)
    {
      theindex = (const void *) &fmindex;
      if (arguments->doms)
      {
        gmatchforwardfunction = gt_skfmmstats;
      } else
      {
        gmatchforwardfunction = gt_skfmuniqueforward;
      }
    } else
    {
      if (arguments->indextype == Esaindextype)
      {
        theindex = (const void *) &suffixarray;
        if (arguments->doms)
        {
          gmatchforwardfunction = gt_suffixarraymstats;
        } else
        {
          gmatchforwardfunction = gt_suffixarrayuniqueforward;
        }
      } else
      {
        gt_assert(arguments->indextype == Packedindextype);
        theindex = (const void *) packedindex;
        if (arguments->doms)
        {
          gmatchforwardfunction = gt_voidpackedindexmstatsforward;
        } else
        {
          gmatchforwardfunction = gt_voidpackedindexuniqueforward;
        }
      }
    }
    if (!haserr)
    {
#ifdef WITHBCKTAB
      if (prefixlength > 0 &&
          arguments->indextype == Esaindextype &&
          runsubstringiteration(gmatchforwardfunction,
                                theindex,
                                totallength,
                                suffixarray.bcktab,
                                suffixarray.countspecialcodes,
                                alphabet,
                                prefixlength,
                                arguments->queryfilenames,
                                err) != 0)

      {
        haserr = true;
      }
#endif
      if (!haserr &&
          gt_findsubquerygmatchforward(dotestsequence(arguments)
                                      ? suffixarray.encseq
                                      : NULL,
                                      theindex,
                                      totallength,
                                      gmatchforwardfunction,
                                      alphabet,
                                      arguments->queryfilenames,
                                      arguments->minlength,
                                      arguments->maxlength,
                                      (arguments->showmode & SHOWSEQUENCE)
                                             ? true : false,
                                      (arguments->showmode & SHOWQUERYPOS)
                                             ? true : false,
                                      (arguments->showmode & SHOWSUBJECTPOS)
                                             ? true : false,
                                      err) != 0)
      {
        haserr = true;
      }
    }
  }
  if (arguments->indextype == Fmindextype)
  {
    if (!gt_mapfmindexfail)
    {
      gt_freefmindex(&fmindex);
    }
  } else
  {
    if (arguments->indextype == Packedindextype && packedindex != NULL)
    {
      gt_deletevoidBWTSeq(packedindex);
    }
    gt_freesuffixarray(&suffixarray);
  }
  gt_logger_delete(logger);

  return haserr ? -1 : 0;;
}
Exemplo n.º 21
0
int gt_testmaxpairs(const char *indexname,
                    GtUword samples,
                    unsigned int minlength,
                    GtUword substringlength,
                    GtLogger *logger,
                    GtError *err)
{
  GtEncseq *encseq;
  GtUword totallength = 0, dblen, querylen;
  GtUchar *dbseq = NULL, *query = NULL;
  bool haserr = false;
  GtUword s;
  GtArray *tabmaxquerymatches;
  Maxmatchselfinfo maxmatchselfinfo;
  GtEncseqLoader *el;

  gt_logger_log(logger,"draw "GT_WU" samples",samples);

  el = gt_encseq_loader_new();
  gt_encseq_loader_do_not_require_des_tab(el);
  gt_encseq_loader_do_not_require_ssp_tab(el);
  gt_encseq_loader_do_not_require_sds_tab(el);
  gt_encseq_loader_set_logger(el, logger);
  encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);

  if (encseq == NULL)
  {
    haserr = true;
  } else
  {
    totallength = gt_encseq_total_length(encseq);
  }
  if (!haserr)
  {
    if (substringlength > totallength/2)
    {
      substringlength = totallength/2;
    }
    dbseq = gt_malloc(sizeof *dbseq * substringlength);
    query = gt_malloc(sizeof *query * substringlength);
  }
  for (s=0; s<samples && !haserr; s++)
  {
    dblen = samplesubstring(dbseq,encseq,substringlength);
    querylen = samplesubstring(query,encseq,substringlength);
    gt_logger_log(logger,"run query match for dblen="GT_WU""
                         ",querylen= "GT_WU", minlength=%u",
                         dblen,
                         querylen,
                         minlength);
    tabmaxquerymatches = gt_array_new(sizeof (Substringmatch));
    if (gt_sarrquerysubstringmatch(dbseq,
                                   dblen,
                                   query,
                                   (GtUword) querylen,
                                   minlength,
                                   gt_encseq_alphabet(encseq),
                                   storemaxmatchquery,
                                   tabmaxquerymatches,
                                   logger,
                                   err) != 0)
    {
      haserr = true;
      break;
    }
    gt_logger_log(logger,"run self match for dblen="GT_WU""
                         ",querylen= "GT_WU", minlength=%u",
                         dblen,
                         querylen,
                         minlength);
    maxmatchselfinfo.results = gt_array_new(sizeof (Substringmatch));
    maxmatchselfinfo.dblen = dblen;
    maxmatchselfinfo.querylen = querylen;
    maxmatchselfinfo.querymarkpos
      = sequence2markpositions(&maxmatchselfinfo.numofquerysequences,
                               query,querylen);
    if (sarrselfsubstringmatch(dbseq,
                               dblen,
                               query,
                               (GtUword) querylen,
                               minlength,
                               gt_encseq_alphabet(encseq),
                               storemaxmatchself,
                               &maxmatchselfinfo,
                               logger,
                               err) != 0)
    {
      haserr = true;
      break;
    }
    gt_array_sort(tabmaxquerymatches,orderSubstringmatch);
    gt_array_sort(maxmatchselfinfo.results,orderSubstringmatch);
    if (!gt_array_equal(tabmaxquerymatches,maxmatchselfinfo.results,
                        orderSubstringmatch))
    {
      const GtUword width = 60UL;
      printf("failure for query of length "GT_WU"\n",(GtUword) querylen);
      printf("querymatches\n");
      (void) gt_array_iterate(tabmaxquerymatches,showSubstringmatch,NULL,
                           err);
      printf("dbmatches\n");
      (void) gt_array_iterate(maxmatchselfinfo.results,showSubstringmatch,
                           NULL,err);
      gt_symbolstring2fasta(stdout,"dbseq",
                         gt_encseq_alphabet(encseq),
                         dbseq,
                         (GtUword) dblen,
                         width);
      gt_symbolstring2fasta(stdout,"queryseq",
                         gt_encseq_alphabet(encseq),
                         query,
                         (GtUword) querylen,
                         width);
      exit(GT_EXIT_PROGRAMMING_ERROR);
    }
    gt_free(maxmatchselfinfo.querymarkpos);
    printf("# numberofmatches="GT_WU"\n",gt_array_size(tabmaxquerymatches));
    gt_array_delete(tabmaxquerymatches);
    gt_array_delete(maxmatchselfinfo.results);
  }
  gt_free(dbseq);
  gt_free(query);
  gt_encseq_delete(encseq);
  encseq = NULL;
  return haserr ? -1 : 0;
}
Exemplo n.º 22
0
static int gt_condenseq_compress_runner(GT_UNUSED int argc, const char **argv,
                                        int parsed_args, void *tool_arguments,
                                        GtError *err)
{
  GtCondenseqCompressArguments *arguments = tool_arguments;
  GtLogger *logger,
           *kdb_logger;
  FILE *kmer_fp = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);
  kdb_logger = gt_logger_new(arguments->kdb, GT_LOGGER_DEFLT_PREFIX, stderr);
  if (arguments->kdb) {
    kmer_fp = gt_fa_fopen("kmer_db.out", "w", err);
    gt_logger_set_target(kdb_logger, kmer_fp);
  }

  if (gt_str_length(arguments->indexname) == 0UL) {
    char *basenameptr;
    basenameptr = gt_basename(argv[parsed_args]);
    gt_str_set(arguments->indexname, basenameptr);
    gt_free(basenameptr);
  }

  if (!had_err) {
    GtEncseqLoader *es_l = gt_encseq_loader_new();
    arguments->input_es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->input_es == NULL)
      had_err = -1;
    gt_encseq_loader_delete(es_l);
  }

  if (!had_err) {
    if (arguments->minalignlength == GT_UNDEF_UWORD)
      arguments->minalignlength = arguments->initsize != GT_UNDEF_UWORD ?
                                  arguments->initsize / (GtUword) 3UL :
                                  GT_UNDEF_UWORD;
    if (arguments->windowsize == GT_UNDEF_UINT)
      arguments->windowsize = arguments->minalignlength != GT_UNDEF_UWORD ?
                              (unsigned int) (arguments->minalignlength / 5U) :
                              GT_UNDEF_UINT;
    if (arguments->windowsize < 4U)
      arguments->windowsize = 4U;
    if (arguments->kmersize == GT_UNDEF_UINT) {
      unsigned int size =
        gt_alphabet_num_of_chars(gt_encseq_alphabet(arguments->input_es));
      /* size^k ~= 100000 */
      gt_safe_assign(arguments->kmersize,
                     gt_round_to_long(gt_log_base(100000.0, (double) size)));
      gt_logger_log(logger, "|A|: %u, k: %u",
                    size, arguments->kmersize);
    }

    if (arguments->windowsize == GT_UNDEF_UINT) {
      arguments->windowsize = 5U * arguments->kmersize;
    }
    if (arguments->minalignlength == GT_UNDEF_UWORD) {
      arguments->minalignlength = (GtUword) (3UL * arguments->windowsize);
    }
    if (arguments->initsize == GT_UNDEF_UWORD) {
      arguments->initsize = (GtUword) (3UL * arguments->minalignlength);
    }
  }
  if (!had_err &&
      arguments->windowsize <= arguments->kmersize) {
    gt_error_set(err, "-windowsize (%u) must be larger -kmersize (%u)!",
                 arguments->windowsize, arguments->kmersize);
    had_err = -1;
  }
  if (!had_err &&
      arguments->minalignlength < (GtUword) arguments->windowsize) {
    gt_error_set(err, "-alignlength (" GT_WU ") must be at least "
                 "-windowsize (%u)!", arguments->minalignlength,
                 arguments->windowsize);
    had_err = -1;
  }
  if (!had_err && (arguments->initsize < arguments->minalignlength)) {
    gt_error_set(err, "-initsize (" GT_WU ") must be at least "
                 "-alignlength (" GT_WU ")!", arguments->initsize,
                 arguments->minalignlength);
    had_err = -1;
  }

  if (!had_err) {
    GtCondenseqCreator *ces_c;

    if (!had_err) {
      ces_c = gt_condenseq_creator_new(arguments->initsize,
                                       arguments->minalignlength,
                                       arguments->xdrop,
                                       &(arguments->scores),
                                       arguments->kmersize,
                                       arguments->windowsize,
                                       logger,
                                       err);
      if (ces_c == NULL)
        had_err = -1;
    }
    if (!had_err) {
      if (arguments->cutoff_value == GT_UNDEF_UWORD)
        gt_condenseq_creator_use_mean_cutoff(ces_c);
      else if (arguments->cutoff_value == 0)
        gt_condenseq_creator_disable_cutoff(ces_c);
      else
        gt_condenseq_creator_set_cutoff(ces_c, arguments->cutoff_value);
      gt_condenseq_creator_set_mean_fraction(ces_c, arguments->fraction);
      if (arguments->prune)
        gt_condenseq_creator_disable_prune(ces_c);
      if (arguments->brute)
        gt_condenseq_creator_enable_brute_force(ces_c);
      if (!arguments->diags)
        gt_condenseq_creator_disable_diagonals(ces_c);
      if (arguments->full_diags)
        gt_condenseq_creator_enable_full_diagonals(ces_c);
      if (arguments->clean_percent != GT_UNDEF_UINT)
        gt_condenseq_creator_set_diags_clean_limit(ces_c,
                                                   arguments->clean_percent);

      had_err = gt_condenseq_creator_create(ces_c,
                                            arguments->indexname,
                                            arguments->input_es,
                                            logger, kdb_logger, err);

      gt_condenseq_creator_delete(ces_c);
    }
  }

  gt_logger_delete(logger);
  gt_logger_delete(kdb_logger);
  if (arguments->kdb)
    gt_fa_fclose(kmer_fp);
  return had_err;
}
Exemplo n.º 23
0
int gt_runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err)
{
  Genericindex *genericindex = NULL;
  bool haserr = false;
  GtLogger *logger;
  const GtEncseq *encseq = NULL;

  logger = gt_logger_new(idxlocalioptions->verbose,
                         GT_LOGGER_DEFLT_PREFIX, stdout);

  if (idxlocalioptions->doonline)
  {
    GtEncseqLoader *el;
    el = gt_encseq_loader_new();
    gt_encseq_loader_require_multiseq_support(el);
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_set_logger(el, logger);
    encseq = gt_encseq_loader_load(el, gt_str_get(idxlocalioptions->indexname),
                                   err);
    gt_encseq_loader_delete(el);
    if (encseq == NULL)
    {
      haserr = true;
    }
  } else
  {
    genericindex = genericindex_new(gt_str_get(idxlocalioptions->indexname),
                                    idxlocalioptions->withesa,
                                    idxlocalioptions->withesa ||
                                    idxlocalioptions->docompare,
                                    false,
                                    true,
                                    0,
                                    logger,
                                    err);
    if (genericindex == NULL)
    {
      haserr = true;
    } else
    {
      encseq = genericindex_getencseq(genericindex);
    }
  }
  if (!haserr)
  {
    GtSeqIterator *seqit;
    const GtUchar *query;
    unsigned long querylen;
    char *desc = NULL;
    int retval;
    Limdfsresources *limdfsresources = NULL;
    const AbstractDfstransformer *dfst;
    SWdpresource *swdpresource = NULL;
    Showmatchinfo showmatchinfo;
    ProcessIdxMatch processmatch;
    GtAlphabet *a;
    void *processmatchinfoonline, *processmatchinfooffline;
    Storematchinfo storeonline, storeoffline;

    a = gt_encseq_alphabet(encseq);
    if (idxlocalioptions->docompare)
    {
      processmatch = storematch;
      gt_initstorematch(&storeonline,encseq);
      gt_initstorematch(&storeoffline,encseq);
      processmatchinfoonline = &storeonline;
      processmatchinfooffline = &storeoffline;
    } else
    {
      processmatch = showmatch;
      showmatchinfo.encseq = encseq;
      showmatchinfo.characters = gt_alphabet_characters(a);
      showmatchinfo.wildcardshow = gt_alphabet_wildcard_show(a);
      showmatchinfo.showalignment = idxlocalioptions->showalignment;
      processmatchinfoonline = processmatchinfooffline = &showmatchinfo;
    }
    if (idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      swdpresource = gt_newSWdpresource(idxlocalioptions->matchscore,
                                     idxlocalioptions->mismatchscore,
                                     idxlocalioptions->gapextend,
                                     idxlocalioptions->threshold,
                                     idxlocalioptions->showalignment,
                                     processmatch,
                                     processmatchinfoonline);
    }
    dfst = gt_locali_AbstractDfstransformer();
    if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      gt_assert(genericindex != NULL);
      limdfsresources = gt_newLimdfsresources(genericindex,
                                           true,
                                           0,
                                           0,    /* maxpathlength */
                                           true, /* keepexpandedonstack */
                                           processmatch,
                                           processmatchinfooffline,
                                           NULL, /* processresult */
                                           NULL, /* processresult info */
                                           dfst);
    }
    seqit = gt_seq_iterator_sequence_buffer_new(idxlocalioptions->queryfiles,
                                               err);
    if (!seqit)
      haserr = true;
    if (!haserr)
    {
      gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(a));
      for (showmatchinfo.queryunit = 0; /* Nothing */;
           showmatchinfo.queryunit++)
      {
        retval = gt_seq_iterator_next(seqit,
                                     &query,
                                     &querylen,
                                     &desc,
                                     err);
        if (retval < 0)
        {
          haserr = true;
          break;
        }
        if (retval == 0)
        {
          break;
        }
        printf("process sequence " Formatuint64_t " of length %lu\n",
                PRINTuint64_tcast(showmatchinfo.queryunit),querylen);
        if (idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          gt_multiapplysmithwaterman(swdpresource,encseq,query,querylen);
        }
        if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          gt_indexbasedlocali(limdfsresources,
                           idxlocalioptions->matchscore,
                           idxlocalioptions->mismatchscore,
                           idxlocalioptions->gapstart,
                           idxlocalioptions->gapextend,
                           idxlocalioptions->threshold,
                           query,
                           querylen,
                           dfst);
        }
        if (idxlocalioptions->docompare)
        {
          gt_checkandresetstorematch(showmatchinfo.queryunit,
                                  &storeonline,&storeoffline);
        }
      }
      if (limdfsresources != NULL)
      {
        gt_freeLimdfsresources(&limdfsresources,dfst);
      }
      if (swdpresource != NULL)
      {
        gt_freeSWdpresource(swdpresource);
        swdpresource = NULL;
      }
      gt_seq_iterator_delete(seqit);
    }
    if (idxlocalioptions->docompare)
    {
      gt_freestorematch(&storeonline);
      gt_freestorematch(&storeoffline);
    }
  }
  if (genericindex == NULL)
  {
    gt_encseq_delete((GtEncseq *) encseq);
    encseq = NULL;
  } else
  {
    genericindex_delete(genericindex);
  }
  gt_logger_delete(logger);
  logger = NULL;
  return haserr ? -1 : 0;
}
Exemplo n.º 24
0
int gt_genomediff_pck_shu_simple(GtLogger *logger,
                                 const GtGenomediffArguments *arguments,
                                 GtError *err)
{
  int had_err = 0;
  int retval;
  GtSeqIterator *queries = NULL;
  const GtUchar *symbolmap, *currentQuery;
  const GtAlphabet *alphabet;
  GtUchar c_sym = 0,
          g_sym = 0;
  uint64_t queryNo;
  char *description = NULL;
  unsigned long queryLength,
                subjectLength = 0,
                currentSuffix;
  double avgShuLength,
         currentShuLength = 0.0,
         /*gc_subject,*/
         gc_query /*, gc*/;
  const FMindex *subjectindex = NULL;
  Genericindex *genericindexSubject;
  const GtEncseq *encseq = NULL;
  double *ln_n_fac;

  /* get the precalculation of ln(n!) for 0<n<max_ln_n_fac */
  ln_n_fac = gt_get_ln_n_fac(arguments->max_ln_n_fac);
  gt_log_log("ln(max_ln_n_fac!) = %f\n",
             ln_n_fac[arguments->max_ln_n_fac]);

  genericindexSubject = genericindex_new(gt_str_get(
                                           arguments->indexname),
                                         arguments->with_esa,
                                         true,
                                         false,
                                         true,
                                         arguments->user_max_depth,
                                         logger,
                                         err);
  if (genericindexSubject == NULL)
  {
    had_err = 1;
  }
  else
  {
    encseq = genericindex_getencseq(genericindexSubject);
  }

  if (!had_err)
  {
    subjectLength = genericindex_get_totallength(genericindexSubject) - 1;
    /*subjectLength /= 2;*/
    /*gt_log_log("subject length: %lu", subjectLength);*/
    subjectindex = genericindex_get_packedindex(genericindexSubject);

    queries = gt_seqiterator_sequence_buffer_new(
                                          arguments->queryname,
                                          err);
    gt_assert(queries);
    alphabet = gt_encseq_alphabet(encseq);
    /* makes assumption that alphabet is dna, it has to calculate the gc! */
    if (!gt_alphabet_is_dna(alphabet))
    {
      fprintf(stderr, "error: Sequences need to be dna");
      had_err = 1;
    }
    else
    {
      symbolmap = gt_alphabet_symbolmap(alphabet);
      gt_seqiterator_set_symbolmap(queries, symbolmap);
      c_sym = gt_alphabet_encode(alphabet, 'c');
      g_sym = gt_alphabet_encode(alphabet, 'g');
    }
  }

  for (queryNo = 0; !had_err; queryNo++)
  {
    retval = gt_seqiterator_next(queries,
                                 &currentQuery,
                                 &queryLength,
                                 &description,
                                 err);
    if ( retval != 1)
    {
      if (retval < 0)
      {
        gt_free(description);
      }
      break;
    }
    gt_logger_log(logger,
                  "found query of length: %lu",
                  queryLength);
    avgShuLength = 0.0;
    gc_query = 0.0;
    for (currentSuffix = 0; currentSuffix < queryLength; currentSuffix++)
    {
      currentShuLength = (double) gt_pck_getShuStringLength(
                    subjectindex,
                    &currentQuery[currentSuffix],
                    queryLength - currentSuffix);
      avgShuLength += currentShuLength;
      if (currentQuery[currentSuffix] == c_sym ||
          currentQuery[currentSuffix] == g_sym)
      {
        gc_query++;
      }
    }
    if (arguments->shulen_only)
    {
      printf("# Query %d sum of shulen:\n %.0f\n",
             (int) queryNo, avgShuLength);
    }
    else
    {
      avgShuLength /= (double) queryLength;
      gc_query /= (double) queryLength;

      gt_logger_log(logger, "Query %d has an average SHUstring length "
                            "of\n# shulength: %f",
                            (int) queryNo, avgShuLength);
      gt_logger_log(logger, "Query description: %s", description);
      gt_log_log("Query (i): %s", description);

  /* XXX Fehlerabfragen einbauen */

      if ( !had_err )
      {
        double div, kr;

        gt_logger_log(logger, "shulen:\n%f", avgShuLength);
        gt_log_log("shu: %f, gc: %f, len: %lu",
            avgShuLength, gc_query, subjectLength);
        div =  gt_divergence(arguments->divergence_rel_err,
                             arguments->divergence_abs_err,
                             arguments->divergence_m,
                             arguments->divergence_threshold,
                             avgShuLength,
                             subjectLength,
                             gc_query,
                             ln_n_fac,
                             arguments->max_ln_n_fac);
        gt_logger_log(logger, "divergence:\n%f", div);

        kr = gt_calculateKr(div);

        printf("# Kr:\n%f\n", kr);
      }
    }
  }
  gt_free(ln_n_fac);
  gt_seqiterator_delete(queries);
  genericindex_delete(genericindexSubject);
  return had_err;
}
Exemplo n.º 25
0
int gt_mapfmindex (Fmindex *fmindex,const char *indexname,
                GtLogger *logger,GtError *err)
{
  FILE *fpin = NULL;
  bool haserr = false, storeindexpos = true;
  GtSpecialcharinfo specialcharinfo;

  gt_error_check(err);
  fmindex->mappedptr = NULL;
  fmindex->bwtformatching = NULL;
  fmindex->alphabet = NULL;
  fpin = gt_fa_fopen_with_suffix(indexname,FMASCIIFILESUFFIX,"rb",err);
  if (fpin == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    if (scanfmafileviafileptr(fmindex,
                              &specialcharinfo,
                              &storeindexpos,
                              indexname,
                              fpin,
                              logger,
                              err) != 0)
    {
      haserr = true;
    }
  }
  gt_fa_xfclose(fpin);
  if (!haserr)
  {
    fmindex->bwtformatching = mapbwtencoding(indexname,logger,err);
    if (fmindex->bwtformatching == NULL)
    {
      haserr = true;
    }
  }
  if (!haserr)
  {
    fmindex->specpos.nextfreeGtPairBwtidx
      = (unsigned long) gt_determinenumberofspecialstostore(&specialcharinfo);
    fmindex->specpos.spaceGtPairBwtidx = NULL;
    fmindex->specpos.allocatedGtPairBwtidx = 0;
    fmindex->alphabet = gt_alphabet_ref(
                                  gt_encseq_alphabet(fmindex->bwtformatching));
    if (fmindex->alphabet == NULL)
    {
      haserr = true;
    }
  }
  if (!haserr)
  {
    GtStr *tmpfilename;

    gt_computefmkeyvalues (fmindex,
                           &specialcharinfo,
                           fmindex->bwtlength,
                           fmindex->log2bsize,
                           fmindex->log2markdist,
                           gt_alphabet_num_of_chars(fmindex->alphabet),
                           fmindex->suffixlength,
                           storeindexpos);
    tmpfilename = gt_str_new_cstr(indexname);
    gt_str_append_cstr(tmpfilename,FMDATAFILESUFFIX);
    if (gt_fillfmmapspecstartptr(fmindex,storeindexpos,tmpfilename,err) != 0)
    {
      haserr = true;
    }
    gt_str_delete(tmpfilename);
  }
  if (haserr)
  {
    gt_freefmindex(fmindex);
  }
  return haserr ? -1 : 0;
}
Exemplo n.º 26
0
static GtMatchIteratorStatus gt_match_iterator_sw_next(GtMatchIterator *mi,
                                                      GT_UNUSED GtMatch **match,
                                                      GT_UNUSED GtError *err)
{
  GtMatchIteratorSW *mis;
  GtSeq *seq_a, *seq_b;
  char *a, *b;
  const char *adesc, *bdesc;
  GtAlignment *ali = NULL;
  unsigned long seqlen_a, seqlen_b, seqpos;
  GtRange arng, brng;
  gt_assert(mi && match);

  mis = gt_match_iterator_sw_cast(mi);
  while (true) {
    if (!mis->pvt->firstali)
      mis->pvt->seqno_es2++;
    if (mis->pvt->seqno_es2 == gt_encseq_num_of_sequences(mis->pvt->es2)) {
      mis->pvt->seqno_es1++;
      if (mis->pvt->seqno_es1 == gt_encseq_num_of_sequences(mis->pvt->es1))
        return GT_MATCHER_STATUS_END;
      mis->pvt->seqno_es2 = 0;
    }
    seqlen_a = gt_encseq_seqlength(mis->pvt->es1, mis->pvt->seqno_es1);
    seqlen_b = gt_encseq_seqlength(mis->pvt->es2, mis->pvt->seqno_es2);
    /* XXX: reuse buffers for performance improvement */
    a = gt_malloc(seqlen_a * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es1, mis->pvt->seqno_es1);
    gt_encseq_extract_decoded(mis->pvt->es1, a, seqpos, seqpos + seqlen_a - 1);
    b = gt_malloc(seqlen_b * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es2, mis->pvt->seqno_es2);
    gt_encseq_extract_decoded(mis->pvt->es1, b, seqpos, seqpos + seqlen_b - 1);
    seq_a = gt_seq_new(a, seqlen_a, gt_encseq_alphabet(mis->pvt->es1));
    seq_b = gt_seq_new(b, seqlen_b, gt_encseq_alphabet(mis->pvt->es2));
    ali = gt_swalign(seq_a, seq_b, mis->pvt->sf);
    mis->pvt->firstali = false;
    if (ali && gt_alignment_get_length(ali) >= mis->pvt->min_len
          && gt_alignment_eval(ali) <= mis->pvt->max_edist) {
      break;
    }
    gt_alignment_delete(ali);
    gt_seq_delete(seq_a);
    gt_seq_delete(seq_b);
    gt_free(a);
    gt_free(b);
  }
  arng = gt_alignment_get_urange(ali);
  brng = gt_alignment_get_vrange(ali);
  adesc = gt_encseq_description(mis->pvt->es1, &seqlen_a, mis->pvt->seqno_es1);
  bdesc = gt_encseq_description(mis->pvt->es2, &seqlen_b, mis->pvt->seqno_es2);
  *match = gt_match_sw_new("", "",
                           mis->pvt->seqno_es1,
                           mis->pvt->seqno_es2,
                           gt_alignment_get_length(ali),
                           gt_alignment_eval(ali),
                           arng.start, brng.start,
                           arng.end, brng.end,
                           GT_MATCH_DIRECT);
  gt_match_set_seqid1_nt(*match, adesc, seqlen_a);
  gt_match_set_seqid2_nt(*match, bdesc, seqlen_b);
  gt_alignment_delete(ali);
  gt_seq_delete(seq_a);
  gt_seq_delete(seq_b);
  gt_free(a);
  gt_free(b);
  return GT_MATCHER_STATUS_OK;
}
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv,
                                   int parsed_args, void *tool_arguments,
                                   GtError *err)
{
  GtKmerDatabaseArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseq       *es;
  GtUword        es_length,
                 nu_kmer_codes = 0;
  GtKmerDatabase *compare_db = NULL,
                 *db = NULL;
  GtLogger *logger;
  FILE *fp = NULL;
  GtHashmap *kmer_hash = NULL;
  GtTimer *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->use_hash)
    kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL,
                               (GtFree) gt_kmer_database_delete_hash_value);
  if (arguments->bench)
    timer = gt_timer_new_with_progress_description("loading encoded sequence");

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) {
    fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err);
    gt_logger_set_target(logger, fp);
  }

  if (!had_err) {
    GtEncseqLoader *es_l;
    if (arguments->bench)
      gt_timer_start(timer);
    es_l = gt_encseq_loader_new();
    es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->bench)
      gt_timer_show_progress(timer, "saving kmers (+iterating over file)",
                             stdout);
    if (es == NULL) {
      had_err = -1;
    }
    gt_encseq_loader_delete(es_l);
  }
  if (!had_err) {
    es_length = gt_encseq_total_length(es);
    if (es_length < (GtUword) arguments->kmersize) {
      gt_error_set(err, "Input is too short for used kmersize. File length: "
                   GT_WU " kmersize: %u", es_length, arguments->kmersize);
      had_err = -1;
    }
  }
  if (!had_err) {
    GtAlphabet *alphabet;
    alphabet = gt_encseq_alphabet(es);
    if (arguments->bench)
    nu_kmer_codes = gt_power_for_small_exponents(
                                            gt_alphabet_num_of_chars(alphabet),
                                            arguments->kmersize);
    if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) {
      compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize, arguments->sb_size, es);
    }
    if (!arguments->use_hash) {
      db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize,
                                arguments->sb_size, es);
      if (arguments->cutoff) {
        if (arguments->mean_cutoff)
          gt_kmer_database_use_mean_cutoff(db, (GtUword) 2,
                                           arguments->cutoff_value);
        else
          gt_kmer_database_set_cutoff(db, arguments->cutoff_value);
        if (!arguments->prune)
          gt_kmer_database_set_prune(db);
      }
    }
  }

  if (!had_err) {
    GtUword startpos = 0,
            endpos;
    GtKmercodeiterator *iter;
    const GtKmercode *kmercode = NULL;
    iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD,
                                          arguments->kmersize, 0);
    while (!had_err && startpos < es_length - (arguments->kmersize - 1)) {
      GtUword startpos_add_kmer = startpos;
      if (arguments->merge_only) {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max((arguments->sb_size - 1) * 2));
        if (endpos > es_length)
          endpos = es_length;
      }
      else {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max(arguments->sb_size - 1));
      }
      gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos);
      while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL &&
             startpos_add_kmer <= endpos - (arguments->kmersize - 1)) {
        if (!arguments->merge_only && !arguments->use_hash &&
            !kmercode->definedspecialposition && !arguments->bench) {
          gt_kmer_database_add_kmer(compare_db, kmercode->code,
                                    startpos_add_kmer);
        }
        if (arguments->use_hash && !kmercode->definedspecialposition) {
          gt_kmer_database_add_to_hash(kmer_hash, kmercode->code,
                                       startpos_add_kmer);
        }
        startpos_add_kmer++;
      }
      if (!arguments->use_hash) {
        gt_kmer_database_add_interval(db, startpos, endpos);
        gt_kmer_database_print_buffer(db, logger);
        if (!arguments->bench)
          had_err = gt_kmer_database_check_consistency(db, err);
      }
      startpos = endpos + 1;
    }
    if (!arguments->use_hash) {
      gt_kmer_database_flush(db);
      gt_kmer_database_print_buffer(db, logger);
      if (!had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(db, err);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(compare_db, err);
      if (!arguments->merge_only && !arguments->bench)
        gt_kmer_database_print(compare_db, logger, true);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_compare(compare_db, db, err);
      gt_kmer_database_print(db, logger, true);
    }
    gt_kmercodeiterator_delete(iter);
  }

  if (arguments->bench) {
    GtKmerStartpos pos;
    GtArrayGtUword *pos_hash;
    GtUword rand_access = (GtUword) 50000000,
            rand_code,
            i,
            sum = 0;
    gt_timer_show_progress(timer, "random access", stdout);
    for (i = 0; i < rand_access; i++) {
      rand_code = gt_rand_max(nu_kmer_codes - 1);
      if (arguments->use_hash) {
        pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code);
        if (pos_hash != NULL)
          sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1];
      }
      else {
        pos = gt_kmer_database_get_startpos(db, rand_code);
        if (pos.no_positions > 0)
          sum += pos.startpos[pos.no_positions - 1];
      }
    }
    printf("sum: " GT_WU "\n", sum);

    gt_timer_show_progress(timer, "", stdout);
    gt_timer_stop(timer);
    gt_timer_delete(timer);
  }
  if (arguments->use_hash)
    gt_hashmap_delete(kmer_hash);
  gt_encseq_delete(es);
  if (!arguments->use_hash)
    gt_kmer_database_delete(db);
  if (!arguments->merge_only && !arguments->bench)
    gt_kmer_database_delete(compare_db);
  gt_logger_delete(logger);
  gt_fa_fclose(fp);

  return had_err;
}
Exemplo n.º 28
0
/*read condenseq data structure from file*/
GtCondenseq *gt_condenseq_new_from_file(const char *indexname,
                                        GtLogger *logger, GtError *err)
{
  int had_err = 0;
  FILE* fp;
  GtEncseqLoader *esl;
  GtEncseq *unique_es;
  GtCondenseq *condenseq = NULL;
  /*load unique_es*/
  esl = gt_encseq_loader_new();
  unique_es = gt_encseq_loader_load(esl, indexname, err);
  if (!unique_es)
    had_err = -1;
  if (!had_err) {
    gt_encseq_loader_delete(esl);
    condenseq = condenseq_new_empty(gt_encseq_alphabet(unique_es));
    condenseq->filename = gt_cstr_dup(indexname);
    condenseq->unique_es = unique_es;
    fp = gt_fa_fopen_with_suffix(indexname, GT_CONDENSEQ_FILE_SUFFIX,
                                 "rb", err);
    if (fp == NULL) {
      had_err = -1;
    }
    else {
      had_err = condenseq_io(condenseq, fp, gt_io_error_fread, err);
      if (!had_err) {
        GtUword i;
        gt_assert(condenseq->uniques);
        gt_assert(condenseq->links);
        gt_fa_fclose(fp);
        /*create link array for each unique entry*/
        for (i = 0; i < condenseq->udb_nelems; i++) {
          GT_INITARRAY(&(condenseq->uniques[i].links),uint32_t);
        }
        /* check for overflows */
        if (condenseq->ldb_nelems > (GtUword) ((uint32_t) 0 - (uint32_t) 1)) {
          gt_error_set(err, "Overflow, to many link-elements. Can't be stored");
          had_err = -1;
        }
        /* iterate through link entrys and store ids in corresponding unique
          entry array */
        for (i = 0; !had_err && (GtUword) i < condenseq->ldb_nelems; i++) {
          GtUword uid = condenseq->links[i].unique_id;
          gt_assert(uid < condenseq->udb_nelems);
          GT_STOREINARRAY(&(condenseq->uniques[uid].links),
                          uint32_t,
                          10,
                          (uint32_t) i);
        }
      }
    }
  }
  if (!had_err) {
    gt_assert(condenseq != NULL);
    if (condenseq->id_len != GT_UNDEF_UWORD)
      gt_logger_log(logger, "IDs const len: " GT_WU, condenseq->id_len);
    else
      gt_logger_log(logger, "using sdstab to access IDs");
  }
  if (had_err) {
    gt_condenseq_delete(condenseq);
    condenseq = NULL;
  }
  return (condenseq);
}
Exemplo n.º 29
0
static int gt_encseq_check_runner(GT_UNUSED int argc, const char **argv,
                                  int parsed_args, void *tool_arguments,
                                  GtError *err)
{
  GtEncseqCheckArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseqLoader *encseq_loader;
  GtEncseq *encseq;
  gt_error_check(err);
  gt_assert(arguments);

  encseq_loader = gt_encseq_loader_new();
  if (!(encseq = gt_encseq_loader_load(encseq_loader, argv[parsed_args], err)))
    had_err = -1;
  if (!had_err) {
    int readmode;
    gt_encseq_check_startpositions(encseq);
    for (readmode = 0; readmode < 4; readmode++)
    {
      if (gt_alphabet_is_dna(gt_encseq_alphabet(encseq)) ||
           ((GtReadmode) readmode) == GT_READMODE_FORWARD ||
           ((GtReadmode) readmode) == GT_READMODE_REVERSE)
      {
        if (gt_encseq_check_consistency(encseq,
                           gt_encseq_filenames(encseq),
                           (GtReadmode) readmode,
                           arguments->scantrials,
                           arguments->multicharcmptrials,
                           gt_encseq_has_multiseq_support(encseq),
                           err) != 0)
        {
          had_err = -1;
          break;
        }
      }
    }
    if (!had_err)
    {
      gt_encseq_check_specialranges(encseq);
    }
    if (!had_err)
    {
      gt_encseq_check_markpos(encseq);
    }
    if (!had_err)
    {
      had_err = gt_encseq_check_minmax(encseq, err);
    }
    if (!had_err &&
           arguments->prefixlength > 0)
    {
      if (gt_verifymappedstr(encseq,
                             arguments->prefixlength,
                             err) != 0)
      {
        had_err = -1;
      }
    }
  }
  gt_encseq_delete(encseq);
  gt_encseq_loader_delete(encseq_loader);
  return had_err;
}
extern int
gt_packedindex_chk_search(int argc, const char *argv[], GtError *err)
{
  struct chkSearchOptions params;
  Suffixarray suffixarray;
  Enumpatterniterator *epi = NULL;
  bool saIsLoaded = false;
  BWTSeq *bwtSeq = NULL;
  GtStr *inputProject = NULL;
  int parsedArgs;
  bool had_err = false;
  BWTSeqExactMatchesIterator EMIter;
  bool EMIterInitialized = false;
  GtLogger *logger = NULL;
  inputProject = gt_str_new();

  do {
    gt_error_check(err);
    {
      bool exitNow = false;
      switch (parseChkBWTOptions(&parsedArgs, argc, argv, &params,
                                 inputProject, err))
      {
      case GT_OPTION_PARSER_OK:
        break;
      case GT_OPTION_PARSER_ERROR:
        had_err = true;
        exitNow = true;
        break;
      case GT_OPTION_PARSER_REQUESTS_EXIT:
        exitNow = true;
        break;
      }
      if (exitNow)
        break;
    }
    gt_str_set(inputProject, argv[parsedArgs]);

    logger = gt_logger_new(params.verboseOutput,
                           GT_LOGGER_DEFLT_PREFIX, stdout);

    bwtSeq = gt_availBWTSeq(&params.idx.final, logger, err);
    if ((had_err = bwtSeq == NULL))
      break;

    {
      enum verifyBWTSeqErrCode retval =
        gt_BWTSeqVerifyIntegrity(bwtSeq, gt_str_get(inputProject), params.flags,
                              params.progressInterval, stderr, logger, err);
      if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR)))
      {
        fprintf(stderr, "index integrity check failed: %s\n",
                gt_error_get(err));
        gt_error_set(err, "aborted because of index integrity check fail");
        break;
      }
    }
    if (BWTSeqHasLocateInformation(bwtSeq))
    {
      if ((had_err = !gt_initEmptyEMIterator(&EMIter, bwtSeq)))
      {
        gt_error_set(err, "Cannot create matches iterator for sequence index.");
        break;
      }
      EMIterInitialized = true;
    }
    {
      unsigned long totalLen, dbstart;
      unsigned long trial, patternLen;

      if ((had_err =
           gt_mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB,
                             gt_str_get(inputProject), NULL, err) != 0))
      {
        gt_error_set(err, "Can't load suffix array project with"
                  " demand for encoded sequence and suffix table files\n");
        break;
      }
      totalLen = gt_encseq_total_length(suffixarray.encseq);
      saIsLoaded = true;
      if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L
                      && params.minPatLen > params.maxPatLen)))
      {
        gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;"
                  " min <= max is required.", params.minPatLen,
                  params.maxPatLen);
        break;
      }
      if (params.minPatLen < 0 || params.maxPatLen < 0)
      {
        unsigned int numofchars
          = gt_alphabet_num_of_chars(
                               gt_encseq_alphabet(suffixarray.encseq));
        if (params.minPatLen < 0)
          params.minPatLen
            = gt_recommendedprefixlength(numofchars,
                                         totalLen,
                                         GT_RECOMMENDED_MULTIPLIER_DEFAULT,
                                         true);
        if (params.maxPatLen < 0)
          params.maxPatLen =
            MAX(params.minPatLen,
                125 * gt_recommendedprefixlength(numofchars,totalLen,
                                         GT_RECOMMENDED_MULTIPLIER_DEFAULT,
                                         true)/100);
        else
          params.maxPatLen = MAX(params.maxPatLen, params.minPatLen);
      }
      fprintf(stderr, "Using patterns of lengths %lu to %lu\n",
              params.minPatLen, params.maxPatLen);
      if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq)))
      {
        gt_error_set(err, "base suffix array and index have diferrent lengths!"
                          "%lu vs. %lu",  totalLen + 1,
                  BWTSeqLength(bwtSeq));
        break;
      }
      if ((had_err =
           (epi = gt_newenumpatterniterator(params.minPatLen, params.maxPatLen,
                                         suffixarray.encseq,
                                         err)) == NULL))
      {
        fputs("Creation of pattern iterator failed!\n", stderr);
        break;
      }
      for (trial = 0; !had_err && trial < params.numOfSamples; ++trial)
      {
        const GtUchar *pptr = gt_nextEnumpatterniterator(&patternLen, epi);
        GtMMsearchiterator *mmsi =
          gt_mmsearchiterator_new_complete_olain(suffixarray.encseq,
                                            suffixarray.suftab,
                                            0,  /* leftbound */
                                            totalLen, /* rightbound */
                                            0, /* offset */
                                            suffixarray.readmode,
                                            pptr,
                                            patternLen);
        if (BWTSeqHasLocateInformation(bwtSeq))
        {
          if ((had_err = !gt_reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen,
                                           false)))
          {
            fputs("Internal error: failed to reinitialize pattern match"
                  " iterator", stderr);
            abort();
          }
          gt_assert(gt_EMINumMatchesTotal(&EMIter) ==
                    gt_BWTSeqMatchCount(bwtSeq, pptr, patternLen,
                                        false));
          gt_assert(gt_EMINumMatchesTotal(&EMIter)
                      == gt_mmsearchiterator_count(mmsi));
          while (gt_mmsearchiterator_next(&dbstart,mmsi))
          {
            unsigned long matchPos = 0;
            bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq);
            if ((had_err = !match))
            {
              gt_error_set(err,
                           "matches of packedindex expired before mmsearch!");
              break;
            }
            if ((had_err = matchPos != dbstart))
            {
              gt_error_set(err, "packedindex match doesn't equal mmsearch "
                           "match result!\n%lu vs. %lu\n",
                           matchPos, dbstart);
            }
          }
          if (!had_err)
          {
            unsigned long matchPos;
            bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq);
            if ((had_err = trailingMatch))
            {
              gt_error_set(err, "matches of mmsearch expired before fmindex!");
              break;
            }
          }
        }
        else
        {
          unsigned long numFMIMatches = gt_BWTSeqMatchCount(bwtSeq, pptr,
                                                         patternLen,
                                                         false),
            numMMSearchMatches = gt_mmsearchiterator_count(mmsi);
          if ((had_err = numFMIMatches != numMMSearchMatches))
          {
            gt_error_set(err, "Number of matches not equal for suffix array ("
                              "%lu) and fmindex (%lu).\n",
                      numFMIMatches, numMMSearchMatches);
          }
        }
        gt_mmsearchiterator_delete(mmsi);
        mmsi = NULL;
        if (params.progressInterval && !((trial + 1) % params.progressInterval))
          putc('.', stderr);
      }
      if (params.progressInterval)
        putc('\n', stderr);
      fprintf(stderr, "Finished %lu of %lu matchings successfully.\n",
              trial, params.numOfSamples);
    }
  } while (0);
  if (EMIterInitialized) gt_destructEMIterator(&EMIter);
  if (saIsLoaded) gt_freesuffixarray(&suffixarray);
  gt_freeEnumpatterniterator(epi);
  if (bwtSeq) gt_deleteBWTSeq(bwtSeq);
  if (logger) gt_logger_delete(logger);
  if (inputProject) gt_str_delete(inputProject);
  return had_err?-1:0;
}