Пример #1
0
static int gt_processxdropquerymatches(void *info,
                                       const GtEncseq *encseq,
                                       const GtQuerymatch *querymatch,
                                       const GtUchar *query,
                                       unsigned long query_totallength,
                                       GtError *err)
{
  GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info;
  GtXdropscore score;
  unsigned long querystart, dblen, dbstart, querylen;
  unsigned long pos1 = gt_querymatch_dbstart(querymatch);
  unsigned long pos2 = gt_querymatch_querystart(querymatch);
  unsigned long len = gt_querymatch_querylen(querymatch);
  const unsigned long dbtotallength = gt_encseq_total_length(encseq);
  uint64_t queryseqnum;
  unsigned long dbseqnum, dbseqstartpos, dbseqlength;

  dbseqnum = gt_encseq_seqnum(encseq,pos1);
  dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum);
  dbseqlength = gt_encseq_seqlength(encseq,dbseqnum);
  if (pos1 > 0 && pos2 > 0)
  {
    gt_assert(dbseqstartpos < pos1);
    gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,encseq,
                                 pos1 - dbseqstartpos,0);
    gt_seqabstract_reinit_ptr(xdropmatchinfo->vseq,query,pos2,0);
    gt_evalxdroparbitscoresextend(false,
                                  &xdropmatchinfo->best_left,
                                  xdropmatchinfo->res,
                                  xdropmatchinfo->useq,
                                  xdropmatchinfo->vseq,
                                  pos1,
                                  pos2,
                                  xdropmatchinfo->belowscore);
  } else
  {
    xdropmatchinfo->best_left.ivalue = 0;
    xdropmatchinfo->best_left.jvalue = 0;
    xdropmatchinfo->best_left.score = 0;
  }
  if (pos1 + len < dbtotallength && pos2 + len < query_totallength)
  {
    gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,
                                 encseq,dbseqstartpos + dbseqlength -
                                        (pos1 + len),0);
    gt_seqabstract_reinit_ptr(xdropmatchinfo->vseq,
                              query,query_totallength - (pos2 + len),0);
    gt_evalxdroparbitscoresextend(true,
                                  &xdropmatchinfo->best_right,
                                  xdropmatchinfo->res,
                                  xdropmatchinfo->useq,
                                  xdropmatchinfo->vseq,
                                  pos1 + len,
                                  pos2 + len,
                                  xdropmatchinfo->belowscore);
  } else
  {
    xdropmatchinfo->best_right.ivalue = 0;
    xdropmatchinfo->best_right.jvalue = 0;
    xdropmatchinfo->best_right.score = 0;
  }
  gt_assert(pos1 >= (unsigned long) xdropmatchinfo->best_left.ivalue &&
            pos2 >= (unsigned long) xdropmatchinfo->best_left.jvalue);
  querystart = pos2 - xdropmatchinfo->best_left.jvalue;
  queryseqnum = gt_querymatch_queryseqnum(querymatch);
  dblen = len + xdropmatchinfo->best_left.ivalue
              + xdropmatchinfo->best_right.ivalue;
  dbstart = pos1 - xdropmatchinfo->best_left.ivalue;
  querylen = len + xdropmatchinfo->best_left.jvalue
                 + xdropmatchinfo->best_right.jvalue,
  score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat +
          xdropmatchinfo->best_left.score +
          xdropmatchinfo->best_right.score;
  gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,
                               encseq,
                               dblen,
                               dbstart);
  gt_seqabstract_reinit_ptr(xdropmatchinfo->vseq,
                            query,
                            querylen,
                            querystart);
  gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr,
                     dblen,
                     dbstart,
                     GT_READMODE_FORWARD,
                     false,
                     score,
                     greedyunitedist(xdropmatchinfo->frontresource,
                                     xdropmatchinfo->useq,xdropmatchinfo->vseq),
                     false,
                     queryseqnum,
                     querylen,
                     querystart);
  return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr,
                              query, query_totallength,
                              err);
}
static int gt_readjoiner_assembly_runner(GT_UNUSED int argc,
    GT_UNUSED const char **argv, GT_UNUSED int parsed_args,
    void *tool_arguments, GtError *err)
{
  GtReadjoinerAssemblyArguments *arguments = tool_arguments;
  GtLogger *verbose_logger, *default_logger;
  GtEncseqLoader *el;
  GtEncseq *reads;
  GtTimer *timer = NULL;
  GtStrgraph *strgraph = NULL;
  GtBitsequence *contained = NULL;
  const char *readset = gt_str_get(arguments->readset);
  bool eqlen = true;
  GtUword nreads, tlen, rlen;
  int had_err = 0;

  gt_assert(arguments);
  gt_error_check(err);
  default_logger = gt_logger_new(!arguments->quiet, GT_LOGGER_DEFLT_PREFIX,
      stdout);
  gt_logger_log(default_logger,
      "gt readjoiner assembly (version "GT_READJOINER_VERSION")");
  verbose_logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX,
      stdout);
  gt_logger_log(verbose_logger, "verbose output activated");
  gt_logger_log(verbose_logger, "readset name = %s", readset);
  if (gt_showtime_enabled())
  {
    timer = gt_timer_new_with_progress_description(
        GT_READJOINER_ASSEMBLY_MSG_COUNTSPM);
    gt_timer_start(timer);
    gt_timer_show_cpu_time_by_progress(timer);
  }

  if (!arguments->paths2seq)
  {
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    reads = gt_encseq_loader_load(el, readset, err);
    if (reads == NULL)
    {
      had_err = -1;
    }
    if (had_err == 0)
    {
      eqlen = gt_encseq_accesstype_get(reads) == GT_ACCESS_TYPE_EQUALLENGTH;
      nreads = gt_encseq_num_of_sequences(reads);
      gt_logger_log(default_logger, "number of reads in filtered readset = "
                    GT_WU, nreads);
      tlen = gt_encseq_total_length(reads) - nreads + 1;
      gt_logger_log(verbose_logger, "total length of filtered readset = " GT_WU,
          tlen);

      if (eqlen)
      {
        rlen = gt_encseq_seqlength(reads, 0);
        gt_logger_log(verbose_logger, "read length = " GT_WU, rlen);
        gt_encseq_delete(reads);
        reads = NULL;
      }
      else
      {
        had_err = gt_readjoiner_assembly_build_contained_reads_list(
          arguments, &contained, err);
        rlen = 0;
        gt_logger_log(verbose_logger, "read length = variable");
        gt_assert(reads != NULL);
      }
    }

    if (had_err == 0)
    {
      if (!arguments->load)
      {
        had_err = gt_readjoiner_assembly_build_graph(arguments, &strgraph,
            reads, readset, eqlen, rlen, nreads, contained, default_logger,
            verbose_logger, timer, err);
      }
      else
      {
        gt_readjoiner_assembly_load_graph(&strgraph, reads, readset, rlen,
            default_logger, timer);
      }
    }

    if (!eqlen && reads != NULL && !arguments->errors)
    {
      gt_encseq_delete(reads);
      reads = NULL;
      if (had_err == 0)
        gt_strgraph_set_encseq(strgraph, NULL);
    }

    if (had_err == 0 && arguments->redtrans)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_REDTRANS,
            stdout);
      gt_strgraph_sort_edges_by_len(strgraph, false);
      (void)gt_strgraph_redtrans(strgraph, false);
      (void)gt_strgraph_redself(strgraph, false);
      (void)gt_strgraph_redwithrc(strgraph, false);
      gt_strgraph_log_stats(strgraph, verbose_logger);
    }

    if (had_err == 0 && arguments->errors)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_CLEANSG,
            stdout);
      gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_CLEANSG);
      had_err = gt_readjoiner_assembly_error_correction(strgraph,
          arguments->bubble, arguments->deadend, arguments->deadend_depth,
          verbose_logger);
    }

    if (had_err == 0 && arguments->save)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_SAVESG,
            stdout);
      gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_SAVESG);
      gt_strgraph_show(strgraph, GT_STRGRAPH_BIN,
          gt_str_get(arguments->readset), GT_READJOINER_SUFFIX_SG, false);
    }

    if (!eqlen && reads != NULL)
    {
      gt_encseq_delete(reads);
      reads = NULL;
      if (had_err == 0)
        gt_strgraph_set_encseq(strgraph, NULL);
    }

    if (had_err == 0)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_TRAVERSESG,
            stdout);
      gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_TRAVERSESG);
      gt_readjoiner_assembly_show_current_space("(before traversal)");
      gt_strgraph_spell(strgraph, (GtUword)arguments->depthcutoff,
          (GtUword)arguments->lengthcutoff, arguments->vd, readset,
          GT_READJOINER_SUFFIX_CONTIG_PATHS, NULL, true,
          arguments->show_contigs_info, false, verbose_logger);
    }

    if (contained != NULL)
      gt_free(contained);
    gt_strgraph_delete(strgraph);
    strgraph = NULL;
    gt_assert(reads == NULL);
    gt_encseq_loader_delete(el);
  }

  if (had_err == 0)
  {
    gt_readjoiner_assembly_show_current_space("(before paths2seq)");
    had_err = gt_readjoiner_assembly_paths2seq(readset,
        (GtUword)arguments->lengthcutoff, arguments->vd,
        arguments->astat, arguments->coverage, arguments->copynum,
        arguments->buffersize, default_logger, &timer, err);
  }

  if (gt_showtime_enabled())
  {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  gt_logger_delete(default_logger);
  gt_logger_delete(verbose_logger);
  return had_err;
}
Пример #3
0
static GtMatchIteratorStatus gt_match_iterator_sw_next(GtMatchIterator *mi,
                                                      GT_UNUSED GtMatch **match,
                                                      GT_UNUSED GtError *err)
{
  GtMatchIteratorSW *mis;
  GtSeq *seq_a, *seq_b;
  char *a, *b;
  const char *adesc, *bdesc;
  GtAlignment *ali = NULL;
  unsigned long seqlen_a, seqlen_b, seqpos;
  GtRange arng, brng;
  gt_assert(mi && match);

  mis = gt_match_iterator_sw_cast(mi);
  while (true) {
    if (!mis->pvt->firstali)
      mis->pvt->seqno_es2++;
    if (mis->pvt->seqno_es2 == gt_encseq_num_of_sequences(mis->pvt->es2)) {
      mis->pvt->seqno_es1++;
      if (mis->pvt->seqno_es1 == gt_encseq_num_of_sequences(mis->pvt->es1))
        return GT_MATCHER_STATUS_END;
      mis->pvt->seqno_es2 = 0;
    }
    seqlen_a = gt_encseq_seqlength(mis->pvt->es1, mis->pvt->seqno_es1);
    seqlen_b = gt_encseq_seqlength(mis->pvt->es2, mis->pvt->seqno_es2);
    /* XXX: reuse buffers for performance improvement */
    a = gt_malloc(seqlen_a * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es1, mis->pvt->seqno_es1);
    gt_encseq_extract_decoded(mis->pvt->es1, a, seqpos, seqpos + seqlen_a - 1);
    b = gt_malloc(seqlen_b * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es2, mis->pvt->seqno_es2);
    gt_encseq_extract_decoded(mis->pvt->es1, b, seqpos, seqpos + seqlen_b - 1);
    seq_a = gt_seq_new(a, seqlen_a, gt_encseq_alphabet(mis->pvt->es1));
    seq_b = gt_seq_new(b, seqlen_b, gt_encseq_alphabet(mis->pvt->es2));
    ali = gt_swalign(seq_a, seq_b, mis->pvt->sf);
    mis->pvt->firstali = false;
    if (ali && gt_alignment_get_length(ali) >= mis->pvt->min_len
          && gt_alignment_eval(ali) <= mis->pvt->max_edist) {
      break;
    }
    gt_alignment_delete(ali);
    gt_seq_delete(seq_a);
    gt_seq_delete(seq_b);
    gt_free(a);
    gt_free(b);
  }
  arng = gt_alignment_get_urange(ali);
  brng = gt_alignment_get_vrange(ali);
  adesc = gt_encseq_description(mis->pvt->es1, &seqlen_a, mis->pvt->seqno_es1);
  bdesc = gt_encseq_description(mis->pvt->es2, &seqlen_b, mis->pvt->seqno_es2);
  *match = gt_match_sw_new("", "",
                           mis->pvt->seqno_es1,
                           mis->pvt->seqno_es2,
                           gt_alignment_get_length(ali),
                           gt_alignment_eval(ali),
                           arng.start, brng.start,
                           arng.end, brng.end,
                           GT_MATCH_DIRECT);
  gt_match_set_seqid1_nt(*match, adesc, seqlen_a);
  gt_match_set_seqid2_nt(*match, bdesc, seqlen_b);
  gt_alignment_delete(ali);
  gt_seq_delete(seq_a);
  gt_seq_delete(seq_b);
  gt_free(a);
  gt_free(b);
  return GT_MATCHER_STATUS_OK;
}
Пример #4
0
double *gt_encseq_get_gc(const GtEncseq *encseq,
                         bool with_special,
                         bool calculate,
                         GT_UNUSED GtError *err)
{
  GtEncseqReader *reader;
  GtAlphabet *alphabet;
  double *gc_content;
  /* unit = file or sequence depending on per_file */
  unsigned long char_idx, totallength, max_unit,
                seq_idx = 0,
                nextsep = 0,
                at_count = 0,
                gc_count = 0,
                default_count = 0;
  bool is_mirrored_encseq;
  GtUchar acgt[8], current_c;

  alphabet = gt_encseq_alphabet(encseq);
  gt_assert(gt_alphabet_is_dna(alphabet));
  gt_alphabet_encode_seq(alphabet, acgt,
                         "aAtTcCgG", 8UL);
  totallength = gt_encseq_total_length(encseq);
  reader = gt_encseq_create_reader_with_readmode(encseq,
                                                 GT_READMODE_FORWARD,
                                                 0);
  is_mirrored_encseq = gt_encseq_is_mirrored(encseq);
  if (is_mirrored_encseq)
  {
    max_unit = GT_DIV2(gt_encseq_num_of_sequences(encseq));
    gc_content = gt_calloc((size_t) GT_MULT2(max_unit), sizeof (double));
  }
  else
  {
    max_unit = gt_encseq_num_of_sequences(encseq);
    gc_content = gt_calloc((size_t) max_unit, sizeof (double));
  }

  nextsep = gt_encseq_seqstartpos(encseq, seq_idx) +
            gt_encseq_seqlength(encseq, seq_idx);

  for (char_idx = 0; char_idx < totallength; char_idx++)
  {
    if (nextsep == char_idx)
    {
      if (calculate)
      {
        calculate_gc(encseq,
                     gc_content,
                     with_special,
                     seq_idx,
                     gc_count,
                     at_count);
      }
      else
      {
        gc_content[seq_idx] = (double) gc_count;
      }

      seq_idx++;

      nextsep = gt_encseq_seqstartpos(encseq, seq_idx) +
                gt_encseq_seqlength(encseq, seq_idx);

      gt_encseq_reader_reinit_with_readmode(reader,
                                            encseq,
                                            GT_READMODE_FORWARD,
                                            char_idx + 1UL);
      gc_count = at_count = default_count = 0UL;
      continue;
    }
    current_c = gt_encseq_reader_next_encoded_char(reader);
    if (current_c == acgt[0] ||
        current_c == acgt[1] ||
        current_c == acgt[2] ||
        current_c == acgt[3])
    {
       at_count++;
    }
    else
    {
      if (current_c == acgt[4] ||
          current_c == acgt[5] ||
          current_c == acgt[6] ||
          current_c == acgt[7])
      {
         gc_count++;
      }
      else
      {
        default_count++;
      }
    }
  }
  if (calculate)
  {
    calculate_gc(encseq,
                 gc_content,
                 with_special,
                 seq_idx,
                 gc_count,
                 at_count);
  }
  else
  {
    gc_content[seq_idx] = (double) gc_count;
  }
  gt_encseq_reader_delete(reader);
  if (is_mirrored_encseq)
  {
    unsigned long double_max_unit = GT_MULT2(max_unit);
    for (seq_idx = 0; seq_idx < max_unit; seq_idx++)
    {
      gc_content[double_max_unit - seq_idx - 1] =
        gc_content[seq_idx];
    }
  }
  return gc_content;
}
Пример #5
0
int gt_region_mapping_get_sequence(GtRegionMapping *rm, char **seq,
                                   GtStr *seqid, unsigned long start,
                                   unsigned long end, GtError *err)
{
  int had_err = 0;
  unsigned long offset = 1;
  GtRange range = {GT_UNDEF_ULONG, GT_UNDEF_ULONG};
  gt_error_check(err);
  gt_assert(rm && seq && seqid && gt_str_length(seqid) > 0);

  /* handle rawseq access first  */
  if (rm->userawseq) {
    gt_assert(!rm->seqid2seqnum_mapping);
    *seq = gt_calloc(end - start + 1, sizeof (char));
    strncpy(*seq, rm->rawseq + start - 1, (end - start + 1) * sizeof (char));
    return 0;
  }

  /* make sure that correct sequence is loaded */
  had_err = update_seq_col_if_necessary(rm, seqid, err);

  /* MD5 sequence id */
  if (!had_err) {
    if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) {
      had_err = gt_seq_col_md5_to_seq(rm->seq_col, seq, start - offset,
                                      end - offset, seqid, err);
      return had_err;
    }
  }

  /* ``regular'' sequence ID */
  if (!had_err) {
    gt_assert(!rm->usedesc || rm->seqid2seqnum_mapping);
    gt_assert(rm->mapping || rm->seq_col);
    if (rm->usedesc) {
      unsigned long seqnum, filenum;
      gt_assert(rm->seqid2seqnum_mapping);
      range.start = start;
      range.end = end;
      had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping,
                                            gt_str_get(seqid), &range, &seqnum,
                                            &filenum, &offset, err);

      if (!had_err) {
        if (range.end != GT_UNDEF_ULONG && range.start != GT_UNDEF_ULONG &&
              range.end >= gt_seq_col_get_sequence_length(rm->seq_col, filenum,
                                                          seqnum)
              + offset) {
          gt_error_set(err, "trying to extract range %lu-%lu on sequence "
                       "``%s'' which is not covered by that sequence (with "
                       "boundaries %lu-%lu). Has the sequence-region "
                       "to sequence mapping been defined correctly?",
                       start, end, gt_str_get(seqid),
                       range.start, range.end);
          had_err = -1;
        }
      }
      if (!had_err) {
        *seq = gt_seq_col_get_sequence(rm->seq_col, filenum, seqnum,
                                       start - offset, end - offset);
      }
    } else if (rm->matchdesc) {
      gt_assert(!rm->seqid2seqnum_mapping);
      gt_assert(rm->seq_col);
      if (!had_err) {
        had_err = gt_seq_col_grep_desc(rm->seq_col, seq, start - 1, end - 1,
                                       seqid, err);
      }
    } else if (rm->useseqno) {
      unsigned long seqno = GT_UNDEF_ULONG;
      gt_assert(rm->encseq);
      if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) {
        gt_error_set(err, "seqid '%s' does not have the form 'seqX' "
                          "where X is a sequence number in the encoded "
                          "sequence", gt_str_get(seqid));
        had_err = -1;
      }
      gt_assert(had_err || seqno != GT_UNDEF_ULONG);
      if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) {
          gt_error_set(err, "trying to access sequence %lu, but encoded "
                            "sequence contains only %lu sequences",
                            seqno, gt_encseq_num_of_sequences(rm->encseq));
          had_err = -1;
      }
      if (!had_err) {
        unsigned long seqlength = gt_encseq_seqlength(rm->encseq, seqno);
        if (start > seqlength || end > seqlength) {
          gt_error_set(err, "trying to extract range %lu-%lu on sequence "
                       "``%s'' which is not covered by that sequence (only "
                       "%lu characters in size). Has the sequence-region "
                       "to sequence mapping been defined correctly?",
                       start, end, gt_str_get(seqid), seqlength);
          had_err = -1;
        }
      }
      if (!had_err) {
        unsigned long seqstartpos;
        *seq = gt_calloc(end - start + 1, sizeof (char));
        seqstartpos = gt_encseq_seqstartpos(rm->encseq, seqno);
        gt_encseq_extract_decoded(rm->encseq, *seq, seqstartpos + start - 1,
                                  seqstartpos + end - 1);
      }
    } else if (rm->userawseq) {
      gt_assert(!rm->seqid2seqnum_mapping);
      *seq = gt_calloc(end - start + 1, sizeof (char));
      strncpy(*seq, rm->rawseq + start - 1, (end - start + 1) * sizeof (char));
    } else {
      gt_assert(rm->seq_col);
      if (!had_err) {
        unsigned long seqlength = gt_seq_col_get_sequence_length(rm->seq_col,
                                                                 0, 0);
        if (start > seqlength || end > seqlength) {
          had_err = -1;
          gt_error_set(err, "trying to extract range %lu-%lu on sequence "
                       "``%s'' which is not covered by that sequence (only "
                       "%lu characters in size). Has the sequence-region "
                       "to sequence mapping been defined correctly?",
                       start, end, gt_str_get(seqid), seqlength);
        }
        if (!had_err) {
          *seq = gt_seq_col_get_sequence(rm->seq_col, 0, 0, start - offset,
                                         end - offset);
        }
      }
    }
  }
  return had_err;
}
Пример #6
0
static int gt_processxdropquerymatches(void *info,
                                       const GtEncseq *encseq,
                                       const GtQuerymatch *querymatch,
                                       const GtUchar *query,
                                       GtUword query_totallength,
                                       GtError *err)
{
  GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info;
  GtXdropscore score;
  GtUword querystart, dblen, dbstart, querylen;
  GtUword pos1 = gt_querymatch_dbstart(querymatch);
  GtUword pos2 = gt_querymatch_querystart(querymatch);
  GtUword len = gt_querymatch_querylen(querymatch);
  uint64_t queryseqnum;
  GtUword dbseqnum, dbseqstartpos, dbseqlength;

  dbseqnum = gt_encseq_seqnum(encseq,pos1);
  dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum);
  dbseqlength = gt_encseq_seqlength(encseq,dbseqnum);
  /* xdrop left of seed, only if length > 0 excluding pos1 and pos2 */
  if (pos1 > dbseqstartpos &&
      pos2 > 0)
  {
    gt_log_log("leftextend: " GT_WU " to " GT_WU " and "
               GT_WU " to " GT_WU,
               dbseqstartpos, pos1,
               (GtUword) 0, pos2);
    gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,
                                 encseq,
                                 pos1 - dbseqstartpos,
                                 dbseqstartpos);
    gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq,
                                  query,
                                  pos2,
                                  0);
    gt_evalxdroparbitscoresextend(false,
                                  &xdropmatchinfo->best_left,
                                  xdropmatchinfo->res,
                                  xdropmatchinfo->useq,
                                  xdropmatchinfo->vseq,
                                  xdropmatchinfo->belowscore);
  } else
  {
    xdropmatchinfo->best_left.ivalue = 0;
    xdropmatchinfo->best_left.jvalue = 0;
    xdropmatchinfo->best_left.score = 0;
  }
  /* xdrop right of seed, only if length > 0 including pos1+len and pos2+len */
  if (pos1 + len < dbseqstartpos + dbseqlength &&
      pos2 + len < query_totallength)
  {
    gt_log_log("rightextend: " GT_WU " to " GT_WU " and "
               GT_WU " to " GT_WU,
               pos1 + len, dbseqstartpos + dbseqlength,
               pos2 + len, query_totallength - 1);
    gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,
                                 encseq,
                                 dbseqstartpos + dbseqlength - (pos1 + len),
                                 pos1 + len);
    gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq,
                                  query,
                                  query_totallength - (pos2 + len),
                                  pos2 + len);
    gt_evalxdroparbitscoresextend(true,
                                  &xdropmatchinfo->best_right,
                                  xdropmatchinfo->res,
                                  xdropmatchinfo->useq,
                                  xdropmatchinfo->vseq,
                                  xdropmatchinfo->belowscore);
  } else
  {
    xdropmatchinfo->best_right.ivalue = 0;
    xdropmatchinfo->best_right.jvalue = 0;
    xdropmatchinfo->best_right.score = 0;
  }
  gt_assert(pos1 >= (GtUword) xdropmatchinfo->best_left.ivalue &&
            pos2 >= (GtUword) xdropmatchinfo->best_left.jvalue);
  querystart = pos2 - xdropmatchinfo->best_left.jvalue;
  queryseqnum = gt_querymatch_queryseqnum(querymatch);
  dblen = len + xdropmatchinfo->best_left.ivalue
              + xdropmatchinfo->best_right.ivalue;
  dbstart = pos1 - xdropmatchinfo->best_left.ivalue;
  querylen = len + xdropmatchinfo->best_left.jvalue
                 + xdropmatchinfo->best_right.jvalue,
  score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat +
          xdropmatchinfo->best_left.score +
          xdropmatchinfo->best_right.score;
  gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,
                               encseq,
                               dblen,
                               dbstart);
  gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query, querylen,
                                querystart);
  gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr,
                     dblen,
                     dbstart,
                     GT_READMODE_FORWARD,
                     false,
                     score,
                     greedyunitedist(xdropmatchinfo->frontresource,
                                     xdropmatchinfo->useq,xdropmatchinfo->vseq),
                     false,
                     queryseqnum,
                     querylen,
                     querystart);
  return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr,
                              query, query_totallength,
                              err);
}
Пример #7
0
static int gt_simplexdropselfmatchoutput(void *info,
                                         const GtGenericEncseq *genericencseq,
                                         GtUword len,
                                         GtUword pos1,
                                         GtUword pos2,
                                         GtError *err)
{
  GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info;
  GtXdropscore score;
  GtUword dbseqnum, dbseqstartpos, dbseqlength, dbstart, dblen,
                querystart, queryseqnum, querylen, queryseqlength,
                queryseqstartpos;
  const GtEncseq *encseq;

  gt_assert(genericencseq != NULL && genericencseq->hasencseq);
  encseq = genericencseq->seqptr.encseq;
  if (pos1 > pos2)
  {
    GtUword tmp = pos1;
    pos1 = pos2;
    pos2 = tmp;
  }
  dbseqnum = gt_encseq_seqnum(encseq,pos1),
  dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum),
  dbseqlength = gt_encseq_seqlength(encseq,dbseqnum);

  if (pos2 < dbseqstartpos + dbseqlength)
  {
    queryseqnum = dbseqnum;
    queryseqstartpos = dbseqstartpos;
    queryseqlength = dbseqlength;
  } else
  {
    queryseqnum = gt_encseq_seqnum(encseq,pos2);
    gt_assert(dbseqnum < queryseqnum);
    queryseqstartpos = gt_encseq_seqstartpos(encseq,queryseqnum);
    queryseqlength = gt_encseq_seqlength(encseq,queryseqnum);
  }
  if (pos1 > dbseqstartpos &&
      pos2 > queryseqstartpos)
  {
    gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,encseq,
                                 pos1 - dbseqstartpos,
                                 dbseqstartpos);
    gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq,encseq,
                                 pos2 - queryseqstartpos,
                                 queryseqstartpos);
    gt_evalxdroparbitscoresextend(false,
                                  &xdropmatchinfo->best_left,
                                  xdropmatchinfo->res,
                                  xdropmatchinfo->useq,
                                  xdropmatchinfo->vseq,
                                  xdropmatchinfo->belowscore);
  } else
  {
    xdropmatchinfo->best_left.ivalue = 0;
    xdropmatchinfo->best_left.jvalue = 0;
    xdropmatchinfo->best_left.score = 0;
  }
  if (pos1 + len < dbseqstartpos + dbseqlength &&
      pos2 + len < queryseqstartpos + queryseqlength)
  {
    const GtUword seqend1 = dbseqstartpos + dbseqlength;
    const GtUword seqend2 = queryseqstartpos + queryseqlength;

    gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,
                                 encseq,seqend1 - (pos1 + len),
                                 pos1 + len);
    gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq,
                                 encseq,seqend2 - (pos2 + len),
                                 pos2 + len);
    gt_evalxdroparbitscoresextend(true,
                                  &xdropmatchinfo->best_right,
                                  xdropmatchinfo->res,
                                  xdropmatchinfo->useq,
                                  xdropmatchinfo->vseq,
                                  xdropmatchinfo->belowscore);
  } else
  {
    xdropmatchinfo->best_right.ivalue = 0;
    xdropmatchinfo->best_right.jvalue = 0;
    xdropmatchinfo->best_right.score = 0;
  }
  gt_assert(pos1 >= (GtUword) xdropmatchinfo->best_left.ivalue &&
            pos2 >= (GtUword) xdropmatchinfo->best_left.jvalue);
  querystart = pos2 - xdropmatchinfo->best_left.jvalue;
  gt_assert(querystart >= queryseqstartpos);
  dblen = len + xdropmatchinfo->best_left.ivalue
              + xdropmatchinfo->best_right.ivalue;
  dbstart = pos1 - xdropmatchinfo->best_left.ivalue;
  querylen = len + xdropmatchinfo->best_left.jvalue
                 + xdropmatchinfo->best_right.jvalue,
  score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat +
          xdropmatchinfo->best_left.score +
          xdropmatchinfo->best_right.score;
  gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,
                               encseq,
                               dblen,
                               dbstart);
  gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq,
                               encseq,
                               querylen,
                               querystart);
  gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr,
                     dblen,
                     dbstart,
                     GT_READMODE_FORWARD,
                     false,
                     score,
                     greedyunitedist(xdropmatchinfo->frontresource,
                                     xdropmatchinfo->useq,xdropmatchinfo->vseq),
                     true,
                     (uint64_t) queryseqnum,
                     querylen,
                     querystart - queryseqstartpos);
  return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr,
                              NULL, gt_encseq_seqlength(encseq, queryseqnum),
                              err);
}
Пример #8
0
GtUword gt_bioseq_get_sequence_length(const GtBioseq *bs,
                                      GtUword idx)
{
  gt_assert(bs);
  return gt_encseq_seqlength(bs->encseq, idx);
}
static int output_sequence(GtEncseq *encseq, GtEncseqDecodeArguments *args,
                           const char *filename, GtError *err)
{
  GtUword i, j, sfrom, sto;
  int had_err = 0;
  bool has_desc;
  GtEncseqReader *esr;
  gt_assert(encseq);

  if (!(has_desc = gt_encseq_has_description_support(encseq)))
    gt_warning("Missing description support for file %s", filename);

  if (strcmp(gt_str_get(args->mode), "fasta") == 0) {
    /* specify a single sequence to extract */
    if (args->seq != GT_UNDEF_UWORD) {
      if (args->seq >= gt_encseq_num_of_sequences(encseq)) {
        gt_error_set(err,
                     "requested sequence "GT_WU" exceeds number of sequences "
                     "("GT_WU")", args->seq,
                     gt_encseq_num_of_sequences(encseq));
        return -1;
      }
      sfrom = args->seq;
      sto = args->seq + 1;
    } else if (args->seqrng.start != GT_UNDEF_UWORD
                 && args->seqrng.end != GT_UNDEF_UWORD) {
      /* specify a sequence range to extract */
      if (args->seqrng.start >= gt_encseq_num_of_sequences(encseq)
            || args->seqrng.end >= gt_encseq_num_of_sequences(encseq)) {
        gt_error_set(err,
                     "range "GT_WU"-"GT_WU" includes a sequence number "
                     "exceeding the total number of sequences ("GT_WU")",
                     args->seqrng.start,
                     args->seqrng.end,
                     gt_encseq_num_of_sequences(encseq));
        return -1;
      }
      sfrom = args->seqrng.start;
      sto = args->seqrng.end + 1;
    } else {
      /* extract all sequences */
      sfrom = 0;
      sto = gt_encseq_num_of_sequences(encseq);
    }
    for (i = sfrom; i < sto; i++) {
      GtUword desclen, startpos, len;
      char buf[BUFSIZ];
      const char *desc = NULL;
      /* XXX: maybe make this distinction in the functions via readmode? */
      if (!GT_ISDIRREVERSE(args->rm)) {
        startpos = gt_encseq_seqstartpos(encseq, i);
        len = gt_encseq_seqlength(encseq, i);
        if (has_desc) {
          desc = gt_encseq_description(encseq, &desclen, i);
        } else {
          (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i);
          desclen = strlen(buf);
          desc = buf;
        }
      } else {
        startpos = gt_encseq_seqstartpos(encseq, i);
        len = gt_encseq_seqlength(encseq,
                                  gt_encseq_num_of_sequences(encseq)-1-i);
        startpos = gt_encseq_total_length(encseq)
                     - (gt_encseq_seqstartpos(encseq,
                                              gt_encseq_num_of_sequences(
                                                encseq)-1-i) + len);
        if (has_desc) {
          desc = gt_encseq_description(encseq,
                                       &desclen,
                                       gt_encseq_num_of_sequences(encseq)-1-i);
        } else {
          (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i);
          desclen = strlen(buf);
          desc = buf;
        }
      }
      gt_assert(desc);
      /* output description */
      gt_xfputc(GT_FASTA_SEPARATOR, stdout);
      gt_xfwrite(desc, 1, desclen, stdout);
      gt_xfputc('\n', stdout);
      /* XXX: make this more efficient by writing in a buffer first and then
         showing the result */
      if (args->singlechars) {
        for (j = 0; j < len; j++) {
           gt_xfputc(gt_encseq_get_decoded_char(encseq,
                                                startpos + j,
                                                args->rm),
                     stdout);
        }
      } else {
        esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, startpos);
        for (j = 0; j < len; j++) {
           gt_xfputc(gt_encseq_reader_next_decoded_char(esr), stdout);
        }
        gt_encseq_reader_delete(esr);
      }
      gt_xfputc('\n', stdout);
    }
  }

  if (strcmp(gt_str_get(args->mode), "concat") == 0) {
    GtUword from = 0,
                  to = gt_encseq_total_length(encseq) - 1;
    if (args->rng.start != GT_UNDEF_UWORD && args->rng.end != GT_UNDEF_UWORD) {
      if (args->rng.end > to) {
        had_err = -1;
        gt_error_set(err,
                     "end of range ("GT_WU") exceeds encoded sequence length "
                     "("GT_WU")", args->rng.end, to);
      }
      if (!had_err) {
        from = args->rng.start;
        to = args->rng.end;
      }
    }
    if (!had_err) {
      if (args->singlechars) {
        for (j = from; j <= to; j++) {
          char cc = gt_encseq_get_decoded_char(encseq, j, args->rm);
          if (cc == (char) SEPARATOR)
            cc = gt_str_get(args->sepchar)[0];
          gt_xfputc(cc, stdout);
        }
      } else {
        esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, from);
        if (esr) {
          for (j = from; j <= to; j++) {
            char cc = gt_encseq_reader_next_decoded_char(esr);
            if (cc == (char) SEPARATOR)
              cc = gt_str_get(args->sepchar)[0];
            gt_xfputc(cc, stdout);
          }
          gt_encseq_reader_delete(esr);
        }
      }
      gt_xfputc('\n', stdout);
    }
  }
  return had_err;
}
Пример #10
0
static int gt_show_seedext_runner(GT_UNUSED int argc,
                                  GT_UNUSED const char **argv,
                                  GT_UNUSED int parsed_args,
                                  void *tool_arguments,
                                  GtError *err)
{
  int had_err = 0;
  GtUword alignmentwidth;
  GtShowSeedextArguments *arguments = tool_arguments;
  GtSeedextendMatchIterator *semi;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  /* Parse option string in first line of file specified by filename. */
  alignmentwidth = arguments->show_alignment ? 70 : 0;
  semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err);
  if (semi == NULL)
  {
    had_err = -1;
  }
  /* Parse seed extensions. */
  if (!had_err)
  {
    const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi),
                   *bencseq = gt_seedextend_match_iterator_bencseq(semi);
    GtAlignment *alignment = gt_alignment_new();
    Polishing_info *pol_info = NULL;
    GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0};

    /* the following are used if seed_extend is set */
    GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL;
    GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr;
    const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq);
    const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq);
    GtUchar *alignment_show_buffer
      = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth)
                                  : NULL;
    GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new();
    GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);;

    if (!arguments->relax_polish)
    {
      double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS;
      if (gt_seedextend_match_iterator_bias_parameters(semi))
      {
        matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq);
      }
      pol_info = polishing_info_new_with_bias(
                          gt_seedextend_match_iterator_errorpercentage(semi),
                          matchscore_bias,
                          gt_seedextend_match_iterator_history_size(semi));
    }
    if (arguments->seed_display)
    {
      gt_seedextend_match_iterator_seed_display_set(semi);
    }
    if (arguments->show_alignment || arguments->showeoplist)
    {
      gt_seedextend_match_iterator_querymatchoutoptions_set(semi,
                                                       true,
                                                       arguments->showeoplist,
                                                       alignmentwidth,
                                                       !arguments->relax_polish,
                                                       arguments->seed_display);
    }
    if (arguments->seed_extend)
    {
      greedyextendmatchinfo
        = gt_greedy_extend_matchinfo_new(70,
                              GT_MAX_ALI_LEN_DIFF,
                              gt_seedextend_match_iterator_history_size(semi),
                              GT_MIN_PERC_MAT_HISTORY,
                              0, /* userdefinedleastlength */
                              GT_EXTEND_CHAR_ACCESS_ANY,
                              100,
                              pol_info);
    }
    if (pol_info != NULL)
    {
      gt_alignment_polished_ends(alignment,pol_info,false);
    }
    processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo;
    if (arguments->sortmatches)
    {
      (void) gt_seedextend_match_iterator_all_sorted(semi,true);
    }
    while (true)
    {
      GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi);

      if (querymatchptr == NULL)
      {
        break;
      }
      if (gt_seedextend_match_iterator_has_seedline(semi))
      {
        if (arguments->seed_extend)
        {
          if (aencseq == bencseq)
          {
            const GtUword
              seedlen = gt_seedextend_match_iterator_seedlen(semi),
              seedpos1 = gt_seedextend_match_iterator_seedpos1(semi),
              seedpos2 = gt_seedextend_match_iterator_seedpos2(semi);

            processinfo_and_querymatchspaceptr.querymatchspaceptr
              = querymatchptr;
            had_err = gt_greedy_extend_selfmatch_with_output(
                                  &processinfo_and_querymatchspaceptr,
                                  aencseq,
                                  seedlen,
                                  seedpos1,
                                  seedpos2,
                                  err);
            if (had_err)
            {
              break;
            }
          } else
          {
            gt_assert(false);
          }
        } else
        {
          const GtUword query_totallength
            = gt_encseq_seqlength(bencseq,
                                  gt_querymatch_queryseqnum(querymatchptr));
          gt_show_seed_extend_encseq(querymatchptr,
                                     aencseq,
                                     bencseq,
                                     query_totallength);
        }
      } else
      {
        gt_show_seed_extend_plain(&seqpairbuf,
                                  linspace_spacemanager,
                                  linspace_scorehandler,
                                  alignment,
                                  alignment_show_buffer,
                                  alignmentwidth,
                                  arguments->showeoplist,
                                  characters,
                                  wildcardshow,
                                  aencseq,
                                  bencseq,
                                  querymatchptr);
      }
    }
    polishing_info_delete(pol_info);
    gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo);
    gt_free(alignment_show_buffer);
    gt_scorehandler_delete(linspace_scorehandler);
    gt_linspaceManagement_delete(linspace_spacemanager);
    gt_free(seqpairbuf.a_sequence);
    gt_free(seqpairbuf.b_sequence);
    gt_alignment_delete(alignment);
  }
  gt_seedextend_match_iterator_delete(semi);
  return had_err;
}
Пример #11
0
static int cluster_sequences(GtArray *matches,
                             GtClusteredSet *cs,
                             GtHashmap *seqdesc2seqnum,
                             unsigned int psmall,
                             unsigned int plarge,
                             GtEncseq *encseq,
                             GtError *err)
{
  GtMatch *match;
  GtMatchEdgeTable matchedgetab;
  GtMatchEdge matchedge;
  GtRange rng_seq1,
          rng_seq2;
  int had_err = 0;
  unsigned long i,
                lsmall,
                llarge,
                matchlen1,
                matchlen2,
                num_of_seq,
                seqnum1 = 0,
                seqnum2 = 0;
  const char *seqid;

  num_of_seq = gt_encseq_num_of_sequences(encseq);
  gt_assert(matches && cs && seqdesc2seqnum && encseq);

  if (gt_clustered_set_num_of_elements(cs, err) != num_of_seq) {
    had_err = -1;
    gt_error_set(err,
                 "number of sequences (%lu) unequals number of elements in"
                 " clustered set (%lu)",
                 num_of_seq, gt_clustered_set_num_of_elements(cs, err));
  }
  if (!had_err) {
    matchedgetab.edges = gt_array_new(sizeof (GtMatchEdge));
    matchedgetab.num_of_edges = 0;

    for (i = 0; i < gt_array_size(matches); i++) {
      match = *(GtMatch**) gt_array_get(matches, i);
      gt_match_get_range_seq1(match, &rng_seq1);
      gt_match_get_range_seq2(match, &rng_seq2);

      matchlen1 =  gt_range_length(&rng_seq1);
      matchlen2 =  gt_range_length(&rng_seq2);

      seqid = gt_match_get_seqid1(match);
      if (gt_hashmap_get(seqdesc2seqnum, (void*) seqid) != NULL)
        seqnum1 = ((unsigned long) gt_hashmap_get(seqdesc2seqnum, seqid)) - 1;
      else {
        had_err = -1;
        gt_error_set(err, "key %s not found", seqid);
      }

      seqid = gt_match_get_seqid2(match);
      if (!had_err && gt_hashmap_get(seqdesc2seqnum, (void*) seqid) != NULL)
        seqnum2 = ((unsigned long) gt_hashmap_get(seqdesc2seqnum, seqid)) - 1;
      else {
        had_err = -1;
        gt_error_set(err, "key %s not found", seqid);
      }

      if (!had_err) {
        if (gt_encseq_seqlength(encseq, seqnum1) >
            gt_encseq_seqlength(encseq, seqnum2)) {
          llarge = gt_encseq_seqlength(encseq, seqnum1);
          lsmall = gt_encseq_seqlength(encseq, seqnum2);
        } else {
          lsmall = gt_encseq_seqlength(encseq, seqnum1);
          llarge = gt_encseq_seqlength(encseq, seqnum2);
        }
        if (((llarge * plarge)/100 <= matchlen1) &&
            ((lsmall * psmall)/100 <= matchlen1) &&
            ((llarge * plarge)/100 <= matchlen2) &&
            ((lsmall * psmall)/100 <= matchlen2)) {
          if (seqnum1 != seqnum2) {
            matchedge.matchnum0 = seqnum1;
            matchedge.matchnum1 = seqnum2;
            gt_array_add(matchedgetab.edges, matchedge);
            matchedgetab.num_of_edges++;
          }
        }
      }
    }
  }
  if (!had_err)
    if (gt_cluster_matches(cs, &matchedgetab, err) != 0)
      had_err = -1;
  if (!had_err)
    gt_array_delete(matchedgetab.edges);
  return had_err;
}
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc,
    GT_UNUSED const char **argv, GT_UNUSED int parsed_args,
    void *tool_arguments, GT_UNUSED GtError *err)
{
  GtReadjoinerCnttestArguments *arguments = tool_arguments;
  GtEncseqLoader *el = NULL;
  GtEncseq *reads = NULL;
  GtBitsequence *bits = NULL;
  GtUword nofreads;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST)
  {
    GtStr *fn = NULL;
    fn = gt_str_clone(arguments->readset);
    gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST);
    had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err);
    gt_str_delete(fn);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE ||
      arguments->test == GT_READJOINER_CNTTEST_KMP)
  {
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    if (!arguments->singlestrand)
      gt_encseq_loader_mirror(el);
    reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err);
    if (reads == NULL)
      had_err = -1;
    else
    {
      gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand,
          false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true,
          NULL, NULL, false, NULL, &bits, &nofreads);
    }
    gt_encseq_delete(reads);
    gt_encseq_loader_delete(el);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_ESA)
  {
    Sequentialsuffixarrayreader *ssar = NULL;
    GtUword readlength = 0, firstrevcompl = 0;
    GtLogger *verbose_logger = gt_logger_new(arguments->verbose,
        GT_LOGGER_DEFLT_PREFIX, stderr);
    ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get(
          arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB,
        true, verbose_logger, err);
    if (gt_error_is_set(err))
      had_err = -1;
    else
    {
      nofreads = gt_encseq_num_of_sequences(ssar->encseq);
      if (!arguments->singlestrand)
      {
        nofreads = GT_DIV2(nofreads);
        firstrevcompl = nofreads;
      }
      GT_INITBITTAB(bits, nofreads);
      if (!arguments->singlestrand)
      if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH)
        readlength = gt_encseq_seqlength(ssar->encseq, 0);
      (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0
          : firstrevcompl, readlength);
    }
    if (ssar != NULL)
      gt_freeSequentialsuffixarrayreader(&ssar);
    gt_logger_delete(verbose_logger);
  }
  else
  {
    gt_assert(false);
  }
  if (!had_err)
    had_err = gt_cntlist_show(bits, nofreads, NULL, false, err);
  gt_free(bits);
  return had_err;
}